1592 files changed, 99174 insertions, 30402 deletions
diff --git a/Documentation/devicetree/bindings/net/anarion-gmac.txt b/Documentation/devicetree/bindings/net/anarion-gmac.txt
new file mode 100644
index 0000000..fe67896
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/anarion-gmac.txt
@@ -0,0 +1,25 @@
+*  Adaptrum Anarion ethernet controller
+
+This device is a platform glue layer for stmmac.
+Please see stmmac.txt for the other unchanged properties.
+
+Required properties:
+ - compatible:  Should be "adaptrum,anarion-gmac", "snps,dwmac"
+ - phy-mode:    Should be "rgmii". Other modes are not currently supported.
+
+
+Examples:
+
+	gmac1: ethernet@f2014000 {
+		compatible = "adaptrum,anarion-gmac", "snps,dwmac";
+		reg = <0xf2014000 0x4000>, <0xf2018100 8>;
+
+		interrupt-parent = <&core_intc>;
+		interrupts = <21>;
+		interrupt-names = "macirq";
+
+		clocks = <&core_clk>;
+		clock-names = "stmmaceth";
+
+		phy-mode = "rgmii";
+	};
diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt b/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt
new file mode 100644
index 0000000..4194ff7
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt
@@ -0,0 +1,35 @@
+Broadcom Bluetooth Chips
+---------------------
+
+This documents the binding structure and common properties for serial
+attached Broadcom devices.
+
+Serial attached Broadcom devices shall be a child node of the host UART
+device the slave device is attached to.
+
+Required properties:
+
+ - compatible: should contain one of the following:
+   * "brcm,bcm43438-bt"
+
+Optional properties:
+
+ - max-speed: see Documentation/devicetree/bindings/serial/slave-device.txt
+ - shutdown-gpios: GPIO specifier, used to enable the BT module
+ - device-wakeup-gpios: GPIO specifier, used to wakeup the controller
+ - host-wakeup-gpios: GPIO specifier, used to wakeup the host processor
+ - clocks: clock specifier if external clock provided to the controller
+ - clock-names: should be "extclk"
+
+
+Example:
+
+&uart2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&uart2_pins>;
+
+       bluetooth {
+               compatible = "brcm,bcm43438-bt";
+               max-speed = <921600>;
+       };
+};
diff --git a/Documentation/devicetree/bindings/net/marvell-pp2.txt b/Documentation/devicetree/bindings/net/marvell-pp2.txt
index 6b4956b..c78f318 100644
--- a/Documentation/devicetree/bindings/net/marvell-pp2.txt
+++ b/Documentation/devicetree/bindings/net/marvell-pp2.txt
@@ -41,6 +41,11 @@ Optional properties (port):
 - marvell,loopback: port is loopback mode
 - phy: a phandle to a phy node defining the PHY address (as the reg
   property, a single integer).
+- interrupt-names: if more than a single interrupt for rx is given, must
+                   be the name associated to the interrupts listed. Valid
+                   names are: "tx-cpu0", "tx-cpu1", "tx-cpu2", "tx-cpu3",
+		   "rx-shared", "link".
+- marvell,system-controller: a phandle to the system controller.
 
 Example for marvell,armada-375-pp2:
 
@@ -80,19 +85,37 @@ cpm_ethernet: ethernet@0 {
 	clock-names = "pp_clk", "gop_clk", "gp_clk";
 
 	eth0: eth0 {
-		interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <ICU_GRP_NSR 39 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 43 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 47 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 51 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 55 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "tx-cpu0", "tx-cpu1", "tx-cpu2",
+				  "tx-cpu3", "rx-shared";
 		port-id = <0>;
 		gop-port-id = <0>;
 	};
 
 	eth1: eth1 {
-		interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <ICU_GRP_NSR 40 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 44 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 48 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 52 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 56 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "tx-cpu0", "tx-cpu1", "tx-cpu2",
+				  "tx-cpu3", "rx-shared";
 		port-id = <1>;
 		gop-port-id = <2>;
 	};
 
 	eth2: eth2 {
-		interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
+		interrupts = <ICU_GRP_NSR 41 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 45 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 49 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 53 IRQ_TYPE_LEVEL_HIGH>,
+			     <ICU_GRP_NSR 57 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "tx-cpu0", "tx-cpu1", "tx-cpu2",
+				  "tx-cpu3", "rx-shared";
 		port-id = <2>;
 		gop-port-id = <3>;
 	};
diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt
index c7194e8..1d1168b 100644
--- a/Documentation/devicetree/bindings/net/mediatek-net.txt
+++ b/Documentation/devicetree/bindings/net/mediatek-net.txt
@@ -7,24 +7,30 @@ have dual GMAC each represented by a child node..
 * Ethernet controller node
 
 Required properties:
-- compatible: Should be "mediatek,mt2701-eth"
+- compatible: Should be
+		"mediatek,mt2701-eth": for MT2701 SoC
+		"mediatek,mt7623-eth", "mediatek,mt2701-eth": for MT7623 SoC
+		"mediatek,mt7622-eth": for MT7622 SoC
 - reg: Address and length of the register set for the device
 - interrupts: Should contain the three frame engines interrupts in numeric
 	order. These are fe_int0, fe_int1 and fe_int2.
 - clocks: the clock used by the core
 - clock-names: the names of the clock listed in the clocks property. These are
-	"ethif", "esw", "gp2", "gp1"
+	"ethif", "esw", "gp2", "gp1" : For MT2701 and MT7623 SoC
+        "ethif", "esw", "gp0", "gp1", "gp2", "sgmii_tx250m", "sgmii_rx250m",
+	"sgmii_cdr_ref", "sgmii_cdr_fb", "sgmii_ck", "eth2pll" : For MT7622 SoC
 - power-domains: phandle to the power domain that the ethernet is part of
 - resets: Should contain a phandle to the ethsys reset signal
 - reset-names: Should contain the reset signal name "eth"
 - mediatek,ethsys: phandle to the syscon node that handles the port setup
+- mediatek,sgmiisys: phandle to the syscon node that handles the SGMII setup
+	which is required for those SoCs equipped with SGMII such as MT7622 SoC.
 - mediatek,pctl: phandle to the syscon node that handles the ports slew rate
 	and driver current
 
 Optional properties:
 - interrupt-parent: Should be the phandle for the interrupt controller
   that services interrupts for this device
-
 * Ethernet MAC node
 
 Required properties:
diff --git a/Documentation/devicetree/bindings/net/phy.txt b/Documentation/devicetree/bindings/net/phy.txt
index b558576..d3c24d5 100644
--- a/Documentation/devicetree/bindings/net/phy.txt
+++ b/Documentation/devicetree/bindings/net/phy.txt
@@ -52,6 +52,11 @@ Optional Properties:
   Mark the corresponding energy efficient ethernet mode as broken and
   request the ethernet to stop advertising it.
 
+- phy-is-integrated: If set, indicates that the PHY is integrated into the same
+  physical package as the Ethernet MAC. If needed, muxers should be configured
+  to ensure the integrated PHY is used. The absence of this property indicates
+  the muxers should be configured so that the external PHY is used.
+
 Example:
 
 ethernet-phy@0 {
diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt
index b519503..1672353 100644
--- a/Documentation/devicetree/bindings/net/renesas,ravb.txt
+++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt
@@ -4,19 +4,25 @@ This file provides information on what the device node for the Ethernet AVB
 interface contains.
 
 Required properties:
-- compatible: "renesas,etheravb-r8a7790" if the device is a part of R8A7790 SoC.
-	      "renesas,etheravb-r8a7791" if the device is a part of R8A7791 SoC.
-	      "renesas,etheravb-r8a7792" if the device is a part of R8A7792 SoC.
-	      "renesas,etheravb-r8a7793" if the device is a part of R8A7793 SoC.
-	      "renesas,etheravb-r8a7794" if the device is a part of R8A7794 SoC.
-	      "renesas,etheravb-r8a7795" if the device is a part of R8A7795 SoC.
-	      "renesas,etheravb-r8a7796" if the device is a part of R8A7796 SoC.
-	      "renesas,etheravb-rcar-gen2" for generic R-Car Gen 2 compatible interface.
-	      "renesas,etheravb-rcar-gen3" for generic R-Car Gen 3 compatible interface.
+- compatible: Must contain one or more of the following:
+      - "renesas,etheravb-r8a7743" for the R8A7743 SoC.
+      - "renesas,etheravb-r8a7745" for the R8A7745 SoC.
+      - "renesas,etheravb-r8a7790" for the R8A7790 SoC.
+      - "renesas,etheravb-r8a7791" for the R8A7791 SoC.
+      - "renesas,etheravb-r8a7792" for the R8A7792 SoC.
+      - "renesas,etheravb-r8a7793" for the R8A7793 SoC.
+      - "renesas,etheravb-r8a7794" for the R8A7794 SoC.
+      - "renesas,etheravb-rcar-gen2" as a fallback for the above
+		R-Car Gen2 and RZ/G1 devices.
 
-	      When compatible with the generic version, nodes must list the
-	      SoC-specific version corresponding to the platform first
-	      followed by the generic version.
+      - "renesas,etheravb-r8a7795" for the R8A7795 SoC.
+      - "renesas,etheravb-r8a7796" for the R8A7796 SoC.
+      - "renesas,etheravb-rcar-gen3" as a fallback for the above
+		R-Car Gen3 devices.
+
+	When compatible with the generic version, nodes must list the
+	SoC-specific version corresponding to the platform first followed by
+	the generic version.
 
 - reg: offset and length of (1) the register block and (2) the stream buffer.
 - interrupts: A list of interrupt-specifiers, one for each entry in
diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
index 8f42755..c132538 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
@@ -10,6 +10,7 @@ Required properties:
    "rockchip,rk3366-gmac": found on RK3366 SoCs
    "rockchip,rk3368-gmac": found on RK3368 SoCs
    "rockchip,rk3399-gmac": found on RK3399 SoCs
+   "rockchip,rv1108-gmac": found on RV1108 SoCs
  - reg: addresses and length of the register sets for the device.
  - interrupts: Should contain the GMAC interrupts.
  - interrupt-names: Should contain the interrupt names "macirq".
diff --git a/Documentation/devicetree/bindings/net/xilinx_axienet.txt b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
new file mode 100644
index 0000000..38f9ec0
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
@@ -0,0 +1,55 @@
+XILINX AXI ETHERNET Device Tree Bindings
+--------------------------------------------------------
+
+Also called  AXI 1G/2.5G Ethernet Subsystem, the xilinx axi ethernet IP core
+provides connectivity to an external ethernet PHY supporting different
+interfaces: MII, GMII, RGMII, SGMII, 1000BaseX. It also includes two
+segments of memory for buffering TX and RX, as well as the capability of
+offloading TX/RX checksum calculation off the processor.
+
+Management configuration is done through the AXI interface, while payload is
+sent and received through means of an AXI DMA controller. This driver
+includes the DMA driver code, so this driver is incompatible with AXI DMA
+driver.
+
+For more details about mdio please refer phy.txt file in the same directory.
+
+Required properties:
+- compatible	: Must be one of "xlnx,axi-ethernet-1.00.a",
+		  "xlnx,axi-ethernet-1.01.a", "xlnx,axi-ethernet-2.01.a"
+- reg		: Address and length of the IO space.
+- interrupts	: Should be a list of two interrupt, TX and RX.
+- phy-handle	: Should point to the external phy device.
+		  See ethernet.txt file in the same directory.
+- xlnx,rxmem	: Set to allocated memory buffer for Rx/Tx in the hardware
+
+Optional properties:
+- phy-mode	: See ethernet.txt
+- xlnx,phy-type	: Deprecated, do not use, but still accepted in preference
+		  to phy-mode.
+- xlnx,txcsum	: 0 or empty for disabling TX checksum offload,
+		  1 to enable partial TX checksum offload,
+		  2 to enable full TX checksum offload
+- xlnx,rxcsum	: Same values as xlnx,txcsum but for RX checksum offload
+
+Example:
+	axi_ethernet_eth: ethernet@40c00000 {
+		compatible = "xlnx,axi-ethernet-1.00.a";
+		device_type = "network";
+		interrupt-parent = <&microblaze_0_axi_intc>;
+		interrupts = <2 0>;
+		phy-mode = "mii";
+		reg = <0x40c00000 0x40000>;
+		xlnx,rxcsum = <0x2>;
+		xlnx,rxmem = <0x800>;
+		xlnx,txcsum = <0x2>;
+		phy-handle = <&phy0>;
+		axi_ethernetlite_0_mdio: mdio {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			phy0: phy@0 {
+				device_type = "ethernet-phy";
+				reg = <1>;
+			};
+		};
+	};
diff --git a/Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt b/Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt
new file mode 100644
index 0000000..bfcf803
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt
@@ -0,0 +1,43 @@
+mvebu comphy driver
+-------------------
+
+A comphy controller can be found on Marvell Armada 7k/8k on the CP110. It
+provides a number of shared PHYs used by various interfaces (network, sata,
+usb, PCIe...).
+
+Required properties:
+
+- compatible: should be "marvell,comphy-cp110"
+- reg: should contain the comphy register location and length.
+- marvell,system-controller: should contain a phandle to the
+                             system controller node.
+- #address-cells: should be 1.
+- #size-cells: should be 0.
+
+A sub-node is required for each comphy lane provided by the comphy.
+
+Required properties (child nodes):
+
+- reg: comphy lane number.
+- #phy-cells : from the generic phy bindings, must be 1. Defines the
+               input port to use for a given comphy lane.
+
+Example:
+
+	cpm_comphy: phy@120000 {
+		compatible = "marvell,comphy-cp110";
+		reg = <0x120000 0x6000>;
+		marvell,system-controller = <&cpm_syscon0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpm_comphy0: phy@0 {
+			reg = <0>;
+			#phy-cells = <1>;
+		};
+
+		cpm_comphy1: phy@1 {
+			reg = <1>;
+			#phy-cells = <1>;
+		};
+	};
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index c6beb5f..7a79b35 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -30,8 +30,6 @@ atm.txt
 	- info on where to get ATM programs and support for Linux.
 ax25.txt
 	- info on using AX.25 and NET/ROM code for Linux
-batman-adv.txt
-	- B.A.T.M.A.N routing protocol on top of layer 2 Ethernet Frames.
 baycom.txt
 	- info on the driver for Baycom style amateur radio modems
 bonding.txt
diff --git a/Documentation/networking/batman-adv.rst b/Documentation/networking/batman-adv.rst
new file mode 100644
index 0000000..a342b2c
--- /dev/null
+++ b/Documentation/networking/batman-adv.rst
@@ -0,0 +1,220 @@
+==========
+batman-adv
+==========
+
+Batman advanced is a new approach to wireless networking which does no longer
+operate on the IP basis. Unlike the batman daemon, which exchanges information
+using UDP packets and sets routing tables, batman-advanced operates on ISO/OSI
+Layer 2 only and uses and routes (or better: bridges) Ethernet Frames. It
+emulates a virtual network switch of all nodes participating. Therefore all
+nodes appear to be link local, thus all higher operating protocols won't be
+affected by any changes within the network. You can run almost any protocol
+above batman advanced, prominent examples are: IPv4, IPv6, DHCP, IPX.
+
+Batman advanced was implemented as a Linux kernel driver to reduce the overhead
+to a minimum. It does not depend on any (other) network driver, and can be used
+on wifi as well as ethernet lan, vpn, etc ... (anything with ethernet-style
+layer 2).
+
+
+Configuration
+=============
+
+Load the batman-adv module into your kernel::
+
+  $ insmod batman-adv.ko
+
+The module is now waiting for activation. You must add some interfaces on which
+batman can operate. After loading the module batman advanced will scan your
+systems interfaces to search for compatible interfaces. Once found, it will
+create subfolders in the ``/sys`` directories of each supported interface,
+e.g.::
+
+  $ ls /sys/class/net/eth0/batman_adv/
+  elp_interval iface_status mesh_iface throughput_override
+
+If an interface does not have the ``batman_adv`` subfolder, it probably is not
+supported. Not supported interfaces are: loopback, non-ethernet and batman's
+own interfaces.
+
+Note: After the module was loaded it will continuously watch for new
+interfaces to verify the compatibility. There is no need to reload the module
+if you plug your USB wifi adapter into your machine after batman advanced was
+initially loaded.
+
+The batman-adv soft-interface can be created using the iproute2 tool ``ip``::
+
+  $ ip link add name bat0 type batadv
+
+To activate a given interface simply attach it to the ``bat0`` interface::
+
+  $ ip link set dev eth0 master bat0
+
+Repeat this step for all interfaces you wish to add. Now batman starts
+using/broadcasting on this/these interface(s).
+
+By reading the "iface_status" file you can check its status::
+
+  $ cat /sys/class/net/eth0/batman_adv/iface_status
+  active
+
+To deactivate an interface you have to detach it from the "bat0" interface::
+
+  $ ip link set dev eth0 nomaster
+
+
+All mesh wide settings can be found in batman's own interface folder::
+
+  $ ls /sys/class/net/bat0/mesh/
+  aggregated_ogms       fragmentation isolation_mark routing_algo
+  ap_isolation          gw_bandwidth  log_level      vlan0
+  bonding               gw_mode       multicast_mode
+  bridge_loop_avoidance gw_sel_class  network_coding
+  distributed_arp_table hop_penalty   orig_interval
+
+There is a special folder for debugging information::
+
+  $ ls /sys/kernel/debug/batman_adv/bat0/
+  bla_backbone_table log         neighbors         transtable_local
+  bla_claim_table    mcast_flags originators
+  dat_cache          nc          socket
+  gateways           nc_nodes    transtable_global
+
+Some of the files contain all sort of status information regarding the mesh
+network. For example, you can view the table of originators (mesh
+participants) with::
+
+  $ cat /sys/kernel/debug/batman_adv/bat0/originators
+
+Other files allow to change batman's behaviour to better fit your requirements.
+For instance, you can check the current originator interval (value in
+milliseconds which determines how often batman sends its broadcast packets)::
+
+  $ cat /sys/class/net/bat0/mesh/orig_interval
+  1000
+
+and also change its value::
+
+  $ echo 3000 > /sys/class/net/bat0/mesh/orig_interval
+
+In very mobile scenarios, you might want to adjust the originator interval to a
+lower value. This will make the mesh more responsive to topology changes, but
+will also increase the overhead.
+
+
+Usage
+=====
+
+To make use of your newly created mesh, batman advanced provides a new
+interface "bat0" which you should use from this point on. All interfaces added
+to batman advanced are not relevant any longer because batman handles them for
+you. Basically, one "hands over" the data by using the batman interface and
+batman will make sure it reaches its destination.
+
+The "bat0" interface can be used like any other regular interface. It needs an
+IP address which can be either statically configured or dynamically (by using
+DHCP or similar services)::
+
+  NodeA: ip link set up dev bat0
+  NodeA: ip addr add 192.168.0.1/24 dev bat0
+
+  NodeB: ip link set up dev bat0
+  NodeB: ip addr add 192.168.0.2/24 dev bat0
+  NodeB: ping 192.168.0.1
+
+Note: In order to avoid problems remove all IP addresses previously assigned to
+interfaces now used by batman advanced, e.g.::
+
+  $ ip addr flush dev eth0
+
+
+Logging/Debugging
+=================
+
+All error messages, warnings and information messages are sent to the kernel
+log. Depending on your operating system distribution this can be read in one of
+a number of ways. Try using the commands: ``dmesg``, ``logread``, or looking in
+the files ``/var/log/kern.log`` or ``/var/log/syslog``. All batman-adv messages
+are prefixed with "batman-adv:" So to see just these messages try::
+
+  $ dmesg | grep batman-adv
+
+When investigating problems with your mesh network, it is sometimes necessary to
+see more detail debug messages. This must be enabled when compiling the
+batman-adv module. When building batman-adv as part of kernel, use "make
+menuconfig" and enable the option ``B.A.T.M.A.N. debugging``
+(``CONFIG_BATMAN_ADV_DEBUG=y``).
+
+Those additional debug messages can be accessed using a special file in
+debugfs::
+
+  $ cat /sys/kernel/debug/batman_adv/bat0/log
+
+The additional debug output is by default disabled. It can be enabled during
+run time. Following log_levels are defined:
+
+.. flat-table::
+
+   * - 0
+     - All debug output disabled
+   * - 1
+     - Enable messages related to routing / flooding / broadcasting
+   * - 2
+     - Enable messages related to route added / changed / deleted
+   * - 4
+     - Enable messages related to translation table operations
+   * - 8
+     - Enable messages related to bridge loop avoidance
+   * - 16
+     - Enable messages related to DAT, ARP snooping and parsing
+   * - 32
+     - Enable messages related to network coding
+   * - 64
+     - Enable messages related to multicast
+   * - 128
+     - Enable messages related to throughput meter
+   * - 255
+     - Enable all messages
+
+The debug output can be changed at runtime using the file
+``/sys/class/net/bat0/mesh/log_level``. e.g.::
+
+  $ echo 6 > /sys/class/net/bat0/mesh/log_level
+
+will enable debug messages for when routes change.
+
+Counters for different types of packets entering and leaving the batman-adv
+module are available through ethtool::
+
+  $ ethtool --statistics bat0
+
+
+batctl
+======
+
+As batman advanced operates on layer 2, all hosts participating in the virtual
+switch are completely transparent for all protocols above layer 2. Therefore
+the common diagnosis tools do not work as expected. To overcome these problems,
+batctl was created. At the moment the batctl contains ping, traceroute, tcpdump
+and interfaces to the kernel module settings.
+
+For more information, please see the manpage (``man batctl``).
+
+batctl is available on https://www.open-mesh.org/
+
+
+Contact
+=======
+
+Please send us comments, experiences, questions, anything :)
+
+IRC:
+  #batman on irc.freenode.org
+Mailing-list:
+  b.a.t.m.a.n@open-mesh.org (optional subscription at
+  https://lists.open-mesh.org/mm/listinfo/b.a.t.m.a.n)
+
+You can also contact the Authors:
+
+* Marek Lindner <mareklindner@neomailbox.ch>
+* Simon Wunderlich <sw@simonwunderlich.de>
diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt
deleted file mode 100644
index ccf9467..0000000
--- a/Documentation/networking/batman-adv.txt
+++ /dev/null
@@ -1,215 +0,0 @@
-BATMAN-ADV
-----------
-
-Batman  advanced  is  a new approach to wireless networking which
-does no longer operate on the IP basis. Unlike the batman daemon,
-which  exchanges  information  using UDP packets and sets routing
-tables, batman-advanced operates on ISO/OSI Layer 2 only and uses
-and  routes  (or  better: bridges) Ethernet Frames. It emulates a
-virtual network switch of all nodes participating.  Therefore all
-nodes  appear  to be link local, thus all higher operating proto-
-cols won't be affected by any changes within the network. You can
-run almost any protocol above batman advanced, prominent examples
-are: IPv4, IPv6, DHCP, IPX.
-
-Batman advanced was implemented as a Linux kernel driver  to  re-
-duce the overhead to a minimum. It does not depend on any (other)
-network driver, and can be used on wifi as well as ethernet  lan,
-vpn,  etc ... (anything with ethernet-style layer 2).
-
-
-CONFIGURATION
--------------
-
-Load the batman-adv module into your kernel:
-
-# insmod batman-adv.ko
-
-The  module  is now waiting for activation. You must add some in-
-terfaces on which batman can operate. After  loading  the  module
-batman  advanced  will scan your systems interfaces to search for
-compatible interfaces. Once found, it will create  subfolders  in
-the /sys directories of each supported interface, e.g.
-
-# ls /sys/class/net/eth0/batman_adv/
-# elp_interval  iface_status  mesh_iface  throughput_override
-
-If an interface does not have the "batman_adv" subfolder it prob-
-ably is not supported. Not supported  interfaces  are:  loopback,
-non-ethernet and batman's own interfaces.
-
-Note:  After the module was loaded it will continuously watch for
-new interfaces to verify the compatibility. There is no  need  to
-reload the module if you plug your USB wifi adapter into your ma-
-chine after batman advanced was initially loaded.
-
-The batman-adv soft-interface can be created using  the  iproute2
-tool "ip"
-
-# ip link add name bat0 type batadv
-
-To  activate a  given  interface  simply  attach it to the "bat0"
-interface
-
-# ip link set dev eth0 master bat0
-
-Repeat  this step for all interfaces you wish to add.  Now batman
-starts using/broadcasting on this/these interface(s).
-
-By reading the "iface_status" file you can check its status:
-
-# cat /sys/class/net/eth0/batman_adv/iface_status
-# active
-
-To  deactivate  an  interface  you  have   to  detach it from the
-"bat0" interface:
-
-# ip link set dev eth0 nomaster
-
-
-All  mesh  wide  settings  can be found in batman's own interface
-folder:
-
-# ls /sys/class/net/bat0/mesh/
-# aggregated_ogms        fragmentation  isolation_mark  routing_algo
-# ap_isolation           gw_bandwidth   log_level       vlan0
-# bonding                gw_mode        multicast_mode
-# bridge_loop_avoidance  gw_sel_class   network_coding
-# distributed_arp_table  hop_penalty    orig_interval
-
-There is a special folder for debugging information:
-
-# ls /sys/kernel/debug/batman_adv/bat0/
-# bla_backbone_table  log          neighbors          transtable_local
-# bla_claim_table     mcast_flags  originators
-# dat_cache           nc           socket
-# gateways            nc_nodes     transtable_global
-
-Some of the files contain all sort of status information  regard-
-ing  the  mesh  network.  For  example, you can view the table of
-originators (mesh participants) with:
-
-# cat /sys/kernel/debug/batman_adv/bat0/originators
-
-Other files allow to change batman's behaviour to better fit your
-requirements.  For instance, you can check the current originator
-interval (value in milliseconds which determines how often batman
-sends its broadcast packets):
-
-# cat /sys/class/net/bat0/mesh/orig_interval
-# 1000
-
-and also change its value:
-
-# echo 3000 > /sys/class/net/bat0/mesh/orig_interval
-
-In very mobile scenarios, you might want to adjust the originator
-interval to a lower value. This will make the mesh  more  respon-
-sive to topology changes, but will also increase the overhead.
-
-
-USAGE
------
-
-To  make use of your newly created mesh, batman advanced provides
-a new interface "bat0" which you should use from this  point  on.
-All  interfaces  added  to  batman  advanced are not relevant any
-longer because batman handles them for you. Basically, one "hands
-over" the data by using the batman interface and batman will make
-sure it reaches its destination.
-
-The "bat0" interface can be used like any  other  regular  inter-
-face.  It needs an IP address which can be either statically con-
-figured or dynamically (by using DHCP or similar services):
-
-# NodeA: ip link set up dev bat0
-# NodeA: ip addr add 192.168.0.1/24 dev bat0
-
-# NodeB: ip link set up dev bat0
-# NodeB: ip addr add 192.168.0.2/24 dev bat0
-# NodeB: ping 192.168.0.1
-
-Note:  In  order to avoid problems remove all IP addresses previ-
-ously assigned to interfaces now used by batman advanced, e.g.
-
-# ip addr flush dev eth0
-
-
-LOGGING/DEBUGGING
------------------
-
-All error messages, warnings and information messages are sent to
-the kernel log. Depending on your operating  system  distribution
-this  can  be read in one of a number of ways. Try using the com-
-mands: dmesg, logread, or looking in the files  /var/log/kern.log
-or  /var/log/syslog.  All  batman-adv  messages are prefixed with
-"batman-adv:" So to see just these messages try
-
-# dmesg | grep batman-adv
-
-When investigating problems with your mesh network  it  is  some-
-times  necessary  to see more detail debug messages. This must be
-enabled when compiling the batman-adv module. When building  bat-
-man-adv  as  part of kernel, use "make menuconfig" and enable the
-option "B.A.T.M.A.N. debugging".
-
-Those additional  debug messages can be accessed  using a special
-file in debugfs
-
-# cat /sys/kernel/debug/batman_adv/bat0/log
-
-The additional debug output is by default disabled. It can be en-
-abled  during run time. Following log_levels are defined:
-
-  0 - All  debug  output  disabled
-  1 - Enable messages related to routing / flooding / broadcasting
-  2 - Enable messages related to route added / changed / deleted
-  4 - Enable messages related to translation table operations
-  8 - Enable messages related to bridge loop avoidance
- 16 - Enable messages related to DAT, ARP snooping and parsing
- 32 - Enable messages related to network coding
- 64 - Enable messages related to multicast
-128 - Enable messages related to throughput meter
-255 - Enable all messages
-
-The debug output can be changed at runtime  using  the  file
-/sys/class/net/bat0/mesh/log_level. e.g.
-
-# echo 6 > /sys/class/net/bat0/mesh/log_level
-
-will enable debug messages for when routes change.
-
-Counters for different types of packets entering and leaving the
-batman-adv module are available through ethtool:
-
-# ethtool --statistics bat0
-
-
-BATCTL
-------
-
-As batman advanced operates on layer 2 all hosts participating in
-the  virtual switch are completely transparent for all  protocols
-above layer 2. Therefore the common diagnosis tools do  not  work
-as  expected.  To  overcome these problems batctl was created. At
-the  moment the  batctl contains ping,  traceroute,  tcpdump  and
-interfaces to the kernel module settings.
-
-For more information, please see the manpage (man batctl).
-
-batctl is available on https://www.open-mesh.org/
-
-
-CONTACT
--------
-
-Please send us comments, experiences, questions, anything :)
-
-IRC:            #batman   on   irc.freenode.org
-Mailing-list:   b.a.t.m.a.n@open-mesh.org (optional  subscription
-          at https://lists.open-mesh.org/mm/listinfo/b.a.t.m.a.n)
-
-You can also contact the Authors:
-
-Marek  Lindner  <mareklindner@neomailbox.ch>
-Simon  Wunderlich  <sw@simonwunderlich.de>
diff --git a/Documentation/networking/dpaa.txt b/Documentation/networking/dpaa.txt
index 76e016d..f88194f 100644
--- a/Documentation/networking/dpaa.txt
+++ b/Documentation/networking/dpaa.txt
@@ -13,6 +13,7 @@ Contents
 	- Configuring DPAA Ethernet in your kernel
 	- DPAA Ethernet Frame Processing
 	- DPAA Ethernet Features
+	- DPAA IRQ Affinity and Receive Side Scaling
 	- Debugging
 
 DPAA Ethernet Overview
@@ -147,7 +148,10 @@ gradually.
 
 The driver has Rx and Tx checksum offloading for UDP and TCP. Currently the Rx
 checksum offload feature is enabled by default and cannot be controlled through
-ethtool.
+ethtool. Also, rx-flow-hash and rx-hashing was added. The addition of RSS
+provides a big performance boost for the forwarding scenarios, allowing
+different traffic flows received by one interface to be processed by different
+CPUs in parallel.
 
 The driver has support for multiple prioritized Tx traffic classes. Priorities
 range from 0 (lowest) to 3 (highest). These are mapped to HW workqueues with
@@ -166,6 +170,68 @@ classes as follows:
 tc qdisc add dev <int> root handle 1: \
 	 mqprio num_tc 4 map 0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 hw 1
 
+DPAA IRQ Affinity and Receive Side Scaling
+==========================================
+
+Traffic coming on the DPAA Rx queues or on the DPAA Tx confirmation
+queues is seen by the CPU as ingress traffic on a certain portal.
+The DPAA QMan portal interrupts are affined each to a certain CPU.
+The same portal interrupt services all the QMan portal consumers.
+
+By default the DPAA Ethernet driver enables RSS, making use of the
+DPAA FMan Parser and Keygen blocks to distribute traffic on 128
+hardware frame queues using a hash on IP v4/v6 source and destination
+and L4 source and destination ports, in present in the received frame.
+When RSS is disabled, all traffic received by a certain interface is
+received on the default Rx frame queue. The default DPAA Rx frame
+queues are configured to put the received traffic into a pool channel
+that allows any available CPU portal to dequeue the ingress traffic.
+The default frame queues have the HOLDACTIVE option set, ensuring that
+traffic bursts from a certain queue are serviced by the same CPU.
+This ensures a very low rate of frame reordering. A drawback of this
+is that only one CPU at a time can service the traffic received by a
+certain interface when RSS is not enabled.
+
+To implement RSS, the DPAA Ethernet driver allocates an extra set of
+128 Rx frame queues that are configured to dedicated channels, in a
+round-robin manner. The mapping of the frame queues to CPUs is now
+hardcoded, there is no indirection table to move traffic for a certain
+FQ (hash result) to another CPU. The ingress traffic arriving on one
+of these frame queues will arrive at the same portal and will always
+be processed by the same CPU. This ensures intra-flow order preservation
+and workload distribution for multiple traffic flows.
+
+RSS can be turned off for a certain interface using ethtool, i.e.
+
+	# ethtool -N fm1-mac9 rx-flow-hash tcp4 ""
+
+To turn it back on, one needs to set rx-flow-hash for tcp4/6 or udp4/6:
+
+	# ethtool -N fm1-mac9 rx-flow-hash udp4 sfdn
+
+There is no independent control for individual protocols, any command
+run for one of tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6 is
+going to control the rx-flow-hashing for all protocols on that interface.
+
+Besides using the FMan Keygen computed hash for spreading traffic on the
+128 Rx FQs, the DPAA Ethernet driver also sets the skb hash value when
+the NETIF_F_RXHASH feature is on (active by default). This can be turned
+on or off through ethtool, i.e.:
+
+	# ethtool -K fm1-mac9 rx-hashing off
+	# ethtool -k fm1-mac9 | grep hash
+	receive-hashing: off
+	# ethtool -K fm1-mac9 rx-hashing on
+	Actual changes:
+	receive-hashing: on
+	# ethtool -k fm1-mac9 | grep hash
+	receive-hashing: on
+
+Please note that Rx hashing depends upon the rx-flow-hashing being on
+for that interface - turning off rx-flow-hashing will also disable the
+rx-hashing (without ethtool reporting it as off as that depends on the
+NETIF_F_RXHASH feature flag).
+
 Debugging
 =========
 
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index b69b205..e5e33ba 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -596,8 +596,8 @@ skb pointer). All constraints and restrictions from bpf_check_classic() apply
 before a conversion to the new layout is being done behind the scenes!
 
 Currently, the classic BPF format is being used for JITing on most 32-bit
-architectures, whereas x86-64, aarch64, s390x, powerpc64, sparc64 perform JIT
-compilation from eBPF instruction set.
+architectures, whereas x86-64, aarch64, s390x, powerpc64, sparc64, arm32 perform
+JIT compilation from eBPF instruction set.
 
 Some core changes of the new internal format:
 
@@ -793,7 +793,7 @@ Some core changes of the new internal format:
     bpf_exit
 
   After the call the registers R1-R5 contain junk values and cannot be read.
-  In the future an eBPF verifier can be used to validate internal BPF programs.
+  An in-kernel eBPF verifier is used to validate internal BPF programs.
 
 Also in the new design, eBPF is limited to 4096 insns, which means that any
 program will terminate quickly and will only call a fixed number of kernel
@@ -906,6 +906,10 @@ If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of:
   BPF_JSGE  0x70  /* eBPF only: signed '>=' */
   BPF_CALL  0x80  /* eBPF only: function call */
   BPF_EXIT  0x90  /* eBPF only: function return */
+  BPF_JLT   0xa0  /* eBPF only: unsigned '<' */
+  BPF_JLE   0xb0  /* eBPF only: unsigned '<=' */
+  BPF_JSLT  0xc0  /* eBPF only: signed '<' */
+  BPF_JSLE  0xd0  /* eBPF only: signed '<=' */
 
 So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF
 and eBPF. There are only two registers in classic BPF, so it means A += X.
@@ -1017,7 +1021,7 @@ At the start of the program the register R1 contains a pointer to context
 and has type PTR_TO_CTX.
 If verifier sees an insn that does R2=R1, then R2 has now type
 PTR_TO_CTX as well and can be used on the right hand side of expression.
-If R1=PTR_TO_CTX and insn is R2=R1+R1, then R2=UNKNOWN_VALUE,
+If R1=PTR_TO_CTX and insn is R2=R1+R1, then R2=SCALAR_VALUE,
 since addition of two valid pointers makes invalid pointer.
 (In 'secure' mode verifier will reject any type of pointer arithmetic to make
 sure that kernel addresses don't leak to unprivileged users)
@@ -1039,7 +1043,7 @@ is a correct program. If there was R1 instead of R6, it would have
 been rejected.
 
 load/store instructions are allowed only with registers of valid types, which
-are PTR_TO_CTX, PTR_TO_MAP, FRAME_PTR. They are bounds and alignment checked.
+are PTR_TO_CTX, PTR_TO_MAP, PTR_TO_STACK. They are bounds and alignment checked.
 For example:
  bpf_mov R1 = 1
  bpf_mov R2 = 2
@@ -1058,7 +1062,7 @@ intends to load a word from address R6 + 8 and store it into R0
 If R6=PTR_TO_CTX, via is_valid_access() callback the verifier will know
 that offset 8 of size 4 bytes can be accessed for reading, otherwise
 the verifier will reject the program.
-If R6=FRAME_PTR, then access should be aligned and be within
+If R6=PTR_TO_STACK, then access should be aligned and be within
 stack bounds, which are [-MAX_BPF_STACK, 0). In this example offset is 8,
 so it will fail verification, since it's out of bounds.
 
@@ -1069,7 +1073,7 @@ For example:
   bpf_ld R0 = *(u32 *)(R10 - 4)
   bpf_exit
 is invalid program.
-Though R10 is correct read-only register and has type FRAME_PTR
+Though R10 is correct read-only register and has type PTR_TO_STACK
 and R10 - 4 is within stack bounds, there were no stores into that location.
 
 Pointer register spill/fill is tracked as well, since four (R6-R9)
@@ -1094,6 +1098,71 @@ all use cases.
 
 See details of eBPF verifier in kernel/bpf/verifier.c
 
+Register value tracking
+-----------------------
+In order to determine the safety of an eBPF program, the verifier must track
+the range of possible values in each register and also in each stack slot.
+This is done with 'struct bpf_reg_state', defined in include/linux/
+bpf_verifier.h, which unifies tracking of scalar and pointer values.  Each
+register state has a type, which is either NOT_INIT (the register has not been
+written to), SCALAR_VALUE (some value which is not usable as a pointer), or a
+pointer type.  The types of pointers describe their base, as follows:
+    PTR_TO_CTX          Pointer to bpf_context.
+    CONST_PTR_TO_MAP    Pointer to struct bpf_map.  "Const" because arithmetic
+                        on these pointers is forbidden.
+    PTR_TO_MAP_VALUE    Pointer to the value stored in a map element.
+    PTR_TO_MAP_VALUE_OR_NULL
+                        Either a pointer to a map value, or NULL; map accesses
+                        (see section 'eBPF maps', below) return this type,
+                        which becomes a PTR_TO_MAP_VALUE when checked != NULL.
+                        Arithmetic on these pointers is forbidden.
+    PTR_TO_STACK        Frame pointer.
+    PTR_TO_PACKET       skb->data.
+    PTR_TO_PACKET_END   skb->data + headlen; arithmetic forbidden.
+However, a pointer may be offset from this base (as a result of pointer
+arithmetic), and this is tracked in two parts: the 'fixed offset' and 'variable
+offset'.  The former is used when an exactly-known value (e.g. an immediate
+operand) is added to a pointer, while the latter is used for values which are
+not exactly known.  The variable offset is also used in SCALAR_VALUEs, to track
+the range of possible values in the register.
+The verifier's knowledge about the variable offset consists of:
+* minimum and maximum values as unsigned
+* minimum and maximum values as signed
+* knowledge of the values of individual bits, in the form of a 'tnum': a u64
+'mask' and a u64 'value'.  1s in the mask represent bits whose value is unknown;
+1s in the value represent bits known to be 1.  Bits known to be 0 have 0 in both
+mask and value; no bit should ever be 1 in both.  For example, if a byte is read
+into a register from memory, the register's top 56 bits are known zero, while
+the low 8 are unknown - which is represented as the tnum (0x0; 0xff).  If we
+then OR this with 0x40, we get (0x40; 0xcf), then if we add 1 we get (0x0;
+0x1ff), because of potential carries.
+Besides arithmetic, the register state can also be updated by conditional
+branches.  For instance, if a SCALAR_VALUE is compared > 8, in the 'true' branch
+it will have a umin_value (unsigned minimum value) of 9, whereas in the 'false'
+branch it will have a umax_value of 8.  A signed compare (with BPF_JSGT or
+BPF_JSGE) would instead update the signed minimum/maximum values.  Information
+from the signed and unsigned bounds can be combined; for instance if a value is
+first tested < 8 and then tested s> 4, the verifier will conclude that the value
+is also > 4 and s< 8, since the bounds prevent crossing the sign boundary.
+PTR_TO_PACKETs with a variable offset part have an 'id', which is common to all
+pointers sharing that same variable offset.  This is important for packet range
+checks: after adding some variable to a packet pointer, if you then copy it to
+another register and (say) add a constant 4, both registers will share the same
+'id' but one will have a fixed offset of +4.  Then if it is bounds-checked and
+found to be less than a PTR_TO_PACKET_END, the other register is now known to
+have a safe range of at least 4 bytes.  See 'Direct packet access', below, for
+more on PTR_TO_PACKET ranges.
+The 'id' field is also used on PTR_TO_MAP_VALUE_OR_NULL, common to all copies of
+the pointer returned from a map lookup.  This means that when one copy is
+checked and found to be non-NULL, all copies can become PTR_TO_MAP_VALUEs.
+As well as range-checking, the tracked information is also used for enforcing
+alignment of pointer accesses.  For instance, on most systems the packet pointer
+is 2 bytes after a 4-byte alignment.  If a program adds 14 bytes to that to jump
+over the Ethernet header, then reads IHL and addes (IHL * 4), the resulting
+pointer will have a variable offset known to be 4n+2 for some n, so adding the 2
+bytes (NET_IP_ALIGN) gives a 4-byte alignment and so word-sized accesses through
+that pointer are safe.
+
 Direct packet access
 --------------------
 In cls_bpf and act_bpf programs the verifier allows direct access to the packet
@@ -1121,7 +1190,7 @@ it now points to 'skb->data + 14' and accessible range is [R5, R5 + 14 - 14)
 which is zero bytes.
 
 More complex packet access may look like:
- R0=imm1 R1=ctx R3=pkt(id=0,off=0,r=14) R4=pkt_end R5=pkt(id=0,off=14,r=14) R10=fp
+ R0=inv1 R1=ctx R3=pkt(id=0,off=0,r=14) R4=pkt_end R5=pkt(id=0,off=14,r=14) R10=fp
  6:  r0 = *(u8 *)(r3 +7) /* load 7th byte from the packet */
  7:  r4 = *(u8 *)(r3 +12)
  8:  r4 *= 14
@@ -1135,26 +1204,31 @@ More complex packet access may look like:
 16:  r2 += 8
 17:  r1 = *(u32 *)(r1 +80) /* load skb->data_end */
 18:  if r2 > r1 goto pc+2
- R0=inv56 R1=pkt_end R2=pkt(id=2,off=8,r=8) R3=pkt(id=2,off=0,r=8) R4=inv52 R5=pkt(id=0,off=14,r=14) R10=fp
+ R0=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R1=pkt_end R2=pkt(id=2,off=8,r=8) R3=pkt(id=2,off=0,r=8) R4=inv(id=0,umax_value=3570,var_off=(0x0; 0xfffe)) R5=pkt(id=0,off=14,r=14) R10=fp
 19:  r1 = *(u8 *)(r3 +4)
 The state of the register R3 is R3=pkt(id=2,off=0,r=8)
 id=2 means that two 'r3 += rX' instructions were seen, so r3 points to some
 offset within a packet and since the program author did
 'if (r3 + 8 > r1) goto err' at insn #18, the safe range is [R3, R3 + 8).
-The verifier only allows 'add' operation on packet registers. Any other
-operation will set the register state to 'unknown_value' and it won't be
+The verifier only allows 'add'/'sub' operations on packet registers. Any other
+operation will set the register state to 'SCALAR_VALUE' and it won't be
 available for direct packet access.
 Operation 'r3 += rX' may overflow and become less than original skb->data,
-therefore the verifier has to prevent that. So it tracks the number of
-upper zero bits in all 'uknown_value' registers, so when it sees
-'r3 += rX' instruction and rX is more than 16-bit value, it will error as:
-"cannot add integer value with N upper zero bits to ptr_to_packet"
+therefore the verifier has to prevent that.  So when it sees 'r3 += rX'
+instruction and rX is more than 16-bit value, any subsequent bounds-check of r3
+against skb->data_end will not give us 'range' information, so attempts to read
+through the pointer will give "invalid access to packet" error.
 Ex. after insn 'r4 = *(u8 *)(r3 +12)' (insn #7 above) the state of r4 is
-R4=inv56 which means that upper 56 bits on the register are guaranteed
-to be zero. After insn 'r4 *= 14' the state becomes R4=inv52, since
-multiplying 8-bit value by constant 14 will keep upper 52 bits as zero.
-Similarly 'r2 >>= 48' will make R2=inv48, since the shift is not sign
-extending. This logic is implemented in evaluate_reg_alu() function.
+R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) which means that upper 56 bits
+of the register are guaranteed to be zero, and nothing is known about the lower
+8 bits. After insn 'r4 *= 14' the state becomes
+R4=inv(id=0,umax_value=3570,var_off=(0x0; 0xfffe)), since multiplying an 8-bit
+value by constant 14 will keep upper 52 bits as zero, also the least significant
+bit will be zero as 14 is even.  Similarly 'r2 >>= 48' will make
+R2=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)), since the shift is not sign
+extending.  This logic is implemented in adjust_reg_min_max_vals() function,
+which calls adjust_ptr_min_max_vals() for adding pointer to scalar (or vice
+versa) and adjust_scalar_min_max_vals() for operations on two scalars.
 
 The end result is that bpf program author can access packet directly
 using normal C code as:
@@ -1214,6 +1288,22 @@ The map is defined by:
   . key size in bytes
   . value size in bytes
 
+Pruning
+-------
+The verifier does not actually walk all possible paths through the program.  For
+each new branch to analyse, the verifier looks at all the states it's previously
+been in when at this instruction.  If any of them contain the current state as a
+subset, the branch is 'pruned' - that is, the fact that the previous state was
+accepted implies the current state would be as well.  For instance, if in the
+previous state, r1 held a packet-pointer, and in the current state, r1 holds a
+packet-pointer with a range as long or longer and at least as strict an
+alignment, then r1 is safe.  Similarly, if r2 was NOT_INIT before then it can't
+have been used by any path from that point, so any value in r2 (including
+another NOT_INIT) is safe.  The implementation is in the function regsafe().
+Pruning considers not only the registers but also the stack (and any spilled
+registers it may hold).  They must all be safe for the branch to be pruned.
+This is implemented in states_equal().
+
 Understanding eBPF verifier messages
 ------------------------------------
 
diff --git a/Documentation/networking/hinic.txt b/Documentation/networking/hinic.txt
new file mode 100644
index 0000000..989366a
--- /dev/null
+++ b/Documentation/networking/hinic.txt
@@ -0,0 +1,125 @@
+Linux Kernel Driver for Huawei Intelligent NIC(HiNIC) family
+============================================================
+
+Overview:
+=========
+HiNIC is a network interface card for the Data Center Area.
+
+The driver supports a range of link-speed devices (10GbE, 25GbE, 40GbE, etc.).
+The driver supports also a negotiated and extendable feature set.
+
+Some HiNIC devices support SR-IOV. This driver is used for Physical Function
+(PF).
+
+HiNIC devices support MSI-X interrupt vector for each Tx/Rx queue and
+adaptive interrupt moderation.
+
+HiNIC devices support also various offload features such as checksum offload,
+TCP Transmit Segmentation Offload(TSO), Receive-Side Scaling(RSS) and
+LRO(Large Receive Offload).
+
+
+Supported PCI vendor ID/device IDs:
+===================================
+
+19e5:1822 - HiNIC PF
+
+
+Driver Architecture and Source Code:
+====================================
+
+hinic_dev - Implement a Logical Network device that is independent from
+specific HW details about HW data structure formats.
+
+hinic_hwdev - Implement the HW details of the device and include the components
+for accessing the PCI NIC.
+
+hinic_hwdev contains the following components:
+===============================================
+
+HW Interface:
+=============
+
+The interface for accessing the pci device (DMA memory and PCI BARs).
+(hinic_hw_if.c, hinic_hw_if.h)
+
+Configuration Status Registers Area that describes the HW Registers on the
+configuration and status BAR0. (hinic_hw_csr.h)
+
+MGMT components:
+================
+
+Asynchronous Event Queues(AEQs) - The event queues for receiving messages from
+the MGMT modules on the cards. (hinic_hw_eqs.c, hinic_hw_eqs.h)
+
+Application Programmable Interface commands(API CMD) - Interface for sending
+MGMT commands to the card. (hinic_hw_api_cmd.c, hinic_hw_api_cmd.h)
+
+Management (MGMT) - the PF to MGMT channel that uses API CMD for sending MGMT
+commands to the card and receives notifications from the MGMT modules on the
+card by AEQs. Also set the addresses of the IO CMDQs in HW.
+(hinic_hw_mgmt.c, hinic_hw_mgmt.h)
+
+IO components:
+==============
+
+Completion Event Queues(CEQs) - The completion Event Queues that describe IO
+tasks that are finished. (hinic_hw_eqs.c, hinic_hw_eqs.h)
+
+Work Queues(WQ) - Contain the memory and operations for use by CMD queues and
+the Queue Pairs. The WQ is a Memory Block in a Page. The Block contains
+pointers to Memory Areas that are the Memory for the Work Queue Elements(WQEs).
+(hinic_hw_wq.c, hinic_hw_wq.h)
+
+Command Queues(CMDQ) - The queues for sending commands for IO management and is
+used to set the QPs addresses in HW. The commands completion events are
+accumulated on the CEQ that is configured to receive the CMDQ completion events.
+(hinic_hw_cmdq.c, hinic_hw_cmdq.h)
+
+Queue Pairs(QPs) - The HW Receive and Send queues for Receiving and Transmitting
+Data. (hinic_hw_qp.c, hinic_hw_qp.h, hinic_hw_qp_ctxt.h)
+
+IO - de/constructs all the IO components. (hinic_hw_io.c, hinic_hw_io.h)
+
+HW device:
+==========
+
+HW device - de/constructs the HW Interface, the MGMT components on the
+initialization of the driver and the IO components on the case of Interface
+UP/DOWN Events. (hinic_hw_dev.c, hinic_hw_dev.h)
+
+
+hinic_dev contains the following components:
+===============================================
+
+PCI ID table - Contains the supported PCI Vendor/Device IDs.
+(hinic_pci_tbl.h)
+
+Port Commands - Send commands to the HW device for port management
+(MAC, Vlan, MTU, ...). (hinic_port.c, hinic_port.h)
+
+Tx Queues - Logical Tx Queues that use the HW Send Queues for transmit.
+The Logical Tx queue is not dependent on the format of the HW Send Queue.
+(hinic_tx.c, hinic_tx.h)
+
+Rx Queues - Logical Rx Queues that use the HW Receive Queues for receive.
+The Logical Rx queue is not dependent on the format of the HW Receive Queue.
+(hinic_rx.c, hinic_rx.h)
+
+hinic_dev - de/constructs the Logical Tx and Rx Queues.
+(hinic_main.c, hinic_dev.h)
+
+
+Miscellaneous:
+=============
+
+Common functions that are used by HW and Logical Device.
+(hinic_common.c, hinic_common.h)
+
+
+Support
+=======
+
+If an issue is identified with the released source code on the supported kernel
+with a supported adapter, email the specific information related to the issue to
+aviad.krawczyk@huawei.com.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index b5bd87e..66e6208 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -6,6 +6,7 @@ Contents:
 .. toctree::
    :maxdepth: 2
 
+   batman-adv
    kapi
    z8530book
 
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 974ab47..b3345d0 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -109,7 +109,10 @@ neigh/default/unres_qlen_bytes - INTEGER
 	queued for each	unresolved address by other network layers.
 	(added in linux 3.3)
 	Setting negative value is meaningless and will return error.
-	Default: 65536 Bytes(64KB)
+	Default: SK_WMEM_MAX, (same as net.core.wmem_default).
+		Exact value depends on architecture and kernel options,
+		but should be enough to allow queuing 256 packets
+		of medium size.
 
 neigh/default/unres_qlen - INTEGER
 	The maximum number of packets which may be queued for each
@@ -119,7 +122,7 @@ neigh/default/unres_qlen - INTEGER
 	unexpected packet loss. The current default value is calculated
 	according to default value of unres_qlen_bytes and true size of
 	packet.
-	Default: 31
+	Default: 101
 
 mtu_expires - INTEGER
 	Time, in seconds, that cached PMTU information is kept.
@@ -353,12 +356,7 @@ tcp_l3mdev_accept - BOOLEAN
 	compiled with CONFIG_NET_L3_MASTER_DEV.
 
 tcp_low_latency - BOOLEAN
-	If set, the TCP stack makes decisions that prefer lower
-	latency as opposed to higher throughput.  By default, this
-	option is not set meaning that higher throughput is preferred.
-	An example of an application where this default should be
-	changed would be a Beowulf compute cluster.
-	Default: 0
+	This is a legacy option, it has no effect anymore.
 
 tcp_max_orphans - INTEGER
 	Maximal number of TCP sockets not attached to any user file handle,
@@ -1291,8 +1289,7 @@ tag - INTEGER
 xfrm4_gc_thresh - INTEGER
 	The threshold at which we will start garbage collecting for IPv4
 	destination cache entries.  At twice this value the system will
-	refuse new allocations. The value must be set below the flowcache
-	limit (4096 * number of online cpus) to take effect.
+	refuse new allocations.
 
 igmp_link_local_mcast_reports - BOOLEAN
 	Enable IGMP reports for link local multicast groups in the
@@ -1356,6 +1353,15 @@ flowlabel_state_ranges - BOOLEAN
 	FALSE: disabled
 	Default: true
 
+flowlabel_reflect - BOOLEAN
+	Automatically reflect the flow label. Needed for Path MTU
+	Discovery to work with Equal Cost Multipath Routing in anycast
+	environments. See RFC 7690 and:
+	https://tools.ietf.org/html/draft-wang-6man-flow-label-reflection-01
+	TRUE: enabled
+	FALSE: disabled
+	Default: FALSE
+
 anycast_src_echo_reply - BOOLEAN
 	Controls the use of anycast addresses as source addresses for ICMPv6
 	echo reply
@@ -1778,8 +1784,7 @@ ratelimit - INTEGER
 xfrm6_gc_thresh - INTEGER
 	The threshold at which we will start garbage collecting for IPv6
 	destination cache entries.  At twice this value the system will
-	refuse new allocations. The value must be set below the flowcache
-	limit (4096 * number of online cpus) to take effect.
+	refuse new allocations.
 
 
 IPv6 Update by:
diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst
new file mode 100644
index 0000000..77f6d7e
--- /dev/null
+++ b/Documentation/networking/msg_zerocopy.rst
@@ -0,0 +1,257 @@
+
+============
+MSG_ZEROCOPY
+============
+
+Intro
+=====
+
+The MSG_ZEROCOPY flag enables copy avoidance for socket send calls.
+The feature is currently implemented for TCP sockets.
+
+
+Opportunity and Caveats
+-----------------------
+
+Copying large buffers between user process and kernel can be
+expensive. Linux supports various interfaces that eschew copying,
+such as sendpage and splice. The MSG_ZEROCOPY flag extends the
+underlying copy avoidance mechanism to common socket send calls.
+
+Copy avoidance is not a free lunch. As implemented, with page pinning,
+it replaces per byte copy cost with page accounting and completion
+notification overhead. As a result, MSG_ZEROCOPY is generally only
+effective at writes over around 10 KB.
+
+Page pinning also changes system call semantics. It temporarily shares
+the buffer between process and network stack. Unlike with copying, the
+process cannot immediately overwrite the buffer after system call
+return without possibly modifying the data in flight. Kernel integrity
+is not affected, but a buggy program can possibly corrupt its own data
+stream.
+
+The kernel returns a notification when it is safe to modify data.
+Converting an existing application to MSG_ZEROCOPY is not always as
+trivial as just passing the flag, then.
+
+
+More Info
+---------
+
+Much of this document was derived from a longer paper presented at
+netdev 2.1. For more in-depth information see that paper and talk,
+the excellent reporting over at LWN.net or read the original code.
+
+  paper, slides, video
+    https://netdevconf.org/2.1/session.html?debruijn
+
+  LWN article
+    https://lwn.net/Articles/726917/
+
+  patchset
+    [PATCH net-next v4 0/9] socket sendmsg MSG_ZEROCOPY
+    http://lkml.kernel.org/r/20170803202945.70750-1-willemdebruijn.kernel@gmail.com
+
+
+Interface
+=========
+
+Passing the MSG_ZEROCOPY flag is the most obvious step to enable copy
+avoidance, but not the only one.
+
+Socket Setup
+------------
+
+The kernel is permissive when applications pass undefined flags to the
+send system call. By default it simply ignores these. To avoid enabling
+copy avoidance mode for legacy processes that accidentally already pass
+this flag, a process must first signal intent by setting a socket option:
+
+::
+
+	if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)))
+		error(1, errno, "setsockopt zerocopy");
+
+
+Transmission
+------------
+
+The change to send (or sendto, sendmsg, sendmmsg) itself is trivial.
+Pass the new flag.
+
+::
+
+	ret = send(fd, buf, sizeof(buf), MSG_ZEROCOPY);
+
+A zerocopy failure will return -1 with errno ENOBUFS. This happens if
+the socket option was not set, the socket exceeds its optmem limit or
+the user exceeds its ulimit on locked pages.
+
+
+Mixing copy avoidance and copying
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Many workloads have a mixture of large and small buffers. Because copy
+avoidance is more expensive than copying for small packets, the
+feature is implemented as a flag. It is safe to mix calls with the flag
+with those without.
+
+
+Notifications
+-------------
+
+The kernel has to notify the process when it is safe to reuse a
+previously passed buffer. It queues completion notifications on the
+socket error queue, akin to the transmit timestamping interface.
+
+The notification itself is a simple scalar value. Each socket
+maintains an internal unsigned 32-bit counter. Each send call with
+MSG_ZEROCOPY that successfully sends data increments the counter. The
+counter is not incremented on failure or if called with length zero.
+The counter counts system call invocations, not bytes. It wraps after
+UINT_MAX calls.
+
+
+Notification Reception
+~~~~~~~~~~~~~~~~~~~~~~
+
+The below snippet demonstrates the API. In the simplest case, each
+send syscall is followed by a poll and recvmsg on the error queue.
+
+Reading from the error queue is always a non-blocking operation. The
+poll call is there to block until an error is outstanding. It will set
+POLLERR in its output flags. That flag does not have to be set in the
+events field. Errors are signaled unconditionally.
+
+::
+
+	pfd.fd = fd;
+	pfd.events = 0;
+	if (poll(&pfd, 1, -1) != 1 || pfd.revents & POLLERR == 0)
+		error(1, errno, "poll");
+
+	ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+	if (ret == -1)
+		error(1, errno, "recvmsg");
+
+	read_notification(msg);
+
+The example is for demonstration purpose only. In practice, it is more
+efficient to not wait for notifications, but read without blocking
+every couple of send calls.
+
+Notifications can be processed out of order with other operations on
+the socket. A socket that has an error queued would normally block
+other operations until the error is read. Zerocopy notifications have
+a zero error code, however, to not block send and recv calls.
+
+
+Notification Batching
+~~~~~~~~~~~~~~~~~~~~~
+
+Multiple outstanding packets can be read at once using the recvmmsg
+call. This is often not needed. In each message the kernel returns not
+a single value, but a range. It coalesces consecutive notifications
+while one is outstanding for reception on the error queue.
+
+When a new notification is about to be queued, it checks whether the
+new value extends the range of the notification at the tail of the
+queue. If so, it drops the new notification packet and instead increases
+the range upper value of the outstanding notification.
+
+For protocols that acknowledge data in-order, like TCP, each
+notification can be squashed into the previous one, so that no more
+than one notification is outstanding at any one point.
+
+Ordered delivery is the common case, but not guaranteed. Notifications
+may arrive out of order on retransmission and socket teardown.
+
+
+Notification Parsing
+~~~~~~~~~~~~~~~~~~~~
+
+The below snippet demonstrates how to parse the control message: the
+read_notification() call in the previous snippet. A notification
+is encoded in the standard error format, sock_extended_err.
+
+The level and type fields in the control data are protocol family
+specific, IP_RECVERR or IPV6_RECVERR.
+
+Error origin is the new type SO_EE_ORIGIN_ZEROCOPY. ee_errno is zero,
+as explained before, to avoid blocking read and write system calls on
+the socket.
+
+The 32-bit notification range is encoded as [ee_info, ee_data]. This
+range is inclusive. Other fields in the struct must be treated as
+undefined, bar for ee_code, as discussed below.
+
+::
+
+	struct sock_extended_err *serr;
+	struct cmsghdr *cm;
+
+	cm = CMSG_FIRSTHDR(msg);
+	if (cm->cmsg_level != SOL_IP &&
+	    cm->cmsg_type != IP_RECVERR)
+		error(1, 0, "cmsg");
+
+	serr = (void *) CMSG_DATA(cm);
+	if (serr->ee_errno != 0 ||
+	    serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
+		error(1, 0, "serr");
+
+	printf("completed: %u..%u\n", serr->ee_info, serr->ee_data);
+
+
+Deferred copies
+~~~~~~~~~~~~~~~
+
+Passing flag MSG_ZEROCOPY is a hint to the kernel to apply copy
+avoidance, and a contract that the kernel will queue a completion
+notification. It is not a guarantee that the copy is elided.
+
+Copy avoidance is not always feasible. Devices that do not support
+scatter-gather I/O cannot send packets made up of kernel generated
+protocol headers plus zerocopy user data. A packet may need to be
+converted to a private copy of data deep in the stack, say to compute
+a checksum.
+
+In all these cases, the kernel returns a completion notification when
+it releases its hold on the shared pages. That notification may arrive
+before the (copied) data is fully transmitted. A zerocopy completion
+notification is not a transmit completion notification, therefore.
+
+Deferred copies can be more expensive than a copy immediately in the
+system call, if the data is no longer warm in the cache. The process
+also incurs notification processing cost for no benefit. For this
+reason, the kernel signals if data was completed with a copy, by
+setting flag SO_EE_CODE_ZEROCOPY_COPIED in field ee_code on return.
+A process may use this signal to stop passing flag MSG_ZEROCOPY on
+subsequent requests on the same socket.
+
+
+Implementation
+==============
+
+Loopback
+--------
+
+Data sent to local sockets can be queued indefinitely if the receive
+process does not read its socket. Unbound notification latency is not
+acceptable. For this reason all packets generated with MSG_ZEROCOPY
+that are looped to a local socket will incur a deferred copy. This
+includes looping onto packet sockets (e.g., tcpdump) and tun devices.
+
+
+Testing
+=======
+
+More realistic example code can be found in the kernel source under
+tools/testing/selftests/net/msg_zerocopy.c.
+
+Be cognizant of the loopback constraint. The test can be run between
+a pair of hosts. But if run between a local pair of processes, for
+instance when run with msg_zerocopy.sh between a veth pair across
+namespaces, the test will not show any improvement. For testing, the
+loopback restriction can be temporarily relaxed by making
+skb_orphan_frags_rx identical to skb_orphan_frags.
diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt
index 247a30b..cfc66ea 100644
--- a/Documentation/networking/netdev-FAQ.txt
+++ b/Documentation/networking/netdev-FAQ.txt
@@ -111,6 +111,14 @@ A: Generally speaking, the patches get triaged quickly (in less than 48h).
    patch is a good way to ensure your patch is ignored or pushed to
    the bottom of the priority list.
 
+Q: I submitted multiple versions of the patch series, should I directly update
+   patchwork for the previous versions of these patch series?
+
+A: No, please don't interfere with the patch status on patchwork, leave it to
+   the maintainer to figure out what is the most recent and current version that
+   should be applied. If there is any doubt, the maintainer will reply and ask
+   what should be done.
+
 Q: How can I tell what patches are queued up for backporting to the
    various stable releases?
 
diff --git a/Documentation/networking/netvsc.txt b/Documentation/networking/netvsc.txt
new file mode 100644
index 0000000..93560fb
--- /dev/null
+++ b/Documentation/networking/netvsc.txt
@@ -0,0 +1,75 @@
+Hyper-V network driver
+======================
+
+Compatibility
+=============
+
+This driver is compatible with Windows Server 2012 R2, 2016 and
+Windows 10.
+
+Features
+========
+
+  Checksum offload
+  ----------------
+  The netvsc driver supports checksum offload as long as the
+  Hyper-V host version does. Windows Server 2016 and Azure
+  support checksum offload for TCP and UDP for both IPv4 and
+  IPv6. Windows Server 2012 only supports checksum offload for TCP.
+
+  Receive Side Scaling
+  --------------------
+  Hyper-V supports receive side scaling. For TCP, packets are
+  distributed among available queues based on IP address and port
+  number.
+
+  For UDP, we can switch UDP hash level between L3 and L4 by ethtool
+  command. UDP over IPv4 and v6 can be set differently. The default
+  hash level is L4. We currently only allow switching TX hash level
+  from within the guests.
+
+  On Azure, fragmented UDP packets have high loss rate with L4
+  hashing. Using L3 hashing is recommended in this case.
+
+  For example, for UDP over IPv4 on eth0:
+  To include UDP port numbers in hashing:
+        ethtool -N eth0 rx-flow-hash udp4 sdfn
+  To exclude UDP port numbers in hashing:
+        ethtool -N eth0 rx-flow-hash udp4 sd
+  To show UDP hash level:
+        ethtool -n eth0 rx-flow-hash udp4
+
+  Generic Receive Offload, aka GRO
+  --------------------------------
+  The driver supports GRO and it is enabled by default. GRO coalesces
+  like packets and significantly reduces CPU usage under heavy Rx
+  load.
+
+  SR-IOV support
+  --------------
+  Hyper-V supports SR-IOV as a hardware acceleration option. If SR-IOV
+  is enabled in both the vSwitch and the guest configuration, then the
+  Virtual Function (VF) device is passed to the guest as a PCI
+  device. In this case, both a synthetic (netvsc) and VF device are
+  visible in the guest OS and both NIC's have the same MAC address.
+
+  The VF is enslaved by netvsc device.  The netvsc driver will transparently
+  switch the data path to the VF when it is available and up.
+  Network state (addresses, firewall, etc) should be applied only to the
+  netvsc device; the slave device should not be accessed directly in
+  most cases.  The exceptions are if some special queue discipline or
+  flow direction is desired, these should be applied directly to the
+  VF slave device.
+
+  Receive Buffer
+  --------------
+  Packets are received into a receive area which is created when device
+  is probed. The receive area is broken into MTU sized chunks and each may
+  contain one or more packets. The number of receive sections may be changed
+  via ethtool Rx ring parameters.
+
+  There is a similar send buffer which is used to aggregate packets for sending.
+  The send area is broken into chunks of 6144 bytes, each of section may
+  contain one or more packets. The send buffer is an optimization, the driver
+  will use slower method to handle very large packets or if the send buffer
+  area is exhausted.
diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt
index 497d668..433b672 100644
--- a/Documentation/networking/nf_conntrack-sysctl.txt
+++ b/Documentation/networking/nf_conntrack-sysctl.txt
@@ -96,17 +96,6 @@ nf_conntrack_max - INTEGER
 	Size of connection tracking table.  Default value is
 	nf_conntrack_buckets value * 4.
 
-nf_conntrack_default_on - BOOLEAN
-	0 - don't register conntrack in new net namespaces
-	1 - register conntrack in new net namespaces (default)
-
-	This controls wheter newly created network namespaces have connection
-	tracking enabled by default.  It will be enabled automatically
-	regardless of this setting if the new net namespace requires
-	connection tracking, e.g. when NAT rules are created.
-	This setting is only visible in initial user namespace, it has no
-	effect on existing namespaces.
-
 nf_conntrack_tcp_be_liberal - BOOLEAN
 	0 - disabled (default)
 	not 0 - enabled
diff --git a/Documentation/networking/rmnet.txt b/Documentation/networking/rmnet.txt
new file mode 100644
index 0000000..6b341ea
--- /dev/null
+++ b/Documentation/networking/rmnet.txt
@@ -0,0 +1,82 @@
+1. Introduction
+
+rmnet driver is used for supporting the Multiplexing and aggregation
+Protocol (MAP). This protocol is used by all recent chipsets using Qualcomm
+Technologies, Inc. modems.
+
+This driver can be used to register onto any physical network device in
+IP mode. Physical transports include USB, HSIC, PCIe and IP accelerator.
+
+Multiplexing allows for creation of logical netdevices (rmnet devices) to
+handle multiple private data networks (PDN) like a default internet, tethering,
+multimedia messaging service (MMS) or IP media subsystem (IMS). Hardware sends
+packets with MAP headers to rmnet. Based on the multiplexer id, rmnet
+routes to the appropriate PDN after removing the MAP header.
+
+Aggregation is required to achieve high data rates. This involves hardware
+sending aggregated bunch of MAP frames. rmnet driver will de-aggregate
+these MAP frames and send them to appropriate PDN's.
+
+2. Packet format
+
+a. MAP packet (data / control)
+
+MAP header has the same endianness of the IP packet.
+
+Packet format -
+
+Bit             0             1           2-7      8 - 15           16 - 31
+Function   Command / Data   Reserved     Pad   Multiplexer ID    Payload length
+Bit            32 - x
+Function     Raw  Bytes
+
+Command (1)/ Data (0) bit value is to indicate if the packet is a MAP command
+or data packet. Control packet is used for transport level flow control. Data
+packets are standard IP packets.
+
+Reserved bits are usually zeroed out and to be ignored by receiver.
+
+Padding is number of bytes to be added for 4 byte alignment if required by
+hardware.
+
+Multiplexer ID is to indicate the PDN on which data has to be sent.
+
+Payload length includes the padding length but does not include MAP header
+length.
+
+b. MAP packet (command specific)
+
+Bit             0             1           2-7      8 - 15           16 - 31
+Function   Command         Reserved     Pad   Multiplexer ID    Payload length
+Bit          32 - 39        40 - 45    46 - 47       48 - 63
+Function   Command name    Reserved   Command Type   Reserved
+Bit          64 - 95
+Function   Transaction ID
+Bit          96 - 127
+Function   Command data
+
+Command 1 indicates disabling flow while 2 is enabling flow
+
+Command types -
+0 for MAP command request
+1 is to acknowledge the receipt of a command
+2 is for unsupported commands
+3 is for error during processing of commands
+
+c. Aggregation
+
+Aggregation is multiple MAP packets (can be data or command) delivered to
+rmnet in a single linear skb. rmnet will process the individual
+packets and either ACK the MAP command or deliver the IP packet to the
+network stack as needed
+
+MAP header|IP Packet|Optional padding|MAP header|IP Packet|Optional padding....
+MAP header|IP Packet|Optional padding|MAP header|Command Packet|Optional pad...
+
+3. Userspace configuration
+
+rmnet userspace configuration is done through netlink library librmnetctl
+and command line utility rmnetcli. Utility is hosted in codeaurora forum git.
+The driver uses rtnl_link_ops for communication.
+
+https://source.codeaurora.org/quic/la/platform/vendor/qcom-opensource/dataservices/tree/rmnetctl
diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 8c70ba5..8106201 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -818,10 +818,15 @@ The kernel interface functions are as follows:
 
  (*) Send data through a call.
 
+	typedef void (*rxrpc_notify_end_tx_t)(struct sock *sk,
+					      unsigned long user_call_ID,
+					      struct sk_buff *skb);
+
 	int rxrpc_kernel_send_data(struct socket *sock,
 				   struct rxrpc_call *call,
 				   struct msghdr *msg,
-				   size_t len);
+				   size_t len,
+				   rxrpc_notify_end_tx_t notify_end_rx);
 
      This is used to supply either the request part of a client call or the
      reply part of a server call.  msg.msg_iovlen and msg.msg_iov specify the
@@ -832,6 +837,11 @@ The kernel interface functions are as follows:
      The msg must not specify a destination address, control data or any flags
      other than MSG_MORE.  len is the total amount of data to transmit.
 
+     notify_end_rx can be NULL or it can be used to specify a function to be
+     called when the call changes state to end the Tx phase.  This function is
+     called with the call-state spinlock held to prevent any reply or final ACK
+     from being delivered first.
+
  (*) Receive data from a call.
 
 	int rxrpc_kernel_recv_data(struct socket *sock,
@@ -965,6 +975,51 @@ The kernel interface functions are as follows:
      size should be set when the call is begun.  tx_total_len may not be less
      than zero.
 
+ (*) Check to see the completion state of a call so that the caller can assess
+     whether it needs to be retried.
+
+	enum rxrpc_call_completion {
+		RXRPC_CALL_SUCCEEDED,
+		RXRPC_CALL_REMOTELY_ABORTED,
+		RXRPC_CALL_LOCALLY_ABORTED,
+		RXRPC_CALL_LOCAL_ERROR,
+		RXRPC_CALL_NETWORK_ERROR,
+	};
+
+	int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call,
+				    enum rxrpc_call_completion *_compl,
+				    u32 *_abort_code);
+
+     On return, -EINPROGRESS will be returned if the call is still ongoing; if
+     it is finished, *_compl will be set to indicate the manner of completion,
+     *_abort_code will be set to any abort code that occurred.  0 will be
+     returned on a successful completion, -ECONNABORTED will be returned if the
+     client failed due to a remote abort and anything else will return an
+     appropriate error code.
+
+     The caller should look at this information to decide if it's worth
+     retrying the call.
+
+ (*) Retry a client call.
+
+	int rxrpc_kernel_retry_call(struct socket *sock,
+				    struct rxrpc_call *call,
+				    struct sockaddr_rxrpc *srx,
+				    struct key *key);
+
+     This attempts to partially reinitialise a call and submit it again whilst
+     reusing the original call's Tx queue to avoid the need to repackage and
+     re-encrypt the data to be sent.  call indicates the call to retry, srx the
+     new address to send it to and key the encryption key to use for signing or
+     encrypting the packets.
+
+     For this to work, the first Tx data packet must still be in the transmit
+     queue, and currently this is only permitted for local and network errors
+     and the call must not have been aborted.  Any partially constructed Tx
+     packet is left as is and can continue being filled afterwards.
+
+     It returns 0 if the call was requeued and an error otherwise.
+
 
 =======================
 CONFIGURABLE PARAMETERS
diff --git a/Documentation/networking/strparser.txt b/Documentation/networking/strparser.txt
index a0bf573..13081b3 100644
--- a/Documentation/networking/strparser.txt
+++ b/Documentation/networking/strparser.txt
@@ -1,45 +1,107 @@
-Stream Parser
--------------
+Stream Parser (strparser)
+
+Introduction
+============
 
 The stream parser (strparser) is a utility that parses messages of an
-application layer protocol running over a TCP connection. The stream
+application layer protocol running over a data stream. The stream
 parser works in conjunction with an upper layer in the kernel to provide
 kernel support for application layer messages. For instance, Kernel
 Connection Multiplexor (KCM) uses the Stream Parser to parse messages
 using a BPF program.
 
+The strparser works in one of two modes: receive callback or general
+mode.
+
+In receive callback mode, the strparser is called from the data_ready
+callback of a TCP socket. Messages are parsed and delivered as they are
+received on the socket.
+
+In general mode, a sequence of skbs are fed to strparser from an
+outside source. Message are parsed and delivered as the sequence is
+processed. This modes allows strparser to be applied to arbitrary
+streams of data.
+
 Interface
----------
+=========
 
 The API includes a context structure, a set of callbacks, utility
-functions, and a data_ready function. The callbacks include
-a parse_msg function that is called to perform parsing (e.g.
-BPF parsing in case of KCM), and a rcv_msg function that is called
-when a full message has been completed.
+functions, and a data_ready function for receive callback mode. The
+callbacks include a parse_msg function that is called to perform
+parsing (e.g.  BPF parsing in case of KCM), and a rcv_msg function
+that is called when a full message has been completed.
+
+Functions
+=========
+
+strp_init(struct strparser *strp, struct sock *sk,
+	  const struct strp_callbacks *cb)
+
+     Called to initialize a stream parser. strp is a struct of type
+     strparser that is allocated by the upper layer. sk is the TCP
+     socket associated with the stream parser for use with receive
+     callback mode; in general mode this is set to NULL. Callbacks
+     are called by the stream parser (the callbacks are listed below).
+
+void strp_pause(struct strparser *strp)
+
+     Temporarily pause a stream parser. Message parsing is suspended
+     and no new messages are delivered to the upper layer.
+
+void strp_pause(struct strparser *strp)
+
+     Unpause a paused stream parser.
+
+void strp_stop(struct strparser *strp);
+
+     strp_stop is called to completely stop stream parser operations.
+     This is called internally when the stream parser encounters an
+     error, and it is called from the upper layer to stop parsing
+     operations.
+
+void strp_done(struct strparser *strp);
+
+     strp_done is called to release any resources held by the stream
+     parser instance. This must be called after the stream processor
+     has been stopped.
 
-A stream parser can be instantiated for a TCP connection. This is done
-by:
+int strp_process(struct strparser *strp, struct sk_buff *orig_skb,
+		 unsigned int orig_offset, size_t orig_len,
+		 size_t max_msg_size, long timeo)
 
-strp_init(struct strparser *strp, struct sock *csk,
-	  struct strp_callbacks *cb)
+    strp_process is called in general mode for a stream parser to
+    parse an sk_buff. The number of bytes processed or a negative
+    error number is returned. Note that strp_process does not
+    consume the sk_buff. max_msg_size is maximum size the stream
+    parser will parse. timeo is timeout for completing a message.
 
-strp is a struct of type strparser that is allocated by the upper layer.
-csk is the TCP socket associated with the stream parser. Callbacks are
-called by the stream parser.
+void strp_data_ready(struct strparser *strp);
+
+    The upper layer calls strp_tcp_data_ready when data is ready on
+    the lower socket for strparser to process. This should be called
+    from a data_ready callback that is set on the socket. Note that
+    maximum messages size is the limit of the receive socket
+    buffer and message timeout is the receive timeout for the socket.
+
+void strp_check_rcv(struct strparser *strp);
+
+    strp_check_rcv is called to check for new messages on the socket.
+    This is normally called at initialization of a stream parser
+    instance or after strp_unpause.
 
 Callbacks
----------
+=========
 
-There are four callbacks:
+There are six callbacks:
 
 int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
 
     parse_msg is called to determine the length of the next message
     in the stream. The upper layer must implement this function. It
     should parse the sk_buff as containing the headers for the
-    next application layer messages in the stream.
+    next application layer message in the stream.
 
-    The skb->cb in the input skb is a struct strp_rx_msg. Only
+    The skb->cb in the input skb is a struct strp_msg. Only
     the offset field is relevant in parse_msg and gives the offset
     where the message starts in the skb.
 
@@ -50,26 +112,41 @@ int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
     -ESTRPIPE : current message should not be processed by the
           kernel, return control of the socket to userspace which
           can proceed to read the messages itself
-    other < 0 : Error is parsing, give control back to userspace
+    other < 0 : Error in parsing, give control back to userspace
           assuming that synchronization is lost and the stream
           is unrecoverable (application expected to close TCP socket)
 
     In the case that an error is returned (return value is less than
-    zero) the stream parser will set the error on TCP socket and wake
-    it up. If parse_msg returned -ESTRPIPE and the stream parser had
-    previously read some bytes for the current message, then the error
-    set on the attached socket is ENODATA since the stream is
-    unrecoverable in that case.
+    zero) and the parser is in receive callback mode, then it will set
+    the error on TCP socket and wake it up. If parse_msg returned
+    -ESTRPIPE and the stream parser had previously read some bytes for
+    the current message, then the error set on the attached socket is
+    ENODATA since the stream is unrecoverable in that case.
+
+void (*lock)(struct strparser *strp)
+
+    The lock callback is called to lock the strp structure when
+    the strparser is performing an asynchronous operation (such as
+    processing a timeout). In receive callback mode the default
+    function is to lock_sock for the associated socket. In general
+    mode the callback must be set appropriately.
+
+void (*unlock)(struct strparser *strp)
+
+    The unlock callback is called to release the lock obtained
+    by the lock callback. In receive callback mode the default
+    function is release_sock for the associated socket. In general
+    mode the callback must be set appropriately.
 
 void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
 
     rcv_msg is called when a full message has been received and
     is queued. The callee must consume the sk_buff; it can
     call strp_pause to prevent any further messages from being
-    received in rcv_msg (see strp_pause below). This callback
+    received in rcv_msg (see strp_pause above). This callback
     must be set.
 
-    The skb->cb in the input skb is a struct strp_rx_msg. This
+    The skb->cb in the input skb is a struct strp_msg. This
     struct contains two fields: offset and full_len. Offset is
     where the message starts in the skb, and full_len is the
     the length of the message. skb->len - offset may be greater
@@ -78,59 +155,53 @@ void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
 int (*read_sock_done)(struct strparser *strp, int err);
 
      read_sock_done is called when the stream parser is done reading
-     the TCP socket. The stream parser may read multiple messages
-     in a loop and this function allows cleanup to occur when existing
-     the loop. If the callback is not set (NULL in strp_init) a
-     default function is used.
+     the TCP socket in receive callback mode. The stream parser may
+     read multiple messages in a loop and this function allows cleanup
+     to occur when exiting the loop. If the callback is not set (NULL
+     in strp_init) a default function is used.
 
 void (*abort_parser)(struct strparser *strp, int err);
 
      This function is called when stream parser encounters an error
-     in parsing. The default function stops the stream parser for the
-     TCP socket and sets the error in the socket. The default function
-     can be changed by setting the callback to non-NULL in strp_init.
+     in parsing. The default function stops the stream parser and
+     sets the error in the socket if the parser is in receive callback
+     mode. The default function can be changed by setting the callback
+     to non-NULL in strp_init.
 
-Functions
----------
+Statistics
+==========
 
-The upper layer calls strp_tcp_data_ready when data is ready on the lower
-socket for strparser to process. This should be called from a data_ready
-callback that is set on the socket.
+Various counters are kept for each stream parser instance. These are in
+the strp_stats structure. strp_aggr_stats is a convenience structure for
+accumulating statistics for multiple stream parser instances.
+save_strp_stats and aggregate_strp_stats are helper functions to save
+and aggregate statistics.
 
-strp_stop is called to completely stop stream parser operations. This
-is called internally when the stream parser encounters an error, and
-it is called from the upper layer when unattaching a TCP socket.
+Message assembly limits
+=======================
 
-strp_done is called to unattach the stream parser from the TCP socket.
-This must be called after the stream processor has be stopped.
+The stream parser provide mechanisms to limit the resources consumed by
+message assembly.
 
-strp_check_rcv is called to check for new messages on the socket. This
-is normally called at initialization of the a stream parser instance
-of after strp_unpause.
+A timer is set when assembly starts for a new message. In receive
+callback mode the message timeout is taken from rcvtime for the
+associated TCP socket. In general mode, the timeout is passed as an
+argument in strp_process. If the timer fires before assembly completes
+the stream parser is aborted and the ETIMEDOUT error is set on the TCP
+socket if in receive callback mode.
 
-Statistics
-----------
+In receive callback mode, message length is limited to the receive
+buffer size of the associated TCP socket. If the length returned by
+parse_msg is greater than the socket buffer size then the stream parser
+is aborted with EMSGSIZE error set on the TCP socket. Note that this
+makes the maximum size of receive skbuffs for a socket with a stream
+parser to be 2*sk_rcvbuf of the TCP socket.
 
-Various counters are kept for each stream parser for a TCP socket.
-These are in the strp_stats structure. strp_aggr_stats is a convenience
-structure for accumulating statistics for multiple stream parser
-instances. save_strp_stats and aggregate_strp_stats are helper functions
-to save and aggregate statistics.
+In general mode the message length limit is passed in as an argument
+to strp_process.
 
-Message assembly limits
------------------------
+Author
+======
 
-The stream parser provide mechanisms to limit the resources consumed by
-message assembly.
+Tom Herbert (tom@quantonium.net)
 
-A timer is set when assembly starts for a new message. The message
-timeout is taken from rcvtime for the associated TCP socket. If the
-timer fires before assembly completes the stream parser is aborted
-and the ETIMEDOUT error is set on the TCP socket.
-
-Message length is limited to the receive buffer size of the associated
-TCP socket. If the length returned by parse_msg is greater than
-the socket buffer size then the stream parser is aborted with
-EMSGSIZE error set on the TCP socket. Note that this makes the
-maximum size of receive skbuffs for a socket with a stream parser
-to be 2*sk_rcvbuf of the TCP socket.
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 28596e0..b67044a 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -46,13 +46,13 @@ translate these BPF proglets into native CPU instructions. There are
 two flavors of JITs, the newer eBPF JIT currently supported on:
   - x86_64
   - arm64
+  - arm32
   - ppc64
   - sparc64
   - mips64
   - s390x
 
 And the older cBPF JIT supported on the following archs:
-  - arm
   - mips
   - ppc
   - sparc
diff --git a/MAINTAINERS b/MAINTAINERS
index dc6758f..961423b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2492,7 +2492,7 @@ Q:	https://patchwork.open-mesh.org/project/batman/list/
 S:	Maintained
 F:	Documentation/ABI/testing/sysfs-class-net-batman-adv
 F:	Documentation/ABI/testing/sysfs-class-net-mesh
-F:	Documentation/networking/batman-adv.txt
+F:	Documentation/networking/batman-adv.rst
 F:	include/uapi/linux/batman_adv.h
 F:	net/batman-adv/
 
@@ -5136,6 +5136,7 @@ F:	include/linux/of_net.h
 F:	include/linux/phy.h
 F:	include/linux/phy_fixed.h
 F:	include/linux/platform_data/mdio-gpio.h
+F:	include/linux/platform_data/mdio-bcm-unimac.h
 F:	include/trace/events/mdio.h
 F:	include/uapi/linux/mdio.h
 F:	include/uapi/linux/mii.h
@@ -6183,6 +6184,14 @@ S:	Maintained
 F:	drivers/net/ethernet/hisilicon/
 F:	Documentation/devicetree/bindings/net/hisilicon*.txt
 
+HISILICON NETWORK SUBSYSTEM 3 DRIVER (HNS3)
+M:	Yisen Zhuang <yisen.zhuang@huawei.com>
+M:	Salil Mehta <salil.mehta@huawei.com>
+L:	netdev@vger.kernel.org
+W:	http://www.hisilicon.com
+S:	Maintained
+F:	drivers/net/ethernet/hisilicon/hns3/
+
 HISILICON ROCE DRIVER
 M:	Lijun Ou <oulijun@huawei.com>
 M:	Wei Hu(Xavier) <xavier.huwei@huawei.com>
@@ -6267,6 +6276,13 @@ L:	linux-input@vger.kernel.org
 S:	Maintained
 F:	drivers/input/touchscreen/htcpen.c
 
+HUAWEI ETHERNET DRIVER
+M:	Aviad Krawczyk <aviad.krawczyk@huawei.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	Documentation/networking/hinic.txt
+F:	drivers/net/ethernet/huawei/hinic/
+
 HUGETLB FILESYSTEM
 M:	Nadia Yvette Chambers <nyc@holomorphy.com>
 S:	Maintained
@@ -6293,6 +6309,7 @@ M:	Haiyang Zhang <haiyangz@microsoft.com>
 M:	Stephen Hemminger <sthemmin@microsoft.com>
 L:	devel@linuxdriverproject.org
 S:	Maintained
+F:	Documentation/networking/netvsc.txt
 F:	arch/x86/include/asm/mshyperv.h
 F:	arch/x86/include/uapi/asm/hyperv.h
 F:	arch/x86/kernel/cpu/mshyperv.c
@@ -6305,6 +6322,7 @@ F:	drivers/net/hyperv/
 F:	drivers/scsi/storvsc_drv.c
 F:	drivers/uio/uio_hv_generic.c
 F:	drivers/video/fbdev/hyperv_fb.c
+F:	net/vmw_vsock/hyperv_transport.c
 F:	include/linux/hyperv.h
 F:	tools/hv/
 F:	Documentation/ABI/stable/sysfs-bus-vmbus
@@ -7120,9 +7138,7 @@ W:	http://irda.sourceforge.net/
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sameo/irda-2.6.git
 F:	Documentation/networking/irda.txt
-F:	drivers/net/irda/
-F:	include/net/irda/
-F:	net/irda/
+F:	drivers/staging/irda/
 
 IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
 M:	Marc Zyngier <marc.zyngier@arm.com>
@@ -8449,7 +8465,9 @@ F:	include/uapi/linux/uvcvideo.h
 
 MEDIATEK ETHERNET DRIVER
 M:	Felix Fietkau <nbd@openwrt.org>
-M:	John Crispin <blogic@openwrt.org>
+M:	John Crispin <john@phrozen.org>
+M:	Sean Wang <sean.wang@mediatek.com>
+M:	Nelson Chang <nelson.chang@mediatek.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/mediatek/
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 7b285dd..c6133a0 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -109,4 +109,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 61a0cb1..f1b3f1d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -50,7 +50,7 @@ config ARM
 	select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARM_SMCCC if CPU_V7
-	select HAVE_CBPF_JIT
+	select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32
 	select HAVE_CC_STACKPROTECTOR
 	select HAVE_CONTEXT_TRACKING
 	select HAVE_C_RECORDMCOUNT
diff --git a/arch/arm/boot/dts/rk3228-evb.dts b/arch/arm/boot/dts/rk3228-evb.dts
index 5883433..1be9daa 100644
--- a/arch/arm/boot/dts/rk3228-evb.dts
+++ b/arch/arm/boot/dts/rk3228-evb.dts
@@ -50,6 +50,16 @@
 		device_type = "memory";
 		reg = <0x60000000 0x40000000>;
 	};
+
+	vcc_phy: vcc-phy-regulator {
+		compatible = "regulator-fixed";
+		enable-active-high;
+		regulator-name = "vcc_phy";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		regulator-always-on;
+		regulator-boot-on;
+	};
 };
 
 &emmc {
@@ -60,6 +70,30 @@
 	status = "okay";
 };
 
+&gmac {
+	assigned-clocks = <&cru SCLK_MAC_SRC>;
+	assigned-clock-rates = <50000000>;
+	clock_in_out = "output";
+	phy-supply = <&vcc_phy>;
+	phy-mode = "rmii";
+	phy-handle = <&phy>;
+	status = "okay";
+
+	mdio {
+		compatible = "snps,dwmac-mdio";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		phy: phy@0 {
+			compatible = "ethernet-phy-id1234.d400", "ethernet-phy-ieee802.3-c22";
+			reg = <0>;
+			clocks = <&cru SCLK_MAC_PHY>;
+			resets = <&cru SRST_MACPHY>;
+			phy-is-integrated;
+		};
+	};
+};
+
 &tsadc {
 	status = "okay";
 
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 4d19c1b..94d7e71 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -270,6 +270,7 @@ CONFIG_ICPLUS_PHY=y
 CONFIG_REALTEK_PHY=y
 CONFIG_MICREL_PHY=y
 CONFIG_FIXED_PHY=y
+CONFIG_ROCKCHIP_PHY=y
 CONFIG_USB_PEGASUS=y
 CONFIG_USB_RTL8152=m
 CONFIG_USB_USBNET=y
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index d5b9fa1..c199990 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1,6 +1,7 @@
 /*
- * Just-In-Time compiler for BPF filters on 32bit ARM
+ * Just-In-Time compiler for eBPF filters on 32bit ARM
  *
+ * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
  * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -8,6 +9,7 @@
  * Free Software Foundation; version 2 of the License.
  */
 
+#include <linux/bpf.h>
 #include <linux/bitops.h>
 #include <linux/compiler.h>
 #include <linux/errno.h>
@@ -18,54 +20,101 @@
 #include <linux/if_vlan.h>
 
 #include <asm/cacheflush.h>
-#include <asm/set_memory.h>
 #include <asm/hwcap.h>
 #include <asm/opcodes.h>
 
 #include "bpf_jit_32.h"
 
+int bpf_jit_enable __read_mostly;
+
+#define STACK_OFFSET(k)	(k)
+#define TMP_REG_1	(MAX_BPF_JIT_REG + 0)	/* TEMP Register 1 */
+#define TMP_REG_2	(MAX_BPF_JIT_REG + 1)	/* TEMP Register 2 */
+#define TCALL_CNT	(MAX_BPF_JIT_REG + 2)	/* Tail Call Count */
+
+/* Flags used for JIT optimization */
+#define SEEN_CALL	(1 << 0)
+
+#define FLAG_IMM_OVERFLOW	(1 << 0)
+
 /*
- * ABI:
+ * Map eBPF registers to ARM 32bit registers or stack scratch space.
+ *
+ * 1. First argument is passed using the arm 32bit registers and rest of the
+ * arguments are passed on stack scratch space.
+ * 2. First callee-saved arugument is mapped to arm 32 bit registers and rest
+ * arguments are mapped to scratch space on stack.
+ * 3. We need two 64 bit temp registers to do complex operations on eBPF
+ * registers.
+ *
+ * As the eBPF registers are all 64 bit registers and arm has only 32 bit
+ * registers, we have to map each eBPF registers with two arm 32 bit regs or
+ * scratch memory space and we have to build eBPF 64 bit register from those.
  *
- * r0	scratch register
- * r4	BPF register A
- * r5	BPF register X
- * r6	pointer to the skb
- * r7	skb->data
- * r8	skb_headlen(skb)
  */
+static const u8 bpf2a32[][2] = {
+	/* return value from in-kernel function, and exit value from eBPF */
+	[BPF_REG_0] = {ARM_R1, ARM_R0},
+	/* arguments from eBPF program to in-kernel function */
+	[BPF_REG_1] = {ARM_R3, ARM_R2},
+	/* Stored on stack scratch space */
+	[BPF_REG_2] = {STACK_OFFSET(0), STACK_OFFSET(4)},
+	[BPF_REG_3] = {STACK_OFFSET(8), STACK_OFFSET(12)},
+	[BPF_REG_4] = {STACK_OFFSET(16), STACK_OFFSET(20)},
+	[BPF_REG_5] = {STACK_OFFSET(24), STACK_OFFSET(28)},
+	/* callee saved registers that in-kernel function will preserve */
+	[BPF_REG_6] = {ARM_R5, ARM_R4},
+	/* Stored on stack scratch space */
+	[BPF_REG_7] = {STACK_OFFSET(32), STACK_OFFSET(36)},
+	[BPF_REG_8] = {STACK_OFFSET(40), STACK_OFFSET(44)},
+	[BPF_REG_9] = {STACK_OFFSET(48), STACK_OFFSET(52)},
+	/* Read only Frame Pointer to access Stack */
+	[BPF_REG_FP] = {STACK_OFFSET(56), STACK_OFFSET(60)},
+	/* Temporary Register for internal BPF JIT, can be used
+	 * for constant blindings and others.
+	 */
+	[TMP_REG_1] = {ARM_R7, ARM_R6},
+	[TMP_REG_2] = {ARM_R10, ARM_R8},
+	/* Tail call count. Stored on stack scratch space. */
+	[TCALL_CNT] = {STACK_OFFSET(64), STACK_OFFSET(68)},
+	/* temporary register for blinding constants.
+	 * Stored on stack scratch space.
+	 */
+	[BPF_REG_AX] = {STACK_OFFSET(72), STACK_OFFSET(76)},
+};
 
-#define r_scratch	ARM_R0
-/* r1-r3 are (also) used for the unaligned loads on the non-ARMv7 slowpath */
-#define r_off		ARM_R1
-#define r_A		ARM_R4
-#define r_X		ARM_R5
-#define r_skb		ARM_R6
-#define r_skb_data	ARM_R7
-#define r_skb_hl	ARM_R8
-
-#define SCRATCH_SP_OFFSET	0
-#define SCRATCH_OFF(k)		(SCRATCH_SP_OFFSET + 4 * (k))
-
-#define SEEN_MEM		((1 << BPF_MEMWORDS) - 1)
-#define SEEN_MEM_WORD(k)	(1 << (k))
-#define SEEN_X			(1 << BPF_MEMWORDS)
-#define SEEN_CALL		(1 << (BPF_MEMWORDS + 1))
-#define SEEN_SKB		(1 << (BPF_MEMWORDS + 2))
-#define SEEN_DATA		(1 << (BPF_MEMWORDS + 3))
+#define	dst_lo	dst[1]
+#define dst_hi	dst[0]
+#define src_lo	src[1]
+#define src_hi	src[0]
 
-#define FLAG_NEED_X_RESET	(1 << 0)
-#define FLAG_IMM_OVERFLOW	(1 << 1)
+/*
+ * JIT Context:
+ *
+ * prog			:	bpf_prog
+ * idx			:	index of current last JITed instruction.
+ * prologue_bytes	:	bytes used in prologue.
+ * epilogue_offset	:	offset of epilogue starting.
+ * seen			:	bit mask used for JIT optimization.
+ * offsets		:	array of eBPF instruction offsets in
+ *				JITed code.
+ * target		:	final JITed code.
+ * epilogue_bytes	:	no of bytes used in epilogue.
+ * imm_count		:	no of immediate counts used for global
+ *				variables.
+ * imms			:	array of global variable addresses.
+ */
 
 struct jit_ctx {
-	const struct bpf_prog *skf;
-	unsigned idx;
-	unsigned prologue_bytes;
-	int ret0_fp_idx;
+	const struct bpf_prog *prog;
+	unsigned int idx;
+	unsigned int prologue_bytes;
+	unsigned int epilogue_offset;
 	u32 seen;
 	u32 flags;
 	u32 *offsets;
 	u32 *target;
+	u32 stack_size;
 #if __LINUX_ARM_ARCH__ < 7
 	u16 epilogue_bytes;
 	u16 imm_count;
@@ -73,68 +122,16 @@ struct jit_ctx {
 #endif
 };
 
-int bpf_jit_enable __read_mostly;
-
-static inline int call_neg_helper(struct sk_buff *skb, int offset, void *ret,
-		      unsigned int size)
-{
-	void *ptr = bpf_internal_load_pointer_neg_helper(skb, offset, size);
-
-	if (!ptr)
-		return -EFAULT;
-	memcpy(ret, ptr, size);
-	return 0;
-}
-
-static u64 jit_get_skb_b(struct sk_buff *skb, int offset)
-{
-	u8 ret;
-	int err;
-
-	if (offset < 0)
-		err = call_neg_helper(skb, offset, &ret, 1);
-	else
-		err = skb_copy_bits(skb, offset, &ret, 1);
-
-	return (u64)err << 32 | ret;
-}
-
-static u64 jit_get_skb_h(struct sk_buff *skb, int offset)
-{
-	u16 ret;
-	int err;
-
-	if (offset < 0)
-		err = call_neg_helper(skb, offset, &ret, 2);
-	else
-		err = skb_copy_bits(skb, offset, &ret, 2);
-
-	return (u64)err << 32 | ntohs(ret);
-}
-
-static u64 jit_get_skb_w(struct sk_buff *skb, int offset)
-{
-	u32 ret;
-	int err;
-
-	if (offset < 0)
-		err = call_neg_helper(skb, offset, &ret, 4);
-	else
-		err = skb_copy_bits(skb, offset, &ret, 4);
-
-	return (u64)err << 32 | ntohl(ret);
-}
-
 /*
  * Wrappers which handle both OABI and EABI and assures Thumb2 interworking
  * (where the assembly routines like __aeabi_uidiv could cause problems).
  */
-static u32 jit_udiv(u32 dividend, u32 divisor)
+static u32 jit_udiv32(u32 dividend, u32 divisor)
 {
 	return dividend / divisor;
 }
 
-static u32 jit_mod(u32 dividend, u32 divisor)
+static u32 jit_mod32(u32 dividend, u32 divisor)
 {
 	return dividend % divisor;
 }
@@ -158,36 +155,22 @@ static inline void emit(u32 inst, struct jit_ctx *ctx)
 	_emit(ARM_COND_AL, inst, ctx);
 }
 
-static u16 saved_regs(struct jit_ctx *ctx)
+/*
+ * Checks if immediate value can be converted to imm12(12 bits) value.
+ */
+static int16_t imm8m(u32 x)
 {
-	u16 ret = 0;
-
-	if ((ctx->skf->len > 1) ||
-	    (ctx->skf->insns[0].code == (BPF_RET | BPF_A)))
-		ret |= 1 << r_A;
-
-#ifdef CONFIG_FRAME_POINTER
-	ret |= (1 << ARM_FP) | (1 << ARM_IP) | (1 << ARM_LR) | (1 << ARM_PC);
-#else
-	if (ctx->seen & SEEN_CALL)
-		ret |= 1 << ARM_LR;
-#endif
-	if (ctx->seen & (SEEN_DATA | SEEN_SKB))
-		ret |= 1 << r_skb;
-	if (ctx->seen & SEEN_DATA)
-		ret |= (1 << r_skb_data) | (1 << r_skb_hl);
-	if (ctx->seen & SEEN_X)
-		ret |= 1 << r_X;
-
-	return ret;
-}
+	u32 rot;
 
-static inline int mem_words_used(struct jit_ctx *ctx)
-{
-	/* yes, we do waste some stack space IF there are "holes" in the set" */
-	return fls(ctx->seen & SEEN_MEM);
+	for (rot = 0; rot < 16; rot++)
+		if ((x & ~ror32(0xff, 2 * rot)) == 0)
+			return rol32(x, 2 * rot) | (rot << 8);
+	return -1;
 }
 
+/*
+ * Initializes the JIT space with undefined instructions.
+ */
 static void jit_fill_hole(void *area, unsigned int size)
 {
 	u32 *ptr;
@@ -196,88 +179,34 @@ static void jit_fill_hole(void *area, unsigned int size)
 		*ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
 }
 
-static void build_prologue(struct jit_ctx *ctx)
-{
-	u16 reg_set = saved_regs(ctx);
-	u16 off;
-
-#ifdef CONFIG_FRAME_POINTER
-	emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
-	emit(ARM_PUSH(reg_set), ctx);
-	emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
-#else
-	if (reg_set)
-		emit(ARM_PUSH(reg_set), ctx);
-#endif
-
-	if (ctx->seen & (SEEN_DATA | SEEN_SKB))
-		emit(ARM_MOV_R(r_skb, ARM_R0), ctx);
-
-	if (ctx->seen & SEEN_DATA) {
-		off = offsetof(struct sk_buff, data);
-		emit(ARM_LDR_I(r_skb_data, r_skb, off), ctx);
-		/* headlen = len - data_len */
-		off = offsetof(struct sk_buff, len);
-		emit(ARM_LDR_I(r_skb_hl, r_skb, off), ctx);
-		off = offsetof(struct sk_buff, data_len);
-		emit(ARM_LDR_I(r_scratch, r_skb, off), ctx);
-		emit(ARM_SUB_R(r_skb_hl, r_skb_hl, r_scratch), ctx);
-	}
-
-	if (ctx->flags & FLAG_NEED_X_RESET)
-		emit(ARM_MOV_I(r_X, 0), ctx);
-
-	/* do not leak kernel data to userspace */
-	if (bpf_needs_clear_a(&ctx->skf->insns[0]))
-		emit(ARM_MOV_I(r_A, 0), ctx);
-
-	/* stack space for the BPF_MEM words */
-	if (ctx->seen & SEEN_MEM)
-		emit(ARM_SUB_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
-}
-
-static void build_epilogue(struct jit_ctx *ctx)
-{
-	u16 reg_set = saved_regs(ctx);
-
-	if (ctx->seen & SEEN_MEM)
-		emit(ARM_ADD_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
-
-	reg_set &= ~(1 << ARM_LR);
+/* Stack must be multiples of 16 Bytes */
+#define STACK_ALIGN(sz) (((sz) + 3) & ~3)
 
-#ifdef CONFIG_FRAME_POINTER
-	/* the first instruction of the prologue was: mov ip, sp */
-	reg_set &= ~(1 << ARM_IP);
-	reg_set |= (1 << ARM_SP);
-	emit(ARM_LDM(ARM_SP, reg_set), ctx);
-#else
-	if (reg_set) {
-		if (ctx->seen & SEEN_CALL)
-			reg_set |= 1 << ARM_PC;
-		emit(ARM_POP(reg_set), ctx);
-	}
+/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
+ * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
+ * BPF_REG_FP and Tail call counts.
+ */
+#define SCRATCH_SIZE 80
 
-	if (!(ctx->seen & SEEN_CALL))
-		emit(ARM_BX(ARM_LR), ctx);
-#endif
-}
+/* total stack size used in JITed code */
+#define _STACK_SIZE \
+	(ctx->prog->aux->stack_depth + \
+	 + SCRATCH_SIZE + \
+	 + 4 /* extra for skb_copy_bits buffer */)
 
-static int16_t imm8m(u32 x)
-{
-	u32 rot;
+#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
 
-	for (rot = 0; rot < 16; rot++)
-		if ((x & ~ror32(0xff, 2 * rot)) == 0)
-			return rol32(x, 2 * rot) | (rot << 8);
+/* Get the offset of eBPF REGISTERs stored on scratch space. */
+#define STACK_VAR(off) (STACK_SIZE-off-4)
 
-	return -1;
-}
+/* Offset of skb_copy_bits buffer */
+#define SKB_BUFFER STACK_VAR(SCRATCH_SIZE)
 
 #if __LINUX_ARM_ARCH__ < 7
 
 static u16 imm_offset(u32 k, struct jit_ctx *ctx)
 {
-	unsigned i = 0, offset;
+	unsigned int i = 0, offset;
 	u16 imm;
 
 	/* on the "fake" run we just count them (duplicates included) */
@@ -296,7 +225,7 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
 		ctx->imms[i] = k;
 
 	/* constants go just after the epilogue */
-	offset =  ctx->offsets[ctx->skf->len];
+	offset =  ctx->offsets[ctx->prog->len - 1] * 4;
 	offset += ctx->prologue_bytes;
 	offset += ctx->epilogue_bytes;
 	offset += i * 4;
@@ -320,10 +249,22 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
 
 #endif /* __LINUX_ARM_ARCH__ */
 
+static inline int bpf2a32_offset(int bpf_to, int bpf_from,
+				 const struct jit_ctx *ctx) {
+	int to, from;
+
+	if (ctx->target == NULL)
+		return 0;
+	to = ctx->offsets[bpf_to];
+	from = ctx->offsets[bpf_from];
+
+	return to - from - 1;
+}
+
 /*
  * Move an immediate that's not an imm8m to a core register.
  */
-static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx)
+static inline void emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx)
 {
 #if __LINUX_ARM_ARCH__ < 7
 	emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx);
@@ -334,7 +275,7 @@ static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx)
 #endif
 }
 
-static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx)
+static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx)
 {
 	int imm12 = imm8m(val);
 
@@ -344,676 +285,1594 @@ static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx)
 		emit_mov_i_no8m(rd, val, ctx);
 }
 
-#if __LINUX_ARM_ARCH__ < 6
-
-static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
 {
-	_emit(cond, ARM_LDRB_I(ARM_R3, r_addr, 1), ctx);
-	_emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx);
-	_emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 3), ctx);
-	_emit(cond, ARM_LSL_I(ARM_R3, ARM_R3, 16), ctx);
-	_emit(cond, ARM_LDRB_I(ARM_R0, r_addr, 2), ctx);
-	_emit(cond, ARM_ORR_S(ARM_R3, ARM_R3, ARM_R1, SRTYPE_LSL, 24), ctx);
-	_emit(cond, ARM_ORR_R(ARM_R3, ARM_R3, ARM_R2), ctx);
-	_emit(cond, ARM_ORR_S(r_res, ARM_R3, ARM_R0, SRTYPE_LSL, 8), ctx);
+	ctx->seen |= SEEN_CALL;
+#if __LINUX_ARM_ARCH__ < 5
+	emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+
+	if (elf_hwcap & HWCAP_THUMB)
+		emit(ARM_BX(tgt_reg), ctx);
+	else
+		emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
+#else
+	emit(ARM_BLX_R(tgt_reg), ctx);
+#endif
 }
 
-static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+static inline int epilogue_offset(const struct jit_ctx *ctx)
 {
-	_emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx);
-	_emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 1), ctx);
-	_emit(cond, ARM_ORR_S(r_res, ARM_R2, ARM_R1, SRTYPE_LSL, 8), ctx);
+	int to, from;
+	/* No need for 1st dummy run */
+	if (ctx->target == NULL)
+		return 0;
+	to = ctx->epilogue_offset;
+	from = ctx->idx;
+
+	return to - from - 2;
 }
 
-static inline void emit_swap16(u8 r_dst, u8 r_src, struct jit_ctx *ctx)
+static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
 {
-	/* r_dst = (r_src << 8) | (r_src >> 8) */
-	emit(ARM_LSL_I(ARM_R1, r_src, 8), ctx);
-	emit(ARM_ORR_S(r_dst, ARM_R1, r_src, SRTYPE_LSR, 8), ctx);
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	s32 jmp_offset;
+
+	/* checks if divisor is zero or not. If it is, then
+	 * exit directly.
+	 */
+	emit(ARM_CMP_I(rn, 0), ctx);
+	_emit(ARM_COND_EQ, ARM_MOV_I(ARM_R0, 0), ctx);
+	jmp_offset = epilogue_offset(ctx);
+	_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
+#if __LINUX_ARM_ARCH__ == 7
+	if (elf_hwcap & HWCAP_IDIVA) {
+		if (op == BPF_DIV)
+			emit(ARM_UDIV(rd, rm, rn), ctx);
+		else {
+			emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
+			emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
+		}
+		return;
+	}
+#endif
 
 	/*
-	 * we need to mask out the bits set in r_dst[23:16] due to
-	 * the first shift instruction.
-	 *
-	 * note that 0x8ff is the encoded immediate 0x00ff0000.
+	 * For BPF_ALU | BPF_DIV | BPF_K instructions
+	 * As ARM_R1 and ARM_R0 contains 1st argument of bpf
+	 * function, we need to save it on caller side to save
+	 * it from getting destroyed within callee.
+	 * After the return from the callee, we restore ARM_R0
+	 * ARM_R1.
 	 */
-	emit(ARM_BIC_I(r_dst, r_dst, 0x8ff), ctx);
-}
+	if (rn != ARM_R1) {
+		emit(ARM_MOV_R(tmp[0], ARM_R1), ctx);
+		emit(ARM_MOV_R(ARM_R1, rn), ctx);
+	}
+	if (rm != ARM_R0) {
+		emit(ARM_MOV_R(tmp[1], ARM_R0), ctx);
+		emit(ARM_MOV_R(ARM_R0, rm), ctx);
+	}
 
-#else  /* ARMv6+ */
+	/* Call appropriate function */
+	ctx->seen |= SEEN_CALL;
+	emit_mov_i(ARM_IP, op == BPF_DIV ?
+		   (u32)jit_udiv32 : (u32)jit_mod32, ctx);
+	emit_blx_r(ARM_IP, ctx);
 
-static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
-{
-	_emit(cond, ARM_LDR_I(r_res, r_addr, 0), ctx);
-#ifdef __LITTLE_ENDIAN
-	_emit(cond, ARM_REV(r_res, r_res), ctx);
-#endif
+	/* Save return value */
+	if (rd != ARM_R0)
+		emit(ARM_MOV_R(rd, ARM_R0), ctx);
+
+	/* Restore ARM_R0 and ARM_R1 */
+	if (rn != ARM_R1)
+		emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx);
+	if (rm != ARM_R0)
+		emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
 }
 
-static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+/* Checks whether BPF register is on scratch stack space or not. */
+static inline bool is_on_stack(u8 bpf_reg)
 {
-	_emit(cond, ARM_LDRH_I(r_res, r_addr, 0), ctx);
-#ifdef __LITTLE_ENDIAN
-	_emit(cond, ARM_REV16(r_res, r_res), ctx);
-#endif
+	static u8 stack_regs[] = {BPF_REG_AX, BPF_REG_3, BPF_REG_4, BPF_REG_5,
+				BPF_REG_7, BPF_REG_8, BPF_REG_9, TCALL_CNT,
+				BPF_REG_2, BPF_REG_FP};
+	int i, reg_len = sizeof(stack_regs);
+
+	for (i = 0 ; i < reg_len ; i++) {
+		if (bpf_reg == stack_regs[i])
+			return true;
+	}
+	return false;
 }
 
-static inline void emit_swap16(u8 r_dst __maybe_unused,
-			       u8 r_src __maybe_unused,
-			       struct jit_ctx *ctx __maybe_unused)
+static inline void emit_a32_mov_i(const u8 dst, const u32 val,
+				  bool dstk, struct jit_ctx *ctx)
 {
-#ifdef __LITTLE_ENDIAN
-	emit(ARM_REV16(r_dst, r_src), ctx);
-#endif
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+
+	if (dstk) {
+		emit_mov_i(tmp[1], val, ctx);
+		emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(dst)), ctx);
+	} else {
+		emit_mov_i(dst, val, ctx);
+	}
 }
 
-#endif /* __LINUX_ARM_ARCH__ < 6 */
+/* Sign extended move */
+static inline void emit_a32_mov_i64(const bool is64, const u8 dst[],
+				  const u32 val, bool dstk,
+				  struct jit_ctx *ctx) {
+	u32 hi = 0;
 
+	if (is64 && (val & (1<<31)))
+		hi = (u32)~0;
+	emit_a32_mov_i(dst_lo, val, dstk, ctx);
+	emit_a32_mov_i(dst_hi, hi, dstk, ctx);
+}
 
-/* Compute the immediate value for a PC-relative branch. */
-static inline u32 b_imm(unsigned tgt, struct jit_ctx *ctx)
-{
-	u32 imm;
+static inline void emit_a32_add_r(const u8 dst, const u8 src,
+			      const bool is64, const bool hi,
+			      struct jit_ctx *ctx) {
+	/* 64 bit :
+	 *	adds dst_lo, dst_lo, src_lo
+	 *	adc dst_hi, dst_hi, src_hi
+	 * 32 bit :
+	 *	add dst_lo, dst_lo, src_lo
+	 */
+	if (!hi && is64)
+		emit(ARM_ADDS_R(dst, dst, src), ctx);
+	else if (hi && is64)
+		emit(ARM_ADC_R(dst, dst, src), ctx);
+	else
+		emit(ARM_ADD_R(dst, dst, src), ctx);
+}
 
-	if (ctx->target == NULL)
-		return 0;
-	/*
-	 * BPF allows only forward jumps and the offset of the target is
-	 * still the one computed during the first pass.
+static inline void emit_a32_sub_r(const u8 dst, const u8 src,
+				  const bool is64, const bool hi,
+				  struct jit_ctx *ctx) {
+	/* 64 bit :
+	 *	subs dst_lo, dst_lo, src_lo
+	 *	sbc dst_hi, dst_hi, src_hi
+	 * 32 bit :
+	 *	sub dst_lo, dst_lo, src_lo
 	 */
-	imm  = ctx->offsets[tgt] + ctx->prologue_bytes - (ctx->idx * 4 + 8);
+	if (!hi && is64)
+		emit(ARM_SUBS_R(dst, dst, src), ctx);
+	else if (hi && is64)
+		emit(ARM_SBC_R(dst, dst, src), ctx);
+	else
+		emit(ARM_SUB_R(dst, dst, src), ctx);
+}
 
-	return imm >> 2;
+static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64,
+			      const bool hi, const u8 op, struct jit_ctx *ctx){
+	switch (BPF_OP(op)) {
+	/* dst = dst + src */
+	case BPF_ADD:
+		emit_a32_add_r(dst, src, is64, hi, ctx);
+		break;
+	/* dst = dst - src */
+	case BPF_SUB:
+		emit_a32_sub_r(dst, src, is64, hi, ctx);
+		break;
+	/* dst = dst | src */
+	case BPF_OR:
+		emit(ARM_ORR_R(dst, dst, src), ctx);
+		break;
+	/* dst = dst & src */
+	case BPF_AND:
+		emit(ARM_AND_R(dst, dst, src), ctx);
+		break;
+	/* dst = dst ^ src */
+	case BPF_XOR:
+		emit(ARM_EOR_R(dst, dst, src), ctx);
+		break;
+	/* dst = dst * src */
+	case BPF_MUL:
+		emit(ARM_MUL(dst, dst, src), ctx);
+		break;
+	/* dst = dst << src */
+	case BPF_LSH:
+		emit(ARM_LSL_R(dst, dst, src), ctx);
+		break;
+	/* dst = dst >> src */
+	case BPF_RSH:
+		emit(ARM_LSR_R(dst, dst, src), ctx);
+		break;
+	/* dst = dst >> src (signed)*/
+	case BPF_ARSH:
+		emit(ARM_MOV_SR(dst, dst, SRTYPE_ASR, src), ctx);
+		break;
+	}
 }
 
-#define OP_IMM3(op, r1, r2, imm_val, ctx)				\
-	do {								\
-		imm12 = imm8m(imm_val);					\
-		if (imm12 < 0) {					\
-			emit_mov_i_no8m(r_scratch, imm_val, ctx);	\
-			emit(op ## _R((r1), (r2), r_scratch), ctx);	\
-		} else {						\
-			emit(op ## _I((r1), (r2), imm12), ctx);		\
-		}							\
-	} while (0)
-
-static inline void emit_err_ret(u8 cond, struct jit_ctx *ctx)
-{
-	if (ctx->ret0_fp_idx >= 0) {
-		_emit(cond, ARM_B(b_imm(ctx->ret0_fp_idx, ctx)), ctx);
-		/* NOP to keep the size constant between passes */
-		emit(ARM_MOV_R(ARM_R0, ARM_R0), ctx);
+/* ALU operation (32 bit)
+ * dst = dst (op) src
+ */
+static inline void emit_a32_alu_r(const u8 dst, const u8 src,
+				  bool dstk, bool sstk,
+				  struct jit_ctx *ctx, const bool is64,
+				  const bool hi, const u8 op) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	u8 rn = sstk ? tmp[1] : src;
+
+	if (sstk)
+		emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx);
+
+	/* ALU operation */
+	if (dstk) {
+		emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx);
+		emit_alu_r(tmp[0], rn, is64, hi, op, ctx);
+		emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx);
 	} else {
-		_emit(cond, ARM_MOV_I(ARM_R0, 0), ctx);
-		_emit(cond, ARM_B(b_imm(ctx->skf->len, ctx)), ctx);
+		emit_alu_r(dst, rn, is64, hi, op, ctx);
 	}
 }
 
-static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
-{
-#if __LINUX_ARM_ARCH__ < 5
-	emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+/* ALU operation (64 bit) */
+static inline void emit_a32_alu_r64(const bool is64, const u8 dst[],
+				  const u8 src[], bool dstk,
+				  bool sstk, struct jit_ctx *ctx,
+				  const u8 op) {
+	emit_a32_alu_r(dst_lo, src_lo, dstk, sstk, ctx, is64, false, op);
+	if (is64)
+		emit_a32_alu_r(dst_hi, src_hi, dstk, sstk, ctx, is64, true, op);
+	else
+		emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+}
 
-	if (elf_hwcap & HWCAP_THUMB)
-		emit(ARM_BX(tgt_reg), ctx);
+/* dst = imm (4 bytes)*/
+static inline void emit_a32_mov_r(const u8 dst, const u8 src,
+				  bool dstk, bool sstk,
+				  struct jit_ctx *ctx) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	u8 rt = sstk ? tmp[0] : src;
+
+	if (sstk)
+		emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx);
+	if (dstk)
+		emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst)), ctx);
 	else
-		emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
-#else
-	emit(ARM_BLX_R(tgt_reg), ctx);
-#endif
+		emit(ARM_MOV_R(dst, rt), ctx);
 }
 
-static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx,
-				int bpf_op)
-{
-#if __LINUX_ARM_ARCH__ == 7
-	if (elf_hwcap & HWCAP_IDIVA) {
-		if (bpf_op == BPF_DIV)
-			emit(ARM_UDIV(rd, rm, rn), ctx);
-		else {
-			emit(ARM_UDIV(ARM_R3, rm, rn), ctx);
-			emit(ARM_MLS(rd, rn, ARM_R3, rm), ctx);
-		}
-		return;
+/* dst = src */
+static inline void emit_a32_mov_r64(const bool is64, const u8 dst[],
+				  const u8 src[], bool dstk,
+				  bool sstk, struct jit_ctx *ctx) {
+	emit_a32_mov_r(dst_lo, src_lo, dstk, sstk, ctx);
+	if (is64) {
+		/* complete 8 byte move */
+		emit_a32_mov_r(dst_hi, src_hi, dstk, sstk, ctx);
+	} else {
+		/* Zero out high 4 bytes */
+		emit_a32_mov_i(dst_hi, 0, dstk, ctx);
 	}
-#endif
+}
 
-	/*
-	 * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4
-	 * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into
-	 * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm
-	 * before using it as a source for ARM_R1.
-	 *
-	 * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is
-	 * ARM_R5 (r_X) so there is no particular register overlap
-	 * issues.
-	 */
-	if (rn != ARM_R1)
-		emit(ARM_MOV_R(ARM_R1, rn), ctx);
-	if (rm != ARM_R0)
-		emit(ARM_MOV_R(ARM_R0, rm), ctx);
+/* Shift operations */
+static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk,
+				struct jit_ctx *ctx, const u8 op) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	u8 rd = dstk ? tmp[0] : dst;
+
+	if (dstk)
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+
+	/* Do shift operation */
+	switch (op) {
+	case BPF_LSH:
+		emit(ARM_LSL_I(rd, rd, val), ctx);
+		break;
+	case BPF_RSH:
+		emit(ARM_LSR_I(rd, rd, val), ctx);
+		break;
+	case BPF_NEG:
+		emit(ARM_RSB_I(rd, rd, val), ctx);
+		break;
+	}
 
+	if (dstk)
+		emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+}
+
+/* dst = ~dst (64 bit) */
+static inline void emit_a32_neg64(const u8 dst[], bool dstk,
+				struct jit_ctx *ctx){
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	u8 rd = dstk ? tmp[1] : dst[1];
+	u8 rm = dstk ? tmp[0] : dst[0];
+
+	/* Setup Operand */
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+
+	/* Do Negate Operation */
+	emit(ARM_RSBS_I(rd, rd, 0), ctx);
+	emit(ARM_RSC_I(rm, rm, 0), ctx);
+
+	if (dstk) {
+		emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+}
+
+/* dst = dst << src */
+static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk,
+				    bool sstk, struct jit_ctx *ctx) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+
+	/* Setup Operands */
+	u8 rt = sstk ? tmp2[1] : src_lo;
+	u8 rd = dstk ? tmp[1] : dst_lo;
+	u8 rm = dstk ? tmp[0] : dst_hi;
+
+	if (sstk)
+		emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+
+	/* Do LSH operation */
+	emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
+	emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
+	/* As we are using ARM_LR */
 	ctx->seen |= SEEN_CALL;
-	emit_mov_i(ARM_R3, bpf_op == BPF_DIV ? (u32)jit_udiv : (u32)jit_mod,
-		   ctx);
-	emit_blx_r(ARM_R3, ctx);
+	emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx);
+	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx);
+	emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx);
+	emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx);
+
+	if (dstk) {
+		emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	} else {
+		emit(ARM_MOV_R(rd, ARM_LR), ctx);
+		emit(ARM_MOV_R(rm, ARM_IP), ctx);
+	}
+}
 
-	if (rd != ARM_R0)
-		emit(ARM_MOV_R(rd, ARM_R0), ctx);
+/* dst = dst >> src (signed)*/
+static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk,
+				    bool sstk, struct jit_ctx *ctx) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	/* Setup Operands */
+	u8 rt = sstk ? tmp2[1] : src_lo;
+	u8 rd = dstk ? tmp[1] : dst_lo;
+	u8 rm = dstk ? tmp[0] : dst_hi;
+
+	if (sstk)
+		emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+
+	/* Do the ARSH operation */
+	emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
+	emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
+	/* As we are using ARM_LR */
+	ctx->seen |= SEEN_CALL;
+	emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
+	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
+	_emit(ARM_COND_MI, ARM_B(0), ctx);
+	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx);
+	emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx);
+	if (dstk) {
+		emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	} else {
+		emit(ARM_MOV_R(rd, ARM_LR), ctx);
+		emit(ARM_MOV_R(rm, ARM_IP), ctx);
+	}
+}
+
+/* dst = dst >> src */
+static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk,
+				     bool sstk, struct jit_ctx *ctx) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	/* Setup Operands */
+	u8 rt = sstk ? tmp2[1] : src_lo;
+	u8 rd = dstk ? tmp[1] : dst_lo;
+	u8 rm = dstk ? tmp[0] : dst_hi;
+
+	if (sstk)
+		emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+
+	/* Do LSH operation */
+	emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
+	emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
+	/* As we are using ARM_LR */
+	ctx->seen |= SEEN_CALL;
+	emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
+	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
+	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx);
+	emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx);
+	if (dstk) {
+		emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	} else {
+		emit(ARM_MOV_R(rd, ARM_LR), ctx);
+		emit(ARM_MOV_R(rm, ARM_IP), ctx);
+	}
 }
 
-static inline void update_on_xread(struct jit_ctx *ctx)
+/* dst = dst << val */
+static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk,
+				     const u32 val, struct jit_ctx *ctx){
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	/* Setup operands */
+	u8 rd = dstk ? tmp[1] : dst_lo;
+	u8 rm = dstk ? tmp[0] : dst_hi;
+
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+
+	/* Do LSH operation */
+	if (val < 32) {
+		emit(ARM_MOV_SI(tmp2[0], rm, SRTYPE_ASL, val), ctx);
+		emit(ARM_ORR_SI(rm, tmp2[0], rd, SRTYPE_LSR, 32 - val), ctx);
+		emit(ARM_MOV_SI(rd, rd, SRTYPE_ASL, val), ctx);
+	} else {
+		if (val == 32)
+			emit(ARM_MOV_R(rm, rd), ctx);
+		else
+			emit(ARM_MOV_SI(rm, rd, SRTYPE_ASL, val - 32), ctx);
+		emit(ARM_EOR_R(rd, rd, rd), ctx);
+	}
+
+	if (dstk) {
+		emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+}
+
+/* dst = dst >> val */
+static inline void emit_a32_lsr_i64(const u8 dst[], bool dstk,
+				    const u32 val, struct jit_ctx *ctx) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	/* Setup operands */
+	u8 rd = dstk ? tmp[1] : dst_lo;
+	u8 rm = dstk ? tmp[0] : dst_hi;
+
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+
+	/* Do LSR operation */
+	if (val < 32) {
+		emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx);
+		emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx);
+		emit(ARM_MOV_SI(rm, rm, SRTYPE_LSR, val), ctx);
+	} else if (val == 32) {
+		emit(ARM_MOV_R(rd, rm), ctx);
+		emit(ARM_MOV_I(rm, 0), ctx);
+	} else {
+		emit(ARM_MOV_SI(rd, rm, SRTYPE_LSR, val - 32), ctx);
+		emit(ARM_MOV_I(rm, 0), ctx);
+	}
+
+	if (dstk) {
+		emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+}
+
+/* dst = dst >> val (signed) */
+static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk,
+				     const u32 val, struct jit_ctx *ctx){
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	 /* Setup operands */
+	u8 rd = dstk ? tmp[1] : dst_lo;
+	u8 rm = dstk ? tmp[0] : dst_hi;
+
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+
+	/* Do ARSH operation */
+	if (val < 32) {
+		emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx);
+		emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx);
+		emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, val), ctx);
+	} else if (val == 32) {
+		emit(ARM_MOV_R(rd, rm), ctx);
+		emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx);
+	} else {
+		emit(ARM_MOV_SI(rd, rm, SRTYPE_ASR, val - 32), ctx);
+		emit(ARM_MOV_SI(rm, rm, SRTYPE_ASR, 31), ctx);
+	}
+
+	if (dstk) {
+		emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+}
+
+static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
+				    bool sstk, struct jit_ctx *ctx) {
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	/* Setup operands for multiplication */
+	u8 rd = dstk ? tmp[1] : dst_lo;
+	u8 rm = dstk ? tmp[0] : dst_hi;
+	u8 rt = sstk ? tmp2[1] : src_lo;
+	u8 rn = sstk ? tmp2[0] : src_hi;
+
+	if (dstk) {
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	}
+	if (sstk) {
+		emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
+		emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_hi)), ctx);
+	}
+
+	/* Do Multiplication */
+	emit(ARM_MUL(ARM_IP, rd, rn), ctx);
+	emit(ARM_MUL(ARM_LR, rm, rt), ctx);
+	/* As we are using ARM_LR */
+	ctx->seen |= SEEN_CALL;
+	emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
+
+	emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx);
+	emit(ARM_ADD_R(rm, ARM_LR, rm), ctx);
+	if (dstk) {
+		emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+	} else {
+		emit(ARM_MOV_R(rd, ARM_IP), ctx);
+	}
+}
+
+/* *(size *)(dst + off) = src */
+static inline void emit_str_r(const u8 dst, const u8 src, bool dstk,
+			      const s32 off, struct jit_ctx *ctx, const u8 sz){
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	u8 rd = dstk ? tmp[1] : dst;
+
+	if (dstk)
+		emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+	if (off) {
+		emit_a32_mov_i(tmp[0], off, false, ctx);
+		emit(ARM_ADD_R(tmp[0], rd, tmp[0]), ctx);
+		rd = tmp[0];
+	}
+	switch (sz) {
+	case BPF_W:
+		/* Store a Word */
+		emit(ARM_STR_I(src, rd, 0), ctx);
+		break;
+	case BPF_H:
+		/* Store a HalfWord */
+		emit(ARM_STRH_I(src, rd, 0), ctx);
+		break;
+	case BPF_B:
+		/* Store a Byte */
+		emit(ARM_STRB_I(src, rd, 0), ctx);
+		break;
+	}
+}
+
+/* dst = *(size*)(src + off) */
+static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk,
+			      const s32 off, struct jit_ctx *ctx, const u8 sz){
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	u8 rd = dstk ? tmp[1] : dst;
+	u8 rm = src;
+
+	if (off) {
+		emit_a32_mov_i(tmp[0], off, false, ctx);
+		emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
+		rm = tmp[0];
+	}
+	switch (sz) {
+	case BPF_W:
+		/* Load a Word */
+		emit(ARM_LDR_I(rd, rm, 0), ctx);
+		break;
+	case BPF_H:
+		/* Load a HalfWord */
+		emit(ARM_LDRH_I(rd, rm, 0), ctx);
+		break;
+	case BPF_B:
+		/* Load a Byte */
+		emit(ARM_LDRB_I(rd, rm, 0), ctx);
+		break;
+	}
+	if (dstk)
+		emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+}
+
+/* Arithmatic Operation */
+static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
+			     const u8 rn, struct jit_ctx *ctx, u8 op) {
+	switch (op) {
+	case BPF_JSET:
+		ctx->seen |= SEEN_CALL;
+		emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
+		emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
+		emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
+		break;
+	case BPF_JEQ:
+	case BPF_JNE:
+	case BPF_JGT:
+	case BPF_JGE:
+	case BPF_JLE:
+	case BPF_JLT:
+		emit(ARM_CMP_R(rd, rm), ctx);
+		_emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx);
+		break;
+	case BPF_JSLE:
+	case BPF_JSGT:
+		emit(ARM_CMP_R(rn, rt), ctx);
+		emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx);
+		break;
+	case BPF_JSLT:
+	case BPF_JSGE:
+		emit(ARM_CMP_R(rt, rn), ctx);
+		emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx);
+		break;
+	}
+}
+
+static int out_offset = -1; /* initialized on the first pass of build_body() */
+static int emit_bpf_tail_call(struct jit_ctx *ctx)
 {
-	if (!(ctx->seen & SEEN_X))
-		ctx->flags |= FLAG_NEED_X_RESET;
 
-	ctx->seen |= SEEN_X;
+	/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
+	const u8 *r2 = bpf2a32[BPF_REG_2];
+	const u8 *r3 = bpf2a32[BPF_REG_3];
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	const u8 *tcc = bpf2a32[TCALL_CNT];
+	const int idx0 = ctx->idx;
+#define cur_offset (ctx->idx - idx0)
+#define jmp_offset (out_offset - (cur_offset))
+	u32 off, lo, hi;
+
+	/* if (index >= array->map.max_entries)
+	 *	goto out;
+	 */
+	off = offsetof(struct bpf_array, map.max_entries);
+	/* array->map.max_entries */
+	emit_a32_mov_i(tmp[1], off, false, ctx);
+	emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
+	emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx);
+	/* index (64 bit) */
+	emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
+	/* index >= array->map.max_entries */
+	emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx);
+	_emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
+
+	/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+	 *	goto out;
+	 * tail_call_cnt++;
+	 */
+	lo = (u32)MAX_TAIL_CALL_CNT;
+	hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
+	emit(ARM_LDR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx);
+	emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx);
+	emit(ARM_CMP_I(tmp[0], hi), ctx);
+	_emit(ARM_COND_EQ, ARM_CMP_I(tmp[1], lo), ctx);
+	_emit(ARM_COND_HI, ARM_B(jmp_offset), ctx);
+	emit(ARM_ADDS_I(tmp[1], tmp[1], 1), ctx);
+	emit(ARM_ADC_I(tmp[0], tmp[0], 0), ctx);
+	emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(tcc[1])), ctx);
+	emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(tcc[0])), ctx);
+
+	/* prog = array->ptrs[index]
+	 * if (prog == NULL)
+	 *	goto out;
+	 */
+	off = offsetof(struct bpf_array, ptrs);
+	emit_a32_mov_i(tmp[1], off, false, ctx);
+	emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
+	emit(ARM_ADD_R(tmp[1], tmp2[1], tmp[1]), ctx);
+	emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
+	emit(ARM_MOV_SI(tmp[0], tmp2[1], SRTYPE_ASL, 2), ctx);
+	emit(ARM_LDR_R(tmp[1], tmp[1], tmp[0]), ctx);
+	emit(ARM_CMP_I(tmp[1], 0), ctx);
+	_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
+
+	/* goto *(prog->bpf_func + prologue_size); */
+	off = offsetof(struct bpf_prog, bpf_func);
+	emit_a32_mov_i(tmp2[1], off, false, ctx);
+	emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx);
+	emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
+	emit(ARM_BX(tmp[1]), ctx);
+
+	/* out: */
+	if (out_offset == -1)
+		out_offset = cur_offset;
+	if (cur_offset != out_offset) {
+		pr_err_once("tail_call out_offset = %d, expected %d!\n",
+			    cur_offset, out_offset);
+		return -1;
+	}
+	return 0;
+#undef cur_offset
+#undef jmp_offset
 }
 
-static int build_body(struct jit_ctx *ctx)
+/* 0xabcd => 0xcdab */
+static inline void emit_rev16(const u8 rd, const u8 rn, struct jit_ctx *ctx)
 {
-	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
-	const struct bpf_prog *prog = ctx->skf;
-	const struct sock_filter *inst;
-	unsigned i, load_order, off, condt;
-	int imm12;
-	u32 k;
+#if __LINUX_ARM_ARCH__ < 6
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+
+	emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
+	emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 8), ctx);
+	emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx);
+	emit(ARM_ORR_SI(rd, tmp2[0], tmp2[1], SRTYPE_LSL, 8), ctx);
+#else /* ARMv6+ */
+	emit(ARM_REV16(rd, rn), ctx);
+#endif
+}
 
-	for (i = 0; i < prog->len; i++) {
-		u16 code;
+/* 0xabcdefgh => 0xghefcdab */
+static inline void emit_rev32(const u8 rd, const u8 rn, struct jit_ctx *ctx)
+{
+#if __LINUX_ARM_ARCH__ < 6
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+
+	emit(ARM_AND_I(tmp2[1], rn, 0xff), ctx);
+	emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 24), ctx);
+	emit(ARM_ORR_SI(ARM_IP, tmp2[0], tmp2[1], SRTYPE_LSL, 24), ctx);
+
+	emit(ARM_MOV_SI(tmp2[1], rn, SRTYPE_LSR, 8), ctx);
+	emit(ARM_AND_I(tmp2[1], tmp2[1], 0xff), ctx);
+	emit(ARM_MOV_SI(tmp2[0], rn, SRTYPE_LSR, 16), ctx);
+	emit(ARM_AND_I(tmp2[0], tmp2[0], 0xff), ctx);
+	emit(ARM_MOV_SI(tmp2[0], tmp2[0], SRTYPE_LSL, 8), ctx);
+	emit(ARM_ORR_SI(tmp2[0], tmp2[0], tmp2[1], SRTYPE_LSL, 16), ctx);
+	emit(ARM_ORR_R(rd, ARM_IP, tmp2[0]), ctx);
+
+#else /* ARMv6+ */
+	emit(ARM_REV(rd, rn), ctx);
+#endif
+}
 
-		inst = &(prog->insns[i]);
-		/* K as an immediate value operand */
-		k = inst->k;
-		code = bpf_anc_helper(inst);
+// push the scratch stack register on top of the stack
+static inline void emit_push_r64(const u8 src[], const u8 shift,
+		struct jit_ctx *ctx)
+{
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	u16 reg_set = 0;
 
-		/* compute offsets only in the fake pass */
-		if (ctx->target == NULL)
-			ctx->offsets[i] = ctx->idx * 4;
+	emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(src[1]+shift)), ctx);
+	emit(ARM_LDR_I(tmp2[0], ARM_SP, STACK_VAR(src[0]+shift)), ctx);
+
+	reg_set = (1 << tmp2[1]) | (1 << tmp2[0]);
+	emit(ARM_PUSH(reg_set), ctx);
+}
+
+static void build_prologue(struct jit_ctx *ctx)
+{
+	const u8 r0 = bpf2a32[BPF_REG_0][1];
+	const u8 r2 = bpf2a32[BPF_REG_1][1];
+	const u8 r3 = bpf2a32[BPF_REG_1][0];
+	const u8 r4 = bpf2a32[BPF_REG_6][1];
+	const u8 r5 = bpf2a32[BPF_REG_6][0];
+	const u8 r6 = bpf2a32[TMP_REG_1][1];
+	const u8 r7 = bpf2a32[TMP_REG_1][0];
+	const u8 r8 = bpf2a32[TMP_REG_2][1];
+	const u8 r10 = bpf2a32[TMP_REG_2][0];
+	const u8 fplo = bpf2a32[BPF_REG_FP][1];
+	const u8 fphi = bpf2a32[BPF_REG_FP][0];
+	const u8 sp = ARM_SP;
+	const u8 *tcc = bpf2a32[TCALL_CNT];
+
+	u16 reg_set = 0;
+
+	/*
+	 * eBPF prog stack layout
+	 *
+	 *                         high
+	 * original ARM_SP =>     +-----+ eBPF prologue
+	 *                        |FP/LR|
+	 * current ARM_FP =>      +-----+
+	 *                        | ... | callee saved registers
+	 * eBPF fp register =>    +-----+ <= (BPF_FP)
+	 *                        | ... | eBPF JIT scratch space
+	 *                        |     | eBPF prog stack
+	 *                        +-----+
+	 *			  |RSVD | JIT scratchpad
+	 * current A64_SP =>      +-----+ <= (BPF_FP - STACK_SIZE)
+	 *                        |     |
+	 *                        | ... | Function call stack
+	 *                        |     |
+	 *                        +-----+
+	 *                          low
+	 */
+
+	/* Save callee saved registers. */
+	reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
+#ifdef CONFIG_FRAME_POINTER
+	reg_set |= (1<<ARM_FP) | (1<<ARM_IP) | (1<<ARM_LR) | (1<<ARM_PC);
+	emit(ARM_MOV_R(ARM_IP, sp), ctx);
+	emit(ARM_PUSH(reg_set), ctx);
+	emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
+#else
+	/* Check if call instruction exists in BPF body */
+	if (ctx->seen & SEEN_CALL)
+		reg_set |= (1<<ARM_LR);
+	emit(ARM_PUSH(reg_set), ctx);
+#endif
+	/* Save frame pointer for later */
+	emit(ARM_SUB_I(ARM_IP, sp, SCRATCH_SIZE), ctx);
+
+	ctx->stack_size = imm8m(STACK_SIZE);
+
+	/* Set up function call stack */
+	emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
 
-		switch (code) {
-		case BPF_LD | BPF_IMM:
-			emit_mov_i(r_A, k, ctx);
+	/* Set up BPF prog stack base register */
+	emit_a32_mov_r(fplo, ARM_IP, true, false, ctx);
+	emit_a32_mov_i(fphi, 0, true, ctx);
+
+	/* mov r4, 0 */
+	emit(ARM_MOV_I(r4, 0), ctx);
+
+	/* Move BPF_CTX to BPF_R1 */
+	emit(ARM_MOV_R(r3, r4), ctx);
+	emit(ARM_MOV_R(r2, r0), ctx);
+	/* Initialize Tail Count */
+	emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[0])), ctx);
+	emit(ARM_STR_I(r4, ARM_SP, STACK_VAR(tcc[1])), ctx);
+	/* end of prologue */
+}
+
+static void build_epilogue(struct jit_ctx *ctx)
+{
+	const u8 r4 = bpf2a32[BPF_REG_6][1];
+	const u8 r5 = bpf2a32[BPF_REG_6][0];
+	const u8 r6 = bpf2a32[TMP_REG_1][1];
+	const u8 r7 = bpf2a32[TMP_REG_1][0];
+	const u8 r8 = bpf2a32[TMP_REG_2][1];
+	const u8 r10 = bpf2a32[TMP_REG_2][0];
+	u16 reg_set = 0;
+
+	/* unwind function call stack */
+	emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
+
+	/* restore callee saved registers. */
+	reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
+#ifdef CONFIG_FRAME_POINTER
+	/* the first instruction of the prologue was: mov ip, sp */
+	reg_set |= (1<<ARM_FP) | (1<<ARM_SP) | (1<<ARM_PC);
+	emit(ARM_LDM(ARM_SP, reg_set), ctx);
+#else
+	if (ctx->seen & SEEN_CALL)
+		reg_set |= (1<<ARM_PC);
+	/* Restore callee saved registers. */
+	emit(ARM_POP(reg_set), ctx);
+	/* Return back to the callee function */
+	if (!(ctx->seen & SEEN_CALL))
+		emit(ARM_BX(ARM_LR), ctx);
+#endif
+}
+
+/*
+ * Convert an eBPF instruction to native instruction, i.e
+ * JITs an eBPF instruction.
+ * Returns :
+ *	0  - Successfully JITed an 8-byte eBPF instruction
+ *	>0 - Successfully JITed a 16-byte eBPF instruction
+ *	<0 - Failed to JIT.
+ */
+static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
+{
+	const u8 code = insn->code;
+	const u8 *dst = bpf2a32[insn->dst_reg];
+	const u8 *src = bpf2a32[insn->src_reg];
+	const u8 *tmp = bpf2a32[TMP_REG_1];
+	const u8 *tmp2 = bpf2a32[TMP_REG_2];
+	const s16 off = insn->off;
+	const s32 imm = insn->imm;
+	const int i = insn - ctx->prog->insnsi;
+	const bool is64 = BPF_CLASS(code) == BPF_ALU64;
+	const bool dstk = is_on_stack(insn->dst_reg);
+	const bool sstk = is_on_stack(insn->src_reg);
+	u8 rd, rt, rm, rn;
+	s32 jmp_offset;
+
+#define check_imm(bits, imm) do {				\
+	if ((((imm) > 0) && ((imm) >> (bits))) ||		\
+	    (((imm) < 0) && (~(imm) >> (bits)))) {		\
+		pr_info("[%2d] imm=%d(0x%x) out of range\n",	\
+			i, imm, imm);				\
+		return -EINVAL;					\
+	}							\
+} while (0)
+#define check_imm24(imm) check_imm(24, imm)
+
+	switch (code) {
+	/* ALU operations */
+
+	/* dst = src */
+	case BPF_ALU | BPF_MOV | BPF_K:
+	case BPF_ALU | BPF_MOV | BPF_X:
+	case BPF_ALU64 | BPF_MOV | BPF_K:
+	case BPF_ALU64 | BPF_MOV | BPF_X:
+		switch (BPF_SRC(code)) {
+		case BPF_X:
+			emit_a32_mov_r64(is64, dst, src, dstk, sstk, ctx);
 			break;
-		case BPF_LD | BPF_W | BPF_LEN:
-			ctx->seen |= SEEN_SKB;
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
-			emit(ARM_LDR_I(r_A, r_skb,
-				       offsetof(struct sk_buff, len)), ctx);
+		case BPF_K:
+			/* Sign-extend immediate value to destination reg */
+			emit_a32_mov_i64(is64, dst, imm, dstk, ctx);
 			break;
-		case BPF_LD | BPF_MEM:
-			/* A = scratch[k] */
-			ctx->seen |= SEEN_MEM_WORD(k);
-			emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
+		}
+		break;
+	/* dst = dst + src/imm */
+	/* dst = dst - src/imm */
+	/* dst = dst | src/imm */
+	/* dst = dst & src/imm */
+	/* dst = dst ^ src/imm */
+	/* dst = dst * src/imm */
+	/* dst = dst << src */
+	/* dst = dst >> src */
+	case BPF_ALU | BPF_ADD | BPF_K:
+	case BPF_ALU | BPF_ADD | BPF_X:
+	case BPF_ALU | BPF_SUB | BPF_K:
+	case BPF_ALU | BPF_SUB | BPF_X:
+	case BPF_ALU | BPF_OR | BPF_K:
+	case BPF_ALU | BPF_OR | BPF_X:
+	case BPF_ALU | BPF_AND | BPF_K:
+	case BPF_ALU | BPF_AND | BPF_X:
+	case BPF_ALU | BPF_XOR | BPF_K:
+	case BPF_ALU | BPF_XOR | BPF_X:
+	case BPF_ALU | BPF_MUL | BPF_K:
+	case BPF_ALU | BPF_MUL | BPF_X:
+	case BPF_ALU | BPF_LSH | BPF_X:
+	case BPF_ALU | BPF_RSH | BPF_X:
+	case BPF_ALU | BPF_ARSH | BPF_K:
+	case BPF_ALU | BPF_ARSH | BPF_X:
+	case BPF_ALU64 | BPF_ADD | BPF_K:
+	case BPF_ALU64 | BPF_ADD | BPF_X:
+	case BPF_ALU64 | BPF_SUB | BPF_K:
+	case BPF_ALU64 | BPF_SUB | BPF_X:
+	case BPF_ALU64 | BPF_OR | BPF_K:
+	case BPF_ALU64 | BPF_OR | BPF_X:
+	case BPF_ALU64 | BPF_AND | BPF_K:
+	case BPF_ALU64 | BPF_AND | BPF_X:
+	case BPF_ALU64 | BPF_XOR | BPF_K:
+	case BPF_ALU64 | BPF_XOR | BPF_X:
+		switch (BPF_SRC(code)) {
+		case BPF_X:
+			emit_a32_alu_r64(is64, dst, src, dstk, sstk,
+					 ctx, BPF_OP(code));
 			break;
-		case BPF_LD | BPF_W | BPF_ABS:
-			load_order = 2;
-			goto load;
-		case BPF_LD | BPF_H | BPF_ABS:
-			load_order = 1;
-			goto load;
-		case BPF_LD | BPF_B | BPF_ABS:
-			load_order = 0;
-load:
-			emit_mov_i(r_off, k, ctx);
-load_common:
-			ctx->seen |= SEEN_DATA | SEEN_CALL;
-
-			if (load_order > 0) {
-				emit(ARM_SUB_I(r_scratch, r_skb_hl,
-					       1 << load_order), ctx);
-				emit(ARM_CMP_R(r_scratch, r_off), ctx);
-				condt = ARM_COND_GE;
-			} else {
-				emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
-				condt = ARM_COND_HI;
-			}
-
-			/*
-			 * test for negative offset, only if we are
-			 * currently scheduled to take the fast
-			 * path. this will update the flags so that
-			 * the slowpath instruction are ignored if the
-			 * offset is negative.
-			 *
-			 * for loard_order == 0 the HI condition will
-			 * make loads at offset 0 take the slow path too.
+		case BPF_K:
+			/* Move immediate value to the temporary register
+			 * and then do the ALU operation on the temporary
+			 * register as this will sign-extend the immediate
+			 * value into temporary reg and then it would be
+			 * safe to do the operation on it.
 			 */
-			_emit(condt, ARM_CMP_I(r_off, 0), ctx);
-
-			_emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data),
-			      ctx);
-
-			if (load_order == 0)
-				_emit(condt, ARM_LDRB_I(r_A, r_scratch, 0),
-				      ctx);
-			else if (load_order == 1)
-				emit_load_be16(condt, r_A, r_scratch, ctx);
-			else if (load_order == 2)
-				emit_load_be32(condt, r_A, r_scratch, ctx);
-
-			_emit(condt, ARM_B(b_imm(i + 1, ctx)), ctx);
-
-			/* the slowpath */
-			emit_mov_i(ARM_R3, (u32)load_func[load_order], ctx);
-			emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
-			/* the offset is already in R1 */
-			emit_blx_r(ARM_R3, ctx);
-			/* check the result of skb_copy_bits */
-			emit(ARM_CMP_I(ARM_R1, 0), ctx);
-			emit_err_ret(ARM_COND_NE, ctx);
-			emit(ARM_MOV_R(r_A, ARM_R0), ctx);
+			emit_a32_mov_i64(is64, tmp2, imm, false, ctx);
+			emit_a32_alu_r64(is64, dst, tmp2, dstk, false,
+					 ctx, BPF_OP(code));
 			break;
-		case BPF_LD | BPF_W | BPF_IND:
-			load_order = 2;
-			goto load_ind;
-		case BPF_LD | BPF_H | BPF_IND:
-			load_order = 1;
-			goto load_ind;
-		case BPF_LD | BPF_B | BPF_IND:
-			load_order = 0;
-load_ind:
-			update_on_xread(ctx);
-			OP_IMM3(ARM_ADD, r_off, r_X, k, ctx);
-			goto load_common;
-		case BPF_LDX | BPF_IMM:
-			ctx->seen |= SEEN_X;
-			emit_mov_i(r_X, k, ctx);
+		}
+		break;
+	/* dst = dst / src(imm) */
+	/* dst = dst % src(imm) */
+	case BPF_ALU | BPF_DIV | BPF_K:
+	case BPF_ALU | BPF_DIV | BPF_X:
+	case BPF_ALU | BPF_MOD | BPF_K:
+	case BPF_ALU | BPF_MOD | BPF_X:
+		rt = src_lo;
+		rd = dstk ? tmp2[1] : dst_lo;
+		if (dstk)
+			emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		switch (BPF_SRC(code)) {
+		case BPF_X:
+			rt = sstk ? tmp2[0] : rt;
+			if (sstk)
+				emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)),
+				     ctx);
 			break;
-		case BPF_LDX | BPF_W | BPF_LEN:
-			ctx->seen |= SEEN_X | SEEN_SKB;
-			emit(ARM_LDR_I(r_X, r_skb,
-				       offsetof(struct sk_buff, len)), ctx);
+		case BPF_K:
+			rt = tmp2[0];
+			emit_a32_mov_i(rt, imm, false, ctx);
 			break;
-		case BPF_LDX | BPF_MEM:
-			ctx->seen |= SEEN_X | SEEN_MEM_WORD(k);
-			emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
+		}
+		emit_udivmod(rd, rd, rt, ctx, BPF_OP(code));
+		if (dstk)
+			emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+		emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+		break;
+	case BPF_ALU64 | BPF_DIV | BPF_K:
+	case BPF_ALU64 | BPF_DIV | BPF_X:
+	case BPF_ALU64 | BPF_MOD | BPF_K:
+	case BPF_ALU64 | BPF_MOD | BPF_X:
+		goto notyet;
+	/* dst = dst >> imm */
+	/* dst = dst << imm */
+	case BPF_ALU | BPF_RSH | BPF_K:
+	case BPF_ALU | BPF_LSH | BPF_K:
+		if (unlikely(imm > 31))
+			return -EINVAL;
+		if (imm)
+			emit_a32_alu_i(dst_lo, imm, dstk, ctx, BPF_OP(code));
+		emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+		break;
+	/* dst = dst << imm */
+	case BPF_ALU64 | BPF_LSH | BPF_K:
+		if (unlikely(imm > 63))
+			return -EINVAL;
+		emit_a32_lsh_i64(dst, dstk, imm, ctx);
+		break;
+	/* dst = dst >> imm */
+	case BPF_ALU64 | BPF_RSH | BPF_K:
+		if (unlikely(imm > 63))
+			return -EINVAL;
+		emit_a32_lsr_i64(dst, dstk, imm, ctx);
+		break;
+	/* dst = dst << src */
+	case BPF_ALU64 | BPF_LSH | BPF_X:
+		emit_a32_lsh_r64(dst, src, dstk, sstk, ctx);
+		break;
+	/* dst = dst >> src */
+	case BPF_ALU64 | BPF_RSH | BPF_X:
+		emit_a32_lsr_r64(dst, src, dstk, sstk, ctx);
+		break;
+	/* dst = dst >> src (signed) */
+	case BPF_ALU64 | BPF_ARSH | BPF_X:
+		emit_a32_arsh_r64(dst, src, dstk, sstk, ctx);
+		break;
+	/* dst = dst >> imm (signed) */
+	case BPF_ALU64 | BPF_ARSH | BPF_K:
+		if (unlikely(imm > 63))
+			return -EINVAL;
+		emit_a32_arsh_i64(dst, dstk, imm, ctx);
+		break;
+	/* dst = ~dst */
+	case BPF_ALU | BPF_NEG:
+		emit_a32_alu_i(dst_lo, 0, dstk, ctx, BPF_OP(code));
+		emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+		break;
+	/* dst = ~dst (64 bit) */
+	case BPF_ALU64 | BPF_NEG:
+		emit_a32_neg64(dst, dstk, ctx);
+		break;
+	/* dst = dst * src/imm */
+	case BPF_ALU64 | BPF_MUL | BPF_X:
+	case BPF_ALU64 | BPF_MUL | BPF_K:
+		switch (BPF_SRC(code)) {
+		case BPF_X:
+			emit_a32_mul_r64(dst, src, dstk, sstk, ctx);
 			break;
-		case BPF_LDX | BPF_B | BPF_MSH:
-			/* x = ((*(frame + k)) & 0xf) << 2; */
-			ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL;
-			/* the interpreter should deal with the negative K */
-			if ((int)k < 0)
-				return -1;
-			/* offset in r1: we might have to take the slow path */
-			emit_mov_i(r_off, k, ctx);
-			emit(ARM_CMP_R(r_skb_hl, r_off), ctx);
-
-			/* load in r0: common with the slowpath */
-			_emit(ARM_COND_HI, ARM_LDRB_R(ARM_R0, r_skb_data,
-						      ARM_R1), ctx);
-			/*
-			 * emit_mov_i() might generate one or two instructions,
-			 * the same holds for emit_blx_r()
+		case BPF_K:
+			/* Move immediate value to the temporary register
+			 * and then do the multiplication on it as this
+			 * will sign-extend the immediate value into temp
+			 * reg then it would be safe to do the operation
+			 * on it.
 			 */
-			_emit(ARM_COND_HI, ARM_B(b_imm(i + 1, ctx) - 2), ctx);
-
-			emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
-			/* r_off is r1 */
-			emit_mov_i(ARM_R3, (u32)jit_get_skb_b, ctx);
-			emit_blx_r(ARM_R3, ctx);
-			/* check the return value of skb_copy_bits */
-			emit(ARM_CMP_I(ARM_R1, 0), ctx);
-			emit_err_ret(ARM_COND_NE, ctx);
-
-			emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx);
-			emit(ARM_LSL_I(r_X, r_X, 2), ctx);
-			break;
-		case BPF_ST:
-			ctx->seen |= SEEN_MEM_WORD(k);
-			emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
-			break;
-		case BPF_STX:
-			update_on_xread(ctx);
-			ctx->seen |= SEEN_MEM_WORD(k);
-			emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
-			break;
-		case BPF_ALU | BPF_ADD | BPF_K:
-			/* A += K */
-			OP_IMM3(ARM_ADD, r_A, r_A, k, ctx);
-			break;
-		case BPF_ALU | BPF_ADD | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_ADD_R(r_A, r_A, r_X), ctx);
-			break;
-		case BPF_ALU | BPF_SUB | BPF_K:
-			/* A -= K */
-			OP_IMM3(ARM_SUB, r_A, r_A, k, ctx);
-			break;
-		case BPF_ALU | BPF_SUB | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_SUB_R(r_A, r_A, r_X), ctx);
-			break;
-		case BPF_ALU | BPF_MUL | BPF_K:
-			/* A *= K */
-			emit_mov_i(r_scratch, k, ctx);
-			emit(ARM_MUL(r_A, r_A, r_scratch), ctx);
-			break;
-		case BPF_ALU | BPF_MUL | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_MUL(r_A, r_A, r_X), ctx);
+			emit_a32_mov_i64(is64, tmp2, imm, false, ctx);
+			emit_a32_mul_r64(dst, tmp2, dstk, false, ctx);
 			break;
-		case BPF_ALU | BPF_DIV | BPF_K:
-			if (k == 1)
-				break;
-			emit_mov_i(r_scratch, k, ctx);
-			emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_DIV);
-			break;
-		case BPF_ALU | BPF_DIV | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_CMP_I(r_X, 0), ctx);
-			emit_err_ret(ARM_COND_EQ, ctx);
-			emit_udivmod(r_A, r_A, r_X, ctx, BPF_DIV);
+		}
+		break;
+	/* dst = htole(dst) */
+	/* dst = htobe(dst) */
+	case BPF_ALU | BPF_END | BPF_FROM_LE:
+	case BPF_ALU | BPF_END | BPF_FROM_BE:
+		rd = dstk ? tmp[0] : dst_hi;
+		rt = dstk ? tmp[1] : dst_lo;
+		if (dstk) {
+			emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx);
+			emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx);
+		}
+		if (BPF_SRC(code) == BPF_FROM_LE)
+			goto emit_bswap_uxt;
+		switch (imm) {
+		case 16:
+			emit_rev16(rt, rt, ctx);
+			goto emit_bswap_uxt;
+		case 32:
+			emit_rev32(rt, rt, ctx);
+			goto emit_bswap_uxt;
+		case 64:
+			/* Because of the usage of ARM_LR */
+			ctx->seen |= SEEN_CALL;
+			emit_rev32(ARM_LR, rt, ctx);
+			emit_rev32(rt, rd, ctx);
+			emit(ARM_MOV_R(rd, ARM_LR), ctx);
 			break;
-		case BPF_ALU | BPF_MOD | BPF_K:
-			if (k == 1) {
-				emit_mov_i(r_A, 0, ctx);
-				break;
-			}
-			emit_mov_i(r_scratch, k, ctx);
-			emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_MOD);
+		}
+		goto exit;
+emit_bswap_uxt:
+		switch (imm) {
+		case 16:
+			/* zero-extend 16 bits into 64 bits */
+#if __LINUX_ARM_ARCH__ < 6
+			emit_a32_mov_i(tmp2[1], 0xffff, false, ctx);
+			emit(ARM_AND_R(rt, rt, tmp2[1]), ctx);
+#else /* ARMv6+ */
+			emit(ARM_UXTH(rt, rt), ctx);
+#endif
+			emit(ARM_EOR_R(rd, rd, rd), ctx);
 			break;
-		case BPF_ALU | BPF_MOD | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_CMP_I(r_X, 0), ctx);
-			emit_err_ret(ARM_COND_EQ, ctx);
-			emit_udivmod(r_A, r_A, r_X, ctx, BPF_MOD);
+		case 32:
+			/* zero-extend 32 bits into 64 bits */
+			emit(ARM_EOR_R(rd, rd, rd), ctx);
 			break;
-		case BPF_ALU | BPF_OR | BPF_K:
-			/* A |= K */
-			OP_IMM3(ARM_ORR, r_A, r_A, k, ctx);
+		case 64:
+			/* nop */
 			break;
-		case BPF_ALU | BPF_OR | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_ORR_R(r_A, r_A, r_X), ctx);
+		}
+exit:
+		if (dstk) {
+			emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx);
+			emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx);
+		}
+		break;
+	/* dst = imm64 */
+	case BPF_LD | BPF_IMM | BPF_DW:
+	{
+		const struct bpf_insn insn1 = insn[1];
+		u32 hi, lo = imm;
+
+		hi = insn1.imm;
+		emit_a32_mov_i(dst_lo, lo, dstk, ctx);
+		emit_a32_mov_i(dst_hi, hi, dstk, ctx);
+
+		return 1;
+	}
+	/* LDX: dst = *(size *)(src + off) */
+	case BPF_LDX | BPF_MEM | BPF_W:
+	case BPF_LDX | BPF_MEM | BPF_H:
+	case BPF_LDX | BPF_MEM | BPF_B:
+	case BPF_LDX | BPF_MEM | BPF_DW:
+		rn = sstk ? tmp2[1] : src_lo;
+		if (sstk)
+			emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			/* Load a Word */
+		case BPF_H:
+			/* Load a Half-Word */
+		case BPF_B:
+			/* Load a Byte */
+			emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code));
+			emit_a32_mov_i(dst_hi, 0, dstk, ctx);
 			break;
-		case BPF_ALU | BPF_XOR | BPF_K:
-			/* A ^= K; */
-			OP_IMM3(ARM_EOR, r_A, r_A, k, ctx);
+		case BPF_DW:
+			/* Load a double word */
+			emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W);
+			emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W);
 			break;
-		case BPF_ANC | SKF_AD_ALU_XOR_X:
-		case BPF_ALU | BPF_XOR | BPF_X:
-			/* A ^= X */
-			update_on_xread(ctx);
-			emit(ARM_EOR_R(r_A, r_A, r_X), ctx);
+		}
+		break;
+	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
+	case BPF_LD | BPF_ABS | BPF_W:
+	case BPF_LD | BPF_ABS | BPF_H:
+	case BPF_LD | BPF_ABS | BPF_B:
+	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
+	case BPF_LD | BPF_IND | BPF_W:
+	case BPF_LD | BPF_IND | BPF_H:
+	case BPF_LD | BPF_IND | BPF_B:
+	{
+		const u8 r4 = bpf2a32[BPF_REG_6][1]; /* r4 = ptr to sk_buff */
+		const u8 r0 = bpf2a32[BPF_REG_0][1]; /*r0: struct sk_buff *skb*/
+						     /* rtn value */
+		const u8 r1 = bpf2a32[BPF_REG_0][0]; /* r1: int k */
+		const u8 r2 = bpf2a32[BPF_REG_1][1]; /* r2: unsigned int size */
+		const u8 r3 = bpf2a32[BPF_REG_1][0]; /* r3: void *buffer */
+		const u8 r6 = bpf2a32[TMP_REG_1][1]; /* r6: void *(*func)(..) */
+		int size;
+
+		/* Setting up first argument */
+		emit(ARM_MOV_R(r0, r4), ctx);
+
+		/* Setting up second argument */
+		emit_a32_mov_i(r1, imm, false, ctx);
+		if (BPF_MODE(code) == BPF_IND)
+			emit_a32_alu_r(r1, src_lo, false, sstk, ctx,
+				       false, false, BPF_ADD);
+
+		/* Setting up third argument */
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			size = 4;
 			break;
-		case BPF_ALU | BPF_AND | BPF_K:
-			/* A &= K */
-			OP_IMM3(ARM_AND, r_A, r_A, k, ctx);
+		case BPF_H:
+			size = 2;
 			break;
-		case BPF_ALU | BPF_AND | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_AND_R(r_A, r_A, r_X), ctx);
+		case BPF_B:
+			size = 1;
 			break;
-		case BPF_ALU | BPF_LSH | BPF_K:
-			if (unlikely(k > 31))
-				return -1;
-			emit(ARM_LSL_I(r_A, r_A, k), ctx);
+		default:
+			return -EINVAL;
+		}
+		emit_a32_mov_i(r2, size, false, ctx);
+
+		/* Setting up fourth argument */
+		emit(ARM_ADD_I(r3, ARM_SP, imm8m(SKB_BUFFER)), ctx);
+
+		/* Setting up function pointer to call */
+		emit_a32_mov_i(r6, (unsigned int)bpf_load_pointer, false, ctx);
+		emit_blx_r(r6, ctx);
+
+		emit(ARM_EOR_R(r1, r1, r1), ctx);
+		/* Check if return address is NULL or not.
+		 * if NULL then jump to epilogue
+		 * else continue to load the value from retn address
+		 */
+		emit(ARM_CMP_I(r0, 0), ctx);
+		jmp_offset = epilogue_offset(ctx);
+		check_imm24(jmp_offset);
+		_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
+
+		/* Load value from the address */
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			emit(ARM_LDR_I(r0, r0, 0), ctx);
+			emit_rev32(r0, r0, ctx);
 			break;
-		case BPF_ALU | BPF_LSH | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_LSL_R(r_A, r_A, r_X), ctx);
+		case BPF_H:
+			emit(ARM_LDRH_I(r0, r0, 0), ctx);
+			emit_rev16(r0, r0, ctx);
 			break;
-		case BPF_ALU | BPF_RSH | BPF_K:
-			if (unlikely(k > 31))
-				return -1;
-			if (k)
-				emit(ARM_LSR_I(r_A, r_A, k), ctx);
+		case BPF_B:
+			emit(ARM_LDRB_I(r0, r0, 0), ctx);
+			/* No need to reverse */
 			break;
-		case BPF_ALU | BPF_RSH | BPF_X:
-			update_on_xread(ctx);
-			emit(ARM_LSR_R(r_A, r_A, r_X), ctx);
+		}
+		break;
+	}
+	/* ST: *(size *)(dst + off) = imm */
+	case BPF_ST | BPF_MEM | BPF_W:
+	case BPF_ST | BPF_MEM | BPF_H:
+	case BPF_ST | BPF_MEM | BPF_B:
+	case BPF_ST | BPF_MEM | BPF_DW:
+		switch (BPF_SIZE(code)) {
+		case BPF_DW:
+			/* Sign-extend immediate value into temp reg */
+			emit_a32_mov_i64(true, tmp2, imm, false, ctx);
+			emit_str_r(dst_lo, tmp2[1], dstk, off, ctx, BPF_W);
+			emit_str_r(dst_lo, tmp2[0], dstk, off+4, ctx, BPF_W);
 			break;
-		case BPF_ALU | BPF_NEG:
-			/* A = -A */
-			emit(ARM_RSB_I(r_A, r_A, 0), ctx);
+		case BPF_W:
+		case BPF_H:
+		case BPF_B:
+			emit_a32_mov_i(tmp2[1], imm, false, ctx);
+			emit_str_r(dst_lo, tmp2[1], dstk, off, ctx,
+				   BPF_SIZE(code));
 			break;
-		case BPF_JMP | BPF_JA:
-			/* pc += K */
-			emit(ARM_B(b_imm(i + k + 1, ctx)), ctx);
+		}
+		break;
+	/* STX XADD: lock *(u32 *)(dst + off) += src */
+	case BPF_STX | BPF_XADD | BPF_W:
+	/* STX XADD: lock *(u64 *)(dst + off) += src */
+	case BPF_STX | BPF_XADD | BPF_DW:
+		goto notyet;
+	/* STX: *(size *)(dst + off) = src */
+	case BPF_STX | BPF_MEM | BPF_W:
+	case BPF_STX | BPF_MEM | BPF_H:
+	case BPF_STX | BPF_MEM | BPF_B:
+	case BPF_STX | BPF_MEM | BPF_DW:
+	{
+		u8 sz = BPF_SIZE(code);
+
+		rn = sstk ? tmp2[1] : src_lo;
+		rm = sstk ? tmp2[0] : src_hi;
+		if (sstk) {
+			emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
+			emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx);
+		}
+
+		/* Store the value */
+		if (BPF_SIZE(code) == BPF_DW) {
+			emit_str_r(dst_lo, rn, dstk, off, ctx, BPF_W);
+			emit_str_r(dst_lo, rm, dstk, off+4, ctx, BPF_W);
+		} else {
+			emit_str_r(dst_lo, rn, dstk, off, ctx, sz);
+		}
+		break;
+	}
+	/* PC += off if dst == src */
+	/* PC += off if dst > src */
+	/* PC += off if dst >= src */
+	/* PC += off if dst < src */
+	/* PC += off if dst <= src */
+	/* PC += off if dst != src */
+	/* PC += off if dst > src (signed) */
+	/* PC += off if dst >= src (signed) */
+	/* PC += off if dst < src (signed) */
+	/* PC += off if dst <= src (signed) */
+	/* PC += off if dst & src */
+	case BPF_JMP | BPF_JEQ | BPF_X:
+	case BPF_JMP | BPF_JGT | BPF_X:
+	case BPF_JMP | BPF_JGE | BPF_X:
+	case BPF_JMP | BPF_JNE | BPF_X:
+	case BPF_JMP | BPF_JSGT | BPF_X:
+	case BPF_JMP | BPF_JSGE | BPF_X:
+	case BPF_JMP | BPF_JSET | BPF_X:
+	case BPF_JMP | BPF_JLE | BPF_X:
+	case BPF_JMP | BPF_JLT | BPF_X:
+	case BPF_JMP | BPF_JSLT | BPF_X:
+	case BPF_JMP | BPF_JSLE | BPF_X:
+		/* Setup source registers */
+		rm = sstk ? tmp2[0] : src_hi;
+		rn = sstk ? tmp2[1] : src_lo;
+		if (sstk) {
+			emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
+			emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(src_hi)), ctx);
+		}
+		goto go_jmp;
+	/* PC += off if dst == imm */
+	/* PC += off if dst > imm */
+	/* PC += off if dst >= imm */
+	/* PC += off if dst < imm */
+	/* PC += off if dst <= imm */
+	/* PC += off if dst != imm */
+	/* PC += off if dst > imm (signed) */
+	/* PC += off if dst >= imm (signed) */
+	/* PC += off if dst < imm (signed) */
+	/* PC += off if dst <= imm (signed) */
+	/* PC += off if dst & imm */
+	case BPF_JMP | BPF_JEQ | BPF_K:
+	case BPF_JMP | BPF_JGT | BPF_K:
+	case BPF_JMP | BPF_JGE | BPF_K:
+	case BPF_JMP | BPF_JNE | BPF_K:
+	case BPF_JMP | BPF_JSGT | BPF_K:
+	case BPF_JMP | BPF_JSGE | BPF_K:
+	case BPF_JMP | BPF_JSET | BPF_K:
+	case BPF_JMP | BPF_JLT | BPF_K:
+	case BPF_JMP | BPF_JLE | BPF_K:
+	case BPF_JMP | BPF_JSLT | BPF_K:
+	case BPF_JMP | BPF_JSLE | BPF_K:
+		if (off == 0)
 			break;
-		case BPF_JMP | BPF_JEQ | BPF_K:
-			/* pc += (A == K) ? pc->jt : pc->jf */
-			condt  = ARM_COND_EQ;
-			goto cmp_imm;
-		case BPF_JMP | BPF_JGT | BPF_K:
-			/* pc += (A > K) ? pc->jt : pc->jf */
-			condt  = ARM_COND_HI;
-			goto cmp_imm;
-		case BPF_JMP | BPF_JGE | BPF_K:
-			/* pc += (A >= K) ? pc->jt : pc->jf */
-			condt  = ARM_COND_HS;
-cmp_imm:
-			imm12 = imm8m(k);
-			if (imm12 < 0) {
-				emit_mov_i_no8m(r_scratch, k, ctx);
-				emit(ARM_CMP_R(r_A, r_scratch), ctx);
-			} else {
-				emit(ARM_CMP_I(r_A, imm12), ctx);
-			}
-cond_jump:
-			if (inst->jt)
-				_emit(condt, ARM_B(b_imm(i + inst->jt + 1,
-						   ctx)), ctx);
-			if (inst->jf)
-				_emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1,
-							     ctx)), ctx);
+		rm = tmp2[0];
+		rn = tmp2[1];
+		/* Sign-extend immediate value */
+		emit_a32_mov_i64(true, tmp2, imm, false, ctx);
+go_jmp:
+		/* Setup destination register */
+		rd = dstk ? tmp[0] : dst_hi;
+		rt = dstk ? tmp[1] : dst_lo;
+		if (dstk) {
+			emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(dst_lo)), ctx);
+			emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_hi)), ctx);
+		}
+
+		/* Check for the condition */
+		emit_ar_r(rd, rt, rm, rn, ctx, BPF_OP(code));
+
+		/* Setup JUMP instruction */
+		jmp_offset = bpf2a32_offset(i+off, i, ctx);
+		switch (BPF_OP(code)) {
+		case BPF_JNE:
+		case BPF_JSET:
+			_emit(ARM_COND_NE, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_JMP | BPF_JEQ | BPF_X:
-			/* pc += (A == X) ? pc->jt : pc->jf */
-			condt   = ARM_COND_EQ;
-			goto cmp_x;
-		case BPF_JMP | BPF_JGT | BPF_X:
-			/* pc += (A > X) ? pc->jt : pc->jf */
-			condt   = ARM_COND_HI;
-			goto cmp_x;
-		case BPF_JMP | BPF_JGE | BPF_X:
-			/* pc += (A >= X) ? pc->jt : pc->jf */
-			condt   = ARM_COND_CS;
-cmp_x:
-			update_on_xread(ctx);
-			emit(ARM_CMP_R(r_A, r_X), ctx);
-			goto cond_jump;
-		case BPF_JMP | BPF_JSET | BPF_K:
-			/* pc += (A & K) ? pc->jt : pc->jf */
-			condt  = ARM_COND_NE;
-			/* not set iff all zeroes iff Z==1 iff EQ */
-
-			imm12 = imm8m(k);
-			if (imm12 < 0) {
-				emit_mov_i_no8m(r_scratch, k, ctx);
-				emit(ARM_TST_R(r_A, r_scratch), ctx);
-			} else {
-				emit(ARM_TST_I(r_A, imm12), ctx);
-			}
-			goto cond_jump;
-		case BPF_JMP | BPF_JSET | BPF_X:
-			/* pc += (A & X) ? pc->jt : pc->jf */
-			update_on_xread(ctx);
-			condt  = ARM_COND_NE;
-			emit(ARM_TST_R(r_A, r_X), ctx);
-			goto cond_jump;
-		case BPF_RET | BPF_A:
-			emit(ARM_MOV_R(ARM_R0, r_A), ctx);
-			goto b_epilogue;
-		case BPF_RET | BPF_K:
-			if ((k == 0) && (ctx->ret0_fp_idx < 0))
-				ctx->ret0_fp_idx = i;
-			emit_mov_i(ARM_R0, k, ctx);
-b_epilogue:
-			if (i != ctx->skf->len - 1)
-				emit(ARM_B(b_imm(prog->len, ctx)), ctx);
+		case BPF_JEQ:
+			_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_MISC | BPF_TAX:
-			/* X = A */
-			ctx->seen |= SEEN_X;
-			emit(ARM_MOV_R(r_X, r_A), ctx);
+		case BPF_JGT:
+			_emit(ARM_COND_HI, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_MISC | BPF_TXA:
-			/* A = X */
-			update_on_xread(ctx);
-			emit(ARM_MOV_R(r_A, r_X), ctx);
+		case BPF_JGE:
+			_emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_ANC | SKF_AD_PROTOCOL:
-			/* A = ntohs(skb->protocol) */
-			ctx->seen |= SEEN_SKB;
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
-						  protocol) != 2);
-			off = offsetof(struct sk_buff, protocol);
-			emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx);
-			emit_swap16(r_A, r_scratch, ctx);
+		case BPF_JSGT:
+			_emit(ARM_COND_LT, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_ANC | SKF_AD_CPU:
-			/* r_scratch = current_thread_info() */
-			OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx);
-			/* A = current_thread_info()->cpu */
-			BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4);
-			off = offsetof(struct thread_info, cpu);
-			emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
+		case BPF_JSGE:
+			_emit(ARM_COND_GE, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_ANC | SKF_AD_IFINDEX:
-		case BPF_ANC | SKF_AD_HATYPE:
-			/* A = skb->dev->ifindex */
-			/* A = skb->dev->type */
-			ctx->seen |= SEEN_SKB;
-			off = offsetof(struct sk_buff, dev);
-			emit(ARM_LDR_I(r_scratch, r_skb, off), ctx);
-
-			emit(ARM_CMP_I(r_scratch, 0), ctx);
-			emit_err_ret(ARM_COND_EQ, ctx);
-
-			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
-						  ifindex) != 4);
-			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
-						  type) != 2);
-
-			if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
-				off = offsetof(struct net_device, ifindex);
-				emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
-			} else {
-				/*
-				 * offset of field "type" in "struct
-				 * net_device" is above what can be
-				 * used in the ldrh rd, [rn, #imm]
-				 * instruction, so load the offset in
-				 * a register and use ldrh rd, [rn, rm]
-				 */
-				off = offsetof(struct net_device, type);
-				emit_mov_i(ARM_R3, off, ctx);
-				emit(ARM_LDRH_R(r_A, r_scratch, ARM_R3), ctx);
-			}
+		case BPF_JLE:
+			_emit(ARM_COND_LS, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_ANC | SKF_AD_MARK:
-			ctx->seen |= SEEN_SKB;
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
-			off = offsetof(struct sk_buff, mark);
-			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
+		case BPF_JLT:
+			_emit(ARM_COND_CC, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_ANC | SKF_AD_RXHASH:
-			ctx->seen |= SEEN_SKB;
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
-			off = offsetof(struct sk_buff, hash);
-			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
+		case BPF_JSLT:
+			_emit(ARM_COND_LT, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_ANC | SKF_AD_VLAN_TAG:
-		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
-			ctx->seen |= SEEN_SKB;
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-			off = offsetof(struct sk_buff, vlan_tci);
-			emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
-			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
-				OP_IMM3(ARM_AND, r_A, r_A, ~VLAN_TAG_PRESENT, ctx);
-			else {
-				OP_IMM3(ARM_LSR, r_A, r_A, 12, ctx);
-				OP_IMM3(ARM_AND, r_A, r_A, 0x1, ctx);
-			}
+		case BPF_JSLE:
+			_emit(ARM_COND_GE, ARM_B(jmp_offset), ctx);
 			break;
-		case BPF_ANC | SKF_AD_PKTTYPE:
-			ctx->seen |= SEEN_SKB;
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
-						  __pkt_type_offset[0]) != 1);
-			off = PKT_TYPE_OFFSET();
-			emit(ARM_LDRB_I(r_A, r_skb, off), ctx);
-			emit(ARM_AND_I(r_A, r_A, PKT_TYPE_MAX), ctx);
-#ifdef __BIG_ENDIAN_BITFIELD
-			emit(ARM_LSR_I(r_A, r_A, 5), ctx);
-#endif
+		}
+		break;
+	/* JMP OFF */
+	case BPF_JMP | BPF_JA:
+	{
+		if (off == 0)
 			break;
-		case BPF_ANC | SKF_AD_QUEUE:
-			ctx->seen |= SEEN_SKB;
-			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
-						  queue_mapping) != 2);
-			BUILD_BUG_ON(offsetof(struct sk_buff,
-					      queue_mapping) > 0xff);
-			off = offsetof(struct sk_buff, queue_mapping);
-			emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
+		jmp_offset = bpf2a32_offset(i+off, i, ctx);
+		check_imm24(jmp_offset);
+		emit(ARM_B(jmp_offset), ctx);
+		break;
+	}
+	/* tail call */
+	case BPF_JMP | BPF_TAIL_CALL:
+		if (emit_bpf_tail_call(ctx))
+			return -EFAULT;
+		break;
+	/* function call */
+	case BPF_JMP | BPF_CALL:
+	{
+		const u8 *r0 = bpf2a32[BPF_REG_0];
+		const u8 *r1 = bpf2a32[BPF_REG_1];
+		const u8 *r2 = bpf2a32[BPF_REG_2];
+		const u8 *r3 = bpf2a32[BPF_REG_3];
+		const u8 *r4 = bpf2a32[BPF_REG_4];
+		const u8 *r5 = bpf2a32[BPF_REG_5];
+		const u32 func = (u32)__bpf_call_base + (u32)imm;
+
+		emit_a32_mov_r64(true, r0, r1, false, false, ctx);
+		emit_a32_mov_r64(true, r1, r2, false, true, ctx);
+		emit_push_r64(r5, 0, ctx);
+		emit_push_r64(r4, 8, ctx);
+		emit_push_r64(r3, 16, ctx);
+
+		emit_a32_mov_i(tmp[1], func, false, ctx);
+		emit_blx_r(tmp[1], ctx);
+
+		emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean
+		break;
+	}
+	/* function return */
+	case BPF_JMP | BPF_EXIT:
+		/* Optimization: when last instruction is EXIT
+		 * simply fallthrough to epilogue.
+		 */
+		if (i == ctx->prog->len - 1)
 			break;
-		case BPF_ANC | SKF_AD_PAY_OFFSET:
-			ctx->seen |= SEEN_SKB | SEEN_CALL;
+		jmp_offset = epilogue_offset(ctx);
+		check_imm24(jmp_offset);
+		emit(ARM_B(jmp_offset), ctx);
+		break;
+notyet:
+		pr_info_once("*** NOT YET: opcode %02x ***\n", code);
+		return -EFAULT;
+	default:
+		pr_err_once("unknown opcode %02x\n", code);
+		return -EINVAL;
+	}
 
-			emit(ARM_MOV_R(ARM_R0, r_skb), ctx);
-			emit_mov_i(ARM_R3, (unsigned int)skb_get_poff, ctx);
-			emit_blx_r(ARM_R3, ctx);
-			emit(ARM_MOV_R(r_A, ARM_R0), ctx);
-			break;
-		case BPF_LDX | BPF_W | BPF_ABS:
-			/*
-			 * load a 32bit word from struct seccomp_data.
-			 * seccomp_check_filter() will already have checked
-			 * that k is 32bit aligned and lies within the
-			 * struct seccomp_data.
-			 */
-			ctx->seen |= SEEN_SKB;
-			emit(ARM_LDR_I(r_A, r_skb, k), ctx);
-			break;
-		default:
-			return -1;
+	if (ctx->flags & FLAG_IMM_OVERFLOW)
+		/*
+		 * this instruction generated an overflow when
+		 * trying to access the literal pool, so
+		 * delegate this filter to the kernel interpreter.
+		 */
+		return -1;
+	return 0;
+}
+
+static int build_body(struct jit_ctx *ctx)
+{
+	const struct bpf_prog *prog = ctx->prog;
+	unsigned int i;
+
+	for (i = 0; i < prog->len; i++) {
+		const struct bpf_insn *insn = &(prog->insnsi[i]);
+		int ret;
+
+		ret = build_insn(insn, ctx);
+
+		/* It's used with loading the 64 bit immediate value. */
+		if (ret > 0) {
+			i++;
+			if (ctx->target == NULL)
+				ctx->offsets[i] = ctx->idx;
+			continue;
 		}
 
-		if (ctx->flags & FLAG_IMM_OVERFLOW)
-			/*
-			 * this instruction generated an overflow when
-			 * trying to access the literal pool, so
-			 * delegate this filter to the kernel interpreter.
-			 */
-			return -1;
+		if (ctx->target == NULL)
+			ctx->offsets[i] = ctx->idx;
+
+		/* If unsuccesfull, return with error code */
+		if (ret)
+			return ret;
 	}
+	return 0;
+}
 
-	/* compute offsets only during the first pass */
-	if (ctx->target == NULL)
-		ctx->offsets[i] = ctx->idx * 4;
+static int validate_code(struct jit_ctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < ctx->idx; i++) {
+		if (ctx->target[i] == __opcode_to_mem_arm(ARM_INST_UDF))
+			return -1;
+	}
 
 	return 0;
 }
 
+void bpf_jit_compile(struct bpf_prog *prog)
+{
+	/* Nothing to do here. We support Internal BPF. */
+}
 
-void bpf_jit_compile(struct bpf_prog *fp)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
+	struct bpf_prog *tmp, *orig_prog = prog;
 	struct bpf_binary_header *header;
+	bool tmp_blinded = false;
 	struct jit_ctx ctx;
-	unsigned tmp_idx;
-	unsigned alloc_size;
-	u8 *target_ptr;
+	unsigned int tmp_idx;
+	unsigned int image_size;
+	u8 *image_ptr;
 
+	/* If BPF JIT was not enabled then we must fall back to
+	 * the interpreter.
+	 */
 	if (!bpf_jit_enable)
-		return;
+		return orig_prog;
 
-	memset(&ctx, 0, sizeof(ctx));
-	ctx.skf		= fp;
-	ctx.ret0_fp_idx = -1;
+	/* If constant blinding was enabled and we failed during blinding
+	 * then we must fall back to the interpreter. Otherwise, we save
+	 * the new JITed code.
+	 */
+	tmp = bpf_jit_blind_constants(prog);
 
-	ctx.offsets = kzalloc(4 * (ctx.skf->len + 1), GFP_KERNEL);
-	if (ctx.offsets == NULL)
-		return;
+	if (IS_ERR(tmp))
+		return orig_prog;
+	if (tmp != prog) {
+		tmp_blinded = true;
+		prog = tmp;
+	}
 
-	/* fake pass to fill in the ctx->seen */
-	if (unlikely(build_body(&ctx)))
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.prog = prog;
+
+	/* Not able to allocate memory for offsets[] , then
+	 * we must fall back to the interpreter
+	 */
+	ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
+	if (ctx.offsets == NULL) {
+		prog = orig_prog;
 		goto out;
+	}
+
+	/* 1) fake pass to find in the length of the JITed code,
+	 * to compute ctx->offsets and other context variables
+	 * needed to compute final JITed code.
+	 * Also, calculate random starting pointer/start of JITed code
+	 * which is prefixed by random number of fault instructions.
+	 *
+	 * If the first pass fails then there is no chance of it
+	 * being successful in the second pass, so just fall back
+	 * to the interpreter.
+	 */
+	if (build_body(&ctx)) {
+		prog = orig_prog;
+		goto out_off;
+	}
 
 	tmp_idx = ctx.idx;
 	build_prologue(&ctx);
 	ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
 
+	ctx.epilogue_offset = ctx.idx;
+
 #if __LINUX_ARM_ARCH__ < 7
 	tmp_idx = ctx.idx;
 	build_epilogue(&ctx);
@@ -1021,64 +1880,83 @@ void bpf_jit_compile(struct bpf_prog *fp)
 
 	ctx.idx += ctx.imm_count;
 	if (ctx.imm_count) {
-		ctx.imms = kzalloc(4 * ctx.imm_count, GFP_KERNEL);
-		if (ctx.imms == NULL)
-			goto out;
+		ctx.imms = kcalloc(ctx.imm_count, sizeof(u32), GFP_KERNEL);
+		if (ctx.imms == NULL) {
+			prog = orig_prog;
+			goto out_off;
+		}
 	}
 #else
-	/* there's nothing after the epilogue on ARMv7 */
+	/* there's nothing about the epilogue on ARMv7 */
 	build_epilogue(&ctx);
 #endif
-	alloc_size = 4 * ctx.idx;
-	header = bpf_jit_binary_alloc(alloc_size, &target_ptr,
-				      4, jit_fill_hole);
-	if (header == NULL)
-		goto out;
+	/* Now we can get the actual image size of the JITed arm code.
+	 * Currently, we are not considering the THUMB-2 instructions
+	 * for jit, although it can decrease the size of the image.
+	 *
+	 * As each arm instruction is of length 32bit, we are translating
+	 * number of JITed intructions into the size required to store these
+	 * JITed code.
+	 */
+	image_size = sizeof(u32) * ctx.idx;
 
-	ctx.target = (u32 *) target_ptr;
+	/* Now we know the size of the structure to make */
+	header = bpf_jit_binary_alloc(image_size, &image_ptr,
+				      sizeof(u32), jit_fill_hole);
+	/* Not able to allocate memory for the structure then
+	 * we must fall back to the interpretation
+	 */
+	if (header == NULL) {
+		prog = orig_prog;
+		goto out_imms;
+	}
+
+	/* 2.) Actual pass to generate final JIT code */
+	ctx.target = (u32 *) image_ptr;
 	ctx.idx = 0;
 
 	build_prologue(&ctx);
+
+	/* If building the body of the JITed code fails somehow,
+	 * we fall back to the interpretation.
+	 */
 	if (build_body(&ctx) < 0) {
-#if __LINUX_ARM_ARCH__ < 7
-		if (ctx.imm_count)
-			kfree(ctx.imms);
-#endif
+		image_ptr = NULL;
 		bpf_jit_binary_free(header);
-		goto out;
+		prog = orig_prog;
+		goto out_imms;
 	}
 	build_epilogue(&ctx);
 
+	/* 3.) Extra pass to validate JITed Code */
+	if (validate_code(&ctx)) {
+		image_ptr = NULL;
+		bpf_jit_binary_free(header);
+		prog = orig_prog;
+		goto out_imms;
+	}
 	flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx));
 
-#if __LINUX_ARM_ARCH__ < 7
-	if (ctx.imm_count)
-		kfree(ctx.imms);
-#endif
-
 	if (bpf_jit_enable > 1)
 		/* there are 2 passes here */
-		bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
+		bpf_jit_dump(prog->len, image_size, 2, ctx.target);
 
 	set_memory_ro((unsigned long)header, header->pages);
-	fp->bpf_func = (void *)ctx.target;
-	fp->jited = 1;
-out:
+	prog->bpf_func = (void *)ctx.target;
+	prog->jited = 1;
+	prog->jited_len = image_size;
+
+out_imms:
+#if __LINUX_ARM_ARCH__ < 7
+	if (ctx.imm_count)
+		kfree(ctx.imms);
+#endif
+out_off:
 	kfree(ctx.offsets);
-	return;
+out:
+	if (tmp_blinded)
+		bpf_jit_prog_release_other(prog, prog == orig_prog ?
+					   tmp : orig_prog);
+	return prog;
 }
 
-void bpf_jit_free(struct bpf_prog *fp)
-{
-	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
-	struct bpf_binary_header *header = (void *)addr;
-
-	if (!fp->jited)
-		goto free_filter;
-
-	set_memory_rw(addr, header->pages);
-	bpf_jit_binary_free(header);
-
-free_filter:
-	bpf_prog_unlock_free(fp);
-}
diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h
index c46fca2..d5cf5f6 100644
--- a/arch/arm/net/bpf_jit_32.h
+++ b/arch/arm/net/bpf_jit_32.h
@@ -11,6 +11,7 @@
 #ifndef PFILTER_OPCODES_ARM_H
 #define PFILTER_OPCODES_ARM_H
 
+/* ARM 32bit Registers */
 #define ARM_R0	0
 #define ARM_R1	1
 #define ARM_R2	2
@@ -22,38 +23,43 @@
 #define ARM_R8	8
 #define ARM_R9	9
 #define ARM_R10	10
-#define ARM_FP	11
-#define ARM_IP	12
-#define ARM_SP	13
-#define ARM_LR	14
-#define ARM_PC	15
-
-#define ARM_COND_EQ		0x0
-#define ARM_COND_NE		0x1
-#define ARM_COND_CS		0x2
+#define ARM_FP	11	/* Frame Pointer */
+#define ARM_IP	12	/* Intra-procedure scratch register */
+#define ARM_SP	13	/* Stack pointer: as load/store base reg */
+#define ARM_LR	14	/* Link Register */
+#define ARM_PC	15	/* Program counter */
+
+#define ARM_COND_EQ		0x0	/* == */
+#define ARM_COND_NE		0x1	/* != */
+#define ARM_COND_CS		0x2	/* unsigned >= */
 #define ARM_COND_HS		ARM_COND_CS
-#define ARM_COND_CC		0x3
+#define ARM_COND_CC		0x3	/* unsigned < */
 #define ARM_COND_LO		ARM_COND_CC
-#define ARM_COND_MI		0x4
-#define ARM_COND_PL		0x5
-#define ARM_COND_VS		0x6
-#define ARM_COND_VC		0x7
-#define ARM_COND_HI		0x8
-#define ARM_COND_LS		0x9
-#define ARM_COND_GE		0xa
-#define ARM_COND_LT		0xb
-#define ARM_COND_GT		0xc
-#define ARM_COND_LE		0xd
-#define ARM_COND_AL		0xe
+#define ARM_COND_MI		0x4	/* < 0 */
+#define ARM_COND_PL		0x5	/* >= 0 */
+#define ARM_COND_VS		0x6	/* Signed Overflow */
+#define ARM_COND_VC		0x7	/* No Signed Overflow */
+#define ARM_COND_HI		0x8	/* unsigned > */
+#define ARM_COND_LS		0x9	/* unsigned <= */
+#define ARM_COND_GE		0xa	/* Signed >= */
+#define ARM_COND_LT		0xb	/* Signed < */
+#define ARM_COND_GT		0xc	/* Signed > */
+#define ARM_COND_LE		0xd	/* Signed <= */
+#define ARM_COND_AL		0xe	/* None */
 
 /* register shift types */
 #define SRTYPE_LSL		0
 #define SRTYPE_LSR		1
 #define SRTYPE_ASR		2
 #define SRTYPE_ROR		3
+#define SRTYPE_ASL		(SRTYPE_LSL)
 
 #define ARM_INST_ADD_R		0x00800000
+#define ARM_INST_ADDS_R		0x00900000
+#define ARM_INST_ADC_R		0x00a00000
+#define ARM_INST_ADC_I		0x02a00000
 #define ARM_INST_ADD_I		0x02800000
+#define ARM_INST_ADDS_I		0x02900000
 
 #define ARM_INST_AND_R		0x00000000
 #define ARM_INST_AND_I		0x02000000
@@ -76,8 +82,10 @@
 #define ARM_INST_LDRH_I		0x01d000b0
 #define ARM_INST_LDRH_R		0x019000b0
 #define ARM_INST_LDR_I		0x05900000
+#define ARM_INST_LDR_R		0x07900000
 
 #define ARM_INST_LDM		0x08900000
+#define ARM_INST_LDM_IA		0x08b00000
 
 #define ARM_INST_LSL_I		0x01a00000
 #define ARM_INST_LSL_R		0x01a00010
@@ -86,6 +94,7 @@
 #define ARM_INST_LSR_R		0x01a00030
 
 #define ARM_INST_MOV_R		0x01a00000
+#define ARM_INST_MOVS_R		0x01b00000
 #define ARM_INST_MOV_I		0x03a00000
 #define ARM_INST_MOVW		0x03000000
 #define ARM_INST_MOVT		0x03400000
@@ -96,17 +105,28 @@
 #define ARM_INST_PUSH		0x092d0000
 
 #define ARM_INST_ORR_R		0x01800000
+#define ARM_INST_ORRS_R		0x01900000
 #define ARM_INST_ORR_I		0x03800000
 
 #define ARM_INST_REV		0x06bf0f30
 #define ARM_INST_REV16		0x06bf0fb0
 
 #define ARM_INST_RSB_I		0x02600000
+#define ARM_INST_RSBS_I		0x02700000
+#define ARM_INST_RSC_I		0x02e00000
 
 #define ARM_INST_SUB_R		0x00400000
+#define ARM_INST_SUBS_R		0x00500000
+#define ARM_INST_RSB_R		0x00600000
 #define ARM_INST_SUB_I		0x02400000
+#define ARM_INST_SUBS_I		0x02500000
+#define ARM_INST_SBC_I		0x02c00000
+#define ARM_INST_SBC_R		0x00c00000
+#define ARM_INST_SBCS_R		0x00d00000
 
 #define ARM_INST_STR_I		0x05800000
+#define ARM_INST_STRB_I		0x05c00000
+#define ARM_INST_STRH_I		0x01c000b0
 
 #define ARM_INST_TST_R		0x01100000
 #define ARM_INST_TST_I		0x03100000
@@ -117,6 +137,8 @@
 
 #define ARM_INST_MLS		0x00600090
 
+#define ARM_INST_UXTH		0x06ff0070
+
 /*
  * Use a suitable undefined instruction to use for ARM/Thumb2 faulting.
  * We need to be careful not to conflict with those used by other modules
@@ -135,9 +157,15 @@
 #define _AL3_R(op, rd, rn, rm)	((op ## _R) | (rd) << 12 | (rn) << 16 | (rm))
 /* immediate */
 #define _AL3_I(op, rd, rn, imm)	((op ## _I) | (rd) << 12 | (rn) << 16 | (imm))
+/* register with register-shift */
+#define _AL3_SR(inst)	(inst | (1 << 4))
 
 #define ARM_ADD_R(rd, rn, rm)	_AL3_R(ARM_INST_ADD, rd, rn, rm)
+#define ARM_ADDS_R(rd, rn, rm)	_AL3_R(ARM_INST_ADDS, rd, rn, rm)
 #define ARM_ADD_I(rd, rn, imm)	_AL3_I(ARM_INST_ADD, rd, rn, imm)
+#define ARM_ADDS_I(rd, rn, imm)	_AL3_I(ARM_INST_ADDS, rd, rn, imm)
+#define ARM_ADC_R(rd, rn, rm)	_AL3_R(ARM_INST_ADC, rd, rn, rm)
+#define ARM_ADC_I(rd, rn, imm)	_AL3_I(ARM_INST_ADC, rd, rn, imm)
 
 #define ARM_AND_R(rd, rn, rm)	_AL3_R(ARM_INST_AND, rd, rn, rm)
 #define ARM_AND_I(rd, rn, imm)	_AL3_I(ARM_INST_AND, rd, rn, imm)
@@ -156,7 +184,9 @@
 #define ARM_EOR_I(rd, rn, imm)	_AL3_I(ARM_INST_EOR, rd, rn, imm)
 
 #define ARM_LDR_I(rt, rn, off)	(ARM_INST_LDR_I | (rt) << 12 | (rn) << 16 \
-				 | (off))
+				 | ((off) & 0xfff))
+#define ARM_LDR_R(rt, rn, rm)	(ARM_INST_LDR_R | (rt) << 12 | (rn) << 16 \
+				 | (rm))
 #define ARM_LDRB_I(rt, rn, off)	(ARM_INST_LDRB_I | (rt) << 12 | (rn) << 16 \
 				 | (off))
 #define ARM_LDRB_R(rt, rn, rm)	(ARM_INST_LDRB_R | (rt) << 12 | (rn) << 16 \
@@ -167,15 +197,23 @@
 				 | (rm))
 
 #define ARM_LDM(rn, regs)	(ARM_INST_LDM | (rn) << 16 | (regs))
+#define ARM_LDM_IA(rn, regs)	(ARM_INST_LDM_IA | (rn) << 16 | (regs))
 
 #define ARM_LSL_R(rd, rn, rm)	(_AL3_R(ARM_INST_LSL, rd, 0, rn) | (rm) << 8)
 #define ARM_LSL_I(rd, rn, imm)	(_AL3_I(ARM_INST_LSL, rd, 0, rn) | (imm) << 7)
 
 #define ARM_LSR_R(rd, rn, rm)	(_AL3_R(ARM_INST_LSR, rd, 0, rn) | (rm) << 8)
 #define ARM_LSR_I(rd, rn, imm)	(_AL3_I(ARM_INST_LSR, rd, 0, rn) | (imm) << 7)
+#define ARM_ASR_R(rd, rn, rm)   (_AL3_R(ARM_INST_ASR, rd, 0, rn) | (rm) << 8)
+#define ARM_ASR_I(rd, rn, imm)  (_AL3_I(ARM_INST_ASR, rd, 0, rn) | (imm) << 7)
 
 #define ARM_MOV_R(rd, rm)	_AL3_R(ARM_INST_MOV, rd, 0, rm)
+#define ARM_MOVS_R(rd, rm)	_AL3_R(ARM_INST_MOVS, rd, 0, rm)
 #define ARM_MOV_I(rd, imm)	_AL3_I(ARM_INST_MOV, rd, 0, imm)
+#define ARM_MOV_SR(rd, rm, type, rs)	\
+	(_AL3_SR(ARM_MOV_R(rd, rm)) | (type) << 5 | (rs) << 8)
+#define ARM_MOV_SI(rd, rm, type, imm6)	\
+	(ARM_MOV_R(rd, rm) | (type) << 5 | (imm6) << 7)
 
 #define ARM_MOVW(rd, imm)	\
 	(ARM_INST_MOVW | ((imm) >> 12) << 16 | (rd) << 12 | ((imm) & 0x0fff))
@@ -190,19 +228,38 @@
 
 #define ARM_ORR_R(rd, rn, rm)	_AL3_R(ARM_INST_ORR, rd, rn, rm)
 #define ARM_ORR_I(rd, rn, imm)	_AL3_I(ARM_INST_ORR, rd, rn, imm)
-#define ARM_ORR_S(rd, rn, rm, type, rs)	\
-	(ARM_ORR_R(rd, rn, rm) | (type) << 5 | (rs) << 7)
+#define ARM_ORR_SR(rd, rn, rm, type, rs)	\
+	(_AL3_SR(ARM_ORR_R(rd, rn, rm)) | (type) << 5 | (rs) << 8)
+#define ARM_ORRS_R(rd, rn, rm)	_AL3_R(ARM_INST_ORRS, rd, rn, rm)
+#define ARM_ORRS_SR(rd, rn, rm, type, rs)	\
+	(_AL3_SR(ARM_ORRS_R(rd, rn, rm)) | (type) << 5 | (rs) << 8)
+#define ARM_ORR_SI(rd, rn, rm, type, imm6)	\
+	(ARM_ORR_R(rd, rn, rm) | (type) << 5 | (imm6) << 7)
+#define ARM_ORRS_SI(rd, rn, rm, type, imm6)	\
+	(ARM_ORRS_R(rd, rn, rm) | (type) << 5 | (imm6) << 7)
 
 #define ARM_REV(rd, rm)		(ARM_INST_REV | (rd) << 12 | (rm))
 #define ARM_REV16(rd, rm)	(ARM_INST_REV16 | (rd) << 12 | (rm))
 
 #define ARM_RSB_I(rd, rn, imm)	_AL3_I(ARM_INST_RSB, rd, rn, imm)
+#define ARM_RSBS_I(rd, rn, imm)	_AL3_I(ARM_INST_RSBS, rd, rn, imm)
+#define ARM_RSC_I(rd, rn, imm)	_AL3_I(ARM_INST_RSC, rd, rn, imm)
 
 #define ARM_SUB_R(rd, rn, rm)	_AL3_R(ARM_INST_SUB, rd, rn, rm)
+#define ARM_SUBS_R(rd, rn, rm)	_AL3_R(ARM_INST_SUBS, rd, rn, rm)
+#define ARM_RSB_R(rd, rn, rm)	_AL3_R(ARM_INST_RSB, rd, rn, rm)
+#define ARM_SBC_R(rd, rn, rm)	_AL3_R(ARM_INST_SBC, rd, rn, rm)
+#define ARM_SBCS_R(rd, rn, rm)	_AL3_R(ARM_INST_SBCS, rd, rn, rm)
 #define ARM_SUB_I(rd, rn, imm)	_AL3_I(ARM_INST_SUB, rd, rn, imm)
+#define ARM_SUBS_I(rd, rn, imm)	_AL3_I(ARM_INST_SUBS, rd, rn, imm)
+#define ARM_SBC_I(rd, rn, imm)	_AL3_I(ARM_INST_SBC, rd, rn, imm)
 
 #define ARM_STR_I(rt, rn, off)	(ARM_INST_STR_I | (rt) << 12 | (rn) << 16 \
-				 | (off))
+				 | ((off) & 0xfff))
+#define ARM_STRH_I(rt, rn, off)	(ARM_INST_STRH_I | (rt) << 12 | (rn) << 16 \
+				 | (((off) & 0xf0) << 4) | ((off) & 0xf))
+#define ARM_STRB_I(rt, rn, off)	(ARM_INST_STRB_I | (rt) << 12 | (rn) << 16 \
+				 | (((off) & 0xf0) << 4) | ((off) & 0xf))
 
 #define ARM_TST_R(rn, rm)	_AL3_R(ARM_INST_TST, 0, rn, rm)
 #define ARM_TST_I(rn, imm)	_AL3_I(ARM_INST_TST, 0, rn, imm)
@@ -214,5 +271,6 @@
 
 #define ARM_MLS(rd, rn, rm, ra)	(ARM_INST_MLS | (rd) << 16 | (rn) | (rm) << 8 \
 				 | (ra) << 12)
+#define ARM_UXTH(rd, rm)	(ARM_INST_UXTH | (rd) << 12 | (rm))
 
 #endif /* PFILTER_OPCODES_ARM_H */
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-evb.dts b/arch/arm64/boot/dts/rockchip/rk3328-evb.dts
index cf27239..b9f36da 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-evb.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-evb.dts
@@ -50,6 +50,23 @@
 	chosen {
 		stdout-path = "serial2:1500000n8";
 	};
+
+	vcc_phy: vcc-phy-regulator {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc_phy";
+		regulator-always-on;
+		regulator-boot-on;
+	};
+};
+
+&gmac2phy {
+	phy-supply = <&vcc_phy>;
+	clock_in_out = "output";
+	assigned-clocks = <&cru SCLK_MAC2PHY_SRC>;
+	assigned-clock-rate = <50000000>;
+	assigned-clocks = <&cru SCLK_MAC2PHY>;
+	assigned-clock-parents = <&cru SCLK_MAC2PHY_SRC>;
+	status = "okay";
 };
 
 &uart2 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 0be96ce..d48bf5d 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -63,6 +63,8 @@
 		i2c1 = &i2c1;
 		i2c2 = &i2c2;
 		i2c3 = &i2c3;
+		ethernet0 = &gmac2io;
+		ethernet1 = &gmac2phy;
 	};
 
 	cpus {
@@ -424,6 +426,43 @@
 		status = "disabled";
 	};
 
+	gmac2phy: ethernet@ff550000 {
+		compatible = "rockchip,rk3328-gmac";
+		reg = <0x0 0xff550000 0x0 0x10000>;
+		rockchip,grf = <&grf>;
+		interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "macirq";
+		clocks = <&cru SCLK_MAC2PHY_SRC>, <&cru SCLK_MAC2PHY_RXTX>,
+			 <&cru SCLK_MAC2PHY_RXTX>, <&cru SCLK_MAC2PHY_REF>,
+			 <&cru ACLK_MAC2PHY>, <&cru PCLK_MAC2PHY>,
+			 <&cru SCLK_MAC2PHY_OUT>;
+		clock-names = "stmmaceth", "mac_clk_rx",
+			      "mac_clk_tx", "clk_mac_ref",
+			      "aclk_mac", "pclk_mac",
+			      "clk_macphy";
+		resets = <&cru SRST_GMAC2PHY_A>, <&cru SRST_MACPHY>;
+		reset-names = "stmmaceth", "mac-phy";
+		phy-mode = "rmii";
+		phy-handle = <&phy>;
+		status = "disabled";
+
+		mdio {
+			compatible = "snps,dwmac-mdio";
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			phy: phy@0 {
+				compatible = "ethernet-phy-id1234.d400", "ethernet-phy-ieee802.3-c22";
+				reg = <0>;
+				clocks = <&cru SCLK_MAC2PHY_OUT>;
+				resets = <&cru SRST_MACPHY>;
+				pinctrl-names = "default";
+				pinctrl-0 = <&fephyled_rxm1 &fephyled_linkm1>;
+				phy-is-integrated;
+			};
+		};
+	};
+
 	gic: interrupt-controller@ff811000 {
 		compatible = "arm,gic-400";
 		#interrupt-cells = <3>;
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index b4ca115..cdde4f5 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -203,6 +203,7 @@ CONFIG_MARVELL_PHY=m
 CONFIG_MESON_GXL_PHY=m
 CONFIG_MICREL_PHY=y
 CONFIG_REALTEK_PHY=m
+CONFIG_ROCKCHIP_PHY=y
 CONFIG_USB_PEGASUS=m
 CONFIG_USB_RTL8150=m
 CONFIG_USB_RTL8152=m
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index b02a926..783de51 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -44,8 +44,12 @@
 #define A64_COND_NE	AARCH64_INSN_COND_NE /* != */
 #define A64_COND_CS	AARCH64_INSN_COND_CS /* unsigned >= */
 #define A64_COND_HI	AARCH64_INSN_COND_HI /* unsigned > */
+#define A64_COND_LS	AARCH64_INSN_COND_LS /* unsigned <= */
+#define A64_COND_CC	AARCH64_INSN_COND_CC /* unsigned < */
 #define A64_COND_GE	AARCH64_INSN_COND_GE /* signed >= */
 #define A64_COND_GT	AARCH64_INSN_COND_GT /* signed > */
+#define A64_COND_LE	AARCH64_INSN_COND_LE /* signed <= */
+#define A64_COND_LT	AARCH64_INSN_COND_LT /* signed < */
 #define A64_B_(cond, imm19) A64_COND_BRANCH(cond, (imm19) << 2)
 
 /* Unconditional branch (immediate) */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index f32144b..ba38d40 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -527,10 +527,14 @@ emit_bswap_uxt:
 	/* IF (dst COND src) JUMP off */
 	case BPF_JMP | BPF_JEQ | BPF_X:
 	case BPF_JMP | BPF_JGT | BPF_X:
+	case BPF_JMP | BPF_JLT | BPF_X:
 	case BPF_JMP | BPF_JGE | BPF_X:
+	case BPF_JMP | BPF_JLE | BPF_X:
 	case BPF_JMP | BPF_JNE | BPF_X:
 	case BPF_JMP | BPF_JSGT | BPF_X:
+	case BPF_JMP | BPF_JSLT | BPF_X:
 	case BPF_JMP | BPF_JSGE | BPF_X:
+	case BPF_JMP | BPF_JSLE | BPF_X:
 		emit(A64_CMP(1, dst, src), ctx);
 emit_cond_jmp:
 		jmp_offset = bpf2a64_offset(i + off, i, ctx);
@@ -542,9 +546,15 @@ emit_cond_jmp:
 		case BPF_JGT:
 			jmp_cond = A64_COND_HI;
 			break;
+		case BPF_JLT:
+			jmp_cond = A64_COND_CC;
+			break;
 		case BPF_JGE:
 			jmp_cond = A64_COND_CS;
 			break;
+		case BPF_JLE:
+			jmp_cond = A64_COND_LS;
+			break;
 		case BPF_JSET:
 		case BPF_JNE:
 			jmp_cond = A64_COND_NE;
@@ -552,9 +562,15 @@ emit_cond_jmp:
 		case BPF_JSGT:
 			jmp_cond = A64_COND_GT;
 			break;
+		case BPF_JSLT:
+			jmp_cond = A64_COND_LT;
+			break;
 		case BPF_JSGE:
 			jmp_cond = A64_COND_GE;
 			break;
+		case BPF_JSLE:
+			jmp_cond = A64_COND_LE;
+			break;
 		default:
 			return -EFAULT;
 		}
@@ -566,10 +582,14 @@ emit_cond_jmp:
 	/* IF (dst COND imm) JUMP off */
 	case BPF_JMP | BPF_JEQ | BPF_K:
 	case BPF_JMP | BPF_JGT | BPF_K:
+	case BPF_JMP | BPF_JLT | BPF_K:
 	case BPF_JMP | BPF_JGE | BPF_K:
+	case BPF_JMP | BPF_JLE | BPF_K:
 	case BPF_JMP | BPF_JNE | BPF_K:
 	case BPF_JMP | BPF_JSGT | BPF_K:
+	case BPF_JMP | BPF_JSLT | BPF_K:
 	case BPF_JMP | BPF_JSGE | BPF_K:
+	case BPF_JMP | BPF_JSLE | BPF_K:
 		emit_a64_mov_i(1, tmp, imm, ctx);
 		emit(A64_CMP(1, dst, tmp), ctx);
 		goto emit_cond_jmp;
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index f1e3b20..9abf02d 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -102,5 +102,7 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 5dd5c5d..002eb85 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -111,4 +111,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index f8f7b47..e268e51 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -102,4 +102,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 882823b..6c755bc 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -120,4 +120,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 3f87b96..7646891 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -113,6 +113,7 @@ struct jit_ctx {
 	u64 *reg_val_types;
 	unsigned int long_b_conversion:1;
 	unsigned int gen_b_offsets:1;
+	unsigned int use_bbit_insns:1;
 };
 
 static void set_reg_val_type(u64 *rvt, int reg, enum reg_val_type type)
@@ -655,19 +656,6 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx)
 	return build_int_epilogue(ctx, MIPS_R_T9);
 }
 
-static bool use_bbit_insns(void)
-{
-	switch (current_cpu_type()) {
-	case CPU_CAVIUM_OCTEON:
-	case CPU_CAVIUM_OCTEON_PLUS:
-	case CPU_CAVIUM_OCTEON2:
-	case CPU_CAVIUM_OCTEON3:
-		return true;
-	default:
-		return false;
-	}
-}
-
 static bool is_bad_offset(int b_off)
 {
 	return b_off > 0x1ffff || b_off < -0x20000;
@@ -682,6 +670,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 	unsigned int target;
 	u64 t64;
 	s64 t64s;
+	int bpf_op = BPF_OP(insn->code);
 
 	switch (insn->code) {
 	case BPF_ALU64 | BPF_ADD | BPF_K: /* ALU64_IMM */
@@ -770,13 +759,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, sll, dst, dst, 0);
 		if (insn->imm == 1) {
 			/* div by 1 is a nop, mod by 1 is zero */
-			if (BPF_OP(insn->code) == BPF_MOD)
+			if (bpf_op == BPF_MOD)
 				emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO);
 			break;
 		}
 		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
 		emit_instr(ctx, divu, dst, MIPS_R_AT);
-		if (BPF_OP(insn->code) == BPF_DIV)
+		if (bpf_op == BPF_DIV)
 			emit_instr(ctx, mflo, dst);
 		else
 			emit_instr(ctx, mfhi, dst);
@@ -798,13 +787,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 
 		if (insn->imm == 1) {
 			/* div by 1 is a nop, mod by 1 is zero */
-			if (BPF_OP(insn->code) == BPF_MOD)
+			if (bpf_op == BPF_MOD)
 				emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO);
 			break;
 		}
 		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
 		emit_instr(ctx, ddivu, dst, MIPS_R_AT);
-		if (BPF_OP(insn->code) == BPF_DIV)
+		if (bpf_op == BPF_DIV)
 			emit_instr(ctx, mflo, dst);
 		else
 			emit_instr(ctx, mfhi, dst);
@@ -829,7 +818,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
 		did_move = false;
 		if (insn->src_reg == BPF_REG_10) {
-			if (BPF_OP(insn->code) == BPF_MOV) {
+			if (bpf_op == BPF_MOV) {
 				emit_instr(ctx, daddiu, dst, MIPS_R_SP, MAX_BPF_STACK);
 				did_move = true;
 			} else {
@@ -839,7 +828,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		} else if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
 			int tmp_reg = MIPS_R_AT;
 
-			if (BPF_OP(insn->code) == BPF_MOV) {
+			if (bpf_op == BPF_MOV) {
 				tmp_reg = dst;
 				did_move = true;
 			}
@@ -847,7 +836,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, dinsu, tmp_reg, MIPS_R_ZERO, 32, 32);
 			src = MIPS_R_AT;
 		}
-		switch (BPF_OP(insn->code)) {
+		switch (bpf_op) {
 		case BPF_MOV:
 			if (!did_move)
 				emit_instr(ctx, daddu, dst, src, MIPS_R_ZERO);
@@ -879,7 +868,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
 			emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
 			emit_instr(ctx, ddivu, dst, src);
-			if (BPF_OP(insn->code) == BPF_DIV)
+			if (bpf_op == BPF_DIV)
 				emit_instr(ctx, mflo, dst);
 			else
 				emit_instr(ctx, mfhi, dst);
@@ -923,7 +912,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		if (ts == REG_64BIT || ts == REG_32BIT_ZERO_EX) {
 			int tmp_reg = MIPS_R_AT;
 
-			if (BPF_OP(insn->code) == BPF_MOV) {
+			if (bpf_op == BPF_MOV) {
 				tmp_reg = dst;
 				did_move = true;
 			}
@@ -931,7 +920,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, sll, tmp_reg, src, 0);
 			src = MIPS_R_AT;
 		}
-		switch (BPF_OP(insn->code)) {
+		switch (bpf_op) {
 		case BPF_MOV:
 			if (!did_move)
 				emit_instr(ctx, addu, dst, src, MIPS_R_ZERO);
@@ -962,7 +951,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
 			emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
 			emit_instr(ctx, divu, dst, src);
-			if (BPF_OP(insn->code) == BPF_DIV)
+			if (bpf_op == BPF_DIV)
 				emit_instr(ctx, mflo, dst);
 			else
 				emit_instr(ctx, mfhi, dst);
@@ -989,7 +978,7 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		break;
 	case BPF_JMP | BPF_JEQ | BPF_K: /* JMP_IMM */
 	case BPF_JMP | BPF_JNE | BPF_K: /* JMP_IMM */
-		cmp_eq = (BPF_OP(insn->code) == BPF_JEQ);
+		cmp_eq = (bpf_op == BPF_JEQ);
 		dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok);
 		if (dst < 0)
 			return dst;
@@ -1002,8 +991,12 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 		goto jeq_common;
 	case BPF_JMP | BPF_JEQ | BPF_X: /* JMP_REG */
 	case BPF_JMP | BPF_JNE | BPF_X:
+	case BPF_JMP | BPF_JSLT | BPF_X:
+	case BPF_JMP | BPF_JSLE | BPF_X:
 	case BPF_JMP | BPF_JSGT | BPF_X:
 	case BPF_JMP | BPF_JSGE | BPF_X:
+	case BPF_JMP | BPF_JLT | BPF_X:
+	case BPF_JMP | BPF_JLE | BPF_X:
 	case BPF_JMP | BPF_JGT | BPF_X:
 	case BPF_JMP | BPF_JGE | BPF_X:
 	case BPF_JMP | BPF_JSET | BPF_X:
@@ -1020,33 +1013,39 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, sll, MIPS_R_AT, dst, 0);
 			dst = MIPS_R_AT;
 		}
-		if (BPF_OP(insn->code) == BPF_JSET) {
+		if (bpf_op == BPF_JSET) {
 			emit_instr(ctx, and, MIPS_R_AT, dst, src);
 			cmp_eq = false;
 			dst = MIPS_R_AT;
 			src = MIPS_R_ZERO;
-		} else if (BPF_OP(insn->code) == BPF_JSGT) {
+		} else if (bpf_op == BPF_JSGT || bpf_op == BPF_JSLE) {
 			emit_instr(ctx, dsubu, MIPS_R_AT, dst, src);
 			if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
 				b_off = b_imm(exit_idx, ctx);
 				if (is_bad_offset(b_off))
 					return -E2BIG;
-				emit_instr(ctx, blez, MIPS_R_AT, b_off);
+				if (bpf_op == BPF_JSGT)
+					emit_instr(ctx, blez, MIPS_R_AT, b_off);
+				else
+					emit_instr(ctx, bgtz, MIPS_R_AT, b_off);
 				emit_instr(ctx, nop);
 				return 2; /* We consumed the exit. */
 			}
 			b_off = b_imm(this_idx + insn->off + 1, ctx);
 			if (is_bad_offset(b_off))
 				return -E2BIG;
-			emit_instr(ctx, bgtz, MIPS_R_AT, b_off);
+			if (bpf_op == BPF_JSGT)
+				emit_instr(ctx, bgtz, MIPS_R_AT, b_off);
+			else
+				emit_instr(ctx, blez, MIPS_R_AT, b_off);
 			emit_instr(ctx, nop);
 			break;
-		} else if (BPF_OP(insn->code) == BPF_JSGE) {
+		} else if (bpf_op == BPF_JSGE || bpf_op == BPF_JSLT) {
 			emit_instr(ctx, slt, MIPS_R_AT, dst, src);
-			cmp_eq = true;
+			cmp_eq = bpf_op == BPF_JSGE;
 			dst = MIPS_R_AT;
 			src = MIPS_R_ZERO;
-		} else if (BPF_OP(insn->code) == BPF_JGT) {
+		} else if (bpf_op == BPF_JGT || bpf_op == BPF_JLE) {
 			/* dst or src could be AT */
 			emit_instr(ctx, dsubu, MIPS_R_T8, dst, src);
 			emit_instr(ctx, sltu, MIPS_R_AT, dst, src);
@@ -1054,16 +1053,16 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 			emit_instr(ctx, movz, MIPS_R_T9, MIPS_R_SP, MIPS_R_T8);
 			emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_ZERO, MIPS_R_T8);
 			emit_instr(ctx, or, MIPS_R_AT, MIPS_R_T9, MIPS_R_AT);
-			cmp_eq = true;
+			cmp_eq = bpf_op == BPF_JGT;
 			dst = MIPS_R_AT;
 			src = MIPS_R_ZERO;
-		} else if (BPF_OP(insn->code) == BPF_JGE) {
+		} else if (bpf_op == BPF_JGE || bpf_op == BPF_JLT) {
 			emit_instr(ctx, sltu, MIPS_R_AT, dst, src);
-			cmp_eq = true;
+			cmp_eq = bpf_op == BPF_JGE;
 			dst = MIPS_R_AT;
 			src = MIPS_R_ZERO;
 		} else { /* JNE/JEQ case */
-			cmp_eq = (BPF_OP(insn->code) == BPF_JEQ);
+			cmp_eq = (bpf_op == BPF_JEQ);
 		}
 jeq_common:
 		/*
@@ -1122,7 +1121,9 @@ jeq_common:
 		break;
 	case BPF_JMP | BPF_JSGT | BPF_K: /* JMP_IMM */
 	case BPF_JMP | BPF_JSGE | BPF_K: /* JMP_IMM */
-		cmp_eq = (BPF_OP(insn->code) == BPF_JSGE);
+	case BPF_JMP | BPF_JSLT | BPF_K: /* JMP_IMM */
+	case BPF_JMP | BPF_JSLE | BPF_K: /* JMP_IMM */
+		cmp_eq = (bpf_op == BPF_JSGE);
 		dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok);
 		if (dst < 0)
 			return dst;
@@ -1132,65 +1133,92 @@ jeq_common:
 				b_off = b_imm(exit_idx, ctx);
 				if (is_bad_offset(b_off))
 					return -E2BIG;
-				if (cmp_eq)
-					emit_instr(ctx, bltz, dst, b_off);
-				else
+				switch (bpf_op) {
+				case BPF_JSGT:
 					emit_instr(ctx, blez, dst, b_off);
+					break;
+				case BPF_JSGE:
+					emit_instr(ctx, bltz, dst, b_off);
+					break;
+				case BPF_JSLT:
+					emit_instr(ctx, bgez, dst, b_off);
+					break;
+				case BPF_JSLE:
+					emit_instr(ctx, bgtz, dst, b_off);
+					break;
+				}
 				emit_instr(ctx, nop);
 				return 2; /* We consumed the exit. */
 			}
 			b_off = b_imm(this_idx + insn->off + 1, ctx);
 			if (is_bad_offset(b_off))
 				return -E2BIG;
-			if (cmp_eq)
-				emit_instr(ctx, bgez, dst, b_off);
-			else
+			switch (bpf_op) {
+			case BPF_JSGT:
 				emit_instr(ctx, bgtz, dst, b_off);
+				break;
+			case BPF_JSGE:
+				emit_instr(ctx, bgez, dst, b_off);
+				break;
+			case BPF_JSLT:
+				emit_instr(ctx, bltz, dst, b_off);
+				break;
+			case BPF_JSLE:
+				emit_instr(ctx, blez, dst, b_off);
+				break;
+			}
 			emit_instr(ctx, nop);
 			break;
 		}
 		/*
 		 * only "LT" compare available, so we must use imm + 1
-		 * to generate "GT"
+		 * to generate "GT" and imm -1 to generate LE
 		 */
-		t64s = insn->imm + (cmp_eq ? 0 : 1);
+		if (bpf_op == BPF_JSGT)
+			t64s = insn->imm + 1;
+		else if (bpf_op == BPF_JSLE)
+			t64s = insn->imm + 1;
+		else
+			t64s = insn->imm;
+
+		cmp_eq = bpf_op == BPF_JSGT || bpf_op == BPF_JSGE;
 		if (t64s >= S16_MIN && t64s <= S16_MAX) {
 			emit_instr(ctx, slti, MIPS_R_AT, dst, (int)t64s);
 			src = MIPS_R_AT;
 			dst = MIPS_R_ZERO;
-			cmp_eq = true;
 			goto jeq_common;
 		}
 		emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s);
 		emit_instr(ctx, slt, MIPS_R_AT, dst, MIPS_R_AT);
 		src = MIPS_R_AT;
 		dst = MIPS_R_ZERO;
-		cmp_eq = true;
 		goto jeq_common;
 
 	case BPF_JMP | BPF_JGT | BPF_K:
 	case BPF_JMP | BPF_JGE | BPF_K:
-		cmp_eq = (BPF_OP(insn->code) == BPF_JGE);
+	case BPF_JMP | BPF_JLT | BPF_K:
+	case BPF_JMP | BPF_JLE | BPF_K:
+		cmp_eq = (bpf_op == BPF_JGE);
 		dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok);
 		if (dst < 0)
 			return dst;
 		/*
 		 * only "LT" compare available, so we must use imm + 1
-		 * to generate "GT"
+		 * to generate "GT" and imm -1 to generate LE
 		 */
-		t64s = (u64)(u32)(insn->imm) + (cmp_eq ? 0 : 1);
-		if (t64s >= 0 && t64s <= S16_MAX) {
-			emit_instr(ctx, sltiu, MIPS_R_AT, dst, (int)t64s);
-			src = MIPS_R_AT;
-			dst = MIPS_R_ZERO;
-			cmp_eq = true;
-			goto jeq_common;
-		}
+		if (bpf_op == BPF_JGT)
+			t64s = (u64)(u32)(insn->imm) + 1;
+		else if (bpf_op == BPF_JLE)
+			t64s = (u64)(u32)(insn->imm) + 1;
+		else
+			t64s = (u64)(u32)(insn->imm);
+
+		cmp_eq = bpf_op == BPF_JGT || bpf_op == BPF_JGE;
+
 		emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s);
 		emit_instr(ctx, sltu, MIPS_R_AT, dst, MIPS_R_AT);
 		src = MIPS_R_AT;
 		dst = MIPS_R_ZERO;
-		cmp_eq = true;
 		goto jeq_common;
 
 	case BPF_JMP | BPF_JSET | BPF_K: /* JMP_IMM */
@@ -1198,7 +1226,7 @@ jeq_common:
 		if (dst < 0)
 			return dst;
 
-		if (use_bbit_insns() && hweight32((u32)insn->imm) == 1) {
+		if (ctx->use_bbit_insns && hweight32((u32)insn->imm) == 1) {
 			if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
 				b_off = b_imm(exit_idx, ctx);
 				if (is_bad_offset(b_off))
@@ -1724,10 +1752,14 @@ static int reg_val_propagate_range(struct jit_ctx *ctx, u64 initial_rvt,
 			case BPF_JEQ:
 			case BPF_JGT:
 			case BPF_JGE:
+			case BPF_JLT:
+			case BPF_JLE:
 			case BPF_JSET:
 			case BPF_JNE:
 			case BPF_JSGT:
 			case BPF_JSGE:
+			case BPF_JSLT:
+			case BPF_JSLE:
 				if (follow_taken) {
 					rvt[idx] |= RVT_BRANCH_TAKEN;
 					idx += insn->off;
@@ -1853,6 +1885,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 
 	memset(&ctx, 0, sizeof(ctx));
 
+	preempt_disable();
+	switch (current_cpu_type()) {
+	case CPU_CAVIUM_OCTEON:
+	case CPU_CAVIUM_OCTEON_PLUS:
+	case CPU_CAVIUM_OCTEON2:
+	case CPU_CAVIUM_OCTEON3:
+		ctx.use_bbit_insns = 1;
+		break;
+	default:
+		ctx.use_bbit_insns = 0;
+	}
+	preempt_enable();
+
 	ctx.offsets = kcalloc(prog->len + 1, sizeof(*ctx.offsets), GFP_KERNEL);
 	if (ctx.offsets == NULL)
 		goto out_err;
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index c710db3..ac82a3f 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -102,4 +102,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/configs/c3000_defconfig b/arch/parisc/configs/c3000_defconfig
index 0764d39..8d41a73 100644
--- a/arch/parisc/configs/c3000_defconfig
+++ b/arch/parisc/configs/c3000_defconfig
@@ -31,7 +31,6 @@ CONFIG_IP_PNP_BOOTP=y
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_TUNNEL=m
 CONFIG_NETFILTER=y
-CONFIG_NETFILTER_DEBUG=y
 CONFIG_NET_PKTGEN=m
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index a0d4dc9..3b2bf7a 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -101,4 +101,6 @@
 
 #define SO_PEERGROUPS		0x4034
 
+#define SO_ZEROCOPY		0x4035
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 30cf03f..47fc666 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -263,6 +263,7 @@ static inline bool is_nearbranch(int offset)
 #define COND_EQ		(CR0_EQ | COND_CMP_TRUE)
 #define COND_NE		(CR0_EQ | COND_CMP_FALSE)
 #define COND_LT		(CR0_LT | COND_CMP_TRUE)
+#define COND_LE		(CR0_GT | COND_CMP_FALSE)
 
 #endif
 
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 861c5af..faf2016 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -795,12 +795,24 @@ emit_clear:
 		case BPF_JMP | BPF_JSGT | BPF_X:
 			true_cond = COND_GT;
 			goto cond_branch;
+		case BPF_JMP | BPF_JLT | BPF_K:
+		case BPF_JMP | BPF_JLT | BPF_X:
+		case BPF_JMP | BPF_JSLT | BPF_K:
+		case BPF_JMP | BPF_JSLT | BPF_X:
+			true_cond = COND_LT;
+			goto cond_branch;
 		case BPF_JMP | BPF_JGE | BPF_K:
 		case BPF_JMP | BPF_JGE | BPF_X:
 		case BPF_JMP | BPF_JSGE | BPF_K:
 		case BPF_JMP | BPF_JSGE | BPF_X:
 			true_cond = COND_GE;
 			goto cond_branch;
+		case BPF_JMP | BPF_JLE | BPF_K:
+		case BPF_JMP | BPF_JLE | BPF_X:
+		case BPF_JMP | BPF_JSLE | BPF_K:
+		case BPF_JMP | BPF_JSLE | BPF_X:
+			true_cond = COND_LE;
+			goto cond_branch;
 		case BPF_JMP | BPF_JEQ | BPF_K:
 		case BPF_JMP | BPF_JEQ | BPF_X:
 			true_cond = COND_EQ;
@@ -817,14 +829,18 @@ emit_clear:
 cond_branch:
 			switch (code) {
 			case BPF_JMP | BPF_JGT | BPF_X:
+			case BPF_JMP | BPF_JLT | BPF_X:
 			case BPF_JMP | BPF_JGE | BPF_X:
+			case BPF_JMP | BPF_JLE | BPF_X:
 			case BPF_JMP | BPF_JEQ | BPF_X:
 			case BPF_JMP | BPF_JNE | BPF_X:
 				/* unsigned comparison */
 				PPC_CMPLD(dst_reg, src_reg);
 				break;
 			case BPF_JMP | BPF_JSGT | BPF_X:
+			case BPF_JMP | BPF_JSLT | BPF_X:
 			case BPF_JMP | BPF_JSGE | BPF_X:
+			case BPF_JMP | BPF_JSLE | BPF_X:
 				/* signed comparison */
 				PPC_CMPD(dst_reg, src_reg);
 				break;
@@ -834,7 +850,9 @@ cond_branch:
 			case BPF_JMP | BPF_JNE | BPF_K:
 			case BPF_JMP | BPF_JEQ | BPF_K:
 			case BPF_JMP | BPF_JGT | BPF_K:
+			case BPF_JMP | BPF_JLT | BPF_K:
 			case BPF_JMP | BPF_JGE | BPF_K:
+			case BPF_JMP | BPF_JLE | BPF_K:
 				/*
 				 * Need sign-extended load, so only positive
 				 * values can be used as imm in cmpldi
@@ -849,7 +867,9 @@ cond_branch:
 				}
 				break;
 			case BPF_JMP | BPF_JSGT | BPF_K:
+			case BPF_JMP | BPF_JSLT | BPF_K:
 			case BPF_JMP | BPF_JSGE | BPF_K:
+			case BPF_JMP | BPF_JSLE | BPF_K:
 				/*
 				 * signed comparison, so any 16-bit value
 				 * can be used in cmpdi
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 52a63f4..a56916c 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -108,4 +108,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 1803797..8ec8849 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1093,15 +1093,27 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 	case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
 		mask = 0x2000; /* jh */
 		goto branch_ks;
+	case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */
+		mask = 0x4000; /* jl */
+		goto branch_ks;
 	case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
 		mask = 0xa000; /* jhe */
 		goto branch_ks;
+	case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */
+		mask = 0xc000; /* jle */
+		goto branch_ks;
 	case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
 		mask = 0x2000; /* jh */
 		goto branch_ku;
+	case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */
+		mask = 0x4000; /* jl */
+		goto branch_ku;
 	case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
 		mask = 0xa000; /* jhe */
 		goto branch_ku;
+	case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */
+		mask = 0xc000; /* jle */
+		goto branch_ku;
 	case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
 		mask = 0x7000; /* jne */
 		goto branch_ku;
@@ -1119,15 +1131,27 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 	case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
 		mask = 0x2000; /* jh */
 		goto branch_xs;
+	case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */
+		mask = 0x4000; /* jl */
+		goto branch_xs;
 	case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
 		mask = 0xa000; /* jhe */
 		goto branch_xs;
+	case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */
+		mask = 0xc000; /* jle */
+		goto branch_xs;
 	case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
 		mask = 0x2000; /* jh */
 		goto branch_xu;
+	case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */
+		mask = 0x4000; /* jl */
+		goto branch_xu;
 	case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
 		mask = 0xa000; /* jhe */
 		goto branch_xu;
+	case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */
+		mask = 0xc000; /* jle */
+		goto branch_xu;
 	case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
 		mask = 0x7000; /* jne */
 		goto branch_xu;
diff --git a/arch/sh/configs/se7751_defconfig b/arch/sh/configs/se7751_defconfig
index 75c92fc..56b5e4c 100644
--- a/arch/sh/configs/se7751_defconfig
+++ b/arch/sh/configs/se7751_defconfig
@@ -28,7 +28,6 @@ CONFIG_IP_PNP_RARP=y
 # CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 CONFIG_NETFILTER=y
-CONFIG_NETFILTER_DEBUG=y
 CONFIG_IP_NF_QUEUE=y
 CONFIG_MTD=y
 CONFIG_MTD_PARTITIONS=y
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 186fd81..b2f5c50 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -98,6 +98,8 @@
 
 #define SO_PEERGROUPS		0x003d
 
+#define SO_ZEROCOPY		0x003e
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 8799ae9..c340af7 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -128,6 +128,8 @@ static u32 WDISP10(u32 off)
 
 #define BA		(BRANCH | CONDA)
 #define BG		(BRANCH | CONDG)
+#define BL		(BRANCH | CONDL)
+#define BLE		(BRANCH | CONDLE)
 #define BGU		(BRANCH | CONDGU)
 #define BLEU		(BRANCH | CONDLEU)
 #define BGE		(BRANCH | CONDGE)
@@ -715,9 +717,15 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
 		case BPF_JGT:
 			br_opcode = BGU;
 			break;
+		case BPF_JLT:
+			br_opcode = BLU;
+			break;
 		case BPF_JGE:
 			br_opcode = BGEU;
 			break;
+		case BPF_JLE:
+			br_opcode = BLEU;
+			break;
 		case BPF_JSET:
 		case BPF_JNE:
 			br_opcode = BNE;
@@ -725,9 +733,15 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
 		case BPF_JSGT:
 			br_opcode = BG;
 			break;
+		case BPF_JSLT:
+			br_opcode = BL;
+			break;
 		case BPF_JSGE:
 			br_opcode = BGE;
 			break;
+		case BPF_JSLE:
+			br_opcode = BLE;
+			break;
 		default:
 			/* Make sure we dont leak kernel information to the
 			 * user.
@@ -746,18 +760,30 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
 		case BPF_JGT:
 			cbcond_opcode = CBCONDGU;
 			break;
+		case BPF_JLT:
+			cbcond_opcode = CBCONDLU;
+			break;
 		case BPF_JGE:
 			cbcond_opcode = CBCONDGEU;
 			break;
+		case BPF_JLE:
+			cbcond_opcode = CBCONDLEU;
+			break;
 		case BPF_JNE:
 			cbcond_opcode = CBCONDNE;
 			break;
 		case BPF_JSGT:
 			cbcond_opcode = CBCONDG;
 			break;
+		case BPF_JSLT:
+			cbcond_opcode = CBCONDL;
+			break;
 		case BPF_JSGE:
 			cbcond_opcode = CBCONDGE;
 			break;
+		case BPF_JSLE:
+			cbcond_opcode = CBCONDLE;
+			break;
 		default:
 			/* Make sure we dont leak kernel information to the
 			 * user.
@@ -1176,10 +1202,14 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	/* IF (dst COND src) JUMP off */
 	case BPF_JMP | BPF_JEQ | BPF_X:
 	case BPF_JMP | BPF_JGT | BPF_X:
+	case BPF_JMP | BPF_JLT | BPF_X:
 	case BPF_JMP | BPF_JGE | BPF_X:
+	case BPF_JMP | BPF_JLE | BPF_X:
 	case BPF_JMP | BPF_JNE | BPF_X:
 	case BPF_JMP | BPF_JSGT | BPF_X:
+	case BPF_JMP | BPF_JSLT | BPF_X:
 	case BPF_JMP | BPF_JSGE | BPF_X:
+	case BPF_JMP | BPF_JSLE | BPF_X:
 	case BPF_JMP | BPF_JSET | BPF_X: {
 		int err;
 
@@ -1191,10 +1221,14 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	/* IF (dst COND imm) JUMP off */
 	case BPF_JMP | BPF_JEQ | BPF_K:
 	case BPF_JMP | BPF_JGT | BPF_K:
+	case BPF_JMP | BPF_JLT | BPF_K:
 	case BPF_JMP | BPF_JGE | BPF_K:
+	case BPF_JMP | BPF_JLE | BPF_K:
 	case BPF_JMP | BPF_JNE | BPF_K:
 	case BPF_JMP | BPF_JSGT | BPF_K:
+	case BPF_JMP | BPF_JSLT | BPF_K:
 	case BPF_JMP | BPF_JSGE | BPF_K:
+	case BPF_JMP | BPF_JSLE | BPF_K:
 	case BPF_JMP | BPF_JSET | BPF_K: {
 		int err;
 
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e1324f2..8c95736 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -94,7 +94,9 @@ static int bpf_size_to_x86_bytes(int bpf_size)
 #define X86_JNE 0x75
 #define X86_JBE 0x76
 #define X86_JA  0x77
+#define X86_JL  0x7C
 #define X86_JGE 0x7D
+#define X86_JLE 0x7E
 #define X86_JG  0x7F
 
 static void bpf_flush_icache(void *start, void *end)
@@ -285,7 +287,7 @@ static void emit_bpf_tail_call(u8 **pprog)
 	EMIT4(0x48, 0x8B, 0x46,                   /* mov rax, qword ptr [rsi + 16] */
 	      offsetof(struct bpf_array, map.max_entries));
 	EMIT3(0x48, 0x39, 0xD0);                  /* cmp rax, rdx */
-#define OFFSET1 47 /* number of bytes to jump */
+#define OFFSET1 43 /* number of bytes to jump */
 	EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
 	label1 = cnt;
 
@@ -294,21 +296,20 @@ static void emit_bpf_tail_call(u8 **pprog)
 	 */
 	EMIT2_off32(0x8B, 0x85, 36);              /* mov eax, dword ptr [rbp + 36] */
 	EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 36
+#define OFFSET2 32
 	EMIT2(X86_JA, OFFSET2);                   /* ja out */
 	label2 = cnt;
 	EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */
 	EMIT2_off32(0x89, 0x85, 36);              /* mov dword ptr [rbp + 36], eax */
 
 	/* prog = array->ptrs[index]; */
-	EMIT4_off32(0x48, 0x8D, 0x84, 0xD6,       /* lea rax, [rsi + rdx * 8 + offsetof(...)] */
+	EMIT4_off32(0x48, 0x8B, 0x84, 0xD6,       /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
 		    offsetof(struct bpf_array, ptrs));
-	EMIT3(0x48, 0x8B, 0x00);                  /* mov rax, qword ptr [rax] */
 
 	/* if (prog == NULL)
 	 *   goto out;
 	 */
-	EMIT4(0x48, 0x83, 0xF8, 0x00);            /* cmp rax, 0 */
+	EMIT3(0x48, 0x85, 0xC0);		  /* test rax,rax */
 #define OFFSET3 10
 	EMIT2(X86_JE, OFFSET3);                   /* je out */
 	label3 = cnt;
@@ -888,9 +889,13 @@ xadd:			if (is_imm8(insn->off))
 		case BPF_JMP | BPF_JEQ | BPF_X:
 		case BPF_JMP | BPF_JNE | BPF_X:
 		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JLT | BPF_X:
 		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JLE | BPF_X:
 		case BPF_JMP | BPF_JSGT | BPF_X:
+		case BPF_JMP | BPF_JSLT | BPF_X:
 		case BPF_JMP | BPF_JSGE | BPF_X:
+		case BPF_JMP | BPF_JSLE | BPF_X:
 			/* cmp dst_reg, src_reg */
 			EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39,
 			      add_2reg(0xC0, dst_reg, src_reg));
@@ -911,9 +916,13 @@ xadd:			if (is_imm8(insn->off))
 		case BPF_JMP | BPF_JEQ | BPF_K:
 		case BPF_JMP | BPF_JNE | BPF_K:
 		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JLT | BPF_K:
 		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JLE | BPF_K:
 		case BPF_JMP | BPF_JSGT | BPF_K:
+		case BPF_JMP | BPF_JSLT | BPF_K:
 		case BPF_JMP | BPF_JSGE | BPF_K:
+		case BPF_JMP | BPF_JSLE | BPF_K:
 			/* cmp dst_reg, imm8/32 */
 			EMIT1(add_1mod(0x48, dst_reg));
 
@@ -935,18 +944,34 @@ emit_cond_jmp:		/* convert BPF opcode to x86 */
 				/* GT is unsigned '>', JA in x86 */
 				jmp_cond = X86_JA;
 				break;
+			case BPF_JLT:
+				/* LT is unsigned '<', JB in x86 */
+				jmp_cond = X86_JB;
+				break;
 			case BPF_JGE:
 				/* GE is unsigned '>=', JAE in x86 */
 				jmp_cond = X86_JAE;
 				break;
+			case BPF_JLE:
+				/* LE is unsigned '<=', JBE in x86 */
+				jmp_cond = X86_JBE;
+				break;
 			case BPF_JSGT:
 				/* signed '>', GT in x86 */
 				jmp_cond = X86_JG;
 				break;
+			case BPF_JSLT:
+				/* signed '<', LT in x86 */
+				jmp_cond = X86_JL;
+				break;
 			case BPF_JSGE:
 				/* signed '>=', GE in x86 */
 				jmp_cond = X86_JGE;
 				break;
+			case BPF_JSLE:
+				/* signed '<=', LE in x86 */
+				jmp_cond = X86_JLE;
+				break;
 			default: /* to silence gcc warning */
 				return -EFAULT;
 			}
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 3eed276..2200599 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -113,4 +113,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/drivers/atm/adummy.c b/drivers/atm/adummy.c
index 1fd25e8..8d98130 100644
--- a/drivers/atm/adummy.c
+++ b/drivers/atm/adummy.c
@@ -71,7 +71,7 @@ static struct attribute *adummy_attrs[] = {
 	NULL
 };
 
-static struct attribute_group adummy_group_attrs = {
+static const struct attribute_group adummy_group_attrs = {
 	.name = NULL, /* We want them in dev's root folder */
 	.attrs = adummy_attrs
 };
@@ -130,7 +130,7 @@ adummy_proc_read(struct atm_dev *dev, loff_t *pos, char *page)
 	return 0;
 }
 
-static struct atmdev_ops adummy_ops =
+static const struct atmdev_ops adummy_ops =
 {
 	.open =		adummy_open,
 	.close =	adummy_close,	
diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c
index 906705e..acf16c3 100644
--- a/drivers/atm/ambassador.c
+++ b/drivers/atm/ambassador.c
@@ -2374,7 +2374,7 @@ MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
 
 /********** module entry **********/
 
-static struct pci_device_id amb_pci_tbl[] = {
+static const struct pci_device_id amb_pci_tbl[] = {
 	{ PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR), 0 },
 	{ PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD), 0 },
 	{ 0, }
diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index 56fa16c..afebeb1c 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -342,7 +342,7 @@ static struct atmdev_ops atmtcp_v_dev_ops = {
  */
 
 
-static struct atmdev_ops atmtcp_c_dev_ops = {
+static const struct atmdev_ops atmtcp_c_dev_ops = {
 	.close		= atmtcp_c_close,
 	.send		= atmtcp_c_send
 };
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index b042ec4..ce47eb1 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -2292,7 +2292,7 @@ err_disable:
 }
 
 
-static struct pci_device_id eni_pci_tbl[] = {
+static const struct pci_device_id eni_pci_tbl[] = {
 	{ PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_FPGA), 0 /* FPGA */ },
 	{ PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_ASIC), 1 /* ASIC */ },
 	{ 0, }
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index 22dcab9..6b6368a 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -2030,7 +2030,7 @@ static void firestream_remove_one(struct pci_dev *pdev)
 	func_exit ();
 }
 
-static struct pci_device_id firestream_pci_tbl[] = {
+static const struct pci_device_id firestream_pci_tbl[] = {
 	{ PCI_VDEVICE(FUJITSU_ME, PCI_DEVICE_ID_FUJITSU_FS50), FS_IS50},
 	{ PCI_VDEVICE(FUJITSU_ME, PCI_DEVICE_ID_FUJITSU_FS155), FS_IS155},
 	{ 0, }
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index f0433ad..f8b7e86 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -2757,7 +2757,7 @@ static void fore200e_pca_remove_one(struct pci_dev *pci_dev)
 }
 
 
-static struct pci_device_id fore200e_pca_tbl[] = {
+static const struct pci_device_id fore200e_pca_tbl[] = {
     { PCI_VENDOR_ID_FORE, PCI_DEVICE_ID_FORE_PCA200E, PCI_ANY_ID, PCI_ANY_ID,
       0, 0, (unsigned long) &fore200e_bus[0] },
     { 0, }
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 37ee21c..e58538c 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -161,7 +161,7 @@ static unsigned int clocktab[] = {
 	CLK_LOW
 };     
 
-static struct atmdev_ops he_ops =
+static const struct atmdev_ops he_ops =
 {
 	.open =		he_open,
 	.close =	he_close,	
@@ -2851,7 +2851,7 @@ MODULE_PARM_DESC(irq_coalesce, "use interrupt coalescing (default 1)");
 module_param(sdh, bool, 0);
 MODULE_PARM_DESC(sdh, "use SDH framing (default 0)");
 
-static struct pci_device_id he_pci_tbl[] = {
+static const struct pci_device_id he_pci_tbl[] = {
 	{ PCI_VDEVICE(FORE, PCI_DEVICE_ID_FORE_HE), 0 },
 	{ 0, }
 };
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c
index 0f18480..7e76b35 100644
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -2867,7 +2867,7 @@ MODULE_PARM_DESC(max_tx_size, "maximum size of TX AAL5 frames");
 MODULE_PARM_DESC(max_rx_size, "maximum size of RX AAL5 frames");
 MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
 
-static struct pci_device_id hrz_pci_tbl[] = {
+static const struct pci_device_id hrz_pci_tbl[] = {
 	{ PCI_VENDOR_ID_MADGE, PCI_DEVICE_ID_MADGE_HORIZON, PCI_ANY_ID, PCI_ANY_ID,
 	  0, 0, 0 },
 	{ 0, }
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 60bacba..47f3c4a 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -134,7 +134,7 @@ static int idt77252_proc_read(struct atm_dev *dev, loff_t * pos,
 static void idt77252_softint(struct work_struct *work);
 
 
-static struct atmdev_ops idt77252_ops =
+static const struct atmdev_ops idt77252_ops =
 {
 	.dev_close	= idt77252_dev_close,
 	.open		= idt77252_open,
@@ -3725,7 +3725,7 @@ err_out_disable_pdev:
 	return err;
 }
 
-static struct pci_device_id idt77252_pci_tbl[] =
+static const struct pci_device_id idt77252_pci_tbl[] =
 {
 	{ PCI_VDEVICE(IDT, PCI_DEVICE_ID_IDT_IDT77252), 0 },
 	{ 0, }
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index a4fa6c8..fc72b763 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -3266,7 +3266,7 @@ static void ia_remove_one(struct pci_dev *pdev)
       	kfree(iadev);
 }
 
-static struct pci_device_id ia_pci_tbl[] = {
+static const struct pci_device_id ia_pci_tbl[] = {
 	{ PCI_VENDOR_ID_IPHASE, 0x0008, PCI_ANY_ID, PCI_ANY_ID, },
 	{ PCI_VENDOR_ID_IPHASE, 0x0009, PCI_ANY_ID, PCI_ANY_ID, },
 	{ 0,}
diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c
index 1a9bc51..2351dad 100644
--- a/drivers/atm/lanai.c
+++ b/drivers/atm/lanai.c
@@ -2589,7 +2589,7 @@ static int lanai_init_one(struct pci_dev *pci,
 	return result;
 }
 
-static struct pci_device_id lanai_pci_tbl[] = {
+static const struct pci_device_id lanai_pci_tbl[] = {
 	{ PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_LANAI2) },
 	{ PCI_VDEVICE(EF, PCI_DEVICE_ID_EF_ATM_LANAIHB) },
 	{ 0, }	/* terminal entry */
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index d879f3b..a970283 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -154,7 +154,7 @@ static unsigned char ns_phy_get(struct atm_dev *dev, unsigned long addr);
 
 static struct ns_dev *cards[NS_MAX_CARDS];
 static unsigned num_cards;
-static struct atmdev_ops atm_ops = {
+static const struct atmdev_ops atm_ops = {
 	.open = ns_open,
 	.close = ns_close,
 	.ioctl = ns_ioctl,
@@ -253,7 +253,7 @@ static void nicstar_remove_one(struct pci_dev *pcidev)
 	kfree(card);
 }
 
-static struct pci_device_id nicstar_pci_tbl[] = {
+static const struct pci_device_id nicstar_pci_tbl[] = {
 	{ PCI_VDEVICE(IDT, PCI_DEVICE_ID_IDT_IDT77201), 0 },
 	{0,}			/* terminate list */
 };
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index c8f2ca6..0df1a1c 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -611,7 +611,7 @@ static struct attribute *solos_attrs[] = {
 	NULL
 };
 
-static struct attribute_group solos_attr_group = {
+static const struct attribute_group solos_attr_group = {
 	.attrs = solos_attrs,
 	.name = "parameters",
 };
@@ -628,7 +628,7 @@ static struct attribute *gpio_attrs[] = {
 	NULL
 };
 
-static struct attribute_group gpio_attr_group = {
+static const struct attribute_group gpio_attr_group = {
 	.attrs = gpio_attrs,
 	.name = "gpio",
 };
@@ -1187,7 +1187,7 @@ static int psend(struct atm_vcc *vcc, struct sk_buff *skb)
 	return 0;
 }
 
-static struct atmdev_ops fpga_ops = {
+static const struct atmdev_ops fpga_ops = {
 	.open =		popen,
 	.close =	pclose,
 	.ioctl =	NULL,
@@ -1476,7 +1476,7 @@ static void fpga_remove(struct pci_dev *dev)
 	kfree(card);
 }
 
-static struct pci_device_id fpga_pci_tbl[] = {
+static const struct pci_device_id fpga_pci_tbl[] = {
 	{ 0x10ee, 0x0300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
 	{ 0, }
 };
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index 07bdd51..1ef67db 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -1642,7 +1642,7 @@ out_free:
 
 MODULE_LICENSE("GPL");
 
-static struct pci_device_id zatm_pci_tbl[] = {
+static const struct pci_device_id zatm_pci_tbl[] = {
 	{ PCI_VDEVICE(ZEITNET, PCI_DEVICE_ID_ZEITNET_1221), ZATM_COPPER },
 	{ PCI_VDEVICE(ZEITNET, PCI_DEVICE_ID_ZEITNET_1225), 0 },
 	{ 0, }
diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
index b5c48a8..54f81c5 100644
--- a/drivers/bcma/Kconfig
+++ b/drivers/bcma/Kconfig
@@ -3,11 +3,8 @@ config BCMA_POSSIBLE
 	depends on HAS_IOMEM && HAS_DMA
 	default y
 
-menu "Broadcom specific AMBA"
-	depends on BCMA_POSSIBLE
-
-config BCMA
-	tristate "BCMA support"
+menuconfig BCMA
+	tristate "Broadcom specific AMBA"
 	depends on BCMA_POSSIBLE
 	help
 	  Bus driver for Broadcom specific Advanced Microcontroller Bus
@@ -117,5 +114,3 @@ config BCMA_DEBUG
 	  This turns on additional debugging messages.
 
 	  If unsure, say N
-
-endmenu
diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c
index 7bde8d7..982d578 100644
--- a/drivers/bcma/driver_gpio.c
+++ b/drivers/bcma/driver_gpio.c
@@ -191,6 +191,7 @@ int bcma_gpio_init(struct bcma_drv_cc *cc)
 	case BCMA_CHIP_ID_BCM4707:
 	case BCMA_CHIP_ID_BCM5357:
 	case BCMA_CHIP_ID_BCM53572:
+	case BCMA_CHIP_ID_BCM53573:
 	case BCMA_CHIP_ID_BCM47094:
 		chip->ngpio	= 32;
 		break;
diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index 35952a9..fae5a74 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -98,6 +98,7 @@ config BT_HCIUART_NOKIA
 	depends on BT_HCIUART_SERDEV
 	depends on PM
 	select BT_HCIUART_H4
+	select BT_BCM
 	help
 	  Nokia H4+ is serial protocol for communication between Bluetooth
 	  device and host. This protocol is required for Bluetooth devices
@@ -167,6 +168,7 @@ config BT_HCIUART_INTEL
 config BT_HCIUART_BCM
 	bool "Broadcom protocol support"
 	depends on BT_HCIUART
+	depends on BT_HCIUART_SERDEV
 	select BT_HCIUART_H4
 	select BT_BCM
 	help
diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index b793853..204afe6 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -140,7 +140,8 @@ MODULE_DEVICE_TABLE(usb, ath3k_table);
 
 #define BTUSB_ATH3012		0x80
 /* This table is to load patch and sysconfig files
- * for AR3012 */
+ * for AR3012
+ */
 static const struct usb_device_id ath3k_blist_tbl[] = {
 
 	/* Atheros AR3012 with sflash firmware*/
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index d4b0b65..b07ca95 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -93,6 +93,7 @@ static void bluecard_detach(struct pcmcia_device *p_dev);
 
 /* Hardware states */
 #define CARD_READY             1
+#define CARD_ACTIVITY	       2
 #define CARD_HAS_PCCARD_ID     4
 #define CARD_HAS_POWER_LED     5
 #define CARD_HAS_ACTIVITY_LED  6
@@ -160,16 +161,14 @@ static void bluecard_activity_led_timeout(u_long arg)
 	struct bluecard_info *info = (struct bluecard_info *)arg;
 	unsigned int iobase = info->p_dev->resource[0]->start;
 
-	if (!test_bit(CARD_HAS_PCCARD_ID, &(info->hw_state)))
-		return;
-
-	if (test_bit(CARD_HAS_ACTIVITY_LED, &(info->hw_state))) {
-		/* Disable activity LED */
-		outb(0x08 | 0x20, iobase + 0x30);
-	} else {
-		/* Disable power LED */
-		outb(0x00, iobase + 0x30);
+	if (test_bit(CARD_ACTIVITY, &(info->hw_state))) {
+		/* leave LED in inactive state for HZ/10 for blink effect */
+		clear_bit(CARD_ACTIVITY, &(info->hw_state));
+		mod_timer(&(info->timer), jiffies + HZ / 10);
 	}
+
+	/* Disable activity LED, enable power LED */
+	outb(0x08 | 0x20, iobase + 0x30);
 }
 
 
@@ -177,22 +176,22 @@ static void bluecard_enable_activity_led(struct bluecard_info *info)
 {
 	unsigned int iobase = info->p_dev->resource[0]->start;
 
-	if (!test_bit(CARD_HAS_PCCARD_ID, &(info->hw_state)))
+	/* don't disturb running blink timer */
+	if (timer_pending(&(info->timer)))
 		return;
 
-	if (test_bit(CARD_HAS_ACTIVITY_LED, &(info->hw_state))) {
-		/* Enable activity LED */
-		outb(0x10 | 0x40, iobase + 0x30);
+	set_bit(CARD_ACTIVITY, &(info->hw_state));
 
-		/* Stop the LED after HZ/4 */
-		mod_timer(&(info->timer), jiffies + HZ / 4);
+	if (test_bit(CARD_HAS_ACTIVITY_LED, &(info->hw_state))) {
+		/* Enable activity LED, keep power LED enabled */
+		outb(0x18 | 0x60, iobase + 0x30);
 	} else {
-		/* Enable power LED */
-		outb(0x08 | 0x20, iobase + 0x30);
-
-		/* Stop the LED after HZ/2 */
-		mod_timer(&(info->timer), jiffies + HZ / 2);
+		/* Disable power LED */
+		outb(0x00, iobase + 0x30);
 	}
+
+	/* Stop the LED after HZ/10 */
+	mod_timer(&(info->timer), jiffies + HZ / 10);
 }
 
 
@@ -625,16 +624,13 @@ static int bluecard_hci_flush(struct hci_dev *hdev)
 static int bluecard_hci_open(struct hci_dev *hdev)
 {
 	struct bluecard_info *info = hci_get_drvdata(hdev);
+	unsigned int iobase = info->p_dev->resource[0]->start;
 
 	if (test_bit(CARD_HAS_PCCARD_ID, &(info->hw_state)))
 		bluecard_hci_set_baud_rate(hdev, DEFAULT_BAUD_RATE);
 
-	if (test_bit(CARD_HAS_PCCARD_ID, &(info->hw_state))) {
-		unsigned int iobase = info->p_dev->resource[0]->start;
-
-		/* Enable LED */
-		outb(0x08 | 0x20, iobase + 0x30);
-	}
+	/* Enable power LED */
+	outb(0x08 | 0x20, iobase + 0x30);
 
 	return 0;
 }
@@ -643,15 +639,15 @@ static int bluecard_hci_open(struct hci_dev *hdev)
 static int bluecard_hci_close(struct hci_dev *hdev)
 {
 	struct bluecard_info *info = hci_get_drvdata(hdev);
+	unsigned int iobase = info->p_dev->resource[0]->start;
 
 	bluecard_hci_flush(hdev);
 
-	if (test_bit(CARD_HAS_PCCARD_ID, &(info->hw_state))) {
-		unsigned int iobase = info->p_dev->resource[0]->start;
+	/* Stop LED timer */
+	del_timer_sync(&(info->timer));
 
-		/* Disable LED */
-		outb(0x00, iobase + 0x30);
-	}
+	/* Disable power LED */
+	outb(0x00, iobase + 0x30);
 
 	return 0;
 }
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 32dcac0..1947887 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -684,14 +684,16 @@ static int bt3c_config(struct pcmcia_device *link)
 	unsigned long try;
 
 	/* First pass: look for a config entry that looks normal.
-	   Two tries: without IO aliases, then with aliases */
+	 * Two tries: without IO aliases, then with aliases
+	 */
 	for (try = 0; try < 2; try++)
 		if (!pcmcia_loop_config(link, bt3c_check_config, (void *) try))
 			goto found_port;
 
 	/* Second pass: try to find an entry that isn't picky about
-	   its base address, then try to grab any standard serial port
-	   address, and finally try to get any free port. */
+	 * its base address, then try to grab any standard serial port
+	 * address, and finally try to get any free port.
+	 */
 	if (!pcmcia_loop_config(link, bt3c_check_config_notpicky, NULL))
 		goto found_port;
 
diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index 9ab6cfb..cc4bdef 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -287,6 +287,37 @@ static struct sk_buff *btbcm_read_usb_product(struct hci_dev *hdev)
 	return skb;
 }
 
+static int btbcm_read_info(struct hci_dev *hdev)
+{
+	struct sk_buff *skb;
+
+	/* Read Verbose Config Version Info */
+	skb = btbcm_read_verbose_config(hdev);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	BT_INFO("%s: BCM: chip id %u", hdev->name, skb->data[1]);
+	kfree_skb(skb);
+
+	/* Read Controller Features */
+	skb = btbcm_read_controller_features(hdev);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	BT_INFO("%s: BCM: features 0x%2.2x", hdev->name, skb->data[1]);
+	kfree_skb(skb);
+
+	/* Read Local Name */
+	skb = btbcm_read_local_name(hdev);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	BT_INFO("%s: %s", hdev->name, (char *)(skb->data + 1));
+	kfree_skb(skb);
+
+	return 0;
+}
+
 static const struct {
 	u16 subver;
 	const char *name;
@@ -322,13 +353,10 @@ int btbcm_initialize(struct hci_dev *hdev, char *fw_name, size_t len)
 	subver = le16_to_cpu(ver->lmp_subver);
 	kfree_skb(skb);
 
-	/* Read Verbose Config Version Info */
-	skb = btbcm_read_verbose_config(hdev);
-	if (IS_ERR(skb))
-		return PTR_ERR(skb);
-
-	BT_INFO("%s: BCM: chip id %u", hdev->name, skb->data[1]);
-	kfree_skb(skb);
+	/* Read controller information */
+	err = btbcm_read_info(hdev);
+	if (err)
+		return err;
 
 	switch ((rev & 0xf000) >> 12) {
 	case 0:
@@ -431,29 +459,10 @@ int btbcm_setup_patchram(struct hci_dev *hdev)
 	subver = le16_to_cpu(ver->lmp_subver);
 	kfree_skb(skb);
 
-	/* Read Verbose Config Version Info */
-	skb = btbcm_read_verbose_config(hdev);
-	if (IS_ERR(skb))
-		return PTR_ERR(skb);
-
-	BT_INFO("%s: BCM: chip id %u", hdev->name, skb->data[1]);
-	kfree_skb(skb);
-
-	/* Read Controller Features */
-	skb = btbcm_read_controller_features(hdev);
-	if (IS_ERR(skb))
-		return PTR_ERR(skb);
-
-	BT_INFO("%s: BCM: features 0x%2.2x", hdev->name, skb->data[1]);
-	kfree_skb(skb);
-
-	/* Read Local Name */
-	skb = btbcm_read_local_name(hdev);
-	if (IS_ERR(skb))
-		return PTR_ERR(skb);
-
-	BT_INFO("%s: %s", hdev->name, (char *)(skb->data + 1));
-	kfree_skb(skb);
+	/* Read controller information */
+	err = btbcm_read_info(hdev);
+	if (err)
+		return err;
 
 	switch ((rev & 0xf000) >> 12) {
 	case 0:
diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c
index eb794f0..03341ce9 100644
--- a/drivers/bluetooth/btmrvl_sdio.c
+++ b/drivers/bluetooth/btmrvl_sdio.c
@@ -1455,7 +1455,8 @@ done:
 	fw_dump_ptr = fw_dump_data;
 
 	/* Dump all the memory data into single file, a userspace script will
-	   be used to split all the memory data to multiple files*/
+	 * be used to split all the memory data to multiple files
+	 */
 	BT_INFO("== btmrvl firmware dump to /sys/class/devcoredump start");
 	for (idx = 0; idx < dump_num; idx++) {
 		struct memory_type_mapping *entry = &mem_type_mapping_tbl[idx];
@@ -1482,7 +1483,8 @@ done:
 	}
 
 	/* fw_dump_data will be free in device coredump release function
-	   after 5 min*/
+	 * after 5 min
+	 */
 	dev_coredumpv(&card->func->dev, fw_dump_data, fw_dump_len, GFP_KERNEL);
 	BT_INFO("== btmrvl firmware dump to /sys/class/devcoredump end");
 }
diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index 28afd5d..0bbdfce 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -81,7 +81,7 @@ static int rome_patch_ver_req(struct hci_dev *hdev, u32 *rome_version)
 	 * and lower 2 bytes from patch will be used.
 	 */
 	*rome_version = (le32_to_cpu(ver->soc_id) << 16) |
-		        (le16_to_cpu(ver->rome_ver) & 0x0000ffff);
+			(le16_to_cpu(ver->rome_ver) & 0x0000ffff);
 
 out:
 	kfree_skb(skb);
diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c
index 8279094..d9a99b4 100644
--- a/drivers/bluetooth/btrtl.c
+++ b/drivers/bluetooth/btrtl.c
@@ -279,6 +279,8 @@ static int rtl_load_config(struct hci_dev *hdev, const char *name, u8 **buff)
 		return ret;
 	ret = fw->size;
 	*buff = kmemdup(fw->data, ret, GFP_KERNEL);
+	if (!*buff)
+		ret = -ENOMEM;
 
 	release_firmware(fw);
 
diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c
index 1cb958e..c8e945d 100644
--- a/drivers/bluetooth/btsdio.c
+++ b/drivers/bluetooth/btsdio.c
@@ -144,7 +144,8 @@ static int btsdio_rx_packet(struct btsdio_data *data)
 	if (!skb) {
 		/* Out of memory. Prepare a read retry and just
 		 * return with the expectation that the next time
-		 * we're called we'll have more memory. */
+		 * we're called we'll have more memory.
+		 */
 		return -ENOMEM;
 	}
 
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index 7df79bb..310e9c2 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -614,14 +614,16 @@ static int btuart_config(struct pcmcia_device *link)
 	int try;
 
 	/* First pass: look for a config entry that looks normal.
-	   Two tries: without IO aliases, then with aliases */
+	 * Two tries: without IO aliases, then with aliases
+	 */
 	for (try = 0; try < 2; try++)
 		if (!pcmcia_loop_config(link, btuart_check_config, &try))
 			goto found_port;
 
 	/* Second pass: try to find an entry that isn't picky about
-	   its base address, then try to grab any standard serial port
-	   address, and finally try to get any free port. */
+	 * its base address, then try to grab any standard serial port
+	 * address, and finally try to get any free port.
+	 */
 	if (!pcmcia_loop_config(link, btuart_check_config_notpicky, NULL))
 		goto found_port;
 
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index fa24d69..7a5c06a 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -66,6 +66,7 @@ static struct usb_driver btusb_driver;
 #define BTUSB_BCM2045		0x40000
 #define BTUSB_IFNUM_2		0x80000
 #define BTUSB_CW6622		0x100000
+#define BTUSB_BCM_NO_PRODID	0x200000
 
 static const struct usb_device_id btusb_table[] = {
 	/* Generic Bluetooth USB device */
@@ -131,7 +132,8 @@ static const struct usb_device_id btusb_table[] = {
 	{ USB_DEVICE(0x19ff, 0x0239), .driver_info = BTUSB_BCM_PATCHRAM },
 
 	/* Broadcom BCM43142A0 (Foxconn/Lenovo) */
-	{ USB_DEVICE(0x105b, 0xe065), .driver_info = BTUSB_BCM_PATCHRAM },
+	{ USB_VENDOR_AND_INTERFACE_INFO(0x105b, 0xff, 0x01, 0x01),
+	  .driver_info = BTUSB_BCM_PATCHRAM },
 
 	/* Broadcom BCM920703 (HTC Vive) */
 	{ USB_VENDOR_AND_INTERFACE_INFO(0x0bb4, 0xff, 0x01, 0x01),
@@ -169,6 +171,10 @@ static const struct usb_device_id btusb_table[] = {
 	{ USB_VENDOR_AND_INTERFACE_INFO(0x0930, 0xff, 0x01, 0x01),
 	  .driver_info = BTUSB_BCM_PATCHRAM },
 
+	/* Broadcom devices with missing product id */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x0000, 0x0000, 0xff, 0x01, 0x01),
+	  .driver_info = BTUSB_BCM_PATCHRAM | BTUSB_BCM_NO_PRODID },
+
 	/* Intel Bluetooth USB Bootloader (RAM module) */
 	{ USB_DEVICE(0x8087, 0x0a5a),
 	  .driver_info = BTUSB_INTEL_BOOT | BTUSB_BROKEN_ISOC },
@@ -268,6 +274,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x0489, 0xe092), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0489, 0xe0a2), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x04ca, 0x3011), .driver_info = BTUSB_QCA_ROME },
+	{ USB_DEVICE(0x04ca, 0x3016), .driver_info = BTUSB_QCA_ROME },
 
 	/* Broadcom BCM2035 */
 	{ USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 },
@@ -357,6 +364,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x13d3, 0x3410), .driver_info = BTUSB_REALTEK },
 	{ USB_DEVICE(0x13d3, 0x3416), .driver_info = BTUSB_REALTEK },
 	{ USB_DEVICE(0x13d3, 0x3459), .driver_info = BTUSB_REALTEK },
+	{ USB_DEVICE(0x13d3, 0x3494), .driver_info = BTUSB_REALTEK },
 
 	/* Additional Realtek 8821AE Bluetooth devices */
 	{ USB_DEVICE(0x0b05, 0x17dc), .driver_info = BTUSB_REALTEK },
@@ -656,7 +664,8 @@ static void btusb_intr_complete(struct urb *urb)
 	err = usb_submit_urb(urb, GFP_ATOMIC);
 	if (err < 0) {
 		/* -EPERM: urb is being killed;
-		 * -ENODEV: device got disconnected */
+		 * -ENODEV: device got disconnected
+		 */
 		if (err != -EPERM && err != -ENODEV)
 			BT_ERR("%s urb %p failed to resubmit (%d)",
 			       hdev->name, urb, -err);
@@ -745,7 +754,8 @@ static void btusb_bulk_complete(struct urb *urb)
 	err = usb_submit_urb(urb, GFP_ATOMIC);
 	if (err < 0) {
 		/* -EPERM: urb is being killed;
-		 * -ENODEV: device got disconnected */
+		 * -ENODEV: device got disconnected
+		 */
 		if (err != -EPERM && err != -ENODEV)
 			BT_ERR("%s urb %p failed to resubmit (%d)",
 			       hdev->name, urb, -err);
@@ -840,7 +850,8 @@ static void btusb_isoc_complete(struct urb *urb)
 	err = usb_submit_urb(urb, GFP_ATOMIC);
 	if (err < 0) {
 		/* -EPERM: urb is being killed;
-		 * -ENODEV: device got disconnected */
+		 * -ENODEV: device got disconnected
+		 */
 		if (err != -EPERM && err != -ENODEV)
 			BT_ERR("%s urb %p failed to resubmit (%d)",
 			       hdev->name, urb, -err);
@@ -952,7 +963,8 @@ static void btusb_diag_complete(struct urb *urb)
 	err = usb_submit_urb(urb, GFP_ATOMIC);
 	if (err < 0) {
 		/* -EPERM: urb is being killed;
-		 * -ENODEV: device got disconnected */
+		 * -ENODEV: device got disconnected
+		 */
 		if (err != -EPERM && err != -ENODEV)
 			BT_ERR("%s urb %p failed to resubmit (%d)",
 			       hdev->name, urb, -err);
@@ -1076,6 +1088,10 @@ static int btusb_open(struct hci_dev *hdev)
 	}
 
 	data->intf->needs_remote_wakeup = 1;
+	/* device specific wakeup source enabled and required for USB
+	 * remote wakeup while host is suspended
+	 */
+	device_wakeup_enable(&data->udev->dev);
 
 	if (test_and_set_bit(BTUSB_INTR_RUNNING, &data->flags))
 		goto done;
@@ -1139,6 +1155,7 @@ static int btusb_close(struct hci_dev *hdev)
 		goto failed;
 
 	data->intf->needs_remote_wakeup = 0;
+	device_wakeup_disable(&data->udev->dev);
 	usb_autopm_put_interface(data->intf);
 
 failed:
@@ -2892,11 +2909,25 @@ static int btusb_probe(struct usb_interface *intf,
 	if (id->driver_info == BTUSB_IGNORE)
 		return -ENODEV;
 
+	if (id->driver_info & BTUSB_BCM_NO_PRODID) {
+		struct usb_device *udev = interface_to_usbdev(intf);
+
+		/* For the broken Broadcom devices that show 0000:0000
+		 * as USB vendor and product information, check that the
+		 * manufacturer string identifies them as Broadcom based
+		 * devices.
+		 */
+		if (!udev->manufacturer ||
+		    strcmp(udev->manufacturer, "Broadcom Corp"))
+			return -ENODEV;
+	}
+
 	if (id->driver_info & BTUSB_ATH3012) {
 		struct usb_device *udev = interface_to_usbdev(intf);
 
 		/* Old firmware would otherwise let ath3k driver load
-		 * patch and sysconfig files */
+		 * patch and sysconfig files
+		 */
 		if (le16_to_cpu(udev->descriptor.bcdDevice) <= 0x0001)
 			return -ENODEV;
 	}
@@ -3067,6 +3098,12 @@ static int btusb_probe(struct usb_interface *intf,
 	if (id->driver_info & BTUSB_QCA_ROME) {
 		data->setup_on_usb = btusb_setup_qca;
 		hdev->set_bdaddr = btusb_set_bdaddr_ath3012;
+
+		/* QCA Rome devices lose their updated firmware over suspend,
+		 * but the USB hub doesn't notice any status change.
+		 * Explicitly request a device reset on resume.
+		 */
+		set_bit(BTUSB_RESET_RESUME, &data->flags);
 	}
 
 #ifdef CONFIG_BT_HCIBTUSB_RTL
@@ -3259,13 +3296,28 @@ static void play_deferred(struct btusb_data *data)
 	int err;
 
 	while ((urb = usb_get_from_anchor(&data->deferred))) {
+		usb_anchor_urb(urb, &data->tx_anchor);
+
 		err = usb_submit_urb(urb, GFP_ATOMIC);
-		if (err < 0)
+		if (err < 0) {
+			if (err != -EPERM && err != -ENODEV)
+				BT_ERR("%s urb %p submission failed (%d)",
+				       data->hdev->name, urb, -err);
+			kfree(urb->setup_packet);
+			usb_unanchor_urb(urb);
+			usb_free_urb(urb);
 			break;
+		}
 
 		data->tx_in_flight++;
+		usb_free_urb(urb);
+	}
+
+	/* Cleanup the rest deferred urbs. */
+	while ((urb = usb_get_from_anchor(&data->deferred))) {
+		kfree(urb->setup_packet);
+		usb_free_urb(urb);
 	}
-	usb_scuttle_anchored_urbs(&data->deferred);
 }
 
 static int btusb_resume(struct usb_interface *intf)
diff --git a/drivers/bluetooth/btwilink.c b/drivers/bluetooth/btwilink.c
index 85a3978..5ef8000 100644
--- a/drivers/bluetooth/btwilink.c
+++ b/drivers/bluetooth/btwilink.c
@@ -93,8 +93,7 @@ static void st_reg_completion_cb(void *priv_data, int data)
 	complete(&lhst->wait_reg_completion);
 }
 
-/* Called by Shared Transport layer when receive data is
- * available */
+/* Called by Shared Transport layer when receive data is available */
 static long st_receive(void *priv_data, struct sk_buff *skb)
 {
 	struct ti_st *lhst = priv_data;
@@ -198,7 +197,8 @@ static int ti_st_open(struct hci_dev *hdev)
 		}
 
 		/* Is ST registration callback
-		 * called with ERROR status? */
+		 * called with ERROR status?
+		 */
 		if (hst->reg_status != 0) {
 			BT_ERR("ST registration completed with invalid "
 					"status %d", hst->reg_status);
@@ -276,7 +276,7 @@ static int ti_st_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
 
 static int bt_ti_probe(struct platform_device *pdev)
 {
-	static struct ti_st *hst;
+	struct ti_st *hst;
 	struct hci_dev *hdev;
 	int err;
 
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 6a662d0..e254011 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -27,6 +27,8 @@
 #include <linux/firmware.h>
 #include <linux/module.h>
 #include <linux/acpi.h>
+#include <linux/of.h>
+#include <linux/property.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/gpio/consumer.h>
@@ -34,6 +36,7 @@
 #include <linux/interrupt.h>
 #include <linux/dmi.h>
 #include <linux/pm_runtime.h>
+#include <linux/serdev.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -41,11 +44,15 @@
 #include "btbcm.h"
 #include "hci_uart.h"
 
+#define BCM_NULL_PKT 0x00
+#define BCM_NULL_SIZE 0
+
 #define BCM_LM_DIAG_PKT 0x07
 #define BCM_LM_DIAG_SIZE 63
 
 #define BCM_AUTOSUSPEND_DELAY	5000 /* default autosleep delay */
 
+/* platform device driver resources */
 struct bcm_device {
 	struct list_head	list;
 
@@ -59,6 +66,7 @@ struct bcm_device {
 	bool			clk_enabled;
 
 	u32			init_speed;
+	u32			oper_speed;
 	int			irq;
 	u8			irq_polarity;
 
@@ -68,6 +76,12 @@ struct bcm_device {
 #endif
 };
 
+/* serdev driver resources */
+struct bcm_serdev {
+	struct hci_uart hu;
+};
+
+/* generic bcm uart resources */
 struct bcm_data {
 	struct sk_buff		*rx_skb;
 	struct sk_buff_head	txq;
@@ -79,6 +93,14 @@ struct bcm_data {
 static DEFINE_MUTEX(bcm_device_lock);
 static LIST_HEAD(bcm_device_list);
 
+static inline void host_set_baudrate(struct hci_uart *hu, unsigned int speed)
+{
+	if (hu->serdev)
+		serdev_device_set_baudrate(hu->serdev, speed);
+	else
+		hci_uart_set_baudrate(hu, speed);
+}
+
 static int bcm_set_baudrate(struct hci_uart *hu, unsigned int speed)
 {
 	struct hci_dev *hdev = hu->hdev;
@@ -176,7 +198,7 @@ static irqreturn_t bcm_host_wake(int irq, void *data)
 static int bcm_request_irq(struct bcm_data *bcm)
 {
 	struct bcm_device *bdev = bcm->dev;
-	int err = 0;
+	int err;
 
 	/* If this is not a platform device, do not enable PM functionalities */
 	mutex_lock(&bcm_device_lock);
@@ -185,21 +207,23 @@ static int bcm_request_irq(struct bcm_data *bcm)
 		goto unlock;
 	}
 
-	if (bdev->irq > 0) {
-		err = devm_request_irq(&bdev->pdev->dev, bdev->irq,
-				       bcm_host_wake, IRQF_TRIGGER_RISING,
-				       "host_wake", bdev);
-		if (err)
-			goto unlock;
+	if (bdev->irq <= 0) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
+
+	err = devm_request_irq(&bdev->pdev->dev, bdev->irq, bcm_host_wake,
+			       IRQF_TRIGGER_RISING, "host_wake", bdev);
+	if (err)
+		goto unlock;
 
-		device_init_wakeup(&bdev->pdev->dev, true);
+	device_init_wakeup(&bdev->pdev->dev, true);
 
-		pm_runtime_set_autosuspend_delay(&bdev->pdev->dev,
-						 BCM_AUTOSUSPEND_DELAY);
-		pm_runtime_use_autosuspend(&bdev->pdev->dev);
-		pm_runtime_set_active(&bdev->pdev->dev);
-		pm_runtime_enable(&bdev->pdev->dev);
-	}
+	pm_runtime_set_autosuspend_delay(&bdev->pdev->dev,
+					 BCM_AUTOSUSPEND_DELAY);
+	pm_runtime_use_autosuspend(&bdev->pdev->dev);
+	pm_runtime_set_active(&bdev->pdev->dev);
+	pm_runtime_enable(&bdev->pdev->dev);
 
 unlock:
 	mutex_unlock(&bcm_device_lock);
@@ -287,6 +311,14 @@ static int bcm_open(struct hci_uart *hu)
 
 	hu->priv = bcm;
 
+	/* If this is a serdev defined device, then only use
+	 * serdev open primitive and skip the rest.
+	 */
+	if (hu->serdev) {
+		serdev_device_open(hu->serdev);
+		goto out;
+	}
+
 	if (!hu->tty->dev)
 		goto out;
 
@@ -301,6 +333,7 @@ static int bcm_open(struct hci_uart *hu)
 		if (hu->tty->dev->parent == dev->pdev->dev.parent) {
 			bcm->dev = dev;
 			hu->init_speed = dev->init_speed;
+			hu->oper_speed = dev->oper_speed;
 #ifdef CONFIG_PM
 			dev->hu = hu;
 #endif
@@ -321,6 +354,12 @@ static int bcm_close(struct hci_uart *hu)
 
 	bt_dev_dbg(hu->hdev, "hu %p", hu);
 
+	/* If this is a serdev defined device, only use serdev
+	 * close primitive and then continue as usual.
+	 */
+	if (hu->serdev)
+		serdev_device_close(hu->serdev);
+
 	/* Protect bcm->dev against removal of the device or driver */
 	mutex_lock(&bcm_device_lock);
 	if (bcm_device_exists(bdev)) {
@@ -396,7 +435,7 @@ static int bcm_setup(struct hci_uart *hu)
 		speed = 0;
 
 	if (speed)
-		hci_uart_set_baudrate(hu, speed);
+		host_set_baudrate(hu, speed);
 
 	/* Operational speed if any */
 	if (hu->oper_speed)
@@ -409,7 +448,7 @@ static int bcm_setup(struct hci_uart *hu)
 	if (speed) {
 		err = bcm_set_baudrate(hu, speed);
 		if (!err)
-			hci_uart_set_baudrate(hu, speed);
+			host_set_baudrate(hu, speed);
 	}
 
 finalize:
@@ -432,11 +471,19 @@ finalize:
 	.lsize = 0, \
 	.maxlen = BCM_LM_DIAG_SIZE
 
+#define BCM_RECV_NULL \
+	.type = BCM_NULL_PKT, \
+	.hlen = BCM_NULL_SIZE, \
+	.loff = 0, \
+	.lsize = 0, \
+	.maxlen = BCM_NULL_SIZE
+
 static const struct h4_recv_pkt bcm_recv_pkts[] = {
 	{ H4_RECV_ACL,      .recv = hci_recv_frame },
 	{ H4_RECV_SCO,      .recv = hci_recv_frame },
 	{ H4_RECV_EVENT,    .recv = hci_recv_frame },
 	{ BCM_RECV_LM_DIAG, .recv = hci_recv_diag  },
+	{ BCM_RECV_NULL,    .recv = hci_recv_diag  },
 };
 
 static int bcm_recv(struct hci_uart *hu, const void *data, int count)
@@ -697,8 +744,10 @@ static int bcm_resource(struct acpi_resource *ares, void *data)
 
 	case ACPI_RESOURCE_TYPE_SERIAL_BUS:
 		sb = &ares->data.uart_serial_bus;
-		if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_UART)
+		if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_UART) {
 			dev->init_speed = sb->default_baud_rate;
+			dev->oper_speed = 4000000;
+		}
 		break;
 
 	default:
@@ -851,7 +900,6 @@ static const struct hci_uart_proto bcm_proto = {
 	.name		= "Broadcom",
 	.manufacturer	= 15,
 	.init_speed	= 115200,
-	.oper_speed	= 4000000,
 	.open		= bcm_open,
 	.close		= bcm_close,
 	.flush		= bcm_flush,
@@ -901,9 +949,57 @@ static struct platform_driver bcm_driver = {
 	},
 };
 
+static int bcm_serdev_probe(struct serdev_device *serdev)
+{
+	struct bcm_serdev *bcmdev;
+	u32 speed;
+	int err;
+
+	bcmdev = devm_kzalloc(&serdev->dev, sizeof(*bcmdev), GFP_KERNEL);
+	if (!bcmdev)
+		return -ENOMEM;
+
+	bcmdev->hu.serdev = serdev;
+	serdev_device_set_drvdata(serdev, bcmdev);
+
+	err = device_property_read_u32(&serdev->dev, "max-speed", &speed);
+	if (!err)
+		bcmdev->hu.oper_speed = speed;
+
+	return hci_uart_register_device(&bcmdev->hu, &bcm_proto);
+}
+
+static void bcm_serdev_remove(struct serdev_device *serdev)
+{
+	struct bcm_serdev *bcmdev = serdev_device_get_drvdata(serdev);
+
+	hci_uart_unregister_device(&bcmdev->hu);
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id bcm_bluetooth_of_match[] = {
+	{ .compatible = "brcm,bcm43438-bt" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, bcm_bluetooth_of_match);
+#endif
+
+static struct serdev_device_driver bcm_serdev_driver = {
+	.probe = bcm_serdev_probe,
+	.remove = bcm_serdev_remove,
+	.driver = {
+		.name = "hci_uart_bcm",
+		.of_match_table = of_match_ptr(bcm_bluetooth_of_match),
+	},
+};
+
 int __init bcm_init(void)
 {
+	/* For now, we need to keep both platform device
+	 * driver (ACPI generated) and serdev driver (DT).
+	 */
 	platform_driver_register(&bcm_driver);
+	serdev_device_driver_register(&bcm_serdev_driver);
 
 	return hci_uart_register_proto(&bcm_proto);
 }
@@ -911,6 +1007,7 @@ int __init bcm_init(void)
 int __exit bcm_deinit(void)
 {
 	platform_driver_unregister(&bcm_driver);
+	serdev_device_driver_unregister(&bcm_serdev_driver);
 
 	return hci_uart_unregister_proto(&bcm_proto);
 }
diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c
index 4e328d7..3b82a87 100644
--- a/drivers/bluetooth/hci_h4.c
+++ b/drivers/bluetooth/hci_h4.c
@@ -172,7 +172,7 @@ struct sk_buff *h4_recv_buf(struct hci_dev *hdev, struct sk_buff *skb,
 			    const struct h4_recv_pkt *pkts, int pkts_count)
 {
 	struct hci_uart *hu = hci_get_drvdata(hdev);
-	u8 alignment = hu->alignment;
+	u8 alignment = hu->alignment ? hu->alignment : 1;
 
 	while (count) {
 		int i, len;
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 8397b71..a746627 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -457,7 +457,8 @@ static int hci_uart_tty_open(struct tty_struct *tty)
 	BT_DBG("tty %p", tty);
 
 	/* Error if the tty has no write op instead of leaving an exploitable
-	   hole */
+	 * hole
+	 */
 	if (tty->ops->write == NULL)
 		return -EOPNOTSUPP;
 
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index c982943..424c15a 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -622,7 +622,8 @@ static int download_firmware(struct ll_device *lldev)
 			cmd = (struct hci_command *)action_ptr;
 			if (cmd->opcode == 0xff36) {
 				/* ignore remote change
-				 * baud rate HCI VS command */
+				 * baud rate HCI VS command
+				 */
 				bt_dev_warn(lldev->hu.hdev, "change remote baud rate command in firmware");
 				break;
 			}
@@ -742,14 +743,8 @@ static int hci_ti_probe(struct serdev_device *serdev)
 static void hci_ti_remove(struct serdev_device *serdev)
 {
 	struct ll_device *lldev = serdev_device_get_drvdata(serdev);
-	struct hci_uart *hu = &lldev->hu;
-	struct hci_dev *hdev = hu->hdev;
 
-	cancel_work_sync(&hu->write_work);
-
-	hci_unregister_dev(hdev);
-	hci_free_dev(hdev);
-	hu->proto->close(hu);
+	hci_uart_unregister_device(&lldev->hu);
 }
 
 static const struct of_device_id hci_ti_of_match[] = {
diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c
index 181a15b..3539fd0 100644
--- a/drivers/bluetooth/hci_nokia.c
+++ b/drivers/bluetooth/hci_nokia.c
@@ -767,16 +767,8 @@ static int nokia_bluetooth_serdev_probe(struct serdev_device *serdev)
 static void nokia_bluetooth_serdev_remove(struct serdev_device *serdev)
 {
 	struct nokia_bt_dev *btdev = serdev_device_get_drvdata(serdev);
-	struct hci_uart *hu = &btdev->hu;
-	struct hci_dev *hdev = hu->hdev;
 
-	cancel_work_sync(&hu->write_work);
-
-	hci_unregister_dev(hdev);
-	hci_free_dev(hdev);
-	hu->proto->close(hu);
-
-	pm_runtime_disable(&btdev->serdev->dev);
+	hci_uart_unregister_device(&btdev->hu);
 }
 
 static int nokia_bluetooth_runtime_suspend(struct device *dev)
diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c
index aea9301..b725ac4 100644
--- a/drivers/bluetooth/hci_serdev.c
+++ b/drivers/bluetooth/hci_serdev.c
@@ -354,3 +354,16 @@ err_alloc:
 	return err;
 }
 EXPORT_SYMBOL_GPL(hci_uart_register_device);
+
+void hci_uart_unregister_device(struct hci_uart *hu)
+{
+	struct hci_dev *hdev = hu->hdev;
+
+	hci_unregister_dev(hdev);
+	hci_free_dev(hdev);
+
+	cancel_work_sync(&hu->write_work);
+
+	hu->proto->close(hu);
+}
+EXPORT_SYMBOL_GPL(hci_uart_unregister_device);
diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h
index c6e9e1c..d9cd95d 100644
--- a/drivers/bluetooth/hci_uart.h
+++ b/drivers/bluetooth/hci_uart.h
@@ -112,6 +112,7 @@ struct hci_uart {
 int hci_uart_register_proto(const struct hci_uart_proto *p);
 int hci_uart_unregister_proto(const struct hci_uart_proto *p);
 int hci_uart_register_device(struct hci_uart *hu, const struct hci_uart_proto *p);
+void hci_uart_unregister_device(struct hci_uart *hu);
 
 int hci_uart_tx_wakeup(struct hci_uart *hu);
 int hci_uart_init_ready(struct hci_uart *hu);
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 63ac1c6..efd5db7 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -661,9 +661,23 @@ void vmbus_close(struct vmbus_channel *channel)
 }
 EXPORT_SYMBOL_GPL(vmbus_close);
 
-int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
-			 u32 bufferlen, u64 requestid,
-			 enum vmbus_packet_type type, u32 flags)
+/**
+ * vmbus_sendpacket() - Send the specified buffer on the given channel
+ * @channel: Pointer to vmbus_channel structure.
+ * @buffer: Pointer to the buffer you want to receive the data into.
+ * @bufferlen: Maximum size of what the the buffer will hold
+ * @requestid: Identifier of the request
+ * @type: Type of packet that is being send e.g. negotiate, time
+ * packet etc.
+ *
+ * Sends data in @buffer directly to hyper-v via the vmbus
+ * This will send the data unparsed to hyper-v.
+ *
+ * Mainly used by Hyper-V drivers.
+ */
+int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
+			   u32 bufferlen, u64 requestid,
+			   enum vmbus_packet_type type, u32 flags)
 {
 	struct vmpacket_descriptor desc;
 	u32 packetlen = sizeof(struct vmpacket_descriptor) + bufferlen;
@@ -690,42 +704,19 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
 
 	return hv_ringbuffer_write(channel, bufferlist, num_vecs);
 }
-EXPORT_SYMBOL(vmbus_sendpacket_ctl);
-
-/**
- * vmbus_sendpacket() - Send the specified buffer on the given channel
- * @channel: Pointer to vmbus_channel structure.
- * @buffer: Pointer to the buffer you want to receive the data into.
- * @bufferlen: Maximum size of what the the buffer will hold
- * @requestid: Identifier of the request
- * @type: Type of packet that is being send e.g. negotiate, time
- * packet etc.
- *
- * Sends data in @buffer directly to hyper-v via the vmbus
- * This will send the data unparsed to hyper-v.
- *
- * Mainly used by Hyper-V drivers.
- */
-int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer,
-			   u32 bufferlen, u64 requestid,
-			   enum vmbus_packet_type type, u32 flags)
-{
-	return vmbus_sendpacket_ctl(channel, buffer, bufferlen, requestid,
-				    type, flags);
-}
 EXPORT_SYMBOL(vmbus_sendpacket);
 
 /*
- * vmbus_sendpacket_pagebuffer_ctl - Send a range of single-page buffer
+ * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer
  * packets using a GPADL Direct packet type. This interface allows you
  * to control notifying the host. This will be useful for sending
  * batched data. Also the sender can control the send flags
  * explicitly.
  */
-int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
-				    struct hv_page_buffer pagebuffers[],
-				    u32 pagecount, void *buffer, u32 bufferlen,
-				    u64 requestid, u32 flags)
+int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
+				struct hv_page_buffer pagebuffers[],
+				u32 pagecount, void *buffer, u32 bufferlen,
+				u64 requestid)
 {
 	int i;
 	struct vmbus_channel_packet_page_buffer desc;
@@ -750,7 +741,7 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
 
 	/* Setup the descriptor */
 	desc.type = VM_PKT_DATA_USING_GPA_DIRECT;
-	desc.flags = flags;
+	desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
 	desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */
 	desc.length8 = (u16)(packetlen_aligned >> 3);
 	desc.transactionid = requestid;
@@ -771,24 +762,6 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
 
 	return hv_ringbuffer_write(channel, bufferlist, 3);
 }
-EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer_ctl);
-
-/*
- * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer
- * packets using a GPADL Direct packet type.
- */
-int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
-				     struct hv_page_buffer pagebuffers[],
-				     u32 pagecount, void *buffer, u32 bufferlen,
-				     u64 requestid)
-{
-	u32 flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
-
-	return vmbus_sendpacket_pagebuffer_ctl(channel, pagebuffers, pagecount,
-					       buffer, bufferlen,
-					       requestid, flags);
-
-}
 EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer);
 
 /*
@@ -828,62 +801,6 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
 }
 EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc);
 
-/*
- * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet
- * using a GPADL Direct packet type.
- */
-int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
-				struct hv_multipage_buffer *multi_pagebuffer,
-				void *buffer, u32 bufferlen, u64 requestid)
-{
-	struct vmbus_channel_packet_multipage_buffer desc;
-	u32 descsize;
-	u32 packetlen;
-	u32 packetlen_aligned;
-	struct kvec bufferlist[3];
-	u64 aligned_data = 0;
-	u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset,
-					 multi_pagebuffer->len);
-
-	if (pfncount > MAX_MULTIPAGE_BUFFER_COUNT)
-		return -EINVAL;
-
-	/*
-	 * Adjust the size down since vmbus_channel_packet_multipage_buffer is
-	 * the largest size we support
-	 */
-	descsize = sizeof(struct vmbus_channel_packet_multipage_buffer) -
-			  ((MAX_MULTIPAGE_BUFFER_COUNT - pfncount) *
-			  sizeof(u64));
-	packetlen = descsize + bufferlen;
-	packetlen_aligned = ALIGN(packetlen, sizeof(u64));
-
-
-	/* Setup the descriptor */
-	desc.type = VM_PKT_DATA_USING_GPA_DIRECT;
-	desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
-	desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */
-	desc.length8 = (u16)(packetlen_aligned >> 3);
-	desc.transactionid = requestid;
-	desc.rangecount = 1;
-
-	desc.range.len = multi_pagebuffer->len;
-	desc.range.offset = multi_pagebuffer->offset;
-
-	memcpy(desc.range.pfn_array, multi_pagebuffer->pfn_array,
-	       pfncount * sizeof(u64));
-
-	bufferlist[0].iov_base = &desc;
-	bufferlist[0].iov_len = descsize;
-	bufferlist[1].iov_base = buffer;
-	bufferlist[1].iov_len = bufferlen;
-	bufferlist[2].iov_base = &aligned_data;
-	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
-
-	return hv_ringbuffer_write(channel, bufferlist, 3);
-}
-EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer);
-
 /**
  * vmbus_recvpacket() - Retrieve the user packet on the specified channel
  * @channel: Pointer to vmbus_channel structure.
diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig
index 19982a4..18f5ed0 100644
--- a/drivers/infiniband/hw/bnxt_re/Kconfig
+++ b/drivers/infiniband/hw/bnxt_re/Kconfig
@@ -1,6 +1,7 @@
 config INFINIBAND_BNXT_RE
     tristate "Broadcom Netxtreme HCA support"
     depends on ETHERNET && NETDEVICES && PCI && INET && DCB
+    depends on MAY_USE_DEVLINK
     select NET_VENDOR_BROADCOM
     select BNXT
     ---help---
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 2747abd..b6b33d9 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -150,8 +150,8 @@ static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
 	/* VF or PF -- proxy SQP */
 	if (mlx4_is_mfunc(dev->dev)) {
 		for (i = 0; i < dev->dev->caps.num_ports; i++) {
-			if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] ||
-			    qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) {
+			if (qp->mqp.qpn == dev->dev->caps.spec_qps[i].qp0_proxy ||
+			    qp->mqp.qpn == dev->dev->caps.spec_qps[i].qp1_proxy) {
 				proxy_sqp = 1;
 				break;
 			}
@@ -178,7 +178,7 @@ static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
 	/* VF or PF -- proxy QP0 */
 	if (mlx4_is_mfunc(dev->dev)) {
 		for (i = 0; i < dev->dev->caps.num_ports; i++) {
-			if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) {
+			if (qp->mqp.qpn == dev->dev->caps.spec_qps[i].qp0_proxy) {
 				proxy_qp0 = 1;
 				break;
 			}
@@ -632,8 +632,8 @@ static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
 {
 	int i;
 	for (i = 0; i < dev->caps.num_ports; i++) {
-		if (qpn == dev->caps.qp0_proxy[i])
-			return !!dev->caps.qp0_qkey[i];
+		if (qpn == dev->caps.spec_qps[i].qp0_proxy)
+			return !!dev->caps.spec_qps[i].qp0_qkey;
 	}
 	return 0;
 }
@@ -1521,9 +1521,9 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
 	}
 	/* PF or VF -- creating proxies */
 	if (attr->qp_type == IB_QPT_SMI)
-		return dev->dev->caps.qp0_proxy[attr->port_num - 1];
+		return dev->dev->caps.spec_qps[attr->port_num - 1].qp0_proxy;
 	else
-		return dev->dev->caps.qp1_proxy[attr->port_num - 1];
+		return dev->dev->caps.spec_qps[attr->port_num - 1].qp1_proxy;
 }
 
 static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
@@ -2880,9 +2880,9 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
 {
 	int i;
 	for (i = 0; i < dev->caps.num_ports; i++) {
-		if (qpn == dev->caps.qp0_proxy[i] ||
-		    qpn == dev->caps.qp0_tunnel[i]) {
-			*qkey = dev->caps.qp0_qkey[i];
+		if (qpn == dev->caps.spec_qps[i].qp0_proxy ||
+		    qpn == dev->caps.spec_qps[i].qp0_tunnel) {
+			*qkey = dev->caps.spec_qps[i].qp0_qkey;
 			return 0;
 		}
 	}
@@ -2943,7 +2943,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
 		sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
 	else
 		sqp->ud_header.bth.destination_qpn =
-			cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
+			cpu_to_be32(mdev->dev->caps.spec_qps[sqp->qp.port - 1].qp0_tunnel);
 
 	sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
 	if (mlx4_is_master(mdev->dev)) {
@@ -3403,9 +3403,9 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
 
 	memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
 	if (qpt == MLX4_IB_QPT_PROXY_GSI)
-		dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+		dseg->dqpn = cpu_to_be32(dev->dev->caps.spec_qps[port - 1].qp1_tunnel);
 	else
-		dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]);
+		dseg->dqpn = cpu_to_be32(dev->dev->caps.spec_qps[port - 1].qp0_tunnel);
 	/* Use QKEY from the QP context, which is set by master */
 	dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
 }
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 9ca691d..46c189a 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -55,7 +55,7 @@ struct capictr_event {
 
 /* ------------------------------------------------------------- */
 
-static struct capi_version driver_version = {2, 0, 1, 1 << 4};
+static const struct capi_version driver_version = {2, 0, 1, 1 << 4};
 static char driver_serial[CAPI_SERIAL_LEN] = "0004711";
 static char capi_manufakturer[64] = "AVM Berlin";
 
diff --git a/drivers/isdn/hardware/eicon/divacapi.h b/drivers/isdn/hardware/eicon/divacapi.h
index a315a29..c4868a0 100644
--- a/drivers/isdn/hardware/eicon/divacapi.h
+++ b/drivers/isdn/hardware/eicon/divacapi.h
@@ -26,15 +26,7 @@
 
 /*#define DEBUG */
 
-
-
-
-
-
-
-
-
-
+#include <linux/types.h>
 
 #define IMPLEMENT_DTMF 1
 #define IMPLEMENT_LINE_INTERCONNECT2 1
@@ -82,8 +74,6 @@
 #define CODEC_PERMANENT    0x02
 #define ADV_VOICE          0x03
 #define MAX_CIP_TYPES      5  /* kind of CIP types for group optimization */
-#define C_IND_MASK_DWORDS  ((MAX_APPL + 32) >> 5)
-
 
 #define FAX_CONNECT_INFO_BUFFER_SIZE  256
 #define NCPI_BUFFER_SIZE              256
@@ -265,8 +255,8 @@ struct _PLCI {
 	word          ncci_ring_list;
 	byte          inc_dis_ncci_table[MAX_CHANNELS_PER_PLCI];
 	t_std_internal_command internal_command_queue[MAX_INTERNAL_COMMAND_LEVELS];
-	dword         c_ind_mask_table[C_IND_MASK_DWORDS];
-	dword         group_optimization_mask_table[C_IND_MASK_DWORDS];
+	DECLARE_BITMAP(c_ind_mask_table, MAX_APPL);
+	DECLARE_BITMAP(group_optimization_mask_table, MAX_APPL);
 	byte          RBuffer[200];
 	dword         msg_in_queue[MSG_IN_QUEUE_SIZE/sizeof(dword)];
 	API_SAVE      saved_msg;
diff --git a/drivers/isdn/hardware/eicon/message.c b/drivers/isdn/hardware/eicon/message.c
index 3b11422..eadd1ed 100644
--- a/drivers/isdn/hardware/eicon/message.c
+++ b/drivers/isdn/hardware/eicon/message.c
@@ -23,9 +23,7 @@
  *
  */
 
-
-
-
+#include <linux/bitmap.h>
 
 #include "platform.h"
 #include "di_defs.h"
@@ -35,19 +33,9 @@
 #include "mdm_msg.h"
 #include "divasync.h"
 
-
-
 #define FILE_ "MESSAGE.C"
 #define dprintf
 
-
-
-
-
-
-
-
-
 /*------------------------------------------------------------------*/
 /* This is options supported for all adapters that are server by    */
 /* XDI driver. Allo it is not necessary to ask it from every adapter*/
@@ -72,9 +60,6 @@ static dword diva_xdi_extended_features = 0;
 /*------------------------------------------------------------------*/
 
 static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci);
-static void set_group_ind_mask(PLCI *plci);
-static void clear_group_ind_mask_bit(PLCI *plci, word b);
-static byte test_group_ind_mask_bit(PLCI *plci, word b);
 void AutomaticLaw(DIVA_CAPI_ADAPTER *);
 word CapiRelease(word);
 word CapiRegister(word);
@@ -1087,106 +1072,6 @@ static void plci_remove(PLCI *plci)
 }
 
 /*------------------------------------------------------------------*/
-/* Application Group function helpers                               */
-/*------------------------------------------------------------------*/
-
-static void set_group_ind_mask(PLCI *plci)
-{
-	word i;
-
-	for (i = 0; i < C_IND_MASK_DWORDS; i++)
-		plci->group_optimization_mask_table[i] = 0xffffffffL;
-}
-
-static void clear_group_ind_mask_bit(PLCI *plci, word b)
-{
-	plci->group_optimization_mask_table[b >> 5] &= ~(1L << (b & 0x1f));
-}
-
-static byte test_group_ind_mask_bit(PLCI *plci, word b)
-{
-	return ((plci->group_optimization_mask_table[b >> 5] & (1L << (b & 0x1f))) != 0);
-}
-
-/*------------------------------------------------------------------*/
-/* c_ind_mask operations for arbitrary MAX_APPL                     */
-/*------------------------------------------------------------------*/
-
-static void clear_c_ind_mask(PLCI *plci)
-{
-	word i;
-
-	for (i = 0; i < C_IND_MASK_DWORDS; i++)
-		plci->c_ind_mask_table[i] = 0;
-}
-
-static byte c_ind_mask_empty(PLCI *plci)
-{
-	word i;
-
-	i = 0;
-	while ((i < C_IND_MASK_DWORDS) && (plci->c_ind_mask_table[i] == 0))
-		i++;
-	return (i == C_IND_MASK_DWORDS);
-}
-
-static void set_c_ind_mask_bit(PLCI *plci, word b)
-{
-	plci->c_ind_mask_table[b >> 5] |= (1L << (b & 0x1f));
-}
-
-static void clear_c_ind_mask_bit(PLCI *plci, word b)
-{
-	plci->c_ind_mask_table[b >> 5] &= ~(1L << (b & 0x1f));
-}
-
-static byte test_c_ind_mask_bit(PLCI *plci, word b)
-{
-	return ((plci->c_ind_mask_table[b >> 5] & (1L << (b & 0x1f))) != 0);
-}
-
-static void dump_c_ind_mask(PLCI *plci)
-{
-	word i, j, k;
-	dword d;
-	char *p;
-	char buf[40];
-
-	for (i = 0; i < C_IND_MASK_DWORDS; i += 4)
-	{
-		p = buf + 36;
-		*p = '\0';
-		for (j = 0; j < 4; j++)
-		{
-			if (i + j < C_IND_MASK_DWORDS)
-			{
-				d = plci->c_ind_mask_table[i + j];
-				for (k = 0; k < 8; k++)
-				{
-					*(--p) = hex_asc_lo(d);
-					d >>= 4;
-				}
-			}
-			else if (i != 0)
-			{
-				for (k = 0; k < 8; k++)
-					*(--p) = ' ';
-			}
-			*(--p) = ' ';
-		}
-		dbug(1, dprintf("c_ind_mask =%s", (char *) p));
-	}
-}
-
-
-
-
-
-#define dump_plcis(a)
-
-
-
-/*------------------------------------------------------------------*/
 /* translation function for each message                            */
 /*------------------------------------------------------------------*/
 
@@ -1457,13 +1342,13 @@ static byte connect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
 		return 1;
 	}
 	else if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT) {
-		clear_c_ind_mask_bit(plci, (word)(appl->Id - 1));
-		dump_c_ind_mask(plci);
+		__clear_bit(appl->Id - 1, plci->c_ind_mask_table);
+		dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table));
 		Reject = GET_WORD(parms[0].info);
 		dbug(1, dprintf("Reject=0x%x", Reject));
 		if (Reject)
 		{
-			if (c_ind_mask_empty(plci))
+			if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL))
 			{
 				if ((Reject & 0xff00) == 0x3400)
 				{
@@ -1553,11 +1438,8 @@ static byte connect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
 				sig_req(plci, CALL_RES, 0);
 			}
 
-			for (i = 0; i < max_appl; i++) {
-				if (test_c_ind_mask_bit(plci, i)) {
-					sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED);
-				}
-			}
+			for_each_set_bit(i, plci->c_ind_mask_table, max_appl)
+				sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED);
 		}
 	}
 	return 1;
@@ -1584,13 +1466,10 @@ static byte disconnect_req(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
 	{
 		if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT)
 		{
-			clear_c_ind_mask_bit(plci, (word)(appl->Id - 1));
+			__clear_bit(appl->Id - 1, plci->c_ind_mask_table);
 			plci->appl = appl;
-			for (i = 0; i < max_appl; i++)
-			{
-				if (test_c_ind_mask_bit(plci, i))
-					sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0);
-			}
+			for_each_set_bit(i, plci->c_ind_mask_table, max_appl)
+				sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0);
 			plci->State = OUTG_DIS_PENDING;
 		}
 		if (plci->Sig.Id && plci->appl)
@@ -1634,7 +1513,7 @@ static byte disconnect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
 	{
 		/* clear ind mask bit, just in case of collsion of          */
 		/* DISCONNECT_IND and CONNECT_RES                           */
-		clear_c_ind_mask_bit(plci, (word)(appl->Id - 1));
+		__clear_bit(appl->Id - 1, plci->c_ind_mask_table);
 		ncci_free_receive_buffers(plci, 0);
 		if (plci_remove_check(plci))
 		{
@@ -1642,7 +1521,7 @@ static byte disconnect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
 		}
 		if (plci->State == INC_DIS_PENDING
 		    || plci->State == SUSPENDING) {
-			if (c_ind_mask_empty(plci)) {
+			if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) {
 				if (plci->State != SUSPENDING) plci->State = IDLE;
 				dbug(1, dprintf("chs=%d", plci->channels));
 				if (!plci->channels) {
@@ -3351,13 +3230,11 @@ static byte select_b_req(dword Id, word Number, DIVA_CAPI_ADAPTER *a,
 				}
 				plci->State = INC_CON_CONNECTED_ALERT;
 				plci->appl = appl;
-				clear_c_ind_mask_bit(plci, (word)(appl->Id - 1));
-				dump_c_ind_mask(plci);
-				for (i = 0; i < max_appl; i++) /* disconnect the other appls */
-				{                         /* its quasi a connect        */
-					if (test_c_ind_mask_bit(plci, i))
-						sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED);
-				}
+				__clear_bit(appl->Id - 1, plci->c_ind_mask_table);
+				dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table));
+				/* disconnect the other appls its quasi a connect */
+				for_each_set_bit(i, plci->c_ind_mask_table, max_appl)
+					sendf(&application[i], _DISCONNECT_I, Id, 0, "w", _OTHER_APPL_CONNECTED);
 			}
 
 			api_save_msg(msg, "s", &plci->saved_msg);
@@ -5692,19 +5569,17 @@ static void sig_ind(PLCI *plci)
 		cip = find_cip(a, parms[4], parms[6]);
 		cip_mask = 1L << cip;
 		dbug(1, dprintf("cip=%d,cip_mask=%lx", cip, cip_mask));
-		clear_c_ind_mask(plci);
+		bitmap_zero(plci->c_ind_mask_table, MAX_APPL);
 		if (!remove_started && !a->adapter_disabled)
 		{
-			set_c_ind_mask_bit(plci, MAX_APPL);
 			group_optimization(a, plci);
-			for (i = 0; i < max_appl; i++) {
+			for_each_set_bit(i, plci->group_optimization_mask_table, max_appl) {
 				if (application[i].Id
 				    && (a->CIP_Mask[i] & 1 || a->CIP_Mask[i] & cip_mask)
-				    && CPN_filter_ok(parms[0], a, i)
-				    && test_group_ind_mask_bit(plci, i)) {
+				    && CPN_filter_ok(parms[0], a, i)) {
 					dbug(1, dprintf("storedcip_mask[%d]=0x%lx", i, a->CIP_Mask[i]));
-					set_c_ind_mask_bit(plci, i);
-					dump_c_ind_mask(plci);
+					__set_bit(i, plci->c_ind_mask_table);
+					dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table));
 					plci->State = INC_CON_PENDING;
 					plci->call_dir = (plci->call_dir & ~(CALL_DIR_OUT | CALL_DIR_ORIGINATE)) |
 						CALL_DIR_IN | CALL_DIR_ANSWER;
@@ -5750,10 +5625,9 @@ static void sig_ind(PLCI *plci)
 						      SendMultiIE(plci, Id, multi_pi_parms, PI, 0x210, true));
 				}
 			}
-			clear_c_ind_mask_bit(plci, MAX_APPL);
-			dump_c_ind_mask(plci);
+			dbug(1, dprintf("c_ind_mask =%*pb", MAX_APPL, plci->c_ind_mask_table));
 		}
-		if (c_ind_mask_empty(plci)) {
+		if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL)) {
 			sig_req(plci, HANGUP, 0);
 			send_req(plci);
 			plci->State = IDLE;
@@ -5994,13 +5868,13 @@ static void sig_ind(PLCI *plci)
 		break;
 
 	case RESUME:
-		clear_c_ind_mask_bit(plci, (word)(plci->appl->Id - 1));
+		__clear_bit(plci->appl->Id - 1, plci->c_ind_mask_table);
 		PUT_WORD(&resume_cau[4], GOOD);
 		sendf(plci->appl, _FACILITY_I, Id, 0, "ws", (word)3, resume_cau);
 		break;
 
 	case SUSPEND:
-		clear_c_ind_mask(plci);
+		bitmap_zero(plci->c_ind_mask_table, MAX_APPL);
 
 		if (plci->NL.Id && !plci->nl_remove_id) {
 			mixer_remove(plci);
@@ -6037,15 +5911,12 @@ static void sig_ind(PLCI *plci)
 
 		if (plci->State == INC_CON_PENDING || plci->State == INC_CON_ALERT)
 		{
-			for (i = 0; i < max_appl; i++)
-			{
-				if (test_c_ind_mask_bit(plci, i))
-					sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0);
-			}
+			for_each_set_bit(i, plci->c_ind_mask_table, max_appl)
+				sendf(&application[i], _DISCONNECT_I, Id, 0, "w", 0);
 		}
 		else
 		{
-			clear_c_ind_mask(plci);
+			bitmap_zero(plci->c_ind_mask_table, MAX_APPL);
 		}
 		if (!plci->appl)
 		{
@@ -6055,7 +5926,7 @@ static void sig_ind(PLCI *plci)
 				a->listen_active--;
 			}
 			plci->State = INC_DIS_PENDING;
-			if (c_ind_mask_empty(plci))
+			if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL))
 			{
 				plci->State = IDLE;
 				if (plci->NL.Id && !plci->nl_remove_id)
@@ -6341,14 +6212,10 @@ static void SendInfo(PLCI *plci, dword Id, byte **parms, byte iesent)
 			    || Info_Number == DSP
 			    || Info_Number == UUI)
 			{
-				for (j = 0; j < max_appl; j++)
-				{
-					if (test_c_ind_mask_bit(plci, j))
-					{
-						dbug(1, dprintf("Ovl_Ind"));
-						iesent = true;
-						sendf(&application[j], _INFO_I, Id, 0, "wS", Info_Number, Info_Element);
-					}
+				for_each_set_bit(j, plci->c_ind_mask_table, max_appl) {
+					dbug(1, dprintf("Ovl_Ind"));
+					iesent = true;
+					sendf(&application[j], _INFO_I, Id, 0, "wS", Info_Number, Info_Element);
 				}
 			}
 		}               /* all other signalling states */
@@ -6416,14 +6283,10 @@ static byte SendMultiIE(PLCI *plci, dword Id, byte **parms, byte ie_type,
 		}
 		else if (!plci->appl && Info_Number)
 		{                                        /* overlap receiving broadcast */
-			for (j = 0; j < max_appl; j++)
-			{
-				if (test_c_ind_mask_bit(plci, j))
-				{
-					iesent = true;
-					dbug(1, dprintf("Mlt_Ovl_Ind"));
-					sendf(&application[j] , _INFO_I, Id, 0, "wS", Info_Number, Info_Element);
-				}
+			for_each_set_bit(j, plci->c_ind_mask_table, max_appl) {
+				iesent = true;
+				dbug(1, dprintf("Mlt_Ovl_Ind"));
+				sendf(&application[j] , _INFO_I, Id, 0, "wS", Info_Number, Info_Element);
 			}
 		}                                        /* all other signalling states */
 		else if (Info_Number
@@ -7270,7 +7133,6 @@ static word get_plci(DIVA_CAPI_ADAPTER *a)
 	word i, j;
 	PLCI *plci;
 
-	dump_plcis(a);
 	for (i = 0; i < a->max_plci && a->plci[i].Id; i++);
 	if (i == a->max_plci) {
 		dbug(1, dprintf("get_plci: out of PLCIs"));
@@ -7321,8 +7183,8 @@ static word get_plci(DIVA_CAPI_ADAPTER *a)
 
 	plci->ncci_ring_list = 0;
 	for (j = 0; j < MAX_CHANNELS_PER_PLCI; j++) plci->inc_dis_ncci_table[j] = 0;
-	clear_c_ind_mask(plci);
-	set_group_ind_mask(plci);
+	bitmap_zero(plci->c_ind_mask_table, MAX_APPL);
+	bitmap_fill(plci->group_optimization_mask_table, MAX_APPL);
 	plci->fax_connect_info_length = 0;
 	plci->nsf_control_bits = 0;
 	plci->ncpi_state = 0x00;
@@ -9373,10 +9235,10 @@ word CapiRelease(word Id)
 					if (plci->State == INC_CON_PENDING
 					    || plci->State == INC_CON_ALERT)
 					{
-						if (test_c_ind_mask_bit(plci, (word)(Id - 1)))
+						if (test_bit(Id - 1, plci->c_ind_mask_table))
 						{
-							clear_c_ind_mask_bit(plci, (word)(Id - 1));
-							if (c_ind_mask_empty(plci))
+							__clear_bit(Id - 1, plci->c_ind_mask_table);
+							if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL))
 							{
 								sig_req(plci, HANGUP, 0);
 								send_req(plci);
@@ -9384,10 +9246,10 @@ word CapiRelease(word Id)
 							}
 						}
 					}
-					if (test_c_ind_mask_bit(plci, (word)(Id - 1)))
+					if (test_bit(Id - 1, plci->c_ind_mask_table))
 					{
-						clear_c_ind_mask_bit(plci, (word)(Id - 1));
-						if (c_ind_mask_empty(plci))
+						__clear_bit(Id - 1, plci->c_ind_mask_table);
+						if (bitmap_empty(plci->c_ind_mask_table, MAX_APPL))
 						{
 							if (!plci->appl)
 							{
@@ -9452,7 +9314,7 @@ word CapiRelease(word Id)
 static word plci_remove_check(PLCI *plci)
 {
 	if (!plci) return true;
-	if (!plci->NL.Id && c_ind_mask_empty(plci))
+	if (!plci->NL.Id && bitmap_empty(plci->c_ind_mask_table, MAX_APPL))
 	{
 		if (plci->Sig.Id == 0xff)
 			plci->Sig.Id = 0;
@@ -14735,7 +14597,8 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci)
 	word appl_number_group_type[MAX_APPL];
 	PLCI *auxplci;
 
-	set_group_ind_mask(plci); /* all APPLs within this inc. call are allowed to dial in */
+	/* all APPLs within this inc. call are allowed to dial in */
+	bitmap_fill(plci->group_optimization_mask_table, MAX_APPL);
 
 	if (!a->group_optimization_enabled)
 	{
@@ -14771,13 +14634,12 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci)
 				if (a->plci[k].Id)
 				{
 					auxplci = &a->plci[k];
-					if (auxplci->appl == &application[i]) /* application has a busy PLCI */
-					{
+					if (auxplci->appl == &application[i]) {
+						/* application has a busy PLCI */
 						busy = true;
 						dbug(1, dprintf("Appl 0x%x is busy", i + 1));
-					}
-					else if (test_c_ind_mask_bit(auxplci, i)) /* application has an incoming call pending */
-					{
+					} else if (test_bit(i, plci->c_ind_mask_table)) {
+						/* application has an incoming call pending */
 						busy = true;
 						dbug(1, dprintf("Appl 0x%x has inc. call pending", i + 1));
 					}
@@ -14826,7 +14688,8 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci)
 					if (appl_number_group_type[i] == appl_number_group_type[j])
 					{
 						dbug(1, dprintf("Appl 0x%x is member of group 0x%x, no call", j + 1, appl_number_group_type[j]));
-						clear_group_ind_mask_bit(plci, j);           /* disable call on other group members */
+						/* disable call on other group members */
+						__clear_bit(j, plci->group_optimization_mask_table);
 						appl_number_group_type[j] = 0;       /* remove disabled group member from group list */
 					}
 				}
@@ -14834,7 +14697,7 @@ static void group_optimization(DIVA_CAPI_ADAPTER *a, PLCI *plci)
 		}
 		else                                                 /* application should not get a call */
 		{
-			clear_group_ind_mask_bit(plci, i);
+			__clear_bit(i, plci->group_optimization_mask_table);
 		}
 	}
 
diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.h b/drivers/isdn/hardware/mISDN/hfcsusb.h
index 4157311..5f8f1d9 100644
--- a/drivers/isdn/hardware/mISDN/hfcsusb.h
+++ b/drivers/isdn/hardware/mISDN/hfcsusb.h
@@ -337,7 +337,7 @@ static const char *HFC_NT_LAYER1_STATES[HFC_MAX_NT_LAYER1_STATE + 1] = {
 };
 
 /* supported devices */
-static struct usb_device_id hfcsusb_idtab[] = {
+static const struct usb_device_id hfcsusb_idtab[] = {
 	{
 		USB_DEVICE(0x0959, 0x2bd0),
 		.driver_info = (unsigned long) &((struct hfcsusb_vdata)
diff --git a/drivers/isdn/hisax/hfc_usb.c b/drivers/isdn/hisax/hfc_usb.c
index ef47480..e821218 100644
--- a/drivers/isdn/hisax/hfc_usb.c
+++ b/drivers/isdn/hisax/hfc_usb.c
@@ -65,7 +65,7 @@ typedef struct {
 } hfcsusb_vdata;
 
 /* VID/PID device list */
-static struct usb_device_id hfcusb_idtab[] = {
+static const struct usb_device_id hfcusb_idtab[] = {
 	{
 		USB_DEVICE(0x0959, 0x2bd0),
 		.driver_info = (unsigned long) &((hfcsusb_vdata)
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index b2f6556..8dff900 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -45,7 +45,6 @@ obj-$(CONFIG_ETHERNET) += ethernet/
 obj-$(CONFIG_FDDI) += fddi/
 obj-$(CONFIG_HIPPI) += hippi/
 obj-$(CONFIG_HAMRADIO) += hamradio/
-obj-$(CONFIG_IRDA) += irda/
 obj-$(CONFIG_PLIP) += plip/
 obj-$(CONFIG_PPP) += ppp/
 obj-$(CONFIG_PPP_ASYNC) += ppp/
diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index a306de4..9375cef 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -311,9 +311,7 @@ module_param(ipddp_mode, int, 0);
 static int __init ipddp_init_module(void)
 {
 	dev_ipddp = ipddp_init();
-        if (IS_ERR(dev_ipddp))
-                return PTR_ERR(dev_ipddp);
-	return 0;
+	return PTR_ERR_OR_ZERO(dev_ipddp);
 }
 
 static void __exit ipddp_cleanup_module(void)
diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h
index cbb4f85..d09b2b4 100644
--- a/drivers/net/arcnet/arcdevice.h
+++ b/drivers/net/arcnet/arcdevice.h
@@ -20,7 +20,7 @@
 #include <linux/if_arcnet.h>
 
 #ifdef __KERNEL__
-#include  <linux/irqreturn.h>
+#include <linux/interrupt.h>
 
 /*
  * RECON_THRESHOLD is the maximum number of RECON messages to receive
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 01cab95..eb7f767 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -109,7 +109,7 @@ static struct attribute *com20020_state_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group com20020_state_group = {
+static const struct attribute_group com20020_state_group = {
 	.name = NULL,
 	.attrs = com20020_state_attrs,
 };
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 770623a..040b493 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -759,7 +759,7 @@ static struct attribute *per_bond_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group bonding_group = {
+static const struct attribute_group bonding_group = {
 	.name = "bonding",
 	.attrs = per_bond_attrs,
 };
diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index 0e0df0b..f37ce0e 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -1232,7 +1232,7 @@ static struct attribute *at91_sysfs_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group at91_sysfs_attr_group = {
+static const struct attribute_group at91_sysfs_attr_group = {
 	.attrs = at91_sysfs_attrs,
 };
 
diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c
index e36d105..46a746e 100644
--- a/drivers/net/can/c_can/c_can_platform.c
+++ b/drivers/net/can/c_can/c_can_platform.c
@@ -220,7 +220,7 @@ static const struct c_can_driver_data am3352_dcan_drvdata = {
 	.raminit_bits = am3352_raminit_bits,
 };
 
-static struct platform_device_id c_can_id_table[] = {
+static const struct platform_device_id c_can_id_table[] = {
 	{
 		.name = KBUILD_MODNAME,
 		.driver_data = (kernel_ulong_t)&c_can_drvdata,
diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c
index 2ba1a81..12a53c8 100644
--- a/drivers/net/can/janz-ican3.c
+++ b/drivers/net/can/janz-ican3.c
@@ -1875,7 +1875,7 @@ static struct attribute *ican3_sysfs_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group ican3_sysfs_attr_group = {
+static const struct attribute_group ican3_sysfs_attr_group = {
 	.attrs = ican3_sysfs_attrs,
 };
 
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 7f36d3e..274f367 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1053,49 +1053,6 @@ int b53_vlan_del(struct dsa_switch *ds, int port,
 }
 EXPORT_SYMBOL(b53_vlan_del);
 
-int b53_vlan_dump(struct dsa_switch *ds, int port,
-		  struct switchdev_obj_port_vlan *vlan,
-		  switchdev_obj_dump_cb_t *cb)
-{
-	struct b53_device *dev = ds->priv;
-	u16 vid, vid_start = 0, pvid;
-	struct b53_vlan *vl;
-	int err = 0;
-
-	if (is5325(dev) || is5365(dev))
-		vid_start = 1;
-
-	b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid);
-
-	/* Use our software cache for dumps, since we do not have any HW
-	 * operation returning only the used/valid VLANs
-	 */
-	for (vid = vid_start; vid < dev->num_vlans; vid++) {
-		vl = &dev->vlans[vid];
-
-		if (!vl->valid)
-			continue;
-
-		if (!(vl->members & BIT(port)))
-			continue;
-
-		vlan->vid_begin = vlan->vid_end = vid;
-		vlan->flags = 0;
-
-		if (vl->untag & BIT(port))
-			vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
-		if (pvid == vid)
-			vlan->flags |= BRIDGE_VLAN_INFO_PVID;
-
-		err = cb(&vlan->obj);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL(b53_vlan_dump);
-
 /* Address Resolution Logic routines */
 static int b53_arl_op_wait(struct b53_device *dev)
 {
@@ -1213,9 +1170,8 @@ static int b53_arl_op(struct b53_device *dev, int op, int port,
 	return b53_arl_rw_op(dev, 0);
 }
 
-int b53_fdb_prepare(struct dsa_switch *ds, int port,
-		    const struct switchdev_obj_port_fdb *fdb,
-		    struct switchdev_trans *trans)
+int b53_fdb_add(struct dsa_switch *ds, int port,
+		const unsigned char *addr, u16 vid)
 {
 	struct b53_device *priv = ds->priv;
 
@@ -1225,27 +1181,16 @@ int b53_fdb_prepare(struct dsa_switch *ds, int port,
 	if (is5325(priv) || is5365(priv))
 		return -EOPNOTSUPP;
 
-	return 0;
-}
-EXPORT_SYMBOL(b53_fdb_prepare);
-
-void b53_fdb_add(struct dsa_switch *ds, int port,
-		 const struct switchdev_obj_port_fdb *fdb,
-		 struct switchdev_trans *trans)
-{
-	struct b53_device *priv = ds->priv;
-
-	if (b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, true))
-		pr_err("%s: failed to add MAC address\n", __func__);
+	return b53_arl_op(priv, 0, port, addr, vid, true);
 }
 EXPORT_SYMBOL(b53_fdb_add);
 
 int b53_fdb_del(struct dsa_switch *ds, int port,
-		const struct switchdev_obj_port_fdb *fdb)
+		const unsigned char *addr, u16 vid)
 {
 	struct b53_device *priv = ds->priv;
 
-	return b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, false);
+	return b53_arl_op(priv, 0, port, addr, vid, false);
 }
 EXPORT_SYMBOL(b53_fdb_del);
 
@@ -1282,8 +1227,7 @@ static void b53_arl_search_rd(struct b53_device *dev, u8 idx,
 }
 
 static int b53_fdb_copy(int port, const struct b53_arl_entry *ent,
-			struct switchdev_obj_port_fdb *fdb,
-			switchdev_obj_dump_cb_t *cb)
+			dsa_fdb_dump_cb_t *cb, void *data)
 {
 	if (!ent->is_valid)
 		return 0;
@@ -1291,16 +1235,11 @@ static int b53_fdb_copy(int port, const struct b53_arl_entry *ent,
 	if (port != ent->port)
 		return 0;
 
-	ether_addr_copy(fdb->addr, ent->mac);
-	fdb->vid = ent->vid;
-	fdb->ndm_state = ent->is_static ? NUD_NOARP : NUD_REACHABLE;
-
-	return cb(&fdb->obj);
+	return cb(ent->mac, ent->vid, ent->is_static, data);
 }
 
 int b53_fdb_dump(struct dsa_switch *ds, int port,
-		 struct switchdev_obj_port_fdb *fdb,
-		 switchdev_obj_dump_cb_t *cb)
+		 dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct b53_device *priv = ds->priv;
 	struct b53_arl_entry results[2];
@@ -1318,13 +1257,13 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
 			return ret;
 
 		b53_arl_search_rd(priv, 0, &results[0]);
-		ret = b53_fdb_copy(port, &results[0], fdb, cb);
+		ret = b53_fdb_copy(port, &results[0], cb, data);
 		if (ret)
 			return ret;
 
 		if (priv->num_arl_entries > 2) {
 			b53_arl_search_rd(priv, 1, &results[1]);
-			ret = b53_fdb_copy(port, &results[1], fdb, cb);
+			ret = b53_fdb_copy(port, &results[1], cb, data);
 			if (ret)
 				return ret;
 
@@ -1564,8 +1503,6 @@ static const struct dsa_switch_ops b53_switch_ops = {
 	.port_vlan_prepare	= b53_vlan_prepare,
 	.port_vlan_add		= b53_vlan_add,
 	.port_vlan_del		= b53_vlan_del,
-	.port_vlan_dump		= b53_vlan_dump,
-	.port_fdb_prepare	= b53_fdb_prepare,
 	.port_fdb_dump		= b53_fdb_dump,
 	.port_fdb_add		= b53_fdb_add,
 	.port_fdb_del		= b53_fdb_del,
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 155a9c4..01bd8cb 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -393,20 +393,12 @@ void b53_vlan_add(struct dsa_switch *ds, int port,
 		  struct switchdev_trans *trans);
 int b53_vlan_del(struct dsa_switch *ds, int port,
 		 const struct switchdev_obj_port_vlan *vlan);
-int b53_vlan_dump(struct dsa_switch *ds, int port,
-		  struct switchdev_obj_port_vlan *vlan,
-		  switchdev_obj_dump_cb_t *cb);
-int b53_fdb_prepare(struct dsa_switch *ds, int port,
-		    const struct switchdev_obj_port_fdb *fdb,
-		    struct switchdev_trans *trans);
-void b53_fdb_add(struct dsa_switch *ds, int port,
-		 const struct switchdev_obj_port_fdb *fdb,
-		 struct switchdev_trans *trans);
+int b53_fdb_add(struct dsa_switch *ds, int port,
+		const unsigned char *addr, u16 vid);
 int b53_fdb_del(struct dsa_switch *ds, int port,
-		const struct switchdev_obj_port_fdb *fdb);
+		const unsigned char *addr, u16 vid);
 int b53_fdb_dump(struct dsa_switch *ds, int port,
-		 struct switchdev_obj_port_fdb *fdb,
-		 switchdev_obj_dump_cb_t *cb);
+		 dsa_fdb_dump_cb_t *cb, void *data);
 int b53_mirror_add(struct dsa_switch *ds, int port,
 		   struct dsa_mall_mirror_tc_entry *mirror, bool ingress);
 void b53_mirror_del(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 9b6ce7c..d7b53d5 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -103,6 +103,7 @@ static void bcm_sf2_brcm_hdr_setup(struct bcm_sf2_priv *priv, int port)
 static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
 {
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+	unsigned int i;
 	u32 reg, offset;
 
 	if (priv->type == BCM7445_DEVICE_ID)
@@ -129,6 +130,14 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
 	reg |= MII_DUMB_FWDG_EN;
 	core_writel(priv, reg, CORE_SWITCH_CTRL);
 
+	/* Configure Traffic Class to QoS mapping, allow each priority to map
+	 * to a different queue number
+	 */
+	reg = core_readl(priv, CORE_PORT_TC2_QOS_MAP_PORT(port));
+	for (i = 0; i < SF2_NUM_EGRESS_QUEUES; i++)
+		reg |= i << (PRT_TO_QID_SHIFT * i);
+	core_writel(priv, reg, CORE_PORT_TC2_QOS_MAP_PORT(port));
+
 	bcm_sf2_brcm_hdr_setup(priv, port);
 
 	/* Force link status for IMP port */
@@ -244,7 +253,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port,
 	 * to a different queue number
 	 */
 	reg = core_readl(priv, CORE_PORT_TC2_QOS_MAP_PORT(port));
-	for (i = 0; i < 8; i++)
+	for (i = 0; i < SF2_NUM_EGRESS_QUEUES; i++)
 		reg |= i << (PRT_TO_QID_SHIFT * i);
 	core_writel(priv, reg, CORE_PORT_TC2_QOS_MAP_PORT(port));
 
@@ -327,12 +336,8 @@ static void bcm_sf2_port_disable(struct dsa_switch *ds, int port,
 static int bcm_sf2_eee_init(struct dsa_switch *ds, int port,
 			    struct phy_device *phy)
 {
-	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
-	struct ethtool_eee *p = &priv->port_sts[port].eee;
 	int ret;
 
-	p->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full);
-
 	ret = phy_init_eee(phy, 0);
 	if (ret)
 		return 0;
@@ -342,8 +347,8 @@ static int bcm_sf2_eee_init(struct dsa_switch *ds, int port,
 	return 1;
 }
 
-static int bcm_sf2_sw_get_eee(struct dsa_switch *ds, int port,
-			      struct ethtool_eee *e)
+static int bcm_sf2_sw_get_mac_eee(struct dsa_switch *ds, int port,
+				  struct ethtool_eee *e)
 {
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	struct ethtool_eee *p = &priv->port_sts[port].eee;
@@ -356,22 +361,14 @@ static int bcm_sf2_sw_get_eee(struct dsa_switch *ds, int port,
 	return 0;
 }
 
-static int bcm_sf2_sw_set_eee(struct dsa_switch *ds, int port,
-			      struct phy_device *phydev,
-			      struct ethtool_eee *e)
+static int bcm_sf2_sw_set_mac_eee(struct dsa_switch *ds, int port,
+				  struct ethtool_eee *e)
 {
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	struct ethtool_eee *p = &priv->port_sts[port].eee;
 
 	p->eee_enabled = e->eee_enabled;
-
-	if (!p->eee_enabled) {
-		bcm_sf2_eee_enable_set(ds, port, false);
-	} else {
-		p->eee_enabled = bcm_sf2_eee_init(ds, port, phydev);
-		if (!p->eee_enabled)
-			return -EOPNOTSUPP;
-	}
+	bcm_sf2_eee_enable_set(ds, port, e->eee_enabled);
 
 	return 0;
 }
@@ -800,7 +797,7 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds)
 static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port,
 			       struct ethtool_wolinfo *wol)
 {
-	struct net_device *p = ds->dst[ds->index].cpu_dp->netdev;
+	struct net_device *p = ds->dst->cpu_dp->netdev;
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	struct ethtool_wolinfo pwol;
 
@@ -823,7 +820,7 @@ static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port,
 static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port,
 			      struct ethtool_wolinfo *wol)
 {
-	struct net_device *p = ds->dst[ds->index].cpu_dp->netdev;
+	struct net_device *p = ds->dst->cpu_dp->netdev;
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	s8 cpu_port = ds->dst->cpu_dp->index;
 	struct ethtool_wolinfo pwol;
@@ -995,7 +992,7 @@ static int bcm_sf2_core_write64(struct b53_device *dev, u8 page, u8 reg,
 	return 0;
 }
 
-static struct b53_io_ops bcm_sf2_io_ops = {
+static const struct b53_io_ops bcm_sf2_io_ops = {
 	.read8	= bcm_sf2_core_read8,
 	.read16	= bcm_sf2_core_read16,
 	.read32	= bcm_sf2_core_read32,
@@ -1023,8 +1020,8 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
 	.set_wol		= bcm_sf2_sw_set_wol,
 	.port_enable		= bcm_sf2_port_setup,
 	.port_disable		= bcm_sf2_port_disable,
-	.get_eee		= bcm_sf2_sw_get_eee,
-	.set_eee		= bcm_sf2_sw_set_eee,
+	.get_mac_eee		= bcm_sf2_sw_get_mac_eee,
+	.set_mac_eee		= bcm_sf2_sw_set_mac_eee,
 	.port_bridge_join	= b53_br_join,
 	.port_bridge_leave	= b53_br_leave,
 	.port_stp_state_set	= b53_br_set_stp_state,
@@ -1033,8 +1030,6 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
 	.port_vlan_prepare	= b53_vlan_prepare,
 	.port_vlan_add		= b53_vlan_add,
 	.port_vlan_del		= b53_vlan_del,
-	.port_vlan_dump		= b53_vlan_dump,
-	.port_fdb_prepare	= b53_fdb_prepare,
 	.port_fdb_dump		= b53_fdb_dump,
 	.port_fdb_add		= b53_fdb_add,
 	.port_fdb_del		= b53_fdb_del,
@@ -1165,6 +1160,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
 	ds = dev->ds;
 	ds->ops = &bcm_sf2_ops;
 
+	/* Advertise the 8 egress queues */
+	ds->num_tx_queues = SF2_NUM_EGRESS_QUEUES;
+
 	dev_set_drvdata(&pdev->dev, priv);
 
 	spin_lock_init(&priv->indir_lock);
diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
index 7f9125e..02c499f 100644
--- a/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@ -131,12 +131,12 @@ static inline u32 bcm_sf2_mangle_addr(struct bcm_sf2_priv *priv, u32 off)
 #define SF2_IO_MACRO(name) \
 static inline u32 name##_readl(struct bcm_sf2_priv *priv, u32 off)	\
 {									\
-	return __raw_readl(priv->name + off);				\
+	return readl_relaxed(priv->name + off);				\
 }									\
 static inline void name##_writel(struct bcm_sf2_priv *priv,		\
 				  u32 val, u32 off)			\
 {									\
-	__raw_writel(val, priv->name + off);				\
+	writel_relaxed(val, priv->name + off);				\
 }									\
 
 /* Accesses to 64-bits register requires us to latch the hi/lo pairs
@@ -180,23 +180,23 @@ static inline void intrl2_##which##_mask_set(struct bcm_sf2_priv *priv, \
 static inline u32 core_readl(struct bcm_sf2_priv *priv, u32 off)
 {
 	u32 tmp = bcm_sf2_mangle_addr(priv, off);
-	return __raw_readl(priv->core + tmp);
+	return readl_relaxed(priv->core + tmp);
 }
 
 static inline void core_writel(struct bcm_sf2_priv *priv, u32 val, u32 off)
 {
 	u32 tmp = bcm_sf2_mangle_addr(priv, off);
-	__raw_writel(val, priv->core + tmp);
+	writel_relaxed(val, priv->core + tmp);
 }
 
 static inline u32 reg_readl(struct bcm_sf2_priv *priv, u16 off)
 {
-	return __raw_readl(priv->reg + priv->reg_offsets[off]);
+	return readl_relaxed(priv->reg + priv->reg_offsets[off]);
 }
 
 static inline void reg_writel(struct bcm_sf2_priv *priv, u32 val, u16 off)
 {
-	__raw_writel(val, priv->reg + priv->reg_offsets[off]);
+	writel_relaxed(val, priv->reg + priv->reg_offsets[off]);
 }
 
 SF2_IO64_MACRO(core);
diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h
index 2605245..49695fc 100644
--- a/drivers/net/dsa/bcm_sf2_regs.h
+++ b/drivers/net/dsa/bcm_sf2_regs.h
@@ -401,4 +401,7 @@ enum bcm_sf2_reg_offs {
 
 #define CFP_NUM_RULES			256
 
+/* Number of egress queues per port */
+#define SF2_NUM_EGRESS_QUEUES		8
+
 #endif /* __BCM_SF2_REGS_H */
diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index fdd8f38..d55051a 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -257,44 +257,7 @@ static int dsa_loop_port_vlan_del(struct dsa_switch *ds, int port,
 	return 0;
 }
 
-static int dsa_loop_port_vlan_dump(struct dsa_switch *ds, int port,
-				   struct switchdev_obj_port_vlan *vlan,
-				   switchdev_obj_dump_cb_t *cb)
-{
-	struct dsa_loop_priv *ps = ds->priv;
-	struct mii_bus *bus = ps->bus;
-	struct dsa_loop_vlan *vl;
-	u16 vid, vid_start = 0;
-	int err = 0;
-
-	dev_dbg(ds->dev, "%s\n", __func__);
-
-	/* Just do a sleeping operation to make lockdep checks effective */
-	mdiobus_read(bus, ps->port_base + port, MII_BMSR);
-
-	for (vid = vid_start; vid < DSA_LOOP_VLANS; vid++) {
-		vl = &ps->vlans[vid];
-
-		if (!(vl->members & BIT(port)))
-			continue;
-
-		vlan->vid_begin = vlan->vid_end = vid;
-		vlan->flags = 0;
-
-		if (vl->untagged & BIT(port))
-			vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
-		if (ps->pvid == vid)
-			vlan->flags |= BRIDGE_VLAN_INFO_PVID;
-
-		err = cb(&vlan->obj);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
-static struct dsa_switch_ops dsa_loop_driver = {
+static const struct dsa_switch_ops dsa_loop_driver = {
 	.get_tag_protocol	= dsa_loop_get_protocol,
 	.setup			= dsa_loop_setup,
 	.get_strings		= dsa_loop_get_strings,
@@ -310,7 +273,6 @@ static struct dsa_switch_ops dsa_loop_driver = {
 	.port_vlan_prepare	= dsa_loop_port_vlan_prepare,
 	.port_vlan_add		= dsa_loop_port_vlan_add,
 	.port_vlan_del		= dsa_loop_port_vlan_del,
-	.port_vlan_dump		= dsa_loop_port_vlan_dump,
 };
 
 static int dsa_loop_drv_probe(struct mdio_device *mdiodev)
@@ -390,7 +352,7 @@ static void __exit dsa_loop_exit(void)
 
 	mdio_driver_unregister(&dsa_loop_drv);
 	for (i = 0; i < NUM_FIXED_PHYS; i++)
-		if (phydevs[i])
+		if (!IS_ERR(phydevs[i]))
 			fixed_phy_unregister(phydevs[i]);
 }
 module_exit(dsa_loop_exit);
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index cd76e61..b471413 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -20,6 +20,11 @@
 
 #include "lan9303.h"
 
+#define LAN9303_NUM_PORTS 3
+
+/* 13.2 System Control and Status Registers
+ * Multiply register number by 4 to get address offset.
+ */
 #define LAN9303_CHIP_REV 0x14
 # define LAN9303_CHIP_ID 0x9303
 #define LAN9303_IRQ_CFG 0x15
@@ -53,6 +58,9 @@
 #define LAN9303_VIRT_PHY_BASE 0x70
 #define LAN9303_VIRT_SPECIAL_CTRL 0x77
 
+/*13.4 Switch Fabric Control and Status Registers
+ * Accessed indirectly via SWITCH_CSR_CMD, SWITCH_CSR_DATA.
+ */
 #define LAN9303_SW_DEV_ID 0x0000
 #define LAN9303_SW_RESET 0x0001
 #define LAN9303_SW_RESET_RESET BIT(0)
@@ -153,9 +161,7 @@
 # define LAN9303_BM_EGRSS_PORT_TYPE_SPECIAL_TAG_PORT1 (BIT(9) | BIT(8))
 # define LAN9303_BM_EGRSS_PORT_TYPE_SPECIAL_TAG_PORT0 (BIT(1) | BIT(0))
 
-#define LAN9303_PORT_0_OFFSET 0x400
-#define LAN9303_PORT_1_OFFSET 0x800
-#define LAN9303_PORT_2_OFFSET 0xc00
+#define LAN9303_SWITCH_PORT_REG(port, reg0) (0x400 * (port) + (reg0))
 
 /* the built-in PHYs are of type LAN911X */
 #define MII_LAN911X_SPECIAL_MODES 0x12
@@ -242,7 +248,7 @@ static int lan9303_virt_phy_reg_write(struct lan9303 *chip, int regnum, u16 val)
 	return regmap_write(chip->regmap, LAN9303_VIRT_PHY_BASE + regnum, val);
 }
 
-static int lan9303_port_phy_reg_wait_for_completion(struct lan9303 *chip)
+static int lan9303_indirect_phy_wait_for_completion(struct lan9303 *chip)
 {
 	int ret, i;
 	u32 reg;
@@ -262,7 +268,7 @@ static int lan9303_port_phy_reg_wait_for_completion(struct lan9303 *chip)
 	return -EIO;
 }
 
-static int lan9303_port_phy_reg_read(struct lan9303 *chip, int addr, int regnum)
+static int lan9303_indirect_phy_read(struct lan9303 *chip, int addr, int regnum)
 {
 	int ret;
 	u32 val;
@@ -272,7 +278,7 @@ static int lan9303_port_phy_reg_read(struct lan9303 *chip, int addr, int regnum)
 
 	mutex_lock(&chip->indirect_mutex);
 
-	ret = lan9303_port_phy_reg_wait_for_completion(chip);
+	ret = lan9303_indirect_phy_wait_for_completion(chip);
 	if (ret)
 		goto on_error;
 
@@ -281,7 +287,7 @@ static int lan9303_port_phy_reg_read(struct lan9303 *chip, int addr, int regnum)
 	if (ret)
 		goto on_error;
 
-	ret = lan9303_port_phy_reg_wait_for_completion(chip);
+	ret = lan9303_indirect_phy_wait_for_completion(chip);
 	if (ret)
 		goto on_error;
 
@@ -299,8 +305,8 @@ on_error:
 	return ret;
 }
 
-static int lan9303_phy_reg_write(struct lan9303 *chip, int addr, int regnum,
-				 unsigned int val)
+static int lan9303_indirect_phy_write(struct lan9303 *chip, int addr,
+				      int regnum, u16 val)
 {
 	int ret;
 	u32 reg;
@@ -311,7 +317,7 @@ static int lan9303_phy_reg_write(struct lan9303 *chip, int addr, int regnum,
 
 	mutex_lock(&chip->indirect_mutex);
 
-	ret = lan9303_port_phy_reg_wait_for_completion(chip);
+	ret = lan9303_indirect_phy_wait_for_completion(chip);
 	if (ret)
 		goto on_error;
 
@@ -328,6 +334,12 @@ on_error:
 	return ret;
 }
 
+const struct lan9303_phy_ops lan9303_indirect_phy_ops = {
+	.phy_read = lan9303_indirect_phy_read,
+	.phy_write = lan9303_indirect_phy_write,
+};
+EXPORT_SYMBOL_GPL(lan9303_indirect_phy_ops);
+
 static int lan9303_switch_wait_for_completion(struct lan9303 *chip)
 {
 	int ret, i;
@@ -416,6 +428,20 @@ on_error:
 	return ret;
 }
 
+static int lan9303_write_switch_port(struct lan9303 *chip, int port,
+				     u16 regnum, u32 val)
+{
+	return lan9303_write_switch_reg(
+		chip, LAN9303_SWITCH_PORT_REG(port, regnum), val);
+}
+
+static int lan9303_read_switch_port(struct lan9303 *chip, int port,
+				    u16 regnum, u32 *val)
+{
+	return lan9303_read_switch_reg(
+		chip, LAN9303_SWITCH_PORT_REG(port, regnum), val);
+}
+
 static int lan9303_detect_phy_setup(struct lan9303 *chip)
 {
 	int reg;
@@ -427,14 +453,15 @@ static int lan9303_detect_phy_setup(struct lan9303 *chip)
 	 * Special reg 18 of phy 3 reads as 0x0000, if 'phy_addr_sel_strap' is 0
 	 * and the IDs are 0-1-2, else it contains something different from
 	 * 0x0000, which means 'phy_addr_sel_strap' is 1 and the IDs are 1-2-3.
+	 * 0xffff is returned on MDIO read with no response.
 	 */
-	reg = lan9303_port_phy_reg_read(chip, 3, MII_LAN911X_SPECIAL_MODES);
+	reg = chip->ops->phy_read(chip, 3, MII_LAN911X_SPECIAL_MODES);
 	if (reg < 0) {
 		dev_err(chip->dev, "Failed to detect phy config: %d\n", reg);
 		return reg;
 	}
 
-	if (reg != 0)
+	if ((reg != 0) && (reg != 0xffff))
 		chip->phy_addr_sel_strap = 1;
 	else
 		chip->phy_addr_sel_strap = 0;
@@ -445,40 +472,37 @@ static int lan9303_detect_phy_setup(struct lan9303 *chip)
 	return 0;
 }
 
-#define LAN9303_MAC_RX_CFG_OFFS (LAN9303_MAC_RX_CFG_0 - LAN9303_PORT_0_OFFSET)
-#define LAN9303_MAC_TX_CFG_OFFS (LAN9303_MAC_TX_CFG_0 - LAN9303_PORT_0_OFFSET)
-
-static int lan9303_disable_packet_processing(struct lan9303 *chip,
-					     unsigned int port)
+static int lan9303_disable_processing_port(struct lan9303 *chip,
+					   unsigned int port)
 {
 	int ret;
 
 	/* disable RX, but keep register reset default values else */
-	ret = lan9303_write_switch_reg(chip, LAN9303_MAC_RX_CFG_OFFS + port,
-				       LAN9303_MAC_RX_CFG_X_REJECT_MAC_TYPES);
+	ret = lan9303_write_switch_port(chip, port, LAN9303_MAC_RX_CFG_0,
+					LAN9303_MAC_RX_CFG_X_REJECT_MAC_TYPES);
 	if (ret)
 		return ret;
 
 	/* disable TX, but keep register reset default values else */
-	return lan9303_write_switch_reg(chip, LAN9303_MAC_TX_CFG_OFFS + port,
+	return lan9303_write_switch_port(chip, port, LAN9303_MAC_TX_CFG_0,
 				LAN9303_MAC_TX_CFG_X_TX_IFG_CONFIG_DEFAULT |
 				LAN9303_MAC_TX_CFG_X_TX_PAD_ENABLE);
 }
 
-static int lan9303_enable_packet_processing(struct lan9303 *chip,
-					    unsigned int port)
+static int lan9303_enable_processing_port(struct lan9303 *chip,
+					  unsigned int port)
 {
 	int ret;
 
 	/* enable RX and keep register reset default values else */
-	ret = lan9303_write_switch_reg(chip, LAN9303_MAC_RX_CFG_OFFS + port,
-				       LAN9303_MAC_RX_CFG_X_REJECT_MAC_TYPES |
-				       LAN9303_MAC_RX_CFG_X_RX_ENABLE);
+	ret = lan9303_write_switch_port(chip, port, LAN9303_MAC_RX_CFG_0,
+					LAN9303_MAC_RX_CFG_X_REJECT_MAC_TYPES |
+					LAN9303_MAC_RX_CFG_X_RX_ENABLE);
 	if (ret)
 		return ret;
 
 	/* enable TX and keep register reset default values else */
-	return lan9303_write_switch_reg(chip, LAN9303_MAC_TX_CFG_OFFS + port,
+	return lan9303_write_switch_port(chip, port, LAN9303_MAC_TX_CFG_0,
 				LAN9303_MAC_TX_CFG_X_TX_IFG_CONFIG_DEFAULT |
 				LAN9303_MAC_TX_CFG_X_TX_PAD_ENABLE |
 				LAN9303_MAC_TX_CFG_X_TX_ENABLE);
@@ -543,15 +567,16 @@ static int lan9303_handle_reset(struct lan9303 *chip)
 /* stop processing packets for all ports */
 static int lan9303_disable_processing(struct lan9303 *chip)
 {
-	int ret;
+	int p;
 
-	ret = lan9303_disable_packet_processing(chip, LAN9303_PORT_0_OFFSET);
-	if (ret)
-		return ret;
-	ret = lan9303_disable_packet_processing(chip, LAN9303_PORT_1_OFFSET);
-	if (ret)
-		return ret;
-	return lan9303_disable_packet_processing(chip, LAN9303_PORT_2_OFFSET);
+	for (p = 0; p < LAN9303_NUM_PORTS; p++) {
+		int ret = lan9303_disable_processing_port(chip, p);
+
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 static int lan9303_check_device(struct lan9303 *chip)
@@ -621,7 +646,7 @@ static int lan9303_setup(struct dsa_switch *ds)
 	if (ret)
 		dev_err(chip->dev, "failed to separate ports %d\n", ret);
 
-	ret = lan9303_enable_packet_processing(chip, LAN9303_PORT_0_OFFSET);
+	ret = lan9303_enable_processing_port(chip, 0);
 	if (ret)
 		dev_err(chip->dev, "failed to re-enable switching %d\n", ret);
 
@@ -687,19 +712,18 @@ static void lan9303_get_ethtool_stats(struct dsa_switch *ds, int port,
 				      uint64_t *data)
 {
 	struct lan9303 *chip = ds->priv;
-	u32 reg;
-	unsigned int u, poff;
-	int ret;
-
-	poff = port * 0x400;
+	unsigned int u;
 
 	for (u = 0; u < ARRAY_SIZE(lan9303_mib); u++) {
-		ret = lan9303_read_switch_reg(chip,
-					      lan9303_mib[u].offset + poff,
-					      &reg);
+		u32 reg;
+		int ret;
+
+		ret = lan9303_read_switch_port(
+			chip, port, lan9303_mib[u].offset, &reg);
+
 		if (ret)
-			dev_warn(chip->dev, "Reading status reg %u failed\n",
-				 lan9303_mib[u].offset + poff);
+			dev_warn(chip->dev, "Reading status port %d reg %u failed\n",
+				 port, lan9303_mib[u].offset);
 		data[u] = reg;
 	}
 }
@@ -719,7 +743,7 @@ static int lan9303_phy_read(struct dsa_switch *ds, int phy, int regnum)
 	if (phy > phy_base + 2)
 		return -ENODEV;
 
-	return lan9303_port_phy_reg_read(chip, phy, regnum);
+	return chip->ops->phy_read(chip, phy, regnum);
 }
 
 static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum,
@@ -733,7 +757,7 @@ static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum,
 	if (phy > phy_base + 2)
 		return -ENODEV;
 
-	return lan9303_phy_reg_write(chip, phy, regnum, val);
+	return chip->ops->phy_write(chip, phy, regnum, val);
 }
 
 static int lan9303_port_enable(struct dsa_switch *ds, int port,
@@ -744,11 +768,8 @@ static int lan9303_port_enable(struct dsa_switch *ds, int port,
 	/* enable internal packet processing */
 	switch (port) {
 	case 1:
-		return lan9303_enable_packet_processing(chip,
-							LAN9303_PORT_1_OFFSET);
 	case 2:
-		return lan9303_enable_packet_processing(chip,
-							LAN9303_PORT_2_OFFSET);
+		return lan9303_enable_processing_port(chip, port);
 	default:
 		dev_dbg(chip->dev,
 			"Error: request to power up invalid port %d\n", port);
@@ -765,14 +786,10 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port,
 	/* disable internal packet processing */
 	switch (port) {
 	case 1:
-		lan9303_disable_packet_processing(chip, LAN9303_PORT_1_OFFSET);
-		lan9303_phy_reg_write(chip, chip->phy_addr_sel_strap + 1,
-				      MII_BMCR, BMCR_PDOWN);
-		break;
 	case 2:
-		lan9303_disable_packet_processing(chip, LAN9303_PORT_2_OFFSET);
-		lan9303_phy_reg_write(chip, chip->phy_addr_sel_strap + 2,
-				      MII_BMCR, BMCR_PDOWN);
+		lan9303_disable_processing_port(chip, port);
+		lan9303_phy_write(ds, chip->phy_addr_sel_strap + port,
+				  MII_BMCR, BMCR_PDOWN);
 		break;
 	default:
 		dev_dbg(chip->dev,
@@ -780,7 +797,7 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port,
 	}
 }
 
-static struct dsa_switch_ops lan9303_switch_ops = {
+static const struct dsa_switch_ops lan9303_switch_ops = {
 	.get_tag_protocol = lan9303_get_tag_protocol,
 	.setup = lan9303_setup,
 	.get_strings = lan9303_get_strings,
@@ -794,7 +811,7 @@ static struct dsa_switch_ops lan9303_switch_ops = {
 
 static int lan9303_register_switch(struct lan9303 *chip)
 {
-	chip->ds = dsa_switch_alloc(chip->dev, DSA_MAX_PORTS);
+	chip->ds = dsa_switch_alloc(chip->dev, LAN9303_NUM_PORTS);
 	if (!chip->ds)
 		return -ENOMEM;
 
diff --git a/drivers/net/dsa/lan9303.h b/drivers/net/dsa/lan9303.h
index d1512da..4d8be55 100644
--- a/drivers/net/dsa/lan9303.h
+++ b/drivers/net/dsa/lan9303.h
@@ -2,6 +2,15 @@
 #include <linux/device.h>
 #include <net/dsa.h>
 
+struct lan9303;
+
+struct lan9303_phy_ops {
+	/* PHY 1 and 2 access*/
+	int	(*phy_read)(struct lan9303 *chip, int port, int regnum);
+	int	(*phy_write)(struct lan9303 *chip, int port,
+			     int regnum, u16 val);
+};
+
 struct lan9303 {
 	struct device *dev;
 	struct regmap *regmap;
@@ -11,9 +20,11 @@ struct lan9303 {
 	bool phy_addr_sel_strap;
 	struct dsa_switch *ds;
 	struct mutex indirect_mutex; /* protect indexed register access */
+	const struct lan9303_phy_ops *ops;
 };
 
 extern const struct regmap_access_table lan9303_register_set;
+extern const struct lan9303_phy_ops lan9303_indirect_phy_ops;
 
 int lan9303_probe(struct lan9303 *chip, struct device_node *np);
 int lan9303_remove(struct lan9303 *chip);
diff --git a/drivers/net/dsa/lan9303_i2c.c b/drivers/net/dsa/lan9303_i2c.c
index ab3ce0d..24ec20f 100644
--- a/drivers/net/dsa/lan9303_i2c.c
+++ b/drivers/net/dsa/lan9303_i2c.c
@@ -63,6 +63,8 @@ static int lan9303_i2c_probe(struct i2c_client *client,
 	i2c_set_clientdata(client, sw_dev);
 	sw_dev->chip.dev = &client->dev;
 
+	sw_dev->chip.ops = &lan9303_indirect_phy_ops;
+
 	ret = lan9303_probe(&sw_dev->chip, client->dev.of_node);
 	if (ret != 0)
 		return ret;
diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c
index 93c36c0..fc16668 100644
--- a/drivers/net/dsa/lan9303_mdio.c
+++ b/drivers/net/dsa/lan9303_mdio.c
@@ -40,6 +40,7 @@ static int lan9303_mdio_write(void *ctx, uint32_t reg, uint32_t val)
 {
 	struct lan9303_mdio *sw_dev = (struct lan9303_mdio *)ctx;
 
+	reg <<= 2; /* reg num to offset */
 	mutex_lock(&sw_dev->device->bus->mdio_lock);
 	lan9303_mdio_real_write(sw_dev->device, reg, val & 0xffff);
 	lan9303_mdio_real_write(sw_dev->device, reg + 2, (val >> 16) & 0xffff);
@@ -57,6 +58,7 @@ static int lan9303_mdio_read(void *ctx, uint32_t reg, uint32_t *val)
 {
 	struct lan9303_mdio *sw_dev = (struct lan9303_mdio *)ctx;
 
+	reg <<= 2; /* reg num to offset */
 	mutex_lock(&sw_dev->device->bus->mdio_lock);
 	*val = lan9303_mdio_real_read(sw_dev->device, reg);
 	*val |= (lan9303_mdio_real_read(sw_dev->device, reg + 2) << 16);
@@ -65,6 +67,25 @@ static int lan9303_mdio_read(void *ctx, uint32_t reg, uint32_t *val)
 	return 0;
 }
 
+int lan9303_mdio_phy_write(struct lan9303 *chip, int phy, int reg, u16 val)
+{
+	struct lan9303_mdio *sw_dev = dev_get_drvdata(chip->dev);
+
+	return mdiobus_write_nested(sw_dev->device->bus, phy, reg, val);
+}
+
+int lan9303_mdio_phy_read(struct lan9303 *chip, int phy,  int reg)
+{
+	struct lan9303_mdio *sw_dev = dev_get_drvdata(chip->dev);
+
+	return mdiobus_read_nested(sw_dev->device->bus, phy, reg);
+}
+
+static const struct lan9303_phy_ops lan9303_mdio_phy_ops = {
+	.phy_read = lan9303_mdio_phy_read,
+	.phy_write = lan9303_mdio_phy_write,
+};
+
 static const struct regmap_config lan9303_mdio_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 32,
@@ -106,6 +127,8 @@ static int lan9303_mdio_probe(struct mdio_device *mdiodev)
 	dev_set_drvdata(&mdiodev->dev, sw_dev);
 	sw_dev->chip.dev = &mdiodev->dev;
 
+	sw_dev->chip.ops = &lan9303_mdio_phy_ops;
+
 	ret = lan9303_probe(&sw_dev->chip, mdiodev->dev.of_node);
 	if (ret != 0)
 		return ret;
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index b313ecd..56cd6d3 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -638,55 +638,6 @@ static int ksz_port_vlan_del(struct dsa_switch *ds, int port,
 	return 0;
 }
 
-static int ksz_port_vlan_dump(struct dsa_switch *ds, int port,
-			      struct switchdev_obj_port_vlan *vlan,
-			      switchdev_obj_dump_cb_t *cb)
-{
-	struct ksz_device *dev = ds->priv;
-	u16 vid;
-	u16 data;
-	struct vlan_table *vlan_cache;
-	int err = 0;
-
-	mutex_lock(&dev->vlan_mutex);
-
-	/* use dev->vlan_cache due to lack of searching valid vlan entry */
-	for (vid = vlan->vid_begin; vid < dev->num_vlans; vid++) {
-		vlan_cache = &dev->vlan_cache[vid];
-
-		if (!(vlan_cache->table[0] & VLAN_VALID))
-			continue;
-
-		vlan->vid_begin = vid;
-		vlan->vid_end = vid;
-		vlan->flags = 0;
-		if (vlan_cache->table[2] & BIT(port)) {
-			if (vlan_cache->table[1] & BIT(port))
-				vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
-			ksz_pread16(dev, port, REG_PORT_DEFAULT_VID, &data);
-			if (vid == (data & 0xFFFFF))
-				vlan->flags |= BRIDGE_VLAN_INFO_PVID;
-
-			err = cb(&vlan->obj);
-			if (err)
-				break;
-		}
-	}
-
-	mutex_unlock(&dev->vlan_mutex);
-
-	return err;
-}
-
-static int ksz_port_fdb_prepare(struct dsa_switch *ds, int port,
-				const struct switchdev_obj_port_fdb *fdb,
-				struct switchdev_trans *trans)
-{
-	/* nothing needed */
-
-	return 0;
-}
-
 struct alu_struct {
 	/* entry 1 */
 	u8	is_static:1;
@@ -706,30 +657,31 @@ struct alu_struct {
 	u8	mac[ETH_ALEN];
 };
 
-static void ksz_port_fdb_add(struct dsa_switch *ds, int port,
-			     const struct switchdev_obj_port_fdb *fdb,
-			     struct switchdev_trans *trans)
+static int ksz_port_fdb_add(struct dsa_switch *ds, int port,
+			    const unsigned char *addr, u16 vid)
 {
 	struct ksz_device *dev = ds->priv;
 	u32 alu_table[4];
 	u32 data;
+	int ret = 0;
 
 	mutex_lock(&dev->alu_mutex);
 
 	/* find any entry with mac & vid */
-	data = fdb->vid << ALU_FID_INDEX_S;
-	data |= ((fdb->addr[0] << 8) | fdb->addr[1]);
+	data = vid << ALU_FID_INDEX_S;
+	data |= ((addr[0] << 8) | addr[1]);
 	ksz_write32(dev, REG_SW_ALU_INDEX_0, data);
 
-	data = ((fdb->addr[2] << 24) | (fdb->addr[3] << 16));
-	data |= ((fdb->addr[4] << 8) | fdb->addr[5]);
+	data = ((addr[2] << 24) | (addr[3] << 16));
+	data |= ((addr[4] << 8) | addr[5]);
 	ksz_write32(dev, REG_SW_ALU_INDEX_1, data);
 
 	/* start read operation */
 	ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_READ | ALU_START);
 
 	/* wait to be finished */
-	if (wait_alu_ready(dev, ALU_START, 1000) < 0) {
+	ret = wait_alu_ready(dev, ALU_START, 1000);
+	if (ret < 0) {
 		dev_dbg(dev->dev, "Failed to read ALU\n");
 		goto exit;
 	}
@@ -740,27 +692,30 @@ static void ksz_port_fdb_add(struct dsa_switch *ds, int port,
 	/* update ALU entry */
 	alu_table[0] = ALU_V_STATIC_VALID;
 	alu_table[1] |= BIT(port);
-	if (fdb->vid)
+	if (vid)
 		alu_table[1] |= ALU_V_USE_FID;
-	alu_table[2] = (fdb->vid << ALU_V_FID_S);
-	alu_table[2] |= ((fdb->addr[0] << 8) | fdb->addr[1]);
-	alu_table[3] = ((fdb->addr[2] << 24) | (fdb->addr[3] << 16));
-	alu_table[3] |= ((fdb->addr[4] << 8) | fdb->addr[5]);
+	alu_table[2] = (vid << ALU_V_FID_S);
+	alu_table[2] |= ((addr[0] << 8) | addr[1]);
+	alu_table[3] = ((addr[2] << 24) | (addr[3] << 16));
+	alu_table[3] |= ((addr[4] << 8) | addr[5]);
 
 	write_table(ds, alu_table);
 
 	ksz_write32(dev, REG_SW_ALU_CTRL__4, ALU_WRITE | ALU_START);
 
 	/* wait to be finished */
-	if (wait_alu_ready(dev, ALU_START, 1000) < 0)
-		dev_dbg(dev->dev, "Failed to read ALU\n");
+	ret = wait_alu_ready(dev, ALU_START, 1000);
+	if (ret < 0)
+		dev_dbg(dev->dev, "Failed to write ALU\n");
 
 exit:
 	mutex_unlock(&dev->alu_mutex);
+
+	return ret;
 }
 
 static int ksz_port_fdb_del(struct dsa_switch *ds, int port,
-			    const struct switchdev_obj_port_fdb *fdb)
+			    const unsigned char *addr, u16 vid)
 {
 	struct ksz_device *dev = ds->priv;
 	u32 alu_table[4];
@@ -770,12 +725,12 @@ static int ksz_port_fdb_del(struct dsa_switch *ds, int port,
 	mutex_lock(&dev->alu_mutex);
 
 	/* read any entry with mac & vid */
-	data = fdb->vid << ALU_FID_INDEX_S;
-	data |= ((fdb->addr[0] << 8) | fdb->addr[1]);
+	data = vid << ALU_FID_INDEX_S;
+	data |= ((addr[0] << 8) | addr[1]);
 	ksz_write32(dev, REG_SW_ALU_INDEX_0, data);
 
-	data = ((fdb->addr[2] << 24) | (fdb->addr[3] << 16));
-	data |= ((fdb->addr[4] << 8) | fdb->addr[5]);
+	data = ((addr[2] << 24) | (addr[3] << 16));
+	data |= ((addr[4] << 8) | addr[5]);
 	ksz_write32(dev, REG_SW_ALU_INDEX_1, data);
 
 	/* start read operation */
@@ -850,12 +805,11 @@ static void convert_alu(struct alu_struct *alu, u32 *alu_table)
 }
 
 static int ksz_port_fdb_dump(struct dsa_switch *ds, int port,
-			     struct switchdev_obj_port_fdb *fdb,
-			     switchdev_obj_dump_cb_t *cb)
+			     dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct ksz_device *dev = ds->priv;
 	int ret = 0;
-	u32 data;
+	u32 ksz_data;
 	u32 alu_table[4];
 	struct alu_struct alu;
 	int timeout;
@@ -868,8 +822,8 @@ static int ksz_port_fdb_dump(struct dsa_switch *ds, int port,
 	do {
 		timeout = 1000;
 		do {
-			ksz_read32(dev, REG_SW_ALU_CTRL__4, &data);
-			if ((data & ALU_VALID) || !(data & ALU_START))
+			ksz_read32(dev, REG_SW_ALU_CTRL__4, &ksz_data);
+			if ((ksz_data & ALU_VALID) || !(ksz_data & ALU_START))
 				break;
 			usleep_range(1, 10);
 		} while (timeout-- > 0);
@@ -886,18 +840,11 @@ static int ksz_port_fdb_dump(struct dsa_switch *ds, int port,
 		convert_alu(&alu, alu_table);
 
 		if (alu.port_forward & BIT(port)) {
-			fdb->vid = alu.fid;
-			if (alu.is_static)
-				fdb->ndm_state = NUD_NOARP;
-			else
-				fdb->ndm_state = NUD_REACHABLE;
-			ether_addr_copy(fdb->addr, alu.mac);
-
-			ret = cb(&fdb->obj);
+			ret = cb(alu.mac, alu.fid, alu.is_static, data);
 			if (ret)
 				goto exit;
 		}
-	} while (data & ALU_START);
+	} while (ksz_data & ALU_START);
 
 exit:
 
@@ -1065,14 +1012,6 @@ exit:
 	return ret;
 }
 
-static int ksz_port_mdb_dump(struct dsa_switch *ds, int port,
-			     struct switchdev_obj_port_mdb *mdb,
-			     switchdev_obj_dump_cb_t *cb)
-{
-	/* this is not called by switch layer */
-	return 0;
-}
-
 static int ksz_port_mirror_add(struct dsa_switch *ds, int port,
 			       struct dsa_mall_mirror_tc_entry *mirror,
 			       bool ingress)
@@ -1129,15 +1068,12 @@ static const struct dsa_switch_ops ksz_switch_ops = {
 	.port_vlan_prepare	= ksz_port_vlan_prepare,
 	.port_vlan_add		= ksz_port_vlan_add,
 	.port_vlan_del		= ksz_port_vlan_del,
-	.port_vlan_dump		= ksz_port_vlan_dump,
-	.port_fdb_prepare	= ksz_port_fdb_prepare,
 	.port_fdb_dump		= ksz_port_fdb_dump,
 	.port_fdb_add		= ksz_port_fdb_add,
 	.port_fdb_del		= ksz_port_fdb_del,
 	.port_mdb_prepare       = ksz_port_mdb_prepare,
 	.port_mdb_add           = ksz_port_mdb_add,
 	.port_mdb_del           = ksz_port_mdb_del,
-	.port_mdb_dump          = ksz_port_mdb_dump,
 	.port_mirror_add	= ksz_port_mirror_add,
 	.port_mirror_del	= ksz_port_mirror_del,
 };
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 264b281..c142b97 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -839,49 +839,31 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
 }
 
 static int
-mt7530_port_fdb_prepare(struct dsa_switch *ds, int port,
-			const struct switchdev_obj_port_fdb *fdb,
-			struct switchdev_trans *trans)
+mt7530_port_fdb_add(struct dsa_switch *ds, int port,
+		    const unsigned char *addr, u16 vid)
 {
 	struct mt7530_priv *priv = ds->priv;
 	int ret;
+	u8 port_mask = BIT(port);
 
-	/* Because auto-learned entrie shares the same FDB table.
-	 * an entry is reserved with no port_mask to make sure fdb_add
-	 * is called while the entry is still available.
-	 */
 	mutex_lock(&priv->reg_mutex);
-	mt7530_fdb_write(priv, fdb->vid, 0, fdb->addr, -1, STATIC_ENT);
+	mt7530_fdb_write(priv, vid, port_mask, addr, -1, STATIC_ENT);
 	ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, 0);
 	mutex_unlock(&priv->reg_mutex);
 
 	return ret;
 }
 
-static void
-mt7530_port_fdb_add(struct dsa_switch *ds, int port,
-		    const struct switchdev_obj_port_fdb *fdb,
-		    struct switchdev_trans *trans)
-{
-	struct mt7530_priv *priv = ds->priv;
-	u8 port_mask = BIT(port);
-
-	mutex_lock(&priv->reg_mutex);
-	mt7530_fdb_write(priv, fdb->vid, port_mask, fdb->addr, -1, STATIC_ENT);
-	mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, 0);
-	mutex_unlock(&priv->reg_mutex);
-}
-
 static int
 mt7530_port_fdb_del(struct dsa_switch *ds, int port,
-		    const struct switchdev_obj_port_fdb *fdb)
+		    const unsigned char *addr, u16 vid)
 {
 	struct mt7530_priv *priv = ds->priv;
 	int ret;
 	u8 port_mask = BIT(port);
 
 	mutex_lock(&priv->reg_mutex);
-	mt7530_fdb_write(priv, fdb->vid, port_mask, fdb->addr, -1, STATIC_EMP);
+	mt7530_fdb_write(priv, vid, port_mask, addr, -1, STATIC_EMP);
 	ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, 0);
 	mutex_unlock(&priv->reg_mutex);
 
@@ -890,8 +872,7 @@ mt7530_port_fdb_del(struct dsa_switch *ds, int port,
 
 static int
 mt7530_port_fdb_dump(struct dsa_switch *ds, int port,
-		     struct switchdev_obj_port_fdb *fdb,
-		     switchdev_obj_dump_cb_t *cb)
+		     dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct mt7530_priv *priv = ds->priv;
 	struct mt7530_fdb _fdb = { 0 };
@@ -909,11 +890,8 @@ mt7530_port_fdb_dump(struct dsa_switch *ds, int port,
 		if (rsp & ATC_SRCH_HIT) {
 			mt7530_fdb_read(priv, &_fdb);
 			if (_fdb.port_mask & BIT(port)) {
-				ether_addr_copy(fdb->addr, _fdb.mac);
-				fdb->vid = _fdb.vid;
-				fdb->ndm_state = _fdb.noarp ?
-						NUD_NOARP : NUD_REACHABLE;
-				ret = cb(&fdb->obj);
+				ret = cb(_fdb.mac, _fdb.vid, _fdb.noarp,
+					 data);
 				if (ret < 0)
 					break;
 			}
@@ -1039,7 +1017,7 @@ mt7530_setup(struct dsa_switch *ds)
 	return 0;
 }
 
-static struct dsa_switch_ops mt7530_switch_ops = {
+static const struct dsa_switch_ops mt7530_switch_ops = {
 	.get_tag_protocol	= mtk_get_tag_protocol,
 	.setup			= mt7530_setup,
 	.get_strings		= mt7530_get_strings,
@@ -1053,7 +1031,6 @@ static struct dsa_switch_ops mt7530_switch_ops = {
 	.port_stp_state_set	= mt7530_stp_state_set,
 	.port_bridge_join	= mt7530_port_bridge_join,
 	.port_bridge_leave	= mt7530_port_bridge_leave,
-	.port_fdb_prepare	= mt7530_port_fdb_prepare,
 	.port_fdb_add		= mt7530_port_fdb_add,
 	.port_fdb_del		= mt7530_port_fdb_del,
 	.port_fdb_dump		= mt7530_port_fdb_dump,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 5bcdd33..c6678aa 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -307,7 +307,7 @@ out:
 	mutex_unlock(&chip->reg_lock);
 }
 
-static struct irq_chip mv88e6xxx_g1_irq_chip = {
+static const struct irq_chip mv88e6xxx_g1_irq_chip = {
 	.name			= "mv88e6xxx-g1",
 	.irq_mask		= mv88e6xxx_g1_irq_mask,
 	.irq_unmask		= mv88e6xxx_g1_irq_unmask,
@@ -810,63 +810,18 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
 	mutex_unlock(&chip->reg_lock);
 }
 
-static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port,
-			     struct ethtool_eee *e)
+static int mv88e6xxx_get_mac_eee(struct dsa_switch *ds, int port,
+				 struct ethtool_eee *e)
 {
-	struct mv88e6xxx_chip *chip = ds->priv;
-	u16 reg;
-	int err;
-
-	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE))
-		return -EOPNOTSUPP;
-
-	mutex_lock(&chip->reg_lock);
-
-	err = mv88e6xxx_phy_read(chip, port, 16, &reg);
-	if (err)
-		goto out;
-
-	e->eee_enabled = !!(reg & 0x0200);
-	e->tx_lpi_enabled = !!(reg & 0x0100);
-
-	err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &reg);
-	if (err)
-		goto out;
-
-	e->eee_active = !!(reg & MV88E6352_PORT_STS_EEE);
-out:
-	mutex_unlock(&chip->reg_lock);
-
-	return err;
+	/* Nothing to do on the port's MAC */
+	return 0;
 }
 
-static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
-			     struct phy_device *phydev, struct ethtool_eee *e)
+static int mv88e6xxx_set_mac_eee(struct dsa_switch *ds, int port,
+				 struct ethtool_eee *e)
 {
-	struct mv88e6xxx_chip *chip = ds->priv;
-	u16 reg;
-	int err;
-
-	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE))
-		return -EOPNOTSUPP;
-
-	mutex_lock(&chip->reg_lock);
-
-	err = mv88e6xxx_phy_read(chip, port, 16, &reg);
-	if (err)
-		goto out;
-
-	reg &= ~0x0300;
-	if (e->eee_enabled)
-		reg |= 0x0200;
-	if (e->tx_lpi_enabled)
-		reg |= 0x0100;
-
-	err = mv88e6xxx_phy_write(chip, port, 16, reg);
-out:
-	mutex_unlock(&chip->reg_lock);
-
-	return err;
+	/* Nothing to do on the port's MAC */
+	return 0;
 }
 
 static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port)
@@ -926,6 +881,22 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port,
 		dev_err(ds->dev, "p%d: failed to update state\n", port);
 }
 
+static int mv88e6xxx_pot_setup(struct mv88e6xxx_chip *chip)
+{
+	if (chip->info->ops->pot_clear)
+		return chip->info->ops->pot_clear(chip);
+
+	return 0;
+}
+
+static int mv88e6xxx_rsvd2cpu_setup(struct mv88e6xxx_chip *chip)
+{
+	if (chip->info->ops->mgmt_rsvd2cpu)
+		return chip->info->ops->mgmt_rsvd2cpu(chip);
+
+	return 0;
+}
+
 static int mv88e6xxx_atu_setup(struct mv88e6xxx_chip *chip)
 {
 	int err;
@@ -1040,61 +1011,6 @@ static int mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip,
 	return chip->info->ops->vtu_loadpurge(chip, entry);
 }
 
-static int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port,
-				    struct switchdev_obj_port_vlan *vlan,
-				    switchdev_obj_dump_cb_t *cb)
-{
-	struct mv88e6xxx_chip *chip = ds->priv;
-	struct mv88e6xxx_vtu_entry next = {
-		.vid = chip->info->max_vid,
-	};
-	u16 pvid;
-	int err;
-
-	if (!chip->info->max_vid)
-		return -EOPNOTSUPP;
-
-	mutex_lock(&chip->reg_lock);
-
-	err = mv88e6xxx_port_get_pvid(chip, port, &pvid);
-	if (err)
-		goto unlock;
-
-	do {
-		err = mv88e6xxx_vtu_getnext(chip, &next);
-		if (err)
-			break;
-
-		if (!next.valid)
-			break;
-
-		if (next.member[port] ==
-		    MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_NON_MEMBER)
-			continue;
-
-		/* reinit and dump this VLAN obj */
-		vlan->vid_begin = next.vid;
-		vlan->vid_end = next.vid;
-		vlan->flags = 0;
-
-		if (next.member[port] ==
-		    MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_UNTAGGED)
-			vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
-
-		if (next.vid == pvid)
-			vlan->flags |= BRIDGE_VLAN_INFO_PVID;
-
-		err = cb(&vlan->obj);
-		if (err)
-			break;
-	} while (next.vid < chip->info->max_vid);
-
-unlock:
-	mutex_unlock(&chip->reg_lock);
-
-	return err;
-}
-
 static int mv88e6xxx_atu_new(struct mv88e6xxx_chip *chip, u16 *fid)
 {
 	DECLARE_BITMAP(fid_bitmap, MV88E6XXX_N_FID);
@@ -1435,38 +1351,28 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
 	return mv88e6xxx_g1_atu_loadpurge(chip, vlan.fid, &entry);
 }
 
-static int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port,
-				      const struct switchdev_obj_port_fdb *fdb,
-				      struct switchdev_trans *trans)
-{
-	/* We don't need any dynamic resource from the kernel (yet),
-	 * so skip the prepare phase.
-	 */
-	return 0;
-}
-
-static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
-				   const struct switchdev_obj_port_fdb *fdb,
-				   struct switchdev_trans *trans)
+static int mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port,
+				  const unsigned char *addr, u16 vid)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
+	int err;
 
 	mutex_lock(&chip->reg_lock);
-	if (mv88e6xxx_port_db_load_purge(chip, port, fdb->addr, fdb->vid,
-					 MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC))
-		dev_err(ds->dev, "p%d: failed to load unicast MAC address\n",
-			port);
+	err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid,
+					   MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC);
 	mutex_unlock(&chip->reg_lock);
+
+	return err;
 }
 
 static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
-				  const struct switchdev_obj_port_fdb *fdb)
+				  const unsigned char *addr, u16 vid)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
 	mutex_lock(&chip->reg_lock);
-	err = mv88e6xxx_port_db_load_purge(chip, port, fdb->addr, fdb->vid,
+	err = mv88e6xxx_port_db_load_purge(chip, port, addr, vid,
 					   MV88E6XXX_G1_ATU_DATA_STATE_UNUSED);
 	mutex_unlock(&chip->reg_lock);
 
@@ -1475,10 +1381,10 @@ static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
 
 static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
 				      u16 fid, u16 vid, int port,
-				      struct switchdev_obj *obj,
-				      switchdev_obj_dump_cb_t *cb)
+				      dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct mv88e6xxx_atu_entry addr;
+	bool is_static;
 	int err;
 
 	addr.state = MV88E6XXX_G1_ATU_DATA_STATE_UNUSED;
@@ -1495,33 +1401,12 @@ static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
 		if (addr.trunk || (addr.portvec & BIT(port)) == 0)
 			continue;
 
-		if (obj->id == SWITCHDEV_OBJ_ID_PORT_FDB) {
-			struct switchdev_obj_port_fdb *fdb;
-
-			if (!is_unicast_ether_addr(addr.mac))
-				continue;
-
-			fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
-			fdb->vid = vid;
-			ether_addr_copy(fdb->addr, addr.mac);
-			if (addr.state == MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC)
-				fdb->ndm_state = NUD_NOARP;
-			else
-				fdb->ndm_state = NUD_REACHABLE;
-		} else if (obj->id == SWITCHDEV_OBJ_ID_PORT_MDB) {
-			struct switchdev_obj_port_mdb *mdb;
-
-			if (!is_multicast_ether_addr(addr.mac))
-				continue;
-
-			mdb = SWITCHDEV_OBJ_PORT_MDB(obj);
-			mdb->vid = vid;
-			ether_addr_copy(mdb->addr, addr.mac);
-		} else {
-			return -EOPNOTSUPP;
-		}
+		if (!is_unicast_ether_addr(addr.mac))
+			continue;
 
-		err = cb(obj);
+		is_static = (addr.state ==
+			     MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC);
+		err = cb(addr.mac, vid, is_static, data);
 		if (err)
 			return err;
 	} while (!is_broadcast_ether_addr(addr.mac));
@@ -1530,8 +1415,7 @@ static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
 }
 
 static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
-				  struct switchdev_obj *obj,
-				  switchdev_obj_dump_cb_t *cb)
+				  dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct mv88e6xxx_vtu_entry vlan = {
 		.vid = chip->info->max_vid,
@@ -1544,7 +1428,7 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 	if (err)
 		return err;
 
-	err = mv88e6xxx_port_db_dump_fid(chip, fid, 0, port, obj, cb);
+	err = mv88e6xxx_port_db_dump_fid(chip, fid, 0, port, cb, data);
 	if (err)
 		return err;
 
@@ -1558,7 +1442,7 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 			break;
 
 		err = mv88e6xxx_port_db_dump_fid(chip, vlan.fid, vlan.vid, port,
-						 obj, cb);
+						 cb, data);
 		if (err)
 			return err;
 	} while (vlan.vid < chip->info->max_vid);
@@ -1567,14 +1451,13 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 }
 
 static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
-				   struct switchdev_obj_port_fdb *fdb,
-				   switchdev_obj_dump_cb_t *cb)
+				   dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
 	mutex_lock(&chip->reg_lock);
-	err = mv88e6xxx_port_db_dump(chip, port, &fdb->obj, cb);
+	err = mv88e6xxx_port_db_dump(chip, port, cb, data);
 	mutex_unlock(&chip->reg_lock);
 
 	return err;
@@ -2116,7 +1999,7 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
 		goto unlock;
 
 	/* Setup Switch Global 2 Registers */
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) {
+	if (chip->info->global2_addr) {
 		err = mv88e6xxx_g2_setup(chip);
 		if (err)
 			goto unlock;
@@ -2142,16 +2025,13 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
 	if (err)
 		goto unlock;
 
-	/* Some generations have the configuration of sending reserved
-	 * management frames to the CPU in global2, others in
-	 * global1. Hence it does not fit the two setup functions
-	 * above.
-	 */
-	if (chip->info->ops->mgmt_rsvd2cpu) {
-		err = chip->info->ops->mgmt_rsvd2cpu(chip);
-		if (err)
-			goto unlock;
-	}
+	err = mv88e6xxx_pot_setup(chip);
+	if (err)
+		goto unlock;
+
+	err = mv88e6xxx_rsvd2cpu_setup(chip);
+	if (err)
+		goto unlock;
 
 unlock:
 	mutex_unlock(&chip->reg_lock);
@@ -2236,7 +2116,7 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
 
 	if (np) {
 		bus->name = np->full_name;
-		snprintf(bus->id, MII_BUS_ID_SIZE, "%s", np->full_name);
+		snprintf(bus->id, MII_BUS_ID_SIZE, "%pOF", np);
 	} else {
 		bus->name = "mv88e6xxx SMI";
 		snprintf(bus->id, MII_BUS_ID_SIZE, "mv88e6xxx-%d", index++);
@@ -2385,7 +2265,8 @@ static const struct mv88e6xxx_ops mv88e6085_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.ppu_enable = mv88e6185_g1_ppu_enable,
 	.ppu_disable = mv88e6185_g1_ppu_disable,
 	.reset = mv88e6185_g1_reset,
@@ -2408,7 +2289,7 @@ static const struct mv88e6xxx_ops mv88e6095_ops = {
 	.stats_get_sset_count = mv88e6095_stats_get_sset_count,
 	.stats_get_strings = mv88e6095_stats_get_strings,
 	.stats_get_stats = mv88e6095_stats_get_stats,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu,
 	.ppu_enable = mv88e6185_g1_ppu_enable,
 	.ppu_disable = mv88e6185_g1_ppu_disable,
 	.reset = mv88e6185_g1_reset,
@@ -2441,7 +2322,8 @@ static const struct mv88e6xxx_ops mv88e6097_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2467,7 +2349,8 @@ static const struct mv88e6xxx_ops mv88e6123_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2496,7 +2379,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu,
 	.ppu_enable = mv88e6185_g1_ppu_enable,
 	.ppu_disable = mv88e6185_g1_ppu_disable,
 	.reset = mv88e6185_g1_reset,
@@ -2533,6 +2416,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu =  mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2563,7 +2447,8 @@ static const struct mv88e6xxx_ops mv88e6161_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2587,7 +2472,8 @@ static const struct mv88e6xxx_ops mv88e6165_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2619,7 +2505,8 @@ static const struct mv88e6xxx_ops mv88e6171_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2653,7 +2540,8 @@ static const struct mv88e6xxx_ops mv88e6172_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2686,7 +2574,8 @@ static const struct mv88e6xxx_ops mv88e6175_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2720,7 +2609,8 @@ static const struct mv88e6xxx_ops mv88e6176_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2746,7 +2636,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6185_g2_mgmt_rsvd2cpu,
 	.ppu_enable = mv88e6185_g1_ppu_enable,
 	.ppu_disable = mv88e6185_g1_ppu_disable,
 	.reset = mv88e6185_g1_reset,
@@ -2782,6 +2672,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -2816,6 +2707,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -2850,6 +2742,7 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -2884,7 +2777,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -2920,6 +2814,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -2952,14 +2847,15 @@ static const struct mv88e6xxx_ops mv88e6320_ops = {
 	.stats_get_stats = mv88e6320_stats_get_stats,
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6185_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
 };
 
 static const struct mv88e6xxx_ops mv88e6321_ops = {
-	/* MV88E6XXX_FAMILY_6321 */
+	/* MV88E6XXX_FAMILY_6320 */
 	.irl_init_all = mv88e6352_g2_irl_init_all,
 	.get_eeprom = mv88e6xxx_g2_get_eeprom16,
 	.set_eeprom = mv88e6xxx_g2_set_eeprom16,
@@ -3018,6 +2914,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu =  mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -3049,7 +2946,8 @@ static const struct mv88e6xxx_ops mv88e6350_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -3081,7 +2979,8 @@ static const struct mv88e6xxx_ops mv88e6351_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -3115,7 +3014,8 @@ static const struct mv88e6xxx_ops mv88e6352_ops = {
 	.set_cpu_port = mv88e6095_g1_set_cpu_port,
 	.set_egress_port = mv88e6095_g1_set_egress_port,
 	.watchdog_ops = &mv88e6097_watchdog_ops,
-	.mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu,
+	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6352_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6352_g1_vtu_loadpurge,
@@ -3153,6 +3053,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -3190,6 +3091,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
 	.set_egress_port = mv88e6390_g1_set_egress_port,
 	.watchdog_ops = &mv88e6390_watchdog_ops,
 	.mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
 	.vtu_getnext = mv88e6390_g1_vtu_getnext,
 	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
@@ -3206,12 +3108,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6097,
 		.ops = &mv88e6085_ops,
 	},
 
@@ -3224,11 +3128,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
 		.atu_move_port_mask = 0xf,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6095,
 		.ops = &mv88e6095_ops,
 	},
 
@@ -3241,12 +3146,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6097,
 		.ops = &mv88e6097_ops,
 	},
 
@@ -3259,12 +3166,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6165,
 		.ops = &mv88e6123_ops,
 	},
 
@@ -3277,11 +3186,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
 		.atu_move_port_mask = 0xf,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6185,
 		.ops = &mv88e6131_ops,
 	},
 
@@ -3294,11 +3204,13 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.atu_move_port_mask = 0x1f,
+		.g2_irqs = 10,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6341,
 		.ops = &mv88e6141_ops,
 	},
 
@@ -3311,12 +3223,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6165,
 		.ops = &mv88e6161_ops,
 	},
 
@@ -3329,12 +3243,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6165,
 		.ops = &mv88e6165_ops,
 	},
 
@@ -3347,12 +3263,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
 		.ops = &mv88e6171_ops,
 	},
 
@@ -3365,12 +3283,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
 		.ops = &mv88e6172_ops,
 	},
 
@@ -3383,12 +3303,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
 		.ops = &mv88e6175_ops,
 	},
 
@@ -3401,12 +3323,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
 		.ops = &mv88e6176_ops,
 	},
 
@@ -3419,11 +3343,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
 		.atu_move_port_mask = 0xf,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6185,
 		.ops = &mv88e6185_ops,
 	},
 
@@ -3436,12 +3361,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
 		.age_time_coeff = 3750,
 		.g1_irqs = 9,
+		.g2_irqs = 14,
 		.pvt = true,
+		.multi_chip = true,
 		.atu_move_port_mask = 0x1f,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6390,
 		.ops = &mv88e6190_ops,
 	},
 
@@ -3454,12 +3381,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.g1_irqs = 9,
+		.g2_irqs = 14,
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6390,
 		.ops = &mv88e6190x_ops,
 	},
 
@@ -3472,12 +3401,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.g1_irqs = 9,
+		.g2_irqs = 14,
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6390,
 		.ops = &mv88e6191_ops,
 	},
 
@@ -3490,12 +3421,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
 		.ops = &mv88e6240_ops,
 	},
 
@@ -3508,12 +3441,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.g1_irqs = 9,
+		.g2_irqs = 14,
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6390,
 		.ops = &mv88e6290_ops,
 	},
 
@@ -3526,12 +3461,13 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6320,
 		.ops = &mv88e6320_ops,
 	},
 
@@ -3544,11 +3480,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 8,
 		.atu_move_port_mask = 0xf,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6320,
 		.ops = &mv88e6321_ops,
 	},
 
@@ -3561,11 +3498,13 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.atu_move_port_mask = 0x1f,
+		.g2_irqs = 10,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6341,
 		.ops = &mv88e6341_ops,
 	},
 
@@ -3578,12 +3517,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
 		.ops = &mv88e6350_ops,
 	},
 
@@ -3596,12 +3537,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
 		.ops = &mv88e6351_ops,
 	},
 
@@ -3614,12 +3557,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 4095,
 		.port_base_addr = 0x10,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 15000,
 		.g1_irqs = 9,
+		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_EDSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
 		.ops = &mv88e6352_ops,
 	},
 	[MV88E6390] = {
@@ -3631,12 +3576,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.g1_irqs = 9,
+		.g2_irqs = 14,
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6390,
 		.ops = &mv88e6390_ops,
 	},
 	[MV88E6390X] = {
@@ -3648,12 +3595,14 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.max_vid = 8191,
 		.port_base_addr = 0x0,
 		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
 		.age_time_coeff = 3750,
 		.g1_irqs = 9,
+		.g2_irqs = 14,
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
+		.multi_chip = true,
 		.tag_protocol = DSA_TAG_PROTO_DSA,
-		.flags = MV88E6XXX_FLAGS_FAMILY_6390,
 		.ops = &mv88e6390x_ops,
 	},
 };
@@ -3723,7 +3672,7 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
 {
 	if (sw_addr == 0)
 		chip->smi_ops = &mv88e6xxx_smi_single_chip_ops;
-	else if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_MULTI_CHIP))
+	else if (chip->info->multi_chip)
 		chip->smi_ops = &mv88e6xxx_smi_multi_chip_ops;
 	else
 		return -EINVAL;
@@ -3828,20 +3777,6 @@ static int mv88e6xxx_port_mdb_del(struct dsa_switch *ds, int port,
 	return err;
 }
 
-static int mv88e6xxx_port_mdb_dump(struct dsa_switch *ds, int port,
-				   struct switchdev_obj_port_mdb *mdb,
-				   switchdev_obj_dump_cb_t *cb)
-{
-	struct mv88e6xxx_chip *chip = ds->priv;
-	int err;
-
-	mutex_lock(&chip->reg_lock);
-	err = mv88e6xxx_port_db_dump(chip, port, &mdb->obj, cb);
-	mutex_unlock(&chip->reg_lock);
-
-	return err;
-}
-
 static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
 	.probe			= mv88e6xxx_drv_probe,
 	.get_tag_protocol	= mv88e6xxx_get_tag_protocol,
@@ -3853,8 +3788,8 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
 	.get_sset_count		= mv88e6xxx_get_sset_count,
 	.port_enable		= mv88e6xxx_port_enable,
 	.port_disable		= mv88e6xxx_port_disable,
-	.set_eee		= mv88e6xxx_set_eee,
-	.get_eee		= mv88e6xxx_get_eee,
+	.get_mac_eee		= mv88e6xxx_get_mac_eee,
+	.set_mac_eee		= mv88e6xxx_set_mac_eee,
 	.get_eeprom_len		= mv88e6xxx_get_eeprom_len,
 	.get_eeprom		= mv88e6xxx_get_eeprom,
 	.set_eeprom		= mv88e6xxx_set_eeprom,
@@ -3869,15 +3804,12 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
 	.port_vlan_prepare	= mv88e6xxx_port_vlan_prepare,
 	.port_vlan_add		= mv88e6xxx_port_vlan_add,
 	.port_vlan_del		= mv88e6xxx_port_vlan_del,
-	.port_vlan_dump		= mv88e6xxx_port_vlan_dump,
-	.port_fdb_prepare       = mv88e6xxx_port_fdb_prepare,
 	.port_fdb_add           = mv88e6xxx_port_fdb_add,
 	.port_fdb_del           = mv88e6xxx_port_fdb_del,
 	.port_fdb_dump          = mv88e6xxx_port_fdb_dump,
 	.port_mdb_prepare       = mv88e6xxx_port_mdb_prepare,
 	.port_mdb_add           = mv88e6xxx_port_mdb_add,
 	.port_mdb_del           = mv88e6xxx_port_mdb_del,
-	.port_mdb_dump          = mv88e6xxx_port_mdb_dump,
 	.crosschip_bridge_join	= mv88e6xxx_crosschip_bridge_join,
 	.crosschip_bridge_leave	= mv88e6xxx_crosschip_bridge_leave,
 };
@@ -3971,7 +3903,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 		if (err)
 			goto out;
 
-		if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT)) {
+		if (chip->info->g2_irqs > 0) {
 			err = mv88e6xxx_g2_irq_setup(chip);
 			if (err)
 				goto out_g1_irq;
@@ -3991,7 +3923,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 out_mdio:
 	mv88e6xxx_mdios_unregister(chip);
 out_g2_irq:
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT) && chip->irq > 0)
+	if (chip->info->g2_irqs > 0 && chip->irq > 0)
 		mv88e6xxx_g2_irq_free(chip);
 out_g1_irq:
 	if (chip->irq > 0) {
@@ -4013,7 +3945,7 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev)
 	mv88e6xxx_mdios_unregister(chip);
 
 	if (chip->irq > 0) {
-		if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT))
+		if (chip->info->g2_irqs > 0)
 			mv88e6xxx_g2_irq_free(chip);
 		mv88e6xxx_g1_irq_free(chip);
 	}
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 0864440..334f6f7 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -97,133 +97,6 @@ enum mv88e6xxx_family {
 	MV88E6XXX_FAMILY_6390,  /* 6190 6190X 6191 6290 6390 6390X */
 };
 
-enum mv88e6xxx_cap {
-	/* Energy Efficient Ethernet.
-	 */
-	MV88E6XXX_CAP_EEE,
-
-	/* Multi-chip Addressing Mode.
-	 * Some chips respond to only 2 registers of its own SMI device address
-	 * when it is non-zero, and use indirect access to internal registers.
-	 */
-	MV88E6XXX_CAP_SMI_CMD,		/* (0x00) SMI Command */
-	MV88E6XXX_CAP_SMI_DATA,		/* (0x01) SMI Data */
-
-	/* Switch Global (1) Registers.
-	 */
-	MV88E6XXX_CAP_G1_ATU_FID,	/* (0x01) ATU FID Register */
-	MV88E6XXX_CAP_G1_VTU_FID,	/* (0x02) VTU FID Register */
-
-	/* Switch Global 2 Registers.
-	 * The device contains a second set of global 16-bit registers.
-	 */
-	MV88E6XXX_CAP_GLOBAL2,
-	MV88E6XXX_CAP_G2_INT,		/* (0x00) Interrupt Status */
-	MV88E6XXX_CAP_G2_MGMT_EN_2X,	/* (0x02) MGMT Enable Register 2x */
-	MV88E6XXX_CAP_G2_MGMT_EN_0X,	/* (0x03) MGMT Enable Register 0x */
-	MV88E6XXX_CAP_G2_POT,		/* (0x0f) Priority Override Table */
-
-	/* Per VLAN Spanning Tree Unit (STU).
-	 * The Port State database, if present, is accessed through VTU
-	 * operations and dedicated SID registers. See MV88E6352_G1_VTU_SID.
-	 */
-	MV88E6XXX_CAP_STU,
-
-	/* VLAN Table Unit.
-	 * The VTU is used to program 802.1Q VLANs. See MV88E6XXX_G1_VTU_OP.
-	 */
-	MV88E6XXX_CAP_VTU,
-};
-
-/* Bitmask of capabilities */
-#define MV88E6XXX_FLAG_EEE		BIT_ULL(MV88E6XXX_CAP_EEE)
-
-#define MV88E6XXX_FLAG_SMI_CMD		BIT_ULL(MV88E6XXX_CAP_SMI_CMD)
-#define MV88E6XXX_FLAG_SMI_DATA		BIT_ULL(MV88E6XXX_CAP_SMI_DATA)
-
-#define MV88E6XXX_FLAG_G1_VTU_FID	BIT_ULL(MV88E6XXX_CAP_G1_VTU_FID)
-
-#define MV88E6XXX_FLAG_GLOBAL2		BIT_ULL(MV88E6XXX_CAP_GLOBAL2)
-#define MV88E6XXX_FLAG_G2_INT		BIT_ULL(MV88E6XXX_CAP_G2_INT)
-#define MV88E6XXX_FLAG_G2_MGMT_EN_2X	BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X)
-#define MV88E6XXX_FLAG_G2_MGMT_EN_0X	BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X)
-#define MV88E6XXX_FLAG_G2_POT		BIT_ULL(MV88E6XXX_CAP_G2_POT)
-
-/* Multi-chip Addressing Mode */
-#define MV88E6XXX_FLAGS_MULTI_CHIP	\
-	(MV88E6XXX_FLAG_SMI_CMD |	\
-	 MV88E6XXX_FLAG_SMI_DATA)
-
-#define MV88E6XXX_FLAGS_FAMILY_6095	\
-	(MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6097	\
-	(MV88E6XXX_FLAG_G1_VTU_FID |	\
-	 MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_INT |        \
-	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6165	\
-	(MV88E6XXX_FLAG_G1_VTU_FID |	\
-	 MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_INT |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6185	\
-	(MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_INT |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6320	\
-	(MV88E6XXX_FLAG_EEE |		\
-	 MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6341	\
-	(MV88E6XXX_FLAG_EEE |		\
-	 MV88E6XXX_FLAG_G1_VTU_FID |	\
-	 MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_INT |	\
-	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6351	\
-	(MV88E6XXX_FLAG_G1_VTU_FID |	\
-	 MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_INT |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6352	\
-	(MV88E6XXX_FLAG_EEE |		\
-	 MV88E6XXX_FLAG_G1_VTU_FID |	\
-	 MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_INT |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
-	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
-#define MV88E6XXX_FLAGS_FAMILY_6390	\
-	(MV88E6XXX_FLAG_EEE |		\
-	 MV88E6XXX_FLAG_GLOBAL2 |	\
-	 MV88E6XXX_FLAG_G2_INT |        \
-	 MV88E6XXX_FLAGS_MULTI_CHIP)
-
 struct mv88e6xxx_ops;
 
 struct mv88e6xxx_info {
@@ -235,11 +108,18 @@ struct mv88e6xxx_info {
 	unsigned int max_vid;
 	unsigned int port_base_addr;
 	unsigned int global1_addr;
+	unsigned int global2_addr;
 	unsigned int age_time_coeff;
 	unsigned int g1_irqs;
+	unsigned int g2_irqs;
 	bool pvt;
+
+	/* Multi-chip Addressing Mode.
+	 * Some chips respond to only 2 registers of its own SMI device address
+	 * when it is non-zero, and use indirect access to internal registers.
+	 */
+	bool multi_chip;
 	enum dsa_tag_protocol tag_protocol;
-	unsigned long long flags;
 
 	/* Mask for FromPort and ToPort value of PortVec used in ATU Move
 	 * operation. 0 means that the ATU Move operation is not supported.
@@ -359,6 +239,9 @@ struct mv88e6xxx_ops {
 			 struct mii_bus *bus,
 			 int addr, int reg, u16 val);
 
+	/* Priority Override Table operations */
+	int (*pot_clear)(struct mv88e6xxx_chip *chip);
+
 	/* PHY Polling Unit (PPU) operations */
 	int (*ppu_enable)(struct mv88e6xxx_chip *chip);
 	int (*ppu_disable)(struct mv88e6xxx_chip *chip);
@@ -449,7 +332,6 @@ struct mv88e6xxx_ops {
 	int (*set_egress_port)(struct mv88e6xxx_chip *chip, int port);
 	const struct mv88e6xxx_irq_ops *watchdog_ops;
 
-	/* Can be either in g1 or g2, so don't use a prefix */
 	int (*mgmt_rsvd2cpu)(struct mv88e6xxx_chip *chip);
 
 	/* Power on/off a SERDES interface */
@@ -482,12 +364,6 @@ struct mv88e6xxx_hw_stat {
 	int type;
 };
 
-static inline bool mv88e6xxx_has(struct mv88e6xxx_chip *chip,
-				 unsigned long flags)
-{
-	return (chip->info->flags & flags) == flags;
-}
-
 static inline bool mv88e6xxx_has_pvt(struct mv88e6xxx_chip *chip)
 {
 	return chip->info->pvt;
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index 158d0f4..af07278 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -22,48 +22,99 @@
 
 static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
 {
-	return mv88e6xxx_read(chip, MV88E6XXX_G2, reg, val);
+	return mv88e6xxx_read(chip, chip->info->global2_addr, reg, val);
 }
 
 static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
 {
-	return mv88e6xxx_write(chip, MV88E6XXX_G2, reg, val);
+	return mv88e6xxx_write(chip, chip->info->global2_addr, reg, val);
 }
 
 static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
 {
-	return mv88e6xxx_update(chip, MV88E6XXX_G2, reg, update);
+	return mv88e6xxx_update(chip, chip->info->global2_addr, reg, update);
 }
 
 static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
 {
-	return mv88e6xxx_wait(chip, MV88E6XXX_G2, reg, mask);
+	return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask);
+}
+
+/* Offset 0x00: Interrupt Source Register */
+
+static int mv88e6xxx_g2_int_source(struct mv88e6xxx_chip *chip, u16 *src)
+{
+	/* Read (and clear most of) the Interrupt Source bits */
+	return mv88e6xxx_g2_read(chip, MV88E6XXX_G2_INT_SRC, src);
+}
+
+/* Offset 0x01: Interrupt Mask Register */
+
+static int mv88e6xxx_g2_int_mask(struct mv88e6xxx_chip *chip, u16 mask)
+{
+	return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_INT_MASK, mask);
 }
 
 /* Offset 0x02: Management Enable 2x */
+
+static int mv88e6xxx_g2_mgmt_enable_2x(struct mv88e6xxx_chip *chip, u16 en2x)
+{
+	return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_2X, en2x);
+}
+
 /* Offset 0x03: Management Enable 0x */
 
-int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+static int mv88e6xxx_g2_mgmt_enable_0x(struct mv88e6xxx_chip *chip, u16 en0x)
+{
+	return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_0X, en0x);
+}
+
+/* Offset 0x05: Switch Management Register */
+
+static int mv88e6xxx_g2_switch_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip,
+					     bool enable)
+{
+	u16 val;
+	int err;
+
+	err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_SWITCH_MGMT, &val);
+	if (err)
+		return err;
+
+	if (enable)
+		val |= MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU;
+	else
+		val &= ~MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU;
+
+	return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SWITCH_MGMT, val);
+}
+
+int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
 {
 	int err;
 
 	/* Consider the frames with reserved multicast destination
-	 * addresses matching 01:80:c2:00:00:2x as MGMT.
+	 * addresses matching 01:80:c2:00:00:0x as MGMT.
 	 */
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) {
-		err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_2X, 0xffff);
-		if (err)
-			return err;
-	}
+	err = mv88e6xxx_g2_mgmt_enable_0x(chip, 0xffff);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_switch_mgmt_rsvd2cpu(chip, true);
+}
+
+int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+{
+	int err;
 
 	/* Consider the frames with reserved multicast destination
-	 * addresses matching 01:80:c2:00:00:0x as MGMT.
+	 * addresses matching 01:80:c2:00:00:2x as MGMT.
 	 */
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X))
-		return mv88e6xxx_g2_write(chip, MV88E6XXX_G2_MGMT_EN_0X,
-					  0xffff);
+	err = mv88e6xxx_g2_mgmt_enable_2x(chip, 0xffff);
+	if (err)
+		return err;
 
-	return 0;
+	return mv88e6185_g2_mgmt_rsvd2cpu(chip);
 }
 
 /* Offset 0x06: Device Mapping Table register */
@@ -260,7 +311,7 @@ static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer,
 	return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_PRIO_OVERRIDE, val);
 }
 
-static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip)
+int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip)
 {
 	int i, err;
 
@@ -933,7 +984,7 @@ static irqreturn_t mv88e6xxx_g2_irq_thread_fn(int irq, void *dev_id)
 	u16 reg;
 
 	mutex_lock(&chip->reg_lock);
-	err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_INT_SOURCE, &reg);
+	err = mv88e6xxx_g2_int_source(chip, &reg);
 	mutex_unlock(&chip->reg_lock);
 	if (err)
 		goto out;
@@ -959,13 +1010,16 @@ static void mv88e6xxx_g2_irq_bus_lock(struct irq_data *d)
 static void mv88e6xxx_g2_irq_bus_sync_unlock(struct irq_data *d)
 {
 	struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d);
+	int err;
 
-	mv88e6xxx_g2_write(chip, MV88E6XXX_G2_INT_MASK, ~chip->g2_irq.masked);
+	err = mv88e6xxx_g2_int_mask(chip, ~chip->g2_irq.masked);
+	if (err)
+		dev_err(chip->dev, "failed to mask interrupts\n");
 
 	mutex_unlock(&chip->reg_lock);
 }
 
-static struct irq_chip mv88e6xxx_g2_irq_chip = {
+static const struct irq_chip mv88e6xxx_g2_irq_chip = {
 	.name			= "mv88e6xxx-g2",
 	.irq_mask		= mv88e6xxx_g2_irq_mask,
 	.irq_unmask		= mv88e6xxx_g2_irq_unmask,
@@ -1063,9 +1117,6 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
 	 * port at the highest priority.
 	 */
 	reg = MV88E6XXX_G2_SWITCH_MGMT_FORCE_FLOW_CTL_PRI | (0x7 << 4);
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) ||
-	    mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X))
-		reg |= MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU | 0x7;
 	err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SWITCH_MGMT, reg);
 	if (err)
 		return err;
@@ -1080,12 +1131,5 @@ int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
 	if (err)
 		return err;
 
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) {
-		/* Clear the priority override table. */
-		err = mv88e6xxx_g2_clear_pot(chip);
-		if (err)
-			return err;
-	}
-
 	return 0;
 }
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index 317ffd8..669f590 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -17,14 +17,27 @@
 
 #include "chip.h"
 
-#define MV88E6XXX_G2	0x1c
-
 /* Offset 0x00: Interrupt Source Register */
-#define MV88E6XXX_G2_INT_SOURCE			0x00
+#define MV88E6XXX_G2_INT_SRC			0x00
+#define MV88E6XXX_G2_INT_SRC_WDOG		0x8000
+#define MV88E6XXX_G2_INT_SRC_JAM_LIMIT		0x4000
+#define MV88E6XXX_G2_INT_SRC_DUPLEX_MISMATCH	0x2000
+#define MV88E6XXX_G2_INT_SRC_WAKE_EVENT		0x1000
+#define MV88E6352_G2_INT_SRC_SERDES		0x0800
+#define MV88E6352_G2_INT_SRC_PHY		0x001f
+#define MV88E6390_G2_INT_SRC_PHY		0x07fe
+
 #define MV88E6XXX_G2_INT_SOURCE_WATCHDOG	15
 
 /* Offset 0x01: Interrupt Mask Register */
-#define MV88E6XXX_G2_INT_MASK	0x01
+#define MV88E6XXX_G2_INT_MASK			0x01
+#define MV88E6XXX_G2_INT_MASK_WDOG		0x8000
+#define MV88E6XXX_G2_INT_MASK_JAM_LIMIT		0x4000
+#define MV88E6XXX_G2_INT_MASK_DUPLEX_MISMATCH	0x2000
+#define MV88E6XXX_G2_INT_MASK_WAKE_EVENT	0x1000
+#define MV88E6352_G2_INT_MASK_SERDES		0x0800
+#define MV88E6352_G2_INT_MASK_PHY		0x001f
+#define MV88E6390_G2_INT_MASK_PHY		0x07fe
 
 /* Offset 0x02: MGMT Enable Register 2x */
 #define MV88E6XXX_G2_MGMT_EN_2X		0x02
@@ -245,7 +258,11 @@ int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip);
 int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip);
 int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip);
 void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip);
-int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
+
+int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
+int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
+
+int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip);
 
 extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
 extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
@@ -254,7 +271,7 @@ extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
 
 static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
 {
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) {
+	if (chip->info->global2_addr) {
 		dev_err(chip->dev, "this chip requires CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 enabled\n");
 		return -EOPNOTSUPP;
 	}
@@ -347,7 +364,17 @@ static inline void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip)
 {
 }
 
-static inline int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+static inline int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/drivers/net/dsa/mv88e6xxx/phy.c b/drivers/net/dsa/mv88e6xxx/phy.c
index 3500ac0..436668b 100644
--- a/drivers/net/dsa/mv88e6xxx/phy.c
+++ b/drivers/net/dsa/mv88e6xxx/phy.c
@@ -13,7 +13,6 @@
 
 #include <linux/mdio.h>
 #include <linux/module.h>
-#include <net/dsa.h>
 
 #include "chip.h"
 #include "phy.h"
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index 8f3991b..b16d5f0 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -216,9 +216,6 @@
 /* Offset 0x13: OutFiltered Counter */
 #define MV88E6XXX_PORT_OUT_FILTERED	0x13
 
-/* Offset 0x16: LED Control */
-#define MV88E6XXX_PORT_LED_CONTROL	0x16
-
 /* Offset 0x18: IEEE Priority Mapping Table */
 #define MV88E6390_PORT_IEEE_PRIO_MAP_TABLE			0x18
 #define MV88E6390_PORT_IEEE_PRIO_MAP_TABLE_UPDATE		0x8000
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index b3bee7e..5ada7a41 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -250,7 +250,7 @@ static const struct regmap_range qca8k_readable_ranges[] = {
 
 };
 
-static struct regmap_access_table qca8k_readable_table = {
+static const struct regmap_access_table qca8k_readable_table = {
 	.yes_ranges = qca8k_readable_ranges,
 	.n_yes_ranges = ARRAY_SIZE(qca8k_readable_ranges),
 };
@@ -637,8 +637,8 @@ qca8k_get_sset_count(struct dsa_switch *ds)
 	return ARRAY_SIZE(ar8327_mib);
 }
 
-static void
-qca8k_eee_enable_set(struct dsa_switch *ds, int port, bool enable)
+static int
+qca8k_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *eee)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	u32 lpi_en = QCA8K_REG_EEE_CTRL_LPI_EN(port);
@@ -646,73 +646,21 @@ qca8k_eee_enable_set(struct dsa_switch *ds, int port, bool enable)
 
 	mutex_lock(&priv->reg_mutex);
 	reg = qca8k_read(priv, QCA8K_REG_EEE_CTRL);
-	if (enable)
+	if (eee->eee_enabled)
 		reg |= lpi_en;
 	else
 		reg &= ~lpi_en;
 	qca8k_write(priv, QCA8K_REG_EEE_CTRL, reg);
 	mutex_unlock(&priv->reg_mutex);
-}
-
-static int
-qca8k_eee_init(struct dsa_switch *ds, int port,
-	       struct phy_device *phy)
-{
-	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
-	struct ethtool_eee *p = &priv->port_sts[port].eee;
-	int ret;
-
-	p->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full);
-
-	ret = phy_init_eee(phy, 0);
-	if (ret)
-		return ret;
-
-	qca8k_eee_enable_set(ds, port, true);
 
 	return 0;
 }
 
 static int
-qca8k_set_eee(struct dsa_switch *ds, int port,
-	      struct phy_device *phydev,
-	      struct ethtool_eee *e)
+qca8k_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e)
 {
-	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
-	struct ethtool_eee *p = &priv->port_sts[port].eee;
-	int ret = 0;
-
-	p->eee_enabled = e->eee_enabled;
-
-	if (e->eee_enabled) {
-		p->eee_enabled = qca8k_eee_init(ds, port, phydev);
-		if (!p->eee_enabled)
-			ret = -EOPNOTSUPP;
-	}
-	qca8k_eee_enable_set(ds, port, p->eee_enabled);
-
-	return ret;
-}
-
-static int
-qca8k_get_eee(struct dsa_switch *ds, int port,
-	      struct ethtool_eee *e)
-{
-	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
-	struct ethtool_eee *p = &priv->port_sts[port].eee;
-	struct net_device *netdev = ds->ports[port].netdev;
-	int ret;
-
-	ret = phy_ethtool_get_eee(netdev->phydev, p);
-	if (!ret)
-		e->eee_active =
-			!!(p->supported & p->advertised & p->lp_advertised);
-	else
-		e->eee_active = 0;
-
-	e->eee_enabled = p->eee_enabled;
-
-	return ret;
+	/* Nothing to do on the port's MAC */
+	return 0;
 }
 
 static void
@@ -829,69 +777,44 @@ qca8k_port_fdb_insert(struct qca8k_priv *priv, const u8 *addr,
 }
 
 static int
-qca8k_port_fdb_prepare(struct dsa_switch *ds, int port,
-		       const struct switchdev_obj_port_fdb *fdb,
-		       struct switchdev_trans *trans)
-{
-	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
-
-	/* The FDB table for static and auto learned entries is the same. We
-	 * need to reserve an entry with no port_mask set to make sure that
-	 * when port_fdb_add is called an entry is still available. Otherwise
-	 * the last free entry might have been used up by auto learning
-	 */
-	return qca8k_port_fdb_insert(priv, fdb->addr, 0, fdb->vid);
-}
-
-static void
 qca8k_port_fdb_add(struct dsa_switch *ds, int port,
-		   const struct switchdev_obj_port_fdb *fdb,
-		   struct switchdev_trans *trans)
+		   const unsigned char *addr, u16 vid)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	u16 port_mask = BIT(port);
 
-	/* Update the FDB entry adding the port_mask */
-	qca8k_port_fdb_insert(priv, fdb->addr, port_mask, fdb->vid);
+	return qca8k_port_fdb_insert(priv, addr, port_mask, vid);
 }
 
 static int
 qca8k_port_fdb_del(struct dsa_switch *ds, int port,
-		   const struct switchdev_obj_port_fdb *fdb)
+		   const unsigned char *addr, u16 vid)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	u16 port_mask = BIT(port);
-	u16 vid = fdb->vid;
 
 	if (!vid)
 		vid = 1;
 
-	return qca8k_fdb_del(priv, fdb->addr, port_mask, vid);
+	return qca8k_fdb_del(priv, addr, port_mask, vid);
 }
 
 static int
 qca8k_port_fdb_dump(struct dsa_switch *ds, int port,
-		    struct switchdev_obj_port_fdb *fdb,
-		    switchdev_obj_dump_cb_t *cb)
+		    dsa_fdb_dump_cb_t *cb, void *data)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 	struct qca8k_fdb _fdb = { 0 };
 	int cnt = QCA8K_NUM_FDB_RECORDS;
+	bool is_static;
 	int ret = 0;
 
 	mutex_lock(&priv->reg_mutex);
 	while (cnt-- && !qca8k_fdb_next(priv, &_fdb, port)) {
 		if (!_fdb.aging)
 			break;
-
-		ether_addr_copy(fdb->addr, _fdb.mac);
-		fdb->vid = _fdb.vid;
-		if (_fdb.aging == QCA8K_ATU_STATUS_STATIC)
-			fdb->ndm_state = NUD_NOARP;
-		else
-			fdb->ndm_state = NUD_REACHABLE;
-
-		ret = cb(&fdb->obj);
+		is_static = (_fdb.aging == QCA8K_ATU_STATUS_STATIC);
+		ret = cb(_fdb.mac, _fdb.vid, is_static, data);
 		if (ret)
 			break;
 	}
@@ -914,14 +837,13 @@ static const struct dsa_switch_ops qca8k_switch_ops = {
 	.phy_write		= qca8k_phy_write,
 	.get_ethtool_stats	= qca8k_get_ethtool_stats,
 	.get_sset_count		= qca8k_get_sset_count,
-	.get_eee		= qca8k_get_eee,
-	.set_eee		= qca8k_set_eee,
+	.get_mac_eee		= qca8k_get_mac_eee,
+	.set_mac_eee		= qca8k_set_mac_eee,
 	.port_enable		= qca8k_port_enable,
 	.port_disable		= qca8k_port_disable,
 	.port_stp_state_set	= qca8k_port_stp_state_set,
 	.port_bridge_join	= qca8k_port_bridge_join,
 	.port_bridge_leave	= qca8k_port_bridge_leave,
-	.port_fdb_prepare	= qca8k_port_fdb_prepare,
 	.port_fdb_add		= qca8k_port_fdb_add,
 	.port_fdb_del		= qca8k_port_fdb_del,
 	.port_fdb_dump		= qca8k_port_fdb_dump,
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
index 1ed4fac..1cf8a92 100644
--- a/drivers/net/dsa/qca8k.h
+++ b/drivers/net/dsa/qca8k.h
@@ -156,7 +156,6 @@ enum qca8k_fdb_cmd {
 };
 
 struct ar8xxx_port_status {
-	struct ethtool_eee eee;
 	int enabled;
 };
 
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index d0c165d..d0a1f9c 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -345,7 +345,7 @@ static void dummy_setup(struct net_device *dev)
 	dev->flags &= ~IFF_MULTICAST;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
 	dev->features	|= NETIF_F_SG | NETIF_F_FRAGLIST;
-	dev->features	|= NETIF_F_ALL_TSO | NETIF_F_UFO;
+	dev->features	|= NETIF_F_ALL_TSO;
 	dev->features	|= NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX;
 	dev->features	|= NETIF_F_GSO_ENCAP_ALL;
 	dev->hw_features |= dev->features;
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index f66c971..b223769 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -392,7 +392,7 @@ static struct isa_driver el3_isa_driver = {
 static int isa_registered;
 
 #ifdef CONFIG_PNP
-static struct pnp_device_id el3_pnp_ids[] = {
+static const struct pnp_device_id el3_pnp_ids[] = {
 	{ .id = "TCM5090" }, /* 3Com Etherlink III (TP) */
 	{ .id = "TCM5091" }, /* 3Com Etherlink III */
 	{ .id = "TCM5094" }, /* 3Com Etherlink III (combo) */
@@ -474,7 +474,7 @@ static int pnp_registered;
 #endif /* CONFIG_PNP */
 
 #ifdef CONFIG_EISA
-static struct eisa_device_id el3_eisa_ids[] = {
+static const struct eisa_device_id el3_eisa_ids[] = {
 		{ "TCM5090" },
 		{ "TCM5091" },
 		{ "TCM5092" },
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 3b516eb..402d909 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -900,7 +900,7 @@ static const struct dev_pm_ops vortex_pm_ops = {
 #endif /* !CONFIG_PM */
 
 #ifdef CONFIG_EISA
-static struct eisa_device_id vortex_eisa_ids[] = {
+static const struct eisa_device_id vortex_eisa_ids[] = {
 	{ "TCM5920", CH_3C592 },
 	{ "TCM5970", CH_3C597 },
 	{ "" }
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 05d9d3e..2455547 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -585,7 +585,7 @@ static int ax_bb_get_data(struct mdiobb_ctrl *ctrl)
 	return reg_memr & AX_MEMR_MDI ? 1 : 0;
 }
 
-static struct mdiobb_ops bb_ops = {
+static const struct mdiobb_ops bb_ops = {
 	.owner = THIS_MODULE,
 	.set_mdc = ax_bb_mdc,
 	.set_mdio_dir = ax_bb_dir,
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index edae15ac..c604213 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -78,6 +78,7 @@ source "drivers/net/ethernet/freescale/Kconfig"
 source "drivers/net/ethernet/fujitsu/Kconfig"
 source "drivers/net/ethernet/hisilicon/Kconfig"
 source "drivers/net/ethernet/hp/Kconfig"
+source "drivers/net/ethernet/huawei/Kconfig"
 source "drivers/net/ethernet/ibm/Kconfig"
 source "drivers/net/ethernet/intel/Kconfig"
 source "drivers/net/ethernet/i825xx/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index bf7f450..a0a03d4 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_NET_VENDOR_FREESCALE) += freescale/
 obj-$(CONFIG_NET_VENDOR_FUJITSU) += fujitsu/
 obj-$(CONFIG_NET_VENDOR_HISILICON) += hisilicon/
 obj-$(CONFIG_NET_VENDOR_HP) += hp/
+obj-$(CONFIG_NET_VENDOR_HUAWEI) += huawei/
 obj-$(CONFIG_NET_VENDOR_IBM) += ibm/
 obj-$(CONFIG_NET_VENDOR_INTEL) += intel/
 obj-$(CONFIG_NET_VENDOR_I825XX) += i825xx/
diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c
index a9ac58c..a251de8 100644
--- a/drivers/net/ethernet/adi/bfin_mac.c
+++ b/drivers/net/ethernet/adi/bfin_mac.c
@@ -986,7 +986,7 @@ static int bfin_ptp_enable(struct ptp_clock_info *ptp,
 	return -EOPNOTSUPP;
 }
 
-static struct ptp_clock_info bfin_ptp_caps = {
+static const struct ptp_clock_info bfin_ptp_caps = {
 	.owner		= THIS_MODULE,
 	.name		= "BF518 clock",
 	.max_adj	= 0,
diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c
index ee4b94e..e22f976 100644
--- a/drivers/net/ethernet/amd/a2065.c
+++ b/drivers/net/ethernet/amd/a2065.c
@@ -643,7 +643,7 @@ static int a2065_init_one(struct zorro_dev *z,
 static void a2065_remove_one(struct zorro_dev *z);
 
 
-static struct zorro_device_id a2065_zorro_tbl[] = {
+static const struct zorro_device_id a2065_zorro_tbl[] = {
 	{ ZORRO_PROD_CBM_A2065_1 },
 	{ ZORRO_PROD_CBM_A2065_2 },
 	{ ZORRO_PROD_AMERISTAR_A2065 },
diff --git a/drivers/net/ethernet/amd/ariadne.c b/drivers/net/ethernet/amd/ariadne.c
index 5fd7b15..4b6a5cb 100644
--- a/drivers/net/ethernet/amd/ariadne.c
+++ b/drivers/net/ethernet/amd/ariadne.c
@@ -692,7 +692,7 @@ static void ariadne_remove_one(struct zorro_dev *z)
 	free_netdev(dev);
 }
 
-static struct zorro_device_id ariadne_zorro_tbl[] = {
+static const struct zorro_device_id ariadne_zorro_tbl[] = {
 	{ ZORRO_PROD_VILLAGE_TRONIC_ARIADNE },
 	{ 0 }
 };
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index 9795419..7ea72ef 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -210,11 +210,15 @@
 #define DMA_CH_CR_PBLX8_WIDTH		1
 #define DMA_CH_CR_SPH_INDEX		24
 #define DMA_CH_CR_SPH_WIDTH		1
-#define DMA_CH_IER_AIE_INDEX		15
+#define DMA_CH_IER_AIE20_INDEX		15
+#define DMA_CH_IER_AIE20_WIDTH		1
+#define DMA_CH_IER_AIE_INDEX		14
 #define DMA_CH_IER_AIE_WIDTH		1
 #define DMA_CH_IER_FBEE_INDEX		12
 #define DMA_CH_IER_FBEE_WIDTH		1
-#define DMA_CH_IER_NIE_INDEX		16
+#define DMA_CH_IER_NIE20_INDEX		16
+#define DMA_CH_IER_NIE20_WIDTH		1
+#define DMA_CH_IER_NIE_INDEX		15
 #define DMA_CH_IER_NIE_WIDTH		1
 #define DMA_CH_IER_RBUE_INDEX		7
 #define DMA_CH_IER_RBUE_WIDTH		1
@@ -298,6 +302,7 @@
 #define MAC_RWKPFR			0x00c4
 #define MAC_LPICSR			0x00d0
 #define MAC_LPITCR			0x00d4
+#define MAC_TIR				0x00e0
 #define MAC_VR				0x0110
 #define MAC_DR				0x0114
 #define MAC_HWF0R			0x011c
@@ -364,6 +369,8 @@
 #define MAC_HWF0R_TXCOESEL_WIDTH	1
 #define MAC_HWF0R_VLHASH_INDEX		4
 #define MAC_HWF0R_VLHASH_WIDTH		1
+#define MAC_HWF0R_VXN_INDEX		29
+#define MAC_HWF0R_VXN_WIDTH		1
 #define MAC_HWF1R_ADDR64_INDEX		14
 #define MAC_HWF1R_ADDR64_WIDTH		2
 #define MAC_HWF1R_ADVTHWORD_INDEX	13
@@ -448,6 +455,8 @@
 #define MAC_PFR_PR_WIDTH		1
 #define MAC_PFR_VTFE_INDEX		16
 #define MAC_PFR_VTFE_WIDTH		1
+#define MAC_PFR_VUCC_INDEX		22
+#define MAC_PFR_VUCC_WIDTH		1
 #define MAC_PMTCSR_MGKPKTEN_INDEX	1
 #define MAC_PMTCSR_MGKPKTEN_WIDTH	1
 #define MAC_PMTCSR_PWRDWN_INDEX		0
@@ -510,6 +519,12 @@
 #define MAC_TCR_SS_WIDTH		2
 #define MAC_TCR_TE_INDEX		0
 #define MAC_TCR_TE_WIDTH		1
+#define MAC_TCR_VNE_INDEX		24
+#define MAC_TCR_VNE_WIDTH		1
+#define MAC_TCR_VNM_INDEX		25
+#define MAC_TCR_VNM_WIDTH		1
+#define MAC_TIR_TNID_INDEX		0
+#define MAC_TIR_TNID_WIDTH		16
 #define MAC_TSCR_AV8021ASMEN_INDEX	28
 #define MAC_TSCR_AV8021ASMEN_WIDTH	1
 #define MAC_TSCR_SNAPTYPSEL_INDEX	16
@@ -1153,11 +1168,17 @@
 #define RX_PACKET_ATTRIBUTES_RSS_HASH_WIDTH	1
 #define RX_PACKET_ATTRIBUTES_FIRST_INDEX	7
 #define RX_PACKET_ATTRIBUTES_FIRST_WIDTH	1
+#define RX_PACKET_ATTRIBUTES_TNP_INDEX		8
+#define RX_PACKET_ATTRIBUTES_TNP_WIDTH		1
+#define RX_PACKET_ATTRIBUTES_TNPCSUM_DONE_INDEX	9
+#define RX_PACKET_ATTRIBUTES_TNPCSUM_DONE_WIDTH	1
 
 #define RX_NORMAL_DESC0_OVT_INDEX		0
 #define RX_NORMAL_DESC0_OVT_WIDTH		16
 #define RX_NORMAL_DESC2_HL_INDEX		0
 #define RX_NORMAL_DESC2_HL_WIDTH		10
+#define RX_NORMAL_DESC2_TNP_INDEX		11
+#define RX_NORMAL_DESC2_TNP_WIDTH		1
 #define RX_NORMAL_DESC3_CDA_INDEX		27
 #define RX_NORMAL_DESC3_CDA_WIDTH		1
 #define RX_NORMAL_DESC3_CTXT_INDEX		30
@@ -1184,9 +1205,11 @@
 #define RX_DESC3_L34T_IPV4_TCP			1
 #define RX_DESC3_L34T_IPV4_UDP			2
 #define RX_DESC3_L34T_IPV4_ICMP			3
+#define RX_DESC3_L34T_IPV4_UNKNOWN		7
 #define RX_DESC3_L34T_IPV6_TCP			9
 #define RX_DESC3_L34T_IPV6_UDP			10
 #define RX_DESC3_L34T_IPV6_ICMP			11
+#define RX_DESC3_L34T_IPV6_UNKNOWN		15
 
 #define RX_CONTEXT_DESC3_TSA_INDEX		4
 #define RX_CONTEXT_DESC3_TSA_WIDTH		1
@@ -1201,6 +1224,8 @@
 #define TX_PACKET_ATTRIBUTES_VLAN_CTAG_WIDTH	1
 #define TX_PACKET_ATTRIBUTES_PTP_INDEX		3
 #define TX_PACKET_ATTRIBUTES_PTP_WIDTH		1
+#define TX_PACKET_ATTRIBUTES_VXLAN_INDEX	4
+#define TX_PACKET_ATTRIBUTES_VXLAN_WIDTH	1
 
 #define TX_CONTEXT_DESC2_MSS_INDEX		0
 #define TX_CONTEXT_DESC2_MSS_WIDTH		15
@@ -1241,8 +1266,11 @@
 #define TX_NORMAL_DESC3_TCPPL_WIDTH		18
 #define TX_NORMAL_DESC3_TSE_INDEX		18
 #define TX_NORMAL_DESC3_TSE_WIDTH		1
+#define TX_NORMAL_DESC3_VNP_INDEX		23
+#define TX_NORMAL_DESC3_VNP_WIDTH		3
 
 #define TX_NORMAL_DESC2_VLAN_INSERT		0x2
+#define TX_NORMAL_DESC3_VXLAN_PACKET		0x3
 
 /* MDIO undefined or vendor specific registers */
 #ifndef MDIO_PMA_10GBR_PMD_CTRL
@@ -1339,6 +1367,7 @@
 #define XGBE_AN_CL37_PCS_MODE_BASEX	0x00
 #define XGBE_AN_CL37_PCS_MODE_SGMII	0x04
 #define XGBE_AN_CL37_TX_CONFIG_MASK	0x08
+#define XGBE_AN_CL37_MII_CTRL_8BIT	0x0100
 
 /* Bit setting and getting macros
  *  The get macro will extract the current bit field value from within
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c
index 7546b66..7d128be 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c
@@ -527,3 +527,28 @@ void xgbe_debugfs_exit(struct xgbe_prv_data *pdata)
 	debugfs_remove_recursive(pdata->xgbe_debugfs);
 	pdata->xgbe_debugfs = NULL;
 }
+
+void xgbe_debugfs_rename(struct xgbe_prv_data *pdata)
+{
+	struct dentry *pfile;
+	char *buf;
+
+	if (!pdata->xgbe_debugfs)
+		return;
+
+	buf = kasprintf(GFP_KERNEL, "amd-xgbe-%s", pdata->netdev->name);
+	if (!buf)
+		return;
+
+	if (!strcmp(pdata->xgbe_debugfs->d_name.name, buf))
+		goto out;
+
+	pfile = debugfs_rename(pdata->xgbe_debugfs->d_parent,
+			       pdata->xgbe_debugfs,
+			       pdata->xgbe_debugfs->d_parent, buf);
+	if (!pfile)
+		netdev_err(pdata->netdev, "debugfs_rename failed\n");
+
+out:
+	kfree(buf);
+}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index 06f953e..e107e18 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -479,6 +479,50 @@ static bool xgbe_is_pfc_queue(struct xgbe_prv_data *pdata,
 	return false;
 }
 
+static void xgbe_set_vxlan_id(struct xgbe_prv_data *pdata)
+{
+	/* Program the VXLAN port */
+	XGMAC_IOWRITE_BITS(pdata, MAC_TIR, TNID, pdata->vxlan_port);
+
+	netif_dbg(pdata, drv, pdata->netdev, "VXLAN tunnel id set to %hx\n",
+		  pdata->vxlan_port);
+}
+
+static void xgbe_enable_vxlan(struct xgbe_prv_data *pdata)
+{
+	if (!pdata->hw_feat.vxn)
+		return;
+
+	/* Program the VXLAN port */
+	xgbe_set_vxlan_id(pdata);
+
+	/* Allow for IPv6/UDP zero-checksum VXLAN packets */
+	XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VUCC, 1);
+
+	/* Enable VXLAN tunneling mode */
+	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, VNM, 0);
+	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, VNE, 1);
+
+	netif_dbg(pdata, drv, pdata->netdev, "VXLAN acceleration enabled\n");
+}
+
+static void xgbe_disable_vxlan(struct xgbe_prv_data *pdata)
+{
+	if (!pdata->hw_feat.vxn)
+		return;
+
+	/* Disable tunneling mode */
+	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, VNE, 0);
+
+	/* Clear IPv6/UDP zero-checksum VXLAN packets setting */
+	XGMAC_IOWRITE_BITS(pdata, MAC_PFR, VUCC, 0);
+
+	/* Clear the VXLAN port */
+	XGMAC_IOWRITE_BITS(pdata, MAC_TIR, TNID, 0);
+
+	netif_dbg(pdata, drv, pdata->netdev, "VXLAN acceleration disabled\n");
+}
+
 static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata)
 {
 	unsigned int max_q_count, q_count;
@@ -605,32 +649,38 @@ static void xgbe_config_flow_control(struct xgbe_prv_data *pdata)
 static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata)
 {
 	struct xgbe_channel *channel;
-	unsigned int dma_ch_isr, dma_ch_ier;
-	unsigned int i;
+	unsigned int i, ver;
 
 	/* Set the interrupt mode if supported */
 	if (pdata->channel_irq_mode)
 		XGMAC_IOWRITE_BITS(pdata, DMA_MR, INTM,
 				   pdata->channel_irq_mode);
 
+	ver = XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER);
+
 	for (i = 0; i < pdata->channel_count; i++) {
 		channel = pdata->channel[i];
 
 		/* Clear all the interrupts which are set */
-		dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR);
-		XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_ch_isr);
+		XGMAC_DMA_IOWRITE(channel, DMA_CH_SR,
+				  XGMAC_DMA_IOREAD(channel, DMA_CH_SR));
 
 		/* Clear all interrupt enable bits */
-		dma_ch_ier = 0;
+		channel->curr_ier = 0;
 
 		/* Enable following interrupts
 		 *   NIE  - Normal Interrupt Summary Enable
 		 *   AIE  - Abnormal Interrupt Summary Enable
 		 *   FBEE - Fatal Bus Error Enable
 		 */
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, NIE, 1);
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, AIE, 1);
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, FBEE, 1);
+		if (ver < 0x21) {
+			XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, NIE20, 1);
+			XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, AIE20, 1);
+		} else {
+			XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, NIE, 1);
+			XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, AIE, 1);
+		}
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, FBEE, 1);
 
 		if (channel->tx_ring) {
 			/* Enable the following Tx interrupts
@@ -639,7 +689,8 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata)
 			 *          mode)
 			 */
 			if (!pdata->per_channel_irq || pdata->channel_irq_mode)
-				XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TIE, 1);
+				XGMAC_SET_BITS(channel->curr_ier,
+					       DMA_CH_IER, TIE, 1);
 		}
 		if (channel->rx_ring) {
 			/* Enable following Rx interrupts
@@ -648,12 +699,13 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata)
 			 *          per channel interrupts in edge triggered
 			 *          mode)
 			 */
-			XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RBUE, 1);
+			XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RBUE, 1);
 			if (!pdata->per_channel_irq || pdata->channel_irq_mode)
-				XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RIE, 1);
+				XGMAC_SET_BITS(channel->curr_ier,
+					       DMA_CH_IER, RIE, 1);
 		}
 
-		XGMAC_DMA_IOWRITE(channel, DMA_CH_IER, dma_ch_ier);
+		XGMAC_DMA_IOWRITE(channel, DMA_CH_IER, channel->curr_ier);
 	}
 }
 
@@ -1608,7 +1660,8 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel)
 	struct xgbe_ring_data *rdata;
 	struct xgbe_ring_desc *rdesc;
 	struct xgbe_packet_data *packet = &ring->packet_data;
-	unsigned int csum, tso, vlan;
+	unsigned int tx_packets, tx_bytes;
+	unsigned int csum, tso, vlan, vxlan;
 	unsigned int tso_context, vlan_context;
 	unsigned int tx_set_ic;
 	int start_index = ring->cur;
@@ -1617,12 +1670,17 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel)
 
 	DBGPR("-->xgbe_dev_xmit\n");
 
+	tx_packets = packet->tx_packets;
+	tx_bytes = packet->tx_bytes;
+
 	csum = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
 			      CSUM_ENABLE);
 	tso = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
 			     TSO_ENABLE);
 	vlan = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
 			      VLAN_CTAG);
+	vxlan = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+			       VXLAN);
 
 	if (tso && (packet->mss != ring->tx.cur_mss))
 		tso_context = 1;
@@ -1644,13 +1702,12 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel)
 	 *     - Addition of Tx frame count to the frame count since the
 	 *       last interrupt was set does not exceed the frame count setting
 	 */
-	ring->coalesce_count += packet->tx_packets;
+	ring->coalesce_count += tx_packets;
 	if (!pdata->tx_frames)
 		tx_set_ic = 0;
-	else if (packet->tx_packets > pdata->tx_frames)
+	else if (tx_packets > pdata->tx_frames)
 		tx_set_ic = 1;
-	else if ((ring->coalesce_count % pdata->tx_frames) <
-		 packet->tx_packets)
+	else if ((ring->coalesce_count % pdata->tx_frames) < tx_packets)
 		tx_set_ic = 1;
 	else
 		tx_set_ic = 0;
@@ -1740,7 +1797,7 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel)
 		XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, TCPHDRLEN,
 				  packet->tcp_header_len / 4);
 
-		pdata->ext_stats.tx_tso_packets++;
+		pdata->ext_stats.tx_tso_packets += tx_packets;
 	} else {
 		/* Enable CRC and Pad Insertion */
 		XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, CPC, 0);
@@ -1755,6 +1812,13 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel)
 				  packet->length);
 	}
 
+	if (vxlan) {
+		XGMAC_SET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, VNP,
+				  TX_NORMAL_DESC3_VXLAN_PACKET);
+
+		pdata->ext_stats.tx_vxlan_packets += packet->tx_packets;
+	}
+
 	for (i = cur_index - start_index + 1; i < packet->rdesc_count; i++) {
 		cur_index++;
 		rdata = XGBE_GET_DESC_DATA(ring, cur_index);
@@ -1788,8 +1852,11 @@ static void xgbe_dev_xmit(struct xgbe_channel *channel)
 		XGMAC_SET_BITS_LE(rdesc->desc2, TX_NORMAL_DESC2, IC, 1);
 
 	/* Save the Tx info to report back during cleanup */
-	rdata->tx.packets = packet->tx_packets;
-	rdata->tx.bytes = packet->tx_bytes;
+	rdata->tx.packets = tx_packets;
+	rdata->tx.bytes = tx_bytes;
+
+	pdata->ext_stats.txq_packets[channel->queue_index] += tx_packets;
+	pdata->ext_stats.txq_bytes[channel->queue_index] += tx_bytes;
 
 	/* In case the Tx DMA engine is running, make sure everything
 	 * is written to the descriptor(s) before setting the OWN bit
@@ -1913,9 +1980,28 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
 	rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL);
 
 	/* Set checksum done indicator as appropriate */
-	if (netdev->features & NETIF_F_RXCSUM)
+	if (netdev->features & NETIF_F_RXCSUM) {
 		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
 			       CSUM_DONE, 1);
+		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+			       TNPCSUM_DONE, 1);
+	}
+
+	/* Set the tunneled packet indicator */
+	if (XGMAC_GET_BITS_LE(rdesc->desc2, RX_NORMAL_DESC2, TNP)) {
+		XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+			       TNP, 1);
+		pdata->ext_stats.rx_vxlan_packets++;
+
+		l34t = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, L34T);
+		switch (l34t) {
+		case RX_DESC3_L34T_IPV4_UNKNOWN:
+		case RX_DESC3_L34T_IPV6_UNKNOWN:
+			XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+				       TNPCSUM_DONE, 0);
+			break;
+		}
+	}
 
 	/* Check for errors (only valid in last descriptor) */
 	err = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, ES);
@@ -1935,14 +2021,30 @@ static int xgbe_dev_read(struct xgbe_channel *channel)
 				  packet->vlan_ctag);
 		}
 	} else {
-		if ((etlt == 0x05) || (etlt == 0x06))
+		unsigned int tnp = XGMAC_GET_BITS(packet->attributes,
+						  RX_PACKET_ATTRIBUTES, TNP);
+
+		if ((etlt == 0x05) || (etlt == 0x06)) {
 			XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
 				       CSUM_DONE, 0);
-		else
+			XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+				       TNPCSUM_DONE, 0);
+			pdata->ext_stats.rx_csum_errors++;
+		} else if (tnp && ((etlt == 0x09) || (etlt == 0x0a))) {
+			XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+				       CSUM_DONE, 0);
+			XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES,
+				       TNPCSUM_DONE, 0);
+			pdata->ext_stats.rx_vxlan_csum_errors++;
+		} else {
 			XGMAC_SET_BITS(packet->errors, RX_PACKET_ERRORS,
 				       FRAME, 1);
+		}
 	}
 
+	pdata->ext_stats.rxq_packets[channel->queue_index]++;
+	pdata->ext_stats.rxq_bytes[channel->queue_index] += rdata->rx.len;
+
 	DBGPR("<--xgbe_dev_read: %s - descriptor=%u (cur=%d)\n", channel->name,
 	      ring->cur & (ring->rdesc_count - 1), ring->cur);
 
@@ -1964,44 +2066,40 @@ static int xgbe_is_last_desc(struct xgbe_ring_desc *rdesc)
 static int xgbe_enable_int(struct xgbe_channel *channel,
 			   enum xgbe_int int_id)
 {
-	unsigned int dma_ch_ier;
-
-	dma_ch_ier = XGMAC_DMA_IOREAD(channel, DMA_CH_IER);
-
 	switch (int_id) {
 	case XGMAC_INT_DMA_CH_SR_TI:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TIE, 1);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TIE, 1);
 		break;
 	case XGMAC_INT_DMA_CH_SR_TPS:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TXSE, 1);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TXSE, 1);
 		break;
 	case XGMAC_INT_DMA_CH_SR_TBU:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TBUE, 1);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TBUE, 1);
 		break;
 	case XGMAC_INT_DMA_CH_SR_RI:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RIE, 1);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RIE, 1);
 		break;
 	case XGMAC_INT_DMA_CH_SR_RBU:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RBUE, 1);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RBUE, 1);
 		break;
 	case XGMAC_INT_DMA_CH_SR_RPS:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RSE, 1);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RSE, 1);
 		break;
 	case XGMAC_INT_DMA_CH_SR_TI_RI:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TIE, 1);
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RIE, 1);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TIE, 1);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RIE, 1);
 		break;
 	case XGMAC_INT_DMA_CH_SR_FBE:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, FBEE, 1);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, FBEE, 1);
 		break;
 	case XGMAC_INT_DMA_ALL:
-		dma_ch_ier |= channel->saved_ier;
+		channel->curr_ier |= channel->saved_ier;
 		break;
 	default:
 		return -1;
 	}
 
-	XGMAC_DMA_IOWRITE(channel, DMA_CH_IER, dma_ch_ier);
+	XGMAC_DMA_IOWRITE(channel, DMA_CH_IER, channel->curr_ier);
 
 	return 0;
 }
@@ -2009,45 +2107,41 @@ static int xgbe_enable_int(struct xgbe_channel *channel,
 static int xgbe_disable_int(struct xgbe_channel *channel,
 			    enum xgbe_int int_id)
 {
-	unsigned int dma_ch_ier;
-
-	dma_ch_ier = XGMAC_DMA_IOREAD(channel, DMA_CH_IER);
-
 	switch (int_id) {
 	case XGMAC_INT_DMA_CH_SR_TI:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TIE, 0);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TIE, 0);
 		break;
 	case XGMAC_INT_DMA_CH_SR_TPS:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TXSE, 0);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TXSE, 0);
 		break;
 	case XGMAC_INT_DMA_CH_SR_TBU:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TBUE, 0);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TBUE, 0);
 		break;
 	case XGMAC_INT_DMA_CH_SR_RI:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RIE, 0);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RIE, 0);
 		break;
 	case XGMAC_INT_DMA_CH_SR_RBU:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RBUE, 0);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RBUE, 0);
 		break;
 	case XGMAC_INT_DMA_CH_SR_RPS:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RSE, 0);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RSE, 0);
 		break;
 	case XGMAC_INT_DMA_CH_SR_TI_RI:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TIE, 0);
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RIE, 0);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, TIE, 0);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, RIE, 0);
 		break;
 	case XGMAC_INT_DMA_CH_SR_FBE:
-		XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, FBEE, 0);
+		XGMAC_SET_BITS(channel->curr_ier, DMA_CH_IER, FBEE, 0);
 		break;
 	case XGMAC_INT_DMA_ALL:
-		channel->saved_ier = dma_ch_ier & XGBE_DMA_INTERRUPT_MASK;
-		dma_ch_ier &= ~XGBE_DMA_INTERRUPT_MASK;
+		channel->saved_ier = channel->curr_ier;
+		channel->curr_ier = 0;
 		break;
 	default:
 		return -1;
 	}
 
-	XGMAC_DMA_IOWRITE(channel, DMA_CH_IER, dma_ch_ier);
+	XGMAC_DMA_IOWRITE(channel, DMA_CH_IER, channel->curr_ier);
 
 	return 0;
 }
@@ -3534,5 +3628,10 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if)
 	hw_if->disable_ecc_ded = xgbe_disable_ecc_ded;
 	hw_if->disable_ecc_sec = xgbe_disable_ecc_sec;
 
+	/* For VXLAN */
+	hw_if->enable_vxlan = xgbe_enable_vxlan;
+	hw_if->disable_vxlan = xgbe_disable_vxlan;
+	hw_if->set_vxlan_id = xgbe_set_vxlan_id;
+
 	DBGPR("<--xgbe_init_function_ptrs\n");
 }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index ecef3ee..608693d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -124,6 +124,7 @@
 #include <linux/if_ether.h>
 #include <linux/net_tstamp.h>
 #include <linux/phy.h>
+#include <net/vxlan.h>
 
 #include "xgbe.h"
 #include "xgbe-common.h"
@@ -732,8 +733,6 @@ void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata)
 	unsigned int mac_hfr0, mac_hfr1, mac_hfr2;
 	struct xgbe_hw_features *hw_feat = &pdata->hw_feat;
 
-	DBGPR("-->xgbe_get_all_hw_features\n");
-
 	mac_hfr0 = XGMAC_IOREAD(pdata, MAC_HWF0R);
 	mac_hfr1 = XGMAC_IOREAD(pdata, MAC_HWF1R);
 	mac_hfr2 = XGMAC_IOREAD(pdata, MAC_HWF2R);
@@ -758,6 +757,7 @@ void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata)
 					      ADDMACADRSEL);
 	hw_feat->ts_src      = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, TSSTSSEL);
 	hw_feat->sa_vlan_ins = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, SAVLANINS);
+	hw_feat->vxn         = XGMAC_GET_BITS(mac_hfr0, MAC_HWF0R, VXN);
 
 	/* Hardware feature register 1 */
 	hw_feat->rx_fifo_size  = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R,
@@ -828,7 +828,193 @@ void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata)
 	hw_feat->rx_fifo_size = 1 << (hw_feat->rx_fifo_size + 7);
 	hw_feat->tx_fifo_size = 1 << (hw_feat->tx_fifo_size + 7);
 
-	DBGPR("<--xgbe_get_all_hw_features\n");
+	if (netif_msg_probe(pdata)) {
+		dev_dbg(pdata->dev, "Hardware features:\n");
+
+		/* Hardware feature register 0 */
+		dev_dbg(pdata->dev, "  1GbE support              : %s\n",
+			hw_feat->gmii ? "yes" : "no");
+		dev_dbg(pdata->dev, "  VLAN hash filter          : %s\n",
+			hw_feat->vlhash ? "yes" : "no");
+		dev_dbg(pdata->dev, "  MDIO interface            : %s\n",
+			hw_feat->sma ? "yes" : "no");
+		dev_dbg(pdata->dev, "  Wake-up packet support    : %s\n",
+			hw_feat->rwk ? "yes" : "no");
+		dev_dbg(pdata->dev, "  Magic packet support      : %s\n",
+			hw_feat->mgk ? "yes" : "no");
+		dev_dbg(pdata->dev, "  Management counters       : %s\n",
+			hw_feat->mmc ? "yes" : "no");
+		dev_dbg(pdata->dev, "  ARP offload               : %s\n",
+			hw_feat->aoe ? "yes" : "no");
+		dev_dbg(pdata->dev, "  IEEE 1588-2008 Timestamp  : %s\n",
+			hw_feat->ts ? "yes" : "no");
+		dev_dbg(pdata->dev, "  Energy Efficient Ethernet : %s\n",
+			hw_feat->eee ? "yes" : "no");
+		dev_dbg(pdata->dev, "  TX checksum offload       : %s\n",
+			hw_feat->tx_coe ? "yes" : "no");
+		dev_dbg(pdata->dev, "  RX checksum offload       : %s\n",
+			hw_feat->rx_coe ? "yes" : "no");
+		dev_dbg(pdata->dev, "  Additional MAC addresses  : %u\n",
+			hw_feat->addn_mac);
+		dev_dbg(pdata->dev, "  Timestamp source          : %s\n",
+			(hw_feat->ts_src == 1) ? "internal" :
+			(hw_feat->ts_src == 2) ? "external" :
+			(hw_feat->ts_src == 3) ? "internal/external" : "n/a");
+		dev_dbg(pdata->dev, "  SA/VLAN insertion         : %s\n",
+			hw_feat->sa_vlan_ins ? "yes" : "no");
+		dev_dbg(pdata->dev, "  VXLAN/NVGRE support       : %s\n",
+			hw_feat->vxn ? "yes" : "no");
+
+		/* Hardware feature register 1 */
+		dev_dbg(pdata->dev, "  RX fifo size              : %u\n",
+			hw_feat->rx_fifo_size);
+		dev_dbg(pdata->dev, "  TX fifo size              : %u\n",
+			hw_feat->tx_fifo_size);
+		dev_dbg(pdata->dev, "  IEEE 1588 high word       : %s\n",
+			hw_feat->adv_ts_hi ? "yes" : "no");
+		dev_dbg(pdata->dev, "  DMA width                 : %u\n",
+			hw_feat->dma_width);
+		dev_dbg(pdata->dev, "  Data Center Bridging      : %s\n",
+			hw_feat->dcb ? "yes" : "no");
+		dev_dbg(pdata->dev, "  Split header              : %s\n",
+			hw_feat->sph ? "yes" : "no");
+		dev_dbg(pdata->dev, "  TCP Segmentation Offload  : %s\n",
+			hw_feat->tso ? "yes" : "no");
+		dev_dbg(pdata->dev, "  Debug memory interface    : %s\n",
+			hw_feat->dma_debug ? "yes" : "no");
+		dev_dbg(pdata->dev, "  Receive Side Scaling      : %s\n",
+			hw_feat->rss ? "yes" : "no");
+		dev_dbg(pdata->dev, "  Traffic Class count       : %u\n",
+			hw_feat->tc_cnt);
+		dev_dbg(pdata->dev, "  Hash table size           : %u\n",
+			hw_feat->hash_table_size);
+		dev_dbg(pdata->dev, "  L3/L4 Filters             : %u\n",
+			hw_feat->l3l4_filter_num);
+
+		/* Hardware feature register 2 */
+		dev_dbg(pdata->dev, "  RX queue count            : %u\n",
+			hw_feat->rx_q_cnt);
+		dev_dbg(pdata->dev, "  TX queue count            : %u\n",
+			hw_feat->tx_q_cnt);
+		dev_dbg(pdata->dev, "  RX DMA channel count      : %u\n",
+			hw_feat->rx_ch_cnt);
+		dev_dbg(pdata->dev, "  TX DMA channel count      : %u\n",
+			hw_feat->rx_ch_cnt);
+		dev_dbg(pdata->dev, "  PPS outputs               : %u\n",
+			hw_feat->pps_out_num);
+		dev_dbg(pdata->dev, "  Auxiliary snapshot inputs : %u\n",
+			hw_feat->aux_snap_num);
+	}
+}
+
+static void xgbe_disable_vxlan_offloads(struct xgbe_prv_data *pdata)
+{
+	struct net_device *netdev = pdata->netdev;
+
+	if (!pdata->vxlan_offloads_set)
+		return;
+
+	netdev_info(netdev, "disabling VXLAN offloads\n");
+
+	netdev->hw_enc_features &= ~(NETIF_F_SG |
+				     NETIF_F_IP_CSUM |
+				     NETIF_F_IPV6_CSUM |
+				     NETIF_F_RXCSUM |
+				     NETIF_F_TSO |
+				     NETIF_F_TSO6 |
+				     NETIF_F_GRO |
+				     NETIF_F_GSO_UDP_TUNNEL |
+				     NETIF_F_GSO_UDP_TUNNEL_CSUM);
+
+	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL |
+			      NETIF_F_GSO_UDP_TUNNEL_CSUM);
+
+	pdata->vxlan_offloads_set = 0;
+}
+
+static void xgbe_disable_vxlan_hw(struct xgbe_prv_data *pdata)
+{
+	if (!pdata->vxlan_port_set)
+		return;
+
+	pdata->hw_if.disable_vxlan(pdata);
+
+	pdata->vxlan_port_set = 0;
+	pdata->vxlan_port = 0;
+}
+
+static void xgbe_disable_vxlan_accel(struct xgbe_prv_data *pdata)
+{
+	xgbe_disable_vxlan_offloads(pdata);
+
+	xgbe_disable_vxlan_hw(pdata);
+}
+
+static void xgbe_enable_vxlan_offloads(struct xgbe_prv_data *pdata)
+{
+	struct net_device *netdev = pdata->netdev;
+
+	if (pdata->vxlan_offloads_set)
+		return;
+
+	netdev_info(netdev, "enabling VXLAN offloads\n");
+
+	netdev->hw_enc_features |= NETIF_F_SG |
+				   NETIF_F_IP_CSUM |
+				   NETIF_F_IPV6_CSUM |
+				   NETIF_F_RXCSUM |
+				   NETIF_F_TSO |
+				   NETIF_F_TSO6 |
+				   NETIF_F_GRO |
+				   pdata->vxlan_features;
+
+	netdev->features |= pdata->vxlan_features;
+
+	pdata->vxlan_offloads_set = 1;
+}
+
+static void xgbe_enable_vxlan_hw(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_vxlan_data *vdata;
+
+	if (pdata->vxlan_port_set)
+		return;
+
+	if (list_empty(&pdata->vxlan_ports))
+		return;
+
+	vdata = list_first_entry(&pdata->vxlan_ports,
+				 struct xgbe_vxlan_data, list);
+
+	pdata->vxlan_port_set = 1;
+	pdata->vxlan_port = be16_to_cpu(vdata->port);
+
+	pdata->hw_if.enable_vxlan(pdata);
+}
+
+static void xgbe_enable_vxlan_accel(struct xgbe_prv_data *pdata)
+{
+	/* VXLAN acceleration desired? */
+	if (!pdata->vxlan_features)
+		return;
+
+	/* VXLAN acceleration possible? */
+	if (pdata->vxlan_force_disable)
+		return;
+
+	xgbe_enable_vxlan_hw(pdata);
+
+	xgbe_enable_vxlan_offloads(pdata);
+}
+
+static void xgbe_reset_vxlan_accel(struct xgbe_prv_data *pdata)
+{
+	xgbe_disable_vxlan_hw(pdata);
+
+	if (pdata->vxlan_features)
+		xgbe_enable_vxlan_offloads(pdata);
+
+	pdata->vxlan_force_disable = 0;
 }
 
 static void xgbe_napi_enable(struct xgbe_prv_data *pdata, unsigned int add)
@@ -887,7 +1073,7 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
 		     (unsigned long)pdata);
 
 	ret = devm_request_irq(pdata->dev, pdata->dev_irq, xgbe_isr, 0,
-			       netdev->name, pdata);
+			       netdev_name(netdev), pdata);
 	if (ret) {
 		netdev_alert(netdev, "error requesting irq %d\n",
 			     pdata->dev_irq);
@@ -1154,6 +1340,8 @@ static int xgbe_start(struct xgbe_prv_data *pdata)
 	hw_if->enable_tx(pdata);
 	hw_if->enable_rx(pdata);
 
+	udp_tunnel_get_rx_info(netdev);
+
 	netif_tx_start_all_queues(netdev);
 
 	xgbe_start_timers(pdata);
@@ -1195,6 +1383,8 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
 	xgbe_stop_timers(pdata);
 	flush_workqueue(pdata->dev_workqueue);
 
+	xgbe_reset_vxlan_accel(pdata);
+
 	hw_if->disable_tx(pdata);
 	hw_if->disable_rx(pdata);
 
@@ -1483,10 +1673,18 @@ static int xgbe_prep_tso(struct sk_buff *skb, struct xgbe_packet_data *packet)
 	if (ret)
 		return ret;
 
-	packet->header_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
-	packet->tcp_header_len = tcp_hdrlen(skb);
+	if (XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES, VXLAN)) {
+		packet->header_len = skb_inner_transport_offset(skb) +
+				     inner_tcp_hdrlen(skb);
+		packet->tcp_header_len = inner_tcp_hdrlen(skb);
+	} else {
+		packet->header_len = skb_transport_offset(skb) +
+				     tcp_hdrlen(skb);
+		packet->tcp_header_len = tcp_hdrlen(skb);
+	}
 	packet->tcp_payload_len = skb->len - packet->header_len;
 	packet->mss = skb_shinfo(skb)->gso_size;
+
 	DBGPR("  packet->header_len=%u\n", packet->header_len);
 	DBGPR("  packet->tcp_header_len=%u, packet->tcp_payload_len=%u\n",
 	      packet->tcp_header_len, packet->tcp_payload_len);
@@ -1501,6 +1699,49 @@ static int xgbe_prep_tso(struct sk_buff *skb, struct xgbe_packet_data *packet)
 	return 0;
 }
 
+static bool xgbe_is_vxlan(struct xgbe_prv_data *pdata, struct sk_buff *skb)
+{
+	struct xgbe_vxlan_data *vdata;
+
+	if (pdata->vxlan_force_disable)
+		return false;
+
+	if (!skb->encapsulation)
+		return false;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return false;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		if (ip_hdr(skb)->protocol != IPPROTO_UDP)
+			return false;
+		break;
+
+	case htons(ETH_P_IPV6):
+		if (ipv6_hdr(skb)->nexthdr != IPPROTO_UDP)
+			return false;
+		break;
+
+	default:
+		return false;
+	}
+
+	/* See if we have the UDP port in our list */
+	list_for_each_entry(vdata, &pdata->vxlan_ports, list) {
+		if ((skb->protocol == htons(ETH_P_IP)) &&
+		    (vdata->sa_family == AF_INET) &&
+		    (vdata->port == udp_hdr(skb)->dest))
+			return true;
+		else if ((skb->protocol == htons(ETH_P_IPV6)) &&
+			 (vdata->sa_family == AF_INET6) &&
+			 (vdata->port == udp_hdr(skb)->dest))
+			return true;
+	}
+
+	return false;
+}
+
 static int xgbe_is_tso(struct sk_buff *skb)
 {
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -1549,6 +1790,10 @@ static void xgbe_packet_info(struct xgbe_prv_data *pdata,
 		XGMAC_SET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
 			       CSUM_ENABLE, 1);
 
+	if (xgbe_is_vxlan(pdata, skb))
+		XGMAC_SET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+			       VXLAN, 1);
+
 	if (skb_vlan_tag_present(skb)) {
 		/* VLAN requires an extra descriptor if tag is different */
 		if (skb_vlan_tag_get(skb) != ring->tx.cur_vlan_ctag)
@@ -1589,16 +1834,42 @@ static int xgbe_open(struct net_device *netdev)
 
 	DBGPR("-->xgbe_open\n");
 
+	/* Create the various names based on netdev name */
+	snprintf(pdata->an_name, sizeof(pdata->an_name) - 1, "%s-pcs",
+		 netdev_name(netdev));
+
+	snprintf(pdata->ecc_name, sizeof(pdata->ecc_name) - 1, "%s-ecc",
+		 netdev_name(netdev));
+
+	snprintf(pdata->i2c_name, sizeof(pdata->i2c_name) - 1, "%s-i2c",
+		 netdev_name(netdev));
+
+	/* Create workqueues */
+	pdata->dev_workqueue =
+		create_singlethread_workqueue(netdev_name(netdev));
+	if (!pdata->dev_workqueue) {
+		netdev_err(netdev, "device workqueue creation failed\n");
+		return -ENOMEM;
+	}
+
+	pdata->an_workqueue =
+		create_singlethread_workqueue(pdata->an_name);
+	if (!pdata->an_workqueue) {
+		netdev_err(netdev, "phy workqueue creation failed\n");
+		ret = -ENOMEM;
+		goto err_dev_wq;
+	}
+
 	/* Reset the phy settings */
 	ret = xgbe_phy_reset(pdata);
 	if (ret)
-		return ret;
+		goto err_an_wq;
 
 	/* Enable the clocks */
 	ret = clk_prepare_enable(pdata->sysclk);
 	if (ret) {
 		netdev_alert(netdev, "dma clk_prepare_enable failed\n");
-		return ret;
+		goto err_an_wq;
 	}
 
 	ret = clk_prepare_enable(pdata->ptpclk);
@@ -1651,6 +1922,12 @@ err_ptpclk:
 err_sysclk:
 	clk_disable_unprepare(pdata->sysclk);
 
+err_an_wq:
+	destroy_workqueue(pdata->an_workqueue);
+
+err_dev_wq:
+	destroy_workqueue(pdata->dev_workqueue);
+
 	return ret;
 }
 
@@ -1674,6 +1951,12 @@ static int xgbe_close(struct net_device *netdev)
 	clk_disable_unprepare(pdata->ptpclk);
 	clk_disable_unprepare(pdata->sysclk);
 
+	flush_workqueue(pdata->an_workqueue);
+	destroy_workqueue(pdata->an_workqueue);
+
+	flush_workqueue(pdata->dev_workqueue);
+	destroy_workqueue(pdata->dev_workqueue);
+
 	set_bit(XGBE_DOWN, &pdata->dev_state);
 
 	DBGPR("<--xgbe_close\n");
@@ -1918,18 +2201,18 @@ static void xgbe_poll_controller(struct net_device *netdev)
 }
 #endif /* End CONFIG_NET_POLL_CONTROLLER */
 
-static int xgbe_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
-			 __be16 proto,
-			 struct tc_to_netdev *tc_to_netdev)
+static int xgbe_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+			 void *type_data)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct tc_mqprio_qopt *mqprio = type_data;
 	u8 tc;
 
-	if (tc_to_netdev->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	tc_to_netdev->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
-	tc = tc_to_netdev->mqprio->num_tc;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	tc = mqprio->num_tc;
 
 	if (tc > pdata->hw_feat.tc_cnt)
 		return -EINVAL;
@@ -1940,18 +2223,83 @@ static int xgbe_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
 	return 0;
 }
 
+static netdev_features_t xgbe_fix_features(struct net_device *netdev,
+					   netdev_features_t features)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	netdev_features_t vxlan_base, vxlan_mask;
+
+	vxlan_base = NETIF_F_GSO_UDP_TUNNEL | NETIF_F_RX_UDP_TUNNEL_PORT;
+	vxlan_mask = vxlan_base | NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+	pdata->vxlan_features = features & vxlan_mask;
+
+	/* Only fix VXLAN-related features */
+	if (!pdata->vxlan_features)
+		return features;
+
+	/* If VXLAN isn't supported then clear any features:
+	 *   This is needed because NETIF_F_RX_UDP_TUNNEL_PORT gets
+	 *   automatically set if ndo_udp_tunnel_add is set.
+	 */
+	if (!pdata->hw_feat.vxn)
+		return features & ~vxlan_mask;
+
+	/* VXLAN CSUM requires VXLAN base */
+	if ((features & NETIF_F_GSO_UDP_TUNNEL_CSUM) &&
+	    !(features & NETIF_F_GSO_UDP_TUNNEL)) {
+		netdev_notice(netdev,
+			      "forcing tx udp tunnel support\n");
+		features |= NETIF_F_GSO_UDP_TUNNEL;
+	}
+
+	/* Can't do one without doing the other */
+	if ((features & vxlan_base) != vxlan_base) {
+		netdev_notice(netdev,
+			      "forcing both tx and rx udp tunnel support\n");
+		features |= vxlan_base;
+	}
+
+	if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
+		if (!(features & NETIF_F_GSO_UDP_TUNNEL_CSUM)) {
+			netdev_notice(netdev,
+				      "forcing tx udp tunnel checksumming on\n");
+			features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
+		}
+	} else {
+		if (features & NETIF_F_GSO_UDP_TUNNEL_CSUM) {
+			netdev_notice(netdev,
+				      "forcing tx udp tunnel checksumming off\n");
+			features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM;
+		}
+	}
+
+	pdata->vxlan_features = features & vxlan_mask;
+
+	/* Adjust UDP Tunnel based on current state */
+	if (pdata->vxlan_force_disable) {
+		netdev_notice(netdev,
+			      "VXLAN acceleration disabled, turning off udp tunnel features\n");
+		features &= ~vxlan_mask;
+	}
+
+	return features;
+}
+
 static int xgbe_set_features(struct net_device *netdev,
 			     netdev_features_t features)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
 	netdev_features_t rxhash, rxcsum, rxvlan, rxvlan_filter;
+	netdev_features_t udp_tunnel;
 	int ret = 0;
 
 	rxhash = pdata->netdev_features & NETIF_F_RXHASH;
 	rxcsum = pdata->netdev_features & NETIF_F_RXCSUM;
 	rxvlan = pdata->netdev_features & NETIF_F_HW_VLAN_CTAG_RX;
 	rxvlan_filter = pdata->netdev_features & NETIF_F_HW_VLAN_CTAG_FILTER;
+	udp_tunnel = pdata->netdev_features & NETIF_F_GSO_UDP_TUNNEL;
 
 	if ((features & NETIF_F_RXHASH) && !rxhash)
 		ret = hw_if->enable_rss(pdata);
@@ -1975,6 +2323,11 @@ static int xgbe_set_features(struct net_device *netdev,
 	else if (!(features & NETIF_F_HW_VLAN_CTAG_FILTER) && rxvlan_filter)
 		hw_if->disable_rx_vlan_filtering(pdata);
 
+	if ((features & NETIF_F_GSO_UDP_TUNNEL) && !udp_tunnel)
+		xgbe_enable_vxlan_accel(pdata);
+	else if (!(features & NETIF_F_GSO_UDP_TUNNEL) && udp_tunnel)
+		xgbe_disable_vxlan_accel(pdata);
+
 	pdata->netdev_features = features;
 
 	DBGPR("<--xgbe_set_features\n");
@@ -1982,6 +2335,111 @@ static int xgbe_set_features(struct net_device *netdev,
 	return 0;
 }
 
+static void xgbe_udp_tunnel_add(struct net_device *netdev,
+				struct udp_tunnel_info *ti)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_vxlan_data *vdata;
+
+	if (!pdata->hw_feat.vxn)
+		return;
+
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
+	pdata->vxlan_port_count++;
+
+	netif_dbg(pdata, drv, netdev,
+		  "adding VXLAN tunnel, family=%hx/port=%hx\n",
+		  ti->sa_family, be16_to_cpu(ti->port));
+
+	if (pdata->vxlan_force_disable)
+		return;
+
+	vdata = kzalloc(sizeof(*vdata), GFP_ATOMIC);
+	if (!vdata) {
+		/* Can no longer properly track VXLAN ports */
+		pdata->vxlan_force_disable = 1;
+		netif_dbg(pdata, drv, netdev,
+			  "internal error, disabling VXLAN accelerations\n");
+
+		xgbe_disable_vxlan_accel(pdata);
+
+		return;
+	}
+	vdata->sa_family = ti->sa_family;
+	vdata->port = ti->port;
+
+	list_add_tail(&vdata->list, &pdata->vxlan_ports);
+
+	/* First port added? */
+	if (pdata->vxlan_port_count == 1) {
+		xgbe_enable_vxlan_accel(pdata);
+
+		return;
+	}
+}
+
+static void xgbe_udp_tunnel_del(struct net_device *netdev,
+				struct udp_tunnel_info *ti)
+{
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct xgbe_vxlan_data *vdata;
+
+	if (!pdata->hw_feat.vxn)
+		return;
+
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
+	netif_dbg(pdata, drv, netdev,
+		  "deleting VXLAN tunnel, family=%hx/port=%hx\n",
+		  ti->sa_family, be16_to_cpu(ti->port));
+
+	/* Don't need safe version since loop terminates with deletion */
+	list_for_each_entry(vdata, &pdata->vxlan_ports, list) {
+		if (vdata->sa_family != ti->sa_family)
+			continue;
+
+		if (vdata->port != ti->port)
+			continue;
+
+		list_del(&vdata->list);
+		kfree(vdata);
+
+		break;
+	}
+
+	pdata->vxlan_port_count--;
+	if (!pdata->vxlan_port_count) {
+		xgbe_reset_vxlan_accel(pdata);
+
+		return;
+	}
+
+	if (pdata->vxlan_force_disable)
+		return;
+
+	/* See if VXLAN tunnel id needs to be changed */
+	vdata = list_first_entry(&pdata->vxlan_ports,
+				 struct xgbe_vxlan_data, list);
+	if (pdata->vxlan_port == be16_to_cpu(vdata->port))
+		return;
+
+	pdata->vxlan_port = be16_to_cpu(vdata->port);
+	pdata->hw_if.set_vxlan_id(pdata);
+}
+
+static netdev_features_t xgbe_features_check(struct sk_buff *skb,
+					     struct net_device *netdev,
+					     netdev_features_t features)
+{
+	features = vlan_features_check(skb, features);
+	features = vxlan_features_check(skb, features);
+
+	return features;
+}
+
 static const struct net_device_ops xgbe_netdev_ops = {
 	.ndo_open		= xgbe_open,
 	.ndo_stop		= xgbe_close,
@@ -1999,7 +2457,11 @@ static const struct net_device_ops xgbe_netdev_ops = {
 	.ndo_poll_controller	= xgbe_poll_controller,
 #endif
 	.ndo_setup_tc		= xgbe_setup_tc,
+	.ndo_fix_features	= xgbe_fix_features,
 	.ndo_set_features	= xgbe_set_features,
+	.ndo_udp_tunnel_add	= xgbe_udp_tunnel_add,
+	.ndo_udp_tunnel_del	= xgbe_udp_tunnel_del,
+	.ndo_features_check	= xgbe_features_check,
 };
 
 const struct net_device_ops *xgbe_get_netdev_ops(void)
@@ -2312,6 +2774,15 @@ skip_data:
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 		if (XGMAC_GET_BITS(packet->attributes,
+				   RX_PACKET_ATTRIBUTES, TNP)) {
+			skb->encapsulation = 1;
+
+			if (XGMAC_GET_BITS(packet->attributes,
+					   RX_PACKET_ATTRIBUTES, TNPCSUM_DONE))
+				skb->csum_level = 1;
+		}
+
+		if (XGMAC_GET_BITS(packet->attributes,
 				   RX_PACKET_ATTRIBUTES, VLAN_CTAG))
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 					       packet->vlan_ctag);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index 67a2e52..ff397bb 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -146,6 +146,7 @@ static const struct xgbe_stats xgbe_gstring_stats[] = {
 	XGMAC_MMC_STAT("tx_broadcast_packets", txbroadcastframes_gb),
 	XGMAC_MMC_STAT("tx_multicast_packets", txmulticastframes_gb),
 	XGMAC_MMC_STAT("tx_vlan_packets", txvlanframes_g),
+	XGMAC_EXT_STAT("tx_vxlan_packets", tx_vxlan_packets),
 	XGMAC_EXT_STAT("tx_tso_packets", tx_tso_packets),
 	XGMAC_MMC_STAT("tx_64_byte_packets", tx64octets_gb),
 	XGMAC_MMC_STAT("tx_65_to_127_byte_packets", tx65to127octets_gb),
@@ -162,6 +163,7 @@ static const struct xgbe_stats xgbe_gstring_stats[] = {
 	XGMAC_MMC_STAT("rx_broadcast_packets", rxbroadcastframes_g),
 	XGMAC_MMC_STAT("rx_multicast_packets", rxmulticastframes_g),
 	XGMAC_MMC_STAT("rx_vlan_packets", rxvlanframes_gb),
+	XGMAC_EXT_STAT("rx_vxlan_packets", rx_vxlan_packets),
 	XGMAC_MMC_STAT("rx_64_byte_packets", rx64octets_gb),
 	XGMAC_MMC_STAT("rx_65_to_127_byte_packets", rx65to127octets_gb),
 	XGMAC_MMC_STAT("rx_128_to_255_byte_packets", rx128to255octets_gb),
@@ -177,6 +179,8 @@ static const struct xgbe_stats xgbe_gstring_stats[] = {
 	XGMAC_MMC_STAT("rx_out_of_range_errors", rxoutofrangetype),
 	XGMAC_MMC_STAT("rx_fifo_overflow_errors", rxfifooverflow),
 	XGMAC_MMC_STAT("rx_watchdog_errors", rxwatchdogerror),
+	XGMAC_EXT_STAT("rx_csum_errors", rx_csum_errors),
+	XGMAC_EXT_STAT("rx_vxlan_csum_errors", rx_vxlan_csum_errors),
 	XGMAC_MMC_STAT("rx_pause_frames", rxpauseframes),
 	XGMAC_EXT_STAT("rx_split_header_packets", rx_split_header_packets),
 	XGMAC_EXT_STAT("rx_buffer_unavailable", rx_buffer_unavailable),
@@ -186,6 +190,7 @@ static const struct xgbe_stats xgbe_gstring_stats[] = {
 
 static void xgbe_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 	int i;
 
 	switch (stringset) {
@@ -195,6 +200,18 @@ static void xgbe_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 			       ETH_GSTRING_LEN);
 			data += ETH_GSTRING_LEN;
 		}
+		for (i = 0; i < pdata->tx_ring_count; i++) {
+			sprintf(data, "txq_%u_packets", i);
+			data += ETH_GSTRING_LEN;
+			sprintf(data, "txq_%u_bytes", i);
+			data += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < pdata->rx_ring_count; i++) {
+			sprintf(data, "rxq_%u_packets", i);
+			data += ETH_GSTRING_LEN;
+			sprintf(data, "rxq_%u_bytes", i);
+			data += ETH_GSTRING_LEN;
+		}
 		break;
 	}
 }
@@ -211,15 +228,26 @@ static void xgbe_get_ethtool_stats(struct net_device *netdev,
 		stat = (u8 *)pdata + xgbe_gstring_stats[i].stat_offset;
 		*data++ = *(u64 *)stat;
 	}
+	for (i = 0; i < pdata->tx_ring_count; i++) {
+		*data++ = pdata->ext_stats.txq_packets[i];
+		*data++ = pdata->ext_stats.txq_bytes[i];
+	}
+	for (i = 0; i < pdata->rx_ring_count; i++) {
+		*data++ = pdata->ext_stats.rxq_packets[i];
+		*data++ = pdata->ext_stats.rxq_bytes[i];
+	}
 }
 
 static int xgbe_get_sset_count(struct net_device *netdev, int stringset)
 {
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 	int ret;
 
 	switch (stringset) {
 	case ETH_SS_STATS:
-		ret = XGBE_STATS_COUNT;
+		ret = XGBE_STATS_COUNT +
+		      (pdata->tx_ring_count * 2) +
+		      (pdata->rx_ring_count * 2);
 		break;
 
 	default:
@@ -243,6 +271,7 @@ static int xgbe_set_pauseparam(struct net_device *netdev,
 			       struct ethtool_pauseparam *pause)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	int ret = 0;
 
 	if (pause->autoneg && (pdata->phy.autoneg != AUTONEG_ENABLE)) {
@@ -255,16 +284,21 @@ static int xgbe_set_pauseparam(struct net_device *netdev,
 	pdata->phy.tx_pause = pause->tx_pause;
 	pdata->phy.rx_pause = pause->rx_pause;
 
-	pdata->phy.advertising &= ~ADVERTISED_Pause;
-	pdata->phy.advertising &= ~ADVERTISED_Asym_Pause;
+	XGBE_CLR_ADV(lks, Pause);
+	XGBE_CLR_ADV(lks, Asym_Pause);
 
 	if (pause->rx_pause) {
-		pdata->phy.advertising |= ADVERTISED_Pause;
-		pdata->phy.advertising |= ADVERTISED_Asym_Pause;
+		XGBE_SET_ADV(lks, Pause);
+		XGBE_SET_ADV(lks, Asym_Pause);
 	}
 
-	if (pause->tx_pause)
-		pdata->phy.advertising ^= ADVERTISED_Asym_Pause;
+	if (pause->tx_pause) {
+		/* Equivalent to XOR of Asym_Pause */
+		if (XGBE_ADV(lks, Asym_Pause))
+			XGBE_CLR_ADV(lks, Asym_Pause);
+		else
+			XGBE_SET_ADV(lks, Asym_Pause);
+	}
 
 	if (netif_running(netdev))
 		ret = pdata->phy_if.phy_config_aneg(pdata);
@@ -276,22 +310,20 @@ static int xgbe_get_link_ksettings(struct net_device *netdev,
 				   struct ethtool_link_ksettings *cmd)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 
 	cmd->base.phy_address = pdata->phy.address;
 
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
-						pdata->phy.supported);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
-						pdata->phy.advertising);
-	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising,
-						pdata->phy.lp_advertising);
-
 	cmd->base.autoneg = pdata->phy.autoneg;
 	cmd->base.speed = pdata->phy.speed;
 	cmd->base.duplex = pdata->phy.duplex;
 
 	cmd->base.port = PORT_NONE;
 
+	XGBE_LM_COPY(cmd, supported, lks, supported);
+	XGBE_LM_COPY(cmd, advertising, lks, advertising);
+	XGBE_LM_COPY(cmd, lp_advertising, lks, lp_advertising);
+
 	return 0;
 }
 
@@ -299,7 +331,8 @@ static int xgbe_set_link_ksettings(struct net_device *netdev,
 				   const struct ethtool_link_ksettings *cmd)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
-	u32 advertising;
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
 	u32 speed;
 	int ret;
 
@@ -331,15 +364,17 @@ static int xgbe_set_link_ksettings(struct net_device *netdev,
 		}
 	}
 
-	ethtool_convert_link_mode_to_legacy_u32(&advertising,
-						cmd->link_modes.advertising);
-
 	netif_dbg(pdata, link, netdev,
-		  "requested advertisement %#x, phy supported %#x\n",
-		  advertising, pdata->phy.supported);
+		  "requested advertisement 0x%*pb, phy supported 0x%*pb\n",
+		  __ETHTOOL_LINK_MODE_MASK_NBITS, cmd->link_modes.advertising,
+		  __ETHTOOL_LINK_MODE_MASK_NBITS, lks->link_modes.supported);
+
+	bitmap_and(advertising,
+		   cmd->link_modes.advertising, lks->link_modes.supported,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
 
-	advertising &= pdata->phy.supported;
-	if ((cmd->base.autoneg == AUTONEG_ENABLE) && !advertising) {
+	if ((cmd->base.autoneg == AUTONEG_ENABLE) &&
+	    bitmap_empty(advertising, __ETHTOOL_LINK_MODE_MASK_NBITS)) {
 		netdev_err(netdev,
 			   "unsupported requested advertisement\n");
 		return -EINVAL;
@@ -349,12 +384,13 @@ static int xgbe_set_link_ksettings(struct net_device *netdev,
 	pdata->phy.autoneg = cmd->base.autoneg;
 	pdata->phy.speed = speed;
 	pdata->phy.duplex = cmd->base.duplex;
-	pdata->phy.advertising = advertising;
+	bitmap_copy(lks->link_modes.advertising, advertising,
+		    __ETHTOOL_LINK_MODE_MASK_NBITS);
 
 	if (cmd->base.autoneg == AUTONEG_ENABLE)
-		pdata->phy.advertising |= ADVERTISED_Autoneg;
+		XGBE_SET_ADV(lks, Autoneg);
 	else
-		pdata->phy.advertising &= ~ADVERTISED_Autoneg;
+		XGBE_CLR_ADV(lks, Autoneg);
 
 	if (netif_running(netdev))
 		ret = pdata->phy_if.phy_config_aneg(pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index 500147d..d91fa59 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -120,6 +120,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/io.h>
+#include <linux/notifier.h>
 
 #include "xgbe.h"
 #include "xgbe-common.h"
@@ -192,6 +193,7 @@ struct xgbe_prv_data *xgbe_alloc_pdata(struct device *dev)
 	mutex_init(&pdata->i2c_mutex);
 	init_completion(&pdata->i2c_complete);
 	init_completion(&pdata->mdio_complete);
+	INIT_LIST_HEAD(&pdata->vxlan_ports);
 
 	pdata->msg_enable = netif_msg_init(debug, default_msg_level);
 
@@ -373,6 +375,28 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata)
 	if (pdata->hw_feat.rss)
 		netdev->hw_features |= NETIF_F_RXHASH;
 
+	if (pdata->hw_feat.vxn) {
+		netdev->hw_enc_features = NETIF_F_SG |
+					  NETIF_F_IP_CSUM |
+					  NETIF_F_IPV6_CSUM |
+					  NETIF_F_RXCSUM |
+					  NETIF_F_TSO |
+					  NETIF_F_TSO6 |
+					  NETIF_F_GRO |
+					  NETIF_F_GSO_UDP_TUNNEL |
+					  NETIF_F_GSO_UDP_TUNNEL_CSUM |
+					  NETIF_F_RX_UDP_TUNNEL_PORT;
+
+		netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL |
+				       NETIF_F_GSO_UDP_TUNNEL_CSUM |
+				       NETIF_F_RX_UDP_TUNNEL_PORT;
+
+		pdata->vxlan_offloads_set = 1;
+		pdata->vxlan_features = NETIF_F_GSO_UDP_TUNNEL |
+					NETIF_F_GSO_UDP_TUNNEL_CSUM |
+					NETIF_F_RX_UDP_TUNNEL_PORT;
+	}
+
 	netdev->vlan_features |= NETIF_F_SG |
 				 NETIF_F_IP_CSUM |
 				 NETIF_F_IPV6_CSUM |
@@ -399,35 +423,6 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata)
 		return ret;
 	}
 
-	/* Create the PHY/ANEG name based on netdev name */
-	snprintf(pdata->an_name, sizeof(pdata->an_name) - 1, "%s-pcs",
-		 netdev_name(netdev));
-
-	/* Create the ECC name based on netdev name */
-	snprintf(pdata->ecc_name, sizeof(pdata->ecc_name) - 1, "%s-ecc",
-		 netdev_name(netdev));
-
-	/* Create the I2C name based on netdev name */
-	snprintf(pdata->i2c_name, sizeof(pdata->i2c_name) - 1, "%s-i2c",
-		 netdev_name(netdev));
-
-	/* Create workqueues */
-	pdata->dev_workqueue =
-		create_singlethread_workqueue(netdev_name(netdev));
-	if (!pdata->dev_workqueue) {
-		netdev_err(netdev, "device workqueue creation failed\n");
-		ret = -ENOMEM;
-		goto err_netdev;
-	}
-
-	pdata->an_workqueue =
-		create_singlethread_workqueue(pdata->an_name);
-	if (!pdata->an_workqueue) {
-		netdev_err(netdev, "phy workqueue creation failed\n");
-		ret = -ENOMEM;
-		goto err_wq;
-	}
-
 	if (IS_REACHABLE(CONFIG_PTP_1588_CLOCK))
 		xgbe_ptp_register(pdata);
 
@@ -439,14 +434,6 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata)
 		  pdata->rx_ring_count);
 
 	return 0;
-
-err_wq:
-	destroy_workqueue(pdata->dev_workqueue);
-
-err_netdev:
-	unregister_netdev(netdev);
-
-	return ret;
 }
 
 void xgbe_deconfig_netdev(struct xgbe_prv_data *pdata)
@@ -458,21 +445,45 @@ void xgbe_deconfig_netdev(struct xgbe_prv_data *pdata)
 	if (IS_REACHABLE(CONFIG_PTP_1588_CLOCK))
 		xgbe_ptp_unregister(pdata);
 
+	unregister_netdev(netdev);
+
 	pdata->phy_if.phy_exit(pdata);
+}
 
-	flush_workqueue(pdata->an_workqueue);
-	destroy_workqueue(pdata->an_workqueue);
+static int xgbe_netdev_event(struct notifier_block *nb, unsigned long event,
+			     void *data)
+{
+	struct net_device *netdev = netdev_notifier_info_to_dev(data);
+	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 
-	flush_workqueue(pdata->dev_workqueue);
-	destroy_workqueue(pdata->dev_workqueue);
+	if (netdev->netdev_ops != xgbe_get_netdev_ops())
+		goto out;
 
-	unregister_netdev(netdev);
+	switch (event) {
+	case NETDEV_CHANGENAME:
+		xgbe_debugfs_rename(pdata);
+		break;
+
+	default:
+		break;
+	}
+
+out:
+	return NOTIFY_DONE;
 }
 
+static struct notifier_block xgbe_netdev_notifier = {
+	.notifier_call = xgbe_netdev_event,
+};
+
 static int __init xgbe_mod_init(void)
 {
 	int ret;
 
+	ret = register_netdevice_notifier(&xgbe_netdev_notifier);
+	if (ret)
+		return ret;
+
 	ret = xgbe_platform_init();
 	if (ret)
 		return ret;
@@ -489,6 +500,8 @@ static void __exit xgbe_mod_exit(void)
 	xgbe_pci_exit();
 
 	xgbe_platform_exit();
+
+	unregister_netdevice_notifier(&xgbe_netdev_notifier);
 }
 
 module_init(xgbe_mod_init);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 8068491..072b9f6 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -615,12 +615,14 @@ static enum xgbe_an xgbe_an73_page_received(struct xgbe_prv_data *pdata)
 
 static enum xgbe_an xgbe_an73_incompat_link(struct xgbe_prv_data *pdata)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
+
 	/* Be sure we aren't looping trying to negotiate */
 	if (xgbe_in_kr_mode(pdata)) {
 		pdata->kr_state = XGBE_RX_ERROR;
 
-		if (!(pdata->phy.advertising & ADVERTISED_1000baseKX_Full) &&
-		    !(pdata->phy.advertising & ADVERTISED_2500baseX_Full))
+		if (!XGBE_ADV(lks, 1000baseKX_Full) &&
+		    !XGBE_ADV(lks, 2500baseX_Full))
 			return XGBE_AN_NO_LINK;
 
 		if (pdata->kx_state != XGBE_RX_BPA)
@@ -628,7 +630,7 @@ static enum xgbe_an xgbe_an73_incompat_link(struct xgbe_prv_data *pdata)
 	} else {
 		pdata->kx_state = XGBE_RX_ERROR;
 
-		if (!(pdata->phy.advertising & ADVERTISED_10000baseKR_Full))
+		if (!XGBE_ADV(lks, 10000baseKR_Full))
 			return XGBE_AN_NO_LINK;
 
 		if (pdata->kr_state != XGBE_RX_BPA)
@@ -944,18 +946,19 @@ static void xgbe_an_state_machine(struct work_struct *work)
 
 static void xgbe_an37_init(struct xgbe_prv_data *pdata)
 {
-	unsigned int advertising, reg;
+	struct ethtool_link_ksettings lks;
+	unsigned int reg;
 
-	advertising = pdata->phy_if.phy_impl.an_advertising(pdata);
+	pdata->phy_if.phy_impl.an_advertising(pdata, &lks);
 
 	/* Set up Advertisement register */
 	reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_ADVERTISE);
-	if (advertising & ADVERTISED_Pause)
+	if (XGBE_ADV(&lks, Pause))
 		reg |= 0x100;
 	else
 		reg &= ~0x100;
 
-	if (advertising & ADVERTISED_Asym_Pause)
+	if (XGBE_ADV(&lks, Asym_Pause))
 		reg |= 0x80;
 	else
 		reg &= ~0x80;
@@ -982,6 +985,8 @@ static void xgbe_an37_init(struct xgbe_prv_data *pdata)
 		break;
 	}
 
+	reg |= XGBE_AN_CL37_MII_CTRL_8BIT;
+
 	XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL, reg);
 
 	netif_dbg(pdata, link, pdata->netdev, "CL37 AN (%s) initialized\n",
@@ -990,13 +995,14 @@ static void xgbe_an37_init(struct xgbe_prv_data *pdata)
 
 static void xgbe_an73_init(struct xgbe_prv_data *pdata)
 {
-	unsigned int advertising, reg;
+	struct ethtool_link_ksettings lks;
+	unsigned int reg;
 
-	advertising = pdata->phy_if.phy_impl.an_advertising(pdata);
+	pdata->phy_if.phy_impl.an_advertising(pdata, &lks);
 
 	/* Set up Advertisement register 3 first */
 	reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
-	if (advertising & ADVERTISED_10000baseR_FEC)
+	if (XGBE_ADV(&lks, 10000baseR_FEC))
 		reg |= 0xc000;
 	else
 		reg &= ~0xc000;
@@ -1005,13 +1011,13 @@ static void xgbe_an73_init(struct xgbe_prv_data *pdata)
 
 	/* Set up Advertisement register 2 next */
 	reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1);
-	if (advertising & ADVERTISED_10000baseKR_Full)
+	if (XGBE_ADV(&lks, 10000baseKR_Full))
 		reg |= 0x80;
 	else
 		reg &= ~0x80;
 
-	if ((advertising & ADVERTISED_1000baseKX_Full) ||
-	    (advertising & ADVERTISED_2500baseX_Full))
+	if (XGBE_ADV(&lks, 1000baseKX_Full) ||
+	    XGBE_ADV(&lks, 2500baseX_Full))
 		reg |= 0x20;
 	else
 		reg &= ~0x20;
@@ -1020,12 +1026,12 @@ static void xgbe_an73_init(struct xgbe_prv_data *pdata)
 
 	/* Set up Advertisement register 1 last */
 	reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE);
-	if (advertising & ADVERTISED_Pause)
+	if (XGBE_ADV(&lks, Pause))
 		reg |= 0x400;
 	else
 		reg &= ~0x400;
 
-	if (advertising & ADVERTISED_Asym_Pause)
+	if (XGBE_ADV(&lks, Asym_Pause))
 		reg |= 0x800;
 	else
 		reg &= ~0x800;
@@ -1281,9 +1287,10 @@ static enum xgbe_mode xgbe_phy_status_aneg(struct xgbe_prv_data *pdata)
 
 static void xgbe_phy_status_result(struct xgbe_prv_data *pdata)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	enum xgbe_mode mode;
 
-	pdata->phy.lp_advertising = 0;
+	XGBE_ZERO_LP_ADV(lks);
 
 	if ((pdata->phy.autoneg != AUTONEG_ENABLE) || pdata->parallel_detect)
 		mode = xgbe_cur_mode(pdata);
@@ -1513,17 +1520,21 @@ static void xgbe_dump_phy_registers(struct xgbe_prv_data *pdata)
 
 static int xgbe_phy_best_advertised_speed(struct xgbe_prv_data *pdata)
 {
-	if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full)
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
+
+	if (XGBE_ADV(lks, 10000baseKR_Full))
 		return SPEED_10000;
-	else if (pdata->phy.advertising & ADVERTISED_10000baseT_Full)
+	else if (XGBE_ADV(lks, 10000baseT_Full))
 		return SPEED_10000;
-	else if (pdata->phy.advertising & ADVERTISED_2500baseX_Full)
+	else if (XGBE_ADV(lks, 2500baseX_Full))
 		return SPEED_2500;
-	else if (pdata->phy.advertising & ADVERTISED_1000baseKX_Full)
+	else if (XGBE_ADV(lks, 2500baseT_Full))
+		return SPEED_2500;
+	else if (XGBE_ADV(lks, 1000baseKX_Full))
 		return SPEED_1000;
-	else if (pdata->phy.advertising & ADVERTISED_1000baseT_Full)
+	else if (XGBE_ADV(lks, 1000baseT_Full))
 		return SPEED_1000;
-	else if (pdata->phy.advertising & ADVERTISED_100baseT_Full)
+	else if (XGBE_ADV(lks, 100baseT_Full))
 		return SPEED_100;
 
 	return SPEED_UNKNOWN;
@@ -1531,13 +1542,12 @@ static int xgbe_phy_best_advertised_speed(struct xgbe_prv_data *pdata)
 
 static void xgbe_phy_exit(struct xgbe_prv_data *pdata)
 {
-	xgbe_phy_stop(pdata);
-
 	pdata->phy_if.phy_impl.exit(pdata);
 }
 
 static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	int ret;
 
 	mutex_init(&pdata->an_mutex);
@@ -1555,11 +1565,13 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 	ret = pdata->phy_if.phy_impl.init(pdata);
 	if (ret)
 		return ret;
-	pdata->phy.advertising = pdata->phy.supported;
+
+	/* Copy supported link modes to advertising link modes */
+	XGBE_LM_COPY(lks, advertising, lks, supported);
 
 	pdata->phy.address = 0;
 
-	if (pdata->phy.advertising & ADVERTISED_Autoneg) {
+	if (XGBE_ADV(lks, Autoneg)) {
 		pdata->phy.autoneg = AUTONEG_ENABLE;
 		pdata->phy.speed = SPEED_UNKNOWN;
 		pdata->phy.duplex = DUPLEX_UNKNOWN;
@@ -1576,16 +1588,21 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 	pdata->phy.rx_pause = pdata->rx_pause;
 
 	/* Fix up Flow Control advertising */
-	pdata->phy.advertising &= ~ADVERTISED_Pause;
-	pdata->phy.advertising &= ~ADVERTISED_Asym_Pause;
+	XGBE_CLR_ADV(lks, Pause);
+	XGBE_CLR_ADV(lks, Asym_Pause);
 
 	if (pdata->rx_pause) {
-		pdata->phy.advertising |= ADVERTISED_Pause;
-		pdata->phy.advertising |= ADVERTISED_Asym_Pause;
+		XGBE_SET_ADV(lks, Pause);
+		XGBE_SET_ADV(lks, Asym_Pause);
 	}
 
-	if (pdata->tx_pause)
-		pdata->phy.advertising ^= ADVERTISED_Asym_Pause;
+	if (pdata->tx_pause) {
+		/* Equivalent to XOR of Asym_Pause */
+		if (XGBE_ADV(lks, Asym_Pause))
+			XGBE_CLR_ADV(lks, Asym_Pause);
+		else
+			XGBE_SET_ADV(lks, Asym_Pause);
+	}
 
 	if (netif_msg_drv(pdata))
 		xgbe_dump_phy_registers(pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
index 1e56ad7..3e5833c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
@@ -292,6 +292,10 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pdata->xpcs_window_size = 1 << (pdata->xpcs_window_size + 7);
 	pdata->xpcs_window_mask = pdata->xpcs_window_size - 1;
 	if (netif_msg_probe(pdata)) {
+		dev_dbg(dev, "xpcs window def  = %#010x\n",
+			pdata->xpcs_window_def_reg);
+		dev_dbg(dev, "xpcs window sel  = %#010x\n",
+			pdata->xpcs_window_sel_reg);
 		dev_dbg(dev, "xpcs window      = %#010x\n",
 			pdata->xpcs_window);
 		dev_dbg(dev, "xpcs window size = %#010x\n",
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c
index c75edca..d16eae4 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c
@@ -231,20 +231,21 @@ static void xgbe_phy_kr_training_post(struct xgbe_prv_data *pdata)
 
 static enum xgbe_mode xgbe_phy_an_outcome(struct xgbe_prv_data *pdata)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
 	enum xgbe_mode mode;
 	unsigned int ad_reg, lp_reg;
 
-	pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
-	pdata->phy.lp_advertising |= ADVERTISED_Backplane;
+	XGBE_SET_LP_ADV(lks, Autoneg);
+	XGBE_SET_LP_ADV(lks, Backplane);
 
 	/* Compare Advertisement and Link Partner register 1 */
 	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE);
 	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA);
 	if (lp_reg & 0x400)
-		pdata->phy.lp_advertising |= ADVERTISED_Pause;
+		XGBE_SET_LP_ADV(lks, Pause);
 	if (lp_reg & 0x800)
-		pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause;
+		XGBE_SET_LP_ADV(lks, Asym_Pause);
 
 	if (pdata->phy.pause_autoneg) {
 		/* Set flow control based on auto-negotiation result */
@@ -266,12 +267,12 @@ static enum xgbe_mode xgbe_phy_an_outcome(struct xgbe_prv_data *pdata)
 	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1);
 	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1);
 	if (lp_reg & 0x80)
-		pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full;
+		XGBE_SET_LP_ADV(lks, 10000baseKR_Full);
 	if (lp_reg & 0x20) {
 		if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000)
-			pdata->phy.lp_advertising |= ADVERTISED_2500baseX_Full;
+			XGBE_SET_LP_ADV(lks, 2500baseX_Full);
 		else
-			pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full;
+			XGBE_SET_LP_ADV(lks, 1000baseKX_Full);
 	}
 
 	ad_reg &= lp_reg;
@@ -290,14 +291,17 @@ static enum xgbe_mode xgbe_phy_an_outcome(struct xgbe_prv_data *pdata)
 	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
 	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2);
 	if (lp_reg & 0xc000)
-		pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC;
+		XGBE_SET_LP_ADV(lks, 10000baseR_FEC);
 
 	return mode;
 }
 
-static unsigned int xgbe_phy_an_advertising(struct xgbe_prv_data *pdata)
+static void xgbe_phy_an_advertising(struct xgbe_prv_data *pdata,
+				    struct ethtool_link_ksettings *dlks)
 {
-	return pdata->phy.advertising;
+	struct ethtool_link_ksettings *slks = &pdata->phy.lks;
+
+	XGBE_LM_COPY(dlks, advertising, slks, advertising);
 }
 
 static int xgbe_phy_an_config(struct xgbe_prv_data *pdata)
@@ -565,11 +569,10 @@ static void xgbe_phy_set_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode)
 }
 
 static bool xgbe_phy_check_mode(struct xgbe_prv_data *pdata,
-				enum xgbe_mode mode, u32 advert)
+				enum xgbe_mode mode, bool advert)
 {
 	if (pdata->phy.autoneg == AUTONEG_ENABLE) {
-		if (pdata->phy.advertising & advert)
-			return true;
+		return advert;
 	} else {
 		enum xgbe_mode cur_mode;
 
@@ -583,16 +586,18 @@ static bool xgbe_phy_check_mode(struct xgbe_prv_data *pdata,
 
 static bool xgbe_phy_use_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
+
 	switch (mode) {
 	case XGBE_MODE_KX_1000:
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_1000baseKX_Full);
+					   XGBE_ADV(lks, 1000baseKX_Full));
 	case XGBE_MODE_KX_2500:
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_2500baseX_Full);
+					   XGBE_ADV(lks, 2500baseX_Full));
 	case XGBE_MODE_KR:
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_10000baseKR_Full);
+					   XGBE_ADV(lks, 10000baseKR_Full));
 	default:
 		return false;
 	}
@@ -672,6 +677,7 @@ static void xgbe_phy_exit(struct xgbe_prv_data *pdata)
 
 static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	struct xgbe_phy_data *phy_data;
 	int ret;
 
@@ -790,21 +796,23 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 	}
 
 	/* Initialize supported features */
-	pdata->phy.supported = SUPPORTED_Autoneg;
-	pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-	pdata->phy.supported |= SUPPORTED_Backplane;
-	pdata->phy.supported |= SUPPORTED_10000baseKR_Full;
+	XGBE_ZERO_SUP(lks);
+	XGBE_SET_SUP(lks, Autoneg);
+	XGBE_SET_SUP(lks, Pause);
+	XGBE_SET_SUP(lks, Asym_Pause);
+	XGBE_SET_SUP(lks, Backplane);
+	XGBE_SET_SUP(lks, 10000baseKR_Full);
 	switch (phy_data->speed_set) {
 	case XGBE_SPEEDSET_1000_10000:
-		pdata->phy.supported |= SUPPORTED_1000baseKX_Full;
+		XGBE_SET_SUP(lks, 1000baseKX_Full);
 		break;
 	case XGBE_SPEEDSET_2500_10000:
-		pdata->phy.supported |= SUPPORTED_2500baseX_Full;
+		XGBE_SET_SUP(lks, 2500baseX_Full);
 		break;
 	}
 
 	if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
-		pdata->phy.supported |= SUPPORTED_10000baseR_FEC;
+		XGBE_SET_SUP(lks, 10000baseR_FEC);
 
 	pdata->phy_data = phy_data;
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 04b5c14..3304a29 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -709,18 +709,13 @@ static int xgbe_phy_mii_read(struct mii_bus *mii, int addr, int reg)
 
 static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
 
 	if (!phy_data->sfp_mod_absent && !phy_data->sfp_changed)
 		return;
 
-	pdata->phy.supported &= ~SUPPORTED_Autoneg;
-	pdata->phy.supported &= ~(SUPPORTED_Pause | SUPPORTED_Asym_Pause);
-	pdata->phy.supported &= ~SUPPORTED_TP;
-	pdata->phy.supported &= ~SUPPORTED_FIBRE;
-	pdata->phy.supported &= ~SUPPORTED_100baseT_Full;
-	pdata->phy.supported &= ~SUPPORTED_1000baseT_Full;
-	pdata->phy.supported &= ~SUPPORTED_10000baseT_Full;
+	XGBE_ZERO_SUP(lks);
 
 	if (phy_data->sfp_mod_absent) {
 		pdata->phy.speed = SPEED_UNKNOWN;
@@ -728,18 +723,13 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
 		pdata->phy.autoneg = AUTONEG_ENABLE;
 		pdata->phy.pause_autoneg = AUTONEG_ENABLE;
 
-		pdata->phy.supported |= SUPPORTED_Autoneg;
-		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-		pdata->phy.supported |= SUPPORTED_TP;
-		pdata->phy.supported |= SUPPORTED_FIBRE;
-		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
-			pdata->phy.supported |= SUPPORTED_100baseT_Full;
-		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
-			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
-		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
-			pdata->phy.supported |= SUPPORTED_10000baseT_Full;
+		XGBE_SET_SUP(lks, Autoneg);
+		XGBE_SET_SUP(lks, Pause);
+		XGBE_SET_SUP(lks, Asym_Pause);
+		XGBE_SET_SUP(lks, TP);
+		XGBE_SET_SUP(lks, FIBRE);
 
-		pdata->phy.advertising = pdata->phy.supported;
+		XGBE_LM_COPY(lks, advertising, lks, supported);
 
 		return;
 	}
@@ -753,8 +743,18 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
 		pdata->phy.duplex = DUPLEX_UNKNOWN;
 		pdata->phy.autoneg = AUTONEG_ENABLE;
 		pdata->phy.pause_autoneg = AUTONEG_ENABLE;
-		pdata->phy.supported |= SUPPORTED_Autoneg;
-		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+		XGBE_SET_SUP(lks, Autoneg);
+		XGBE_SET_SUP(lks, Pause);
+		XGBE_SET_SUP(lks, Asym_Pause);
+		if (phy_data->sfp_base == XGBE_SFP_BASE_1000_T) {
+			if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
+				XGBE_SET_SUP(lks, 100baseT_Full);
+			if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
+				XGBE_SET_SUP(lks, 1000baseT_Full);
+		} else {
+			if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
+				XGBE_SET_SUP(lks, 1000baseX_Full);
+		}
 		break;
 	case XGBE_SFP_BASE_10000_SR:
 	case XGBE_SFP_BASE_10000_LR:
@@ -765,6 +765,27 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
 		pdata->phy.duplex = DUPLEX_FULL;
 		pdata->phy.autoneg = AUTONEG_DISABLE;
 		pdata->phy.pause_autoneg = AUTONEG_DISABLE;
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) {
+			switch (phy_data->sfp_base) {
+			case XGBE_SFP_BASE_10000_SR:
+				XGBE_SET_SUP(lks, 10000baseSR_Full);
+				break;
+			case XGBE_SFP_BASE_10000_LR:
+				XGBE_SET_SUP(lks, 10000baseLR_Full);
+				break;
+			case XGBE_SFP_BASE_10000_LRM:
+				XGBE_SET_SUP(lks, 10000baseLRM_Full);
+				break;
+			case XGBE_SFP_BASE_10000_ER:
+				XGBE_SET_SUP(lks, 10000baseER_Full);
+				break;
+			case XGBE_SFP_BASE_10000_CR:
+				XGBE_SET_SUP(lks, 10000baseCR_Full);
+				break;
+			default:
+				break;
+			}
+		}
 		break;
 	default:
 		pdata->phy.speed = SPEED_UNKNOWN;
@@ -778,38 +799,14 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
 	case XGBE_SFP_BASE_1000_T:
 	case XGBE_SFP_BASE_1000_CX:
 	case XGBE_SFP_BASE_10000_CR:
-		pdata->phy.supported |= SUPPORTED_TP;
+		XGBE_SET_SUP(lks, TP);
 		break;
 	default:
-		pdata->phy.supported |= SUPPORTED_FIBRE;
-	}
-
-	switch (phy_data->sfp_speed) {
-	case XGBE_SFP_SPEED_100_1000:
-		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
-			pdata->phy.supported |= SUPPORTED_100baseT_Full;
-		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
-			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
-		break;
-	case XGBE_SFP_SPEED_1000:
-		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
-			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+		XGBE_SET_SUP(lks, FIBRE);
 		break;
-	case XGBE_SFP_SPEED_10000:
-		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
-			pdata->phy.supported |= SUPPORTED_10000baseT_Full;
-		break;
-	default:
-		/* Choose the fastest supported speed */
-		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
-			pdata->phy.supported |= SUPPORTED_10000baseT_Full;
-		else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
-			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
-		else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
-			pdata->phy.supported |= SUPPORTED_100baseT_Full;
 	}
 
-	pdata->phy.advertising = pdata->phy.supported;
+	XGBE_LM_COPY(lks, advertising, lks, supported);
 }
 
 static bool xgbe_phy_sfp_bit_rate(struct xgbe_sfp_eeprom *sfp_eeprom,
@@ -886,8 +883,10 @@ static void xgbe_phy_external_phy_quirks(struct xgbe_prv_data *pdata)
 
 static int xgbe_phy_find_phy_device(struct xgbe_prv_data *pdata)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
 	struct phy_device *phydev;
+	u32 advertising;
 	int ret;
 
 	/* If we already have a PHY, just return */
@@ -943,7 +942,10 @@ static int xgbe_phy_find_phy_device(struct xgbe_prv_data *pdata)
 	phy_data->phydev = phydev;
 
 	xgbe_phy_external_phy_quirks(pdata);
-	phydev->advertising &= pdata->phy.advertising;
+
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						lks->link_modes.advertising);
+	phydev->advertising &= advertising;
 
 	phy_start_aneg(phy_data->phydev);
 
@@ -1277,6 +1279,7 @@ put:
 
 static void xgbe_phy_phydev_flowctrl(struct xgbe_prv_data *pdata)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
 	u16 lcl_adv = 0, rmt_adv = 0;
 	u8 fc;
@@ -1293,11 +1296,11 @@ static void xgbe_phy_phydev_flowctrl(struct xgbe_prv_data *pdata)
 		lcl_adv |= ADVERTISE_PAUSE_ASYM;
 
 	if (phy_data->phydev->pause) {
-		pdata->phy.lp_advertising |= ADVERTISED_Pause;
+		XGBE_SET_LP_ADV(lks, Pause);
 		rmt_adv |= LPA_PAUSE_CAP;
 	}
 	if (phy_data->phydev->asym_pause) {
-		pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause;
+		XGBE_SET_LP_ADV(lks, Asym_Pause);
 		rmt_adv |= LPA_PAUSE_ASYM;
 	}
 
@@ -1310,10 +1313,11 @@ static void xgbe_phy_phydev_flowctrl(struct xgbe_prv_data *pdata)
 
 static enum xgbe_mode xgbe_phy_an37_sgmii_outcome(struct xgbe_prv_data *pdata)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	enum xgbe_mode mode;
 
-	pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
-	pdata->phy.lp_advertising |= ADVERTISED_TP;
+	XGBE_SET_LP_ADV(lks, Autoneg);
+	XGBE_SET_LP_ADV(lks, TP);
 
 	/* Use external PHY to determine flow control */
 	if (pdata->phy.pause_autoneg)
@@ -1322,21 +1326,21 @@ static enum xgbe_mode xgbe_phy_an37_sgmii_outcome(struct xgbe_prv_data *pdata)
 	switch (pdata->an_status & XGBE_SGMII_AN_LINK_SPEED) {
 	case XGBE_SGMII_AN_LINK_SPEED_100:
 		if (pdata->an_status & XGBE_SGMII_AN_LINK_DUPLEX) {
-			pdata->phy.lp_advertising |= ADVERTISED_100baseT_Full;
+			XGBE_SET_LP_ADV(lks, 100baseT_Full);
 			mode = XGBE_MODE_SGMII_100;
 		} else {
 			/* Half-duplex not supported */
-			pdata->phy.lp_advertising |= ADVERTISED_100baseT_Half;
+			XGBE_SET_LP_ADV(lks, 100baseT_Half);
 			mode = XGBE_MODE_UNKNOWN;
 		}
 		break;
 	case XGBE_SGMII_AN_LINK_SPEED_1000:
 		if (pdata->an_status & XGBE_SGMII_AN_LINK_DUPLEX) {
-			pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Full;
+			XGBE_SET_LP_ADV(lks, 1000baseT_Full);
 			mode = XGBE_MODE_SGMII_1000;
 		} else {
 			/* Half-duplex not supported */
-			pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Half;
+			XGBE_SET_LP_ADV(lks, 1000baseT_Half);
 			mode = XGBE_MODE_UNKNOWN;
 		}
 		break;
@@ -1349,19 +1353,20 @@ static enum xgbe_mode xgbe_phy_an37_sgmii_outcome(struct xgbe_prv_data *pdata)
 
 static enum xgbe_mode xgbe_phy_an37_outcome(struct xgbe_prv_data *pdata)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	enum xgbe_mode mode;
 	unsigned int ad_reg, lp_reg;
 
-	pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
-	pdata->phy.lp_advertising |= ADVERTISED_FIBRE;
+	XGBE_SET_LP_ADV(lks, Autoneg);
+	XGBE_SET_LP_ADV(lks, FIBRE);
 
 	/* Compare Advertisement and Link Partner register */
 	ad_reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_ADVERTISE);
 	lp_reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_LP_ABILITY);
 	if (lp_reg & 0x100)
-		pdata->phy.lp_advertising |= ADVERTISED_Pause;
+		XGBE_SET_LP_ADV(lks, Pause);
 	if (lp_reg & 0x80)
-		pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause;
+		XGBE_SET_LP_ADV(lks, Asym_Pause);
 
 	if (pdata->phy.pause_autoneg) {
 		/* Set flow control based on auto-negotiation result */
@@ -1379,10 +1384,8 @@ static enum xgbe_mode xgbe_phy_an37_outcome(struct xgbe_prv_data *pdata)
 		}
 	}
 
-	if (lp_reg & 0x40)
-		pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Half;
 	if (lp_reg & 0x20)
-		pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Full;
+		XGBE_SET_LP_ADV(lks, 1000baseX_Full);
 
 	/* Half duplex is not supported */
 	ad_reg &= lp_reg;
@@ -1393,12 +1396,13 @@ static enum xgbe_mode xgbe_phy_an37_outcome(struct xgbe_prv_data *pdata)
 
 static enum xgbe_mode xgbe_phy_an73_redrv_outcome(struct xgbe_prv_data *pdata)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
 	enum xgbe_mode mode;
 	unsigned int ad_reg, lp_reg;
 
-	pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
-	pdata->phy.lp_advertising |= ADVERTISED_Backplane;
+	XGBE_SET_LP_ADV(lks, Autoneg);
+	XGBE_SET_LP_ADV(lks, Backplane);
 
 	/* Use external PHY to determine flow control */
 	if (pdata->phy.pause_autoneg)
@@ -1408,9 +1412,9 @@ static enum xgbe_mode xgbe_phy_an73_redrv_outcome(struct xgbe_prv_data *pdata)
 	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1);
 	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1);
 	if (lp_reg & 0x80)
-		pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full;
+		XGBE_SET_LP_ADV(lks, 10000baseKR_Full);
 	if (lp_reg & 0x20)
-		pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full;
+		XGBE_SET_LP_ADV(lks, 1000baseKX_Full);
 
 	ad_reg &= lp_reg;
 	if (ad_reg & 0x80) {
@@ -1463,26 +1467,27 @@ static enum xgbe_mode xgbe_phy_an73_redrv_outcome(struct xgbe_prv_data *pdata)
 	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
 	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2);
 	if (lp_reg & 0xc000)
-		pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC;
+		XGBE_SET_LP_ADV(lks, 10000baseR_FEC);
 
 	return mode;
 }
 
 static enum xgbe_mode xgbe_phy_an73_outcome(struct xgbe_prv_data *pdata)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	enum xgbe_mode mode;
 	unsigned int ad_reg, lp_reg;
 
-	pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
-	pdata->phy.lp_advertising |= ADVERTISED_Backplane;
+	XGBE_SET_LP_ADV(lks, Autoneg);
+	XGBE_SET_LP_ADV(lks, Backplane);
 
 	/* Compare Advertisement and Link Partner register 1 */
 	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE);
 	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA);
 	if (lp_reg & 0x400)
-		pdata->phy.lp_advertising |= ADVERTISED_Pause;
+		XGBE_SET_LP_ADV(lks, Pause);
 	if (lp_reg & 0x800)
-		pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause;
+		XGBE_SET_LP_ADV(lks, Asym_Pause);
 
 	if (pdata->phy.pause_autoneg) {
 		/* Set flow control based on auto-negotiation result */
@@ -1504,9 +1509,9 @@ static enum xgbe_mode xgbe_phy_an73_outcome(struct xgbe_prv_data *pdata)
 	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1);
 	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1);
 	if (lp_reg & 0x80)
-		pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full;
+		XGBE_SET_LP_ADV(lks, 10000baseKR_Full);
 	if (lp_reg & 0x20)
-		pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full;
+		XGBE_SET_LP_ADV(lks, 1000baseKX_Full);
 
 	ad_reg &= lp_reg;
 	if (ad_reg & 0x80)
@@ -1520,7 +1525,7 @@ static enum xgbe_mode xgbe_phy_an73_outcome(struct xgbe_prv_data *pdata)
 	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
 	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2);
 	if (lp_reg & 0xc000)
-		pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC;
+		XGBE_SET_LP_ADV(lks, 10000baseR_FEC);
 
 	return mode;
 }
@@ -1541,41 +1546,43 @@ static enum xgbe_mode xgbe_phy_an_outcome(struct xgbe_prv_data *pdata)
 	}
 }
 
-static unsigned int xgbe_phy_an_advertising(struct xgbe_prv_data *pdata)
+static void xgbe_phy_an_advertising(struct xgbe_prv_data *pdata,
+				    struct ethtool_link_ksettings *dlks)
 {
+	struct ethtool_link_ksettings *slks = &pdata->phy.lks;
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
-	unsigned int advertising;
+
+	XGBE_LM_COPY(dlks, advertising, slks, advertising);
 
 	/* Without a re-driver, just return current advertising */
 	if (!phy_data->redrv)
-		return pdata->phy.advertising;
+		return;
 
 	/* With the KR re-driver we need to advertise a single speed */
-	advertising = pdata->phy.advertising;
-	advertising &= ~ADVERTISED_1000baseKX_Full;
-	advertising &= ~ADVERTISED_10000baseKR_Full;
+	XGBE_CLR_ADV(dlks, 1000baseKX_Full);
+	XGBE_CLR_ADV(dlks, 10000baseKR_Full);
 
 	switch (phy_data->port_mode) {
 	case XGBE_PORT_MODE_BACKPLANE:
-		advertising |= ADVERTISED_10000baseKR_Full;
+		XGBE_SET_ADV(dlks, 10000baseKR_Full);
 		break;
 	case XGBE_PORT_MODE_BACKPLANE_2500:
-		advertising |= ADVERTISED_1000baseKX_Full;
+		XGBE_SET_ADV(dlks, 1000baseKX_Full);
 		break;
 	case XGBE_PORT_MODE_1000BASE_T:
 	case XGBE_PORT_MODE_1000BASE_X:
 	case XGBE_PORT_MODE_NBASE_T:
-		advertising |= ADVERTISED_1000baseKX_Full;
+		XGBE_SET_ADV(dlks, 1000baseKX_Full);
 		break;
 	case XGBE_PORT_MODE_10GBASE_T:
 		if (phy_data->phydev &&
 		    (phy_data->phydev->speed == SPEED_10000))
-			advertising |= ADVERTISED_10000baseKR_Full;
+			XGBE_SET_ADV(dlks, 10000baseKR_Full);
 		else
-			advertising |= ADVERTISED_1000baseKX_Full;
+			XGBE_SET_ADV(dlks, 1000baseKX_Full);
 		break;
 	case XGBE_PORT_MODE_10GBASE_R:
-		advertising |= ADVERTISED_10000baseKR_Full;
+		XGBE_SET_ADV(dlks, 10000baseKR_Full);
 		break;
 	case XGBE_PORT_MODE_SFP:
 		switch (phy_data->sfp_base) {
@@ -1583,24 +1590,24 @@ static unsigned int xgbe_phy_an_advertising(struct xgbe_prv_data *pdata)
 		case XGBE_SFP_BASE_1000_SX:
 		case XGBE_SFP_BASE_1000_LX:
 		case XGBE_SFP_BASE_1000_CX:
-			advertising |= ADVERTISED_1000baseKX_Full;
+			XGBE_SET_ADV(dlks, 1000baseKX_Full);
 			break;
 		default:
-			advertising |= ADVERTISED_10000baseKR_Full;
+			XGBE_SET_ADV(dlks, 10000baseKR_Full);
 			break;
 		}
 		break;
 	default:
-		advertising |= ADVERTISED_10000baseKR_Full;
+		XGBE_SET_ADV(dlks, 10000baseKR_Full);
 		break;
 	}
-
-	return advertising;
 }
 
 static int xgbe_phy_an_config(struct xgbe_prv_data *pdata)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	u32 advertising;
 	int ret;
 
 	ret = xgbe_phy_find_phy_device(pdata);
@@ -1610,9 +1617,12 @@ static int xgbe_phy_an_config(struct xgbe_prv_data *pdata)
 	if (!phy_data->phydev)
 		return 0;
 
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						lks->link_modes.advertising);
+
 	phy_data->phydev->autoneg = pdata->phy.autoneg;
 	phy_data->phydev->advertising = phy_data->phydev->supported &
-					pdata->phy.advertising;
+					advertising;
 
 	if (pdata->phy.autoneg != AUTONEG_ENABLE) {
 		phy_data->phydev->speed = pdata->phy.speed;
@@ -2073,11 +2083,10 @@ static void xgbe_phy_set_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode)
 }
 
 static bool xgbe_phy_check_mode(struct xgbe_prv_data *pdata,
-				enum xgbe_mode mode, u32 advert)
+				enum xgbe_mode mode, bool advert)
 {
 	if (pdata->phy.autoneg == AUTONEG_ENABLE) {
-		if (pdata->phy.advertising & advert)
-			return true;
+		return advert;
 	} else {
 		enum xgbe_mode cur_mode;
 
@@ -2092,13 +2101,15 @@ static bool xgbe_phy_check_mode(struct xgbe_prv_data *pdata,
 static bool xgbe_phy_use_basex_mode(struct xgbe_prv_data *pdata,
 				    enum xgbe_mode mode)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
+
 	switch (mode) {
 	case XGBE_MODE_X:
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_1000baseT_Full);
+					   XGBE_ADV(lks, 1000baseX_Full));
 	case XGBE_MODE_KR:
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_10000baseT_Full);
+					   XGBE_ADV(lks, 10000baseKR_Full));
 	default:
 		return false;
 	}
@@ -2107,19 +2118,21 @@ static bool xgbe_phy_use_basex_mode(struct xgbe_prv_data *pdata,
 static bool xgbe_phy_use_baset_mode(struct xgbe_prv_data *pdata,
 				    enum xgbe_mode mode)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
+
 	switch (mode) {
 	case XGBE_MODE_SGMII_100:
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_100baseT_Full);
+					   XGBE_ADV(lks, 100baseT_Full));
 	case XGBE_MODE_SGMII_1000:
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_1000baseT_Full);
+					   XGBE_ADV(lks, 1000baseT_Full));
 	case XGBE_MODE_KX_2500:
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_2500baseX_Full);
+					   XGBE_ADV(lks, 2500baseT_Full));
 	case XGBE_MODE_KR:
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_10000baseT_Full);
+					   XGBE_ADV(lks, 10000baseT_Full));
 	default:
 		return false;
 	}
@@ -2128,6 +2141,7 @@ static bool xgbe_phy_use_baset_mode(struct xgbe_prv_data *pdata,
 static bool xgbe_phy_use_sfp_mode(struct xgbe_prv_data *pdata,
 				  enum xgbe_mode mode)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	struct xgbe_phy_data *phy_data = pdata->phy_data;
 
 	switch (mode) {
@@ -2135,22 +2149,26 @@ static bool xgbe_phy_use_sfp_mode(struct xgbe_prv_data *pdata,
 		if (phy_data->sfp_base == XGBE_SFP_BASE_1000_T)
 			return false;
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_1000baseT_Full);
+					   XGBE_ADV(lks, 1000baseX_Full));
 	case XGBE_MODE_SGMII_100:
 		if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T)
 			return false;
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_100baseT_Full);
+					   XGBE_ADV(lks, 100baseT_Full));
 	case XGBE_MODE_SGMII_1000:
 		if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T)
 			return false;
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_1000baseT_Full);
+					   XGBE_ADV(lks, 1000baseT_Full));
 	case XGBE_MODE_SFI:
 		if (phy_data->sfp_mod_absent)
 			return true;
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_10000baseT_Full);
+					   XGBE_ADV(lks, 10000baseSR_Full)  ||
+					   XGBE_ADV(lks, 10000baseLR_Full)  ||
+					   XGBE_ADV(lks, 10000baseLRM_Full) ||
+					   XGBE_ADV(lks, 10000baseER_Full)  ||
+					   XGBE_ADV(lks, 10000baseCR_Full));
 	default:
 		return false;
 	}
@@ -2159,10 +2177,12 @@ static bool xgbe_phy_use_sfp_mode(struct xgbe_prv_data *pdata,
 static bool xgbe_phy_use_bp_2500_mode(struct xgbe_prv_data *pdata,
 				      enum xgbe_mode mode)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
+
 	switch (mode) {
 	case XGBE_MODE_KX_2500:
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_2500baseX_Full);
+					   XGBE_ADV(lks, 2500baseX_Full));
 	default:
 		return false;
 	}
@@ -2171,13 +2191,15 @@ static bool xgbe_phy_use_bp_2500_mode(struct xgbe_prv_data *pdata,
 static bool xgbe_phy_use_bp_mode(struct xgbe_prv_data *pdata,
 				 enum xgbe_mode mode)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
+
 	switch (mode) {
 	case XGBE_MODE_KX_1000:
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_1000baseKX_Full);
+					   XGBE_ADV(lks, 1000baseKX_Full));
 	case XGBE_MODE_KR:
 		return xgbe_phy_check_mode(pdata, mode,
-					   ADVERTISED_10000baseKR_Full);
+					   XGBE_ADV(lks, 10000baseKR_Full));
 	default:
 		return false;
 	}
@@ -2744,6 +2766,7 @@ static void xgbe_phy_exit(struct xgbe_prv_data *pdata)
 
 static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 {
+	struct ethtool_link_ksettings *lks = &pdata->phy.lks;
 	struct xgbe_phy_data *phy_data;
 	struct mii_bus *mii;
 	unsigned int reg;
@@ -2823,32 +2846,33 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 	phy_data->cur_mode = XGBE_MODE_UNKNOWN;
 
 	/* Initialize supported features */
-	pdata->phy.supported = 0;
+	XGBE_ZERO_SUP(lks);
 
 	switch (phy_data->port_mode) {
 	/* Backplane support */
 	case XGBE_PORT_MODE_BACKPLANE:
-		pdata->phy.supported |= SUPPORTED_Autoneg;
-		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-		pdata->phy.supported |= SUPPORTED_Backplane;
+		XGBE_SET_SUP(lks, Autoneg);
+		XGBE_SET_SUP(lks, Pause);
+		XGBE_SET_SUP(lks, Asym_Pause);
+		XGBE_SET_SUP(lks, Backplane);
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) {
-			pdata->phy.supported |= SUPPORTED_1000baseKX_Full;
+			XGBE_SET_SUP(lks, 1000baseKX_Full);
 			phy_data->start_mode = XGBE_MODE_KX_1000;
 		}
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) {
-			pdata->phy.supported |= SUPPORTED_10000baseKR_Full;
+			XGBE_SET_SUP(lks, 10000baseKR_Full);
 			if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
-				pdata->phy.supported |=
-					SUPPORTED_10000baseR_FEC;
+				XGBE_SET_SUP(lks, 10000baseR_FEC);
 			phy_data->start_mode = XGBE_MODE_KR;
 		}
 
 		phy_data->phydev_mode = XGBE_MDIO_MODE_NONE;
 		break;
 	case XGBE_PORT_MODE_BACKPLANE_2500:
-		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-		pdata->phy.supported |= SUPPORTED_Backplane;
-		pdata->phy.supported |= SUPPORTED_2500baseX_Full;
+		XGBE_SET_SUP(lks, Pause);
+		XGBE_SET_SUP(lks, Asym_Pause);
+		XGBE_SET_SUP(lks, Backplane);
+		XGBE_SET_SUP(lks, 2500baseX_Full);
 		phy_data->start_mode = XGBE_MODE_KX_2500;
 
 		phy_data->phydev_mode = XGBE_MDIO_MODE_NONE;
@@ -2856,15 +2880,16 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 
 	/* MDIO 1GBase-T support */
 	case XGBE_PORT_MODE_1000BASE_T:
-		pdata->phy.supported |= SUPPORTED_Autoneg;
-		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-		pdata->phy.supported |= SUPPORTED_TP;
+		XGBE_SET_SUP(lks, Autoneg);
+		XGBE_SET_SUP(lks, Pause);
+		XGBE_SET_SUP(lks, Asym_Pause);
+		XGBE_SET_SUP(lks, TP);
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) {
-			pdata->phy.supported |= SUPPORTED_100baseT_Full;
+			XGBE_SET_SUP(lks, 100baseT_Full);
 			phy_data->start_mode = XGBE_MODE_SGMII_100;
 		}
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) {
-			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+			XGBE_SET_SUP(lks, 1000baseT_Full);
 			phy_data->start_mode = XGBE_MODE_SGMII_1000;
 		}
 
@@ -2873,10 +2898,11 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 
 	/* MDIO Base-X support */
 	case XGBE_PORT_MODE_1000BASE_X:
-		pdata->phy.supported |= SUPPORTED_Autoneg;
-		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-		pdata->phy.supported |= SUPPORTED_FIBRE;
-		pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+		XGBE_SET_SUP(lks, Autoneg);
+		XGBE_SET_SUP(lks, Pause);
+		XGBE_SET_SUP(lks, Asym_Pause);
+		XGBE_SET_SUP(lks, FIBRE);
+		XGBE_SET_SUP(lks, 1000baseX_Full);
 		phy_data->start_mode = XGBE_MODE_X;
 
 		phy_data->phydev_mode = XGBE_MDIO_MODE_CL22;
@@ -2884,19 +2910,20 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 
 	/* MDIO NBase-T support */
 	case XGBE_PORT_MODE_NBASE_T:
-		pdata->phy.supported |= SUPPORTED_Autoneg;
-		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-		pdata->phy.supported |= SUPPORTED_TP;
+		XGBE_SET_SUP(lks, Autoneg);
+		XGBE_SET_SUP(lks, Pause);
+		XGBE_SET_SUP(lks, Asym_Pause);
+		XGBE_SET_SUP(lks, TP);
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) {
-			pdata->phy.supported |= SUPPORTED_100baseT_Full;
+			XGBE_SET_SUP(lks, 100baseT_Full);
 			phy_data->start_mode = XGBE_MODE_SGMII_100;
 		}
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) {
-			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+			XGBE_SET_SUP(lks, 1000baseT_Full);
 			phy_data->start_mode = XGBE_MODE_SGMII_1000;
 		}
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500) {
-			pdata->phy.supported |= SUPPORTED_2500baseX_Full;
+			XGBE_SET_SUP(lks, 2500baseT_Full);
 			phy_data->start_mode = XGBE_MODE_KX_2500;
 		}
 
@@ -2905,33 +2932,38 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 
 	/* 10GBase-T support */
 	case XGBE_PORT_MODE_10GBASE_T:
-		pdata->phy.supported |= SUPPORTED_Autoneg;
-		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-		pdata->phy.supported |= SUPPORTED_TP;
+		XGBE_SET_SUP(lks, Autoneg);
+		XGBE_SET_SUP(lks, Pause);
+		XGBE_SET_SUP(lks, Asym_Pause);
+		XGBE_SET_SUP(lks, TP);
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) {
-			pdata->phy.supported |= SUPPORTED_100baseT_Full;
+			XGBE_SET_SUP(lks, 100baseT_Full);
 			phy_data->start_mode = XGBE_MODE_SGMII_100;
 		}
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) {
-			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+			XGBE_SET_SUP(lks, 1000baseT_Full);
 			phy_data->start_mode = XGBE_MODE_SGMII_1000;
 		}
 		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) {
-			pdata->phy.supported |= SUPPORTED_10000baseT_Full;
+			XGBE_SET_SUP(lks, 10000baseT_Full);
 			phy_data->start_mode = XGBE_MODE_KR;
 		}
 
-		phy_data->phydev_mode = XGBE_MDIO_MODE_NONE;
+		phy_data->phydev_mode = XGBE_MDIO_MODE_CL45;
 		break;
 
 	/* 10GBase-R support */
 	case XGBE_PORT_MODE_10GBASE_R:
-		pdata->phy.supported |= SUPPORTED_Autoneg;
-		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-		pdata->phy.supported |= SUPPORTED_TP;
-		pdata->phy.supported |= SUPPORTED_10000baseT_Full;
+		XGBE_SET_SUP(lks, Autoneg);
+		XGBE_SET_SUP(lks, Pause);
+		XGBE_SET_SUP(lks, Asym_Pause);
+		XGBE_SET_SUP(lks, FIBRE);
+		XGBE_SET_SUP(lks, 10000baseSR_Full);
+		XGBE_SET_SUP(lks, 10000baseLR_Full);
+		XGBE_SET_SUP(lks, 10000baseLRM_Full);
+		XGBE_SET_SUP(lks, 10000baseER_Full);
 		if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
-			pdata->phy.supported |= SUPPORTED_10000baseR_FEC;
+			XGBE_SET_SUP(lks, 10000baseR_FEC);
 		phy_data->start_mode = XGBE_MODE_SFI;
 
 		phy_data->phydev_mode = XGBE_MDIO_MODE_NONE;
@@ -2939,22 +2971,17 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 
 	/* SFP support */
 	case XGBE_PORT_MODE_SFP:
-		pdata->phy.supported |= SUPPORTED_Autoneg;
-		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-		pdata->phy.supported |= SUPPORTED_TP;
-		pdata->phy.supported |= SUPPORTED_FIBRE;
-		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) {
-			pdata->phy.supported |= SUPPORTED_100baseT_Full;
+		XGBE_SET_SUP(lks, Autoneg);
+		XGBE_SET_SUP(lks, Pause);
+		XGBE_SET_SUP(lks, Asym_Pause);
+		XGBE_SET_SUP(lks, TP);
+		XGBE_SET_SUP(lks, FIBRE);
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
 			phy_data->start_mode = XGBE_MODE_SGMII_100;
-		}
-		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) {
-			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
 			phy_data->start_mode = XGBE_MODE_SGMII_1000;
-		}
-		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) {
-			pdata->phy.supported |= SUPPORTED_10000baseT_Full;
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
 			phy_data->start_mode = XGBE_MODE_SFI;
-		}
 
 		phy_data->phydev_mode = XGBE_MDIO_MODE_CL22;
 
@@ -2965,8 +2992,9 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
 	}
 
 	if (netif_msg_probe(pdata))
-		dev_dbg(pdata->dev, "phy supported=%#x\n",
-			pdata->phy.supported);
+		dev_dbg(pdata->dev, "phy supported=0x%*pb\n",
+			__ETHTOOL_LINK_MODE_MASK_NBITS,
+			lks->link_modes.supported);
 
 	if ((phy_data->conn_type & XGBE_CONN_TYPE_MDIO) &&
 	    (phy_data->phydev_mode != XGBE_MDIO_MODE_NONE)) {
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 0938294..ad102c8 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -129,6 +129,10 @@
 #include <net/dcbnl.h>
 #include <linux/completion.h>
 #include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/dcache.h>
+#include <linux/ethtool.h>
+#include <linux/list.h>
 
 #define XGBE_DRV_NAME		"amd-xgbe"
 #define XGBE_DRV_VERSION	"1.0.3"
@@ -180,8 +184,6 @@
 #define XGBE_IRQ_MODE_EDGE	0
 #define XGBE_IRQ_MODE_LEVEL	1
 
-#define XGBE_DMA_INTERRUPT_MASK	0x31c7
-
 #define XGMAC_MIN_PACKET	60
 #define XGMAC_STD_PACKET_MTU	1500
 #define XGMAC_MAX_STD_PACKET	1518
@@ -296,6 +298,48 @@
 /* MDIO port types */
 #define XGMAC_MAX_C22_PORT		3
 
+/* Link mode bit operations */
+#define XGBE_ZERO_SUP(_ls)		\
+	ethtool_link_ksettings_zero_link_mode((_ls), supported)
+
+#define XGBE_SET_SUP(_ls, _mode)	\
+	ethtool_link_ksettings_add_link_mode((_ls), supported, _mode)
+
+#define XGBE_CLR_SUP(_ls, _mode)	\
+	ethtool_link_ksettings_del_link_mode((_ls), supported, _mode)
+
+#define XGBE_IS_SUP(_ls, _mode)	\
+	ethtool_link_ksettings_test_link_mode((_ls), supported, _mode)
+
+#define XGBE_ZERO_ADV(_ls)		\
+	ethtool_link_ksettings_zero_link_mode((_ls), advertising)
+
+#define XGBE_SET_ADV(_ls, _mode)	\
+	ethtool_link_ksettings_add_link_mode((_ls), advertising, _mode)
+
+#define XGBE_CLR_ADV(_ls, _mode)	\
+	ethtool_link_ksettings_del_link_mode((_ls), advertising, _mode)
+
+#define XGBE_ADV(_ls, _mode)		\
+	ethtool_link_ksettings_test_link_mode((_ls), advertising, _mode)
+
+#define XGBE_ZERO_LP_ADV(_ls)		\
+	ethtool_link_ksettings_zero_link_mode((_ls), lp_advertising)
+
+#define XGBE_SET_LP_ADV(_ls, _mode)	\
+	ethtool_link_ksettings_add_link_mode((_ls), lp_advertising, _mode)
+
+#define XGBE_CLR_LP_ADV(_ls, _mode)	\
+	ethtool_link_ksettings_del_link_mode((_ls), lp_advertising, _mode)
+
+#define XGBE_LP_ADV(_ls, _mode)		\
+	ethtool_link_ksettings_test_link_mode((_ls), lp_advertising, _mode)
+
+#define XGBE_LM_COPY(_dst, _dname, _src, _sname)	\
+	bitmap_copy((_dst)->link_modes._dname,		\
+		    (_src)->link_modes._sname,		\
+		    __ETHTOOL_LINK_MODE_MASK_NBITS)
+
 struct xgbe_prv_data;
 
 struct xgbe_packet_data {
@@ -460,6 +504,8 @@ struct xgbe_channel {
 	/* Netdev related settings */
 	struct napi_struct napi;
 
+	/* Per channel interrupt enablement tracker */
+	unsigned int curr_ier;
 	unsigned int saved_ier;
 
 	unsigned int tx_timer_active;
@@ -561,9 +607,7 @@ enum xgbe_mdio_mode {
 };
 
 struct xgbe_phy {
-	u32 supported;
-	u32 advertising;
-	u32 lp_advertising;
+	struct ethtool_link_ksettings lks;
 
 	int address;
 
@@ -666,6 +710,16 @@ struct xgbe_ext_stats {
 	u64 tx_tso_packets;
 	u64 rx_split_header_packets;
 	u64 rx_buffer_unavailable;
+
+	u64 txq_packets[XGBE_MAX_DMA_CHANNELS];
+	u64 txq_bytes[XGBE_MAX_DMA_CHANNELS];
+	u64 rxq_packets[XGBE_MAX_DMA_CHANNELS];
+	u64 rxq_bytes[XGBE_MAX_DMA_CHANNELS];
+
+	u64 tx_vxlan_packets;
+	u64 rx_vxlan_packets;
+	u64 rx_csum_errors;
+	u64 rx_vxlan_csum_errors;
 };
 
 struct xgbe_hw_if {
@@ -769,6 +823,11 @@ struct xgbe_hw_if {
 	/* For ECC */
 	void (*disable_ecc_ded)(struct xgbe_prv_data *);
 	void (*disable_ecc_sec)(struct xgbe_prv_data *, enum xgbe_ecc_sec);
+
+	/* For VXLAN */
+	void (*enable_vxlan)(struct xgbe_prv_data *);
+	void (*disable_vxlan)(struct xgbe_prv_data *);
+	void (*set_vxlan_id)(struct xgbe_prv_data *);
 };
 
 /* This structure represents implementation specific routines for an
@@ -810,7 +869,8 @@ struct xgbe_phy_impl_if {
 	int (*an_config)(struct xgbe_prv_data *);
 
 	/* Set/override auto-negotiation advertisement settings */
-	unsigned int (*an_advertising)(struct xgbe_prv_data *);
+	void (*an_advertising)(struct xgbe_prv_data *,
+			       struct ethtool_link_ksettings *);
 
 	/* Process results of auto-negotiation */
 	enum xgbe_mode (*an_outcome)(struct xgbe_prv_data *);
@@ -892,6 +952,7 @@ struct xgbe_hw_features {
 	unsigned int addn_mac;		/* Additional MAC Addresses */
 	unsigned int ts_src;		/* Timestamp Source */
 	unsigned int sa_vlan_ins;	/* Source Address or VLAN Insertion */
+	unsigned int vxn;		/* VXLAN/NVGRE */
 
 	/* HW Feature Register1 */
 	unsigned int rx_fifo_size;	/* MTL Receive FIFO Size */
@@ -930,6 +991,12 @@ struct xgbe_version_data {
 	unsigned int rx_desc_prefetch;
 };
 
+struct xgbe_vxlan_data {
+	struct list_head list;
+	sa_family_t sa_family;
+	__be16 port;
+};
+
 struct xgbe_prv_data {
 	struct net_device *netdev;
 	struct pci_dev *pcidev;
@@ -1071,6 +1138,15 @@ struct xgbe_prv_data {
 	u32 rss_table[XGBE_RSS_MAX_TABLE_SIZE];
 	u32 rss_options;
 
+	/* VXLAN settings */
+	unsigned int vxlan_port_set;
+	unsigned int vxlan_offloads_set;
+	unsigned int vxlan_force_disable;
+	unsigned int vxlan_port_count;
+	struct list_head vxlan_ports;
+	u16 vxlan_port;
+	netdev_features_t vxlan_features;
+
 	/* Netdev related settings */
 	unsigned char mac_addr[ETH_ALEN];
 	netdev_features_t netdev_features;
@@ -1171,7 +1247,6 @@ struct xgbe_prv_data {
 	struct tasklet_struct tasklet_i2c;
 	struct tasklet_struct tasklet_an;
 
-#ifdef CONFIG_DEBUG_FS
 	struct dentry *xgbe_debugfs;
 
 	unsigned int debugfs_xgmac_reg;
@@ -1182,7 +1257,6 @@ struct xgbe_prv_data {
 	unsigned int debugfs_xprop_reg;
 
 	unsigned int debugfs_xi2c_reg;
-#endif
 };
 
 /* Function prototypes*/
@@ -1231,9 +1305,11 @@ void xgbe_init_tx_coalesce(struct xgbe_prv_data *);
 #ifdef CONFIG_DEBUG_FS
 void xgbe_debugfs_init(struct xgbe_prv_data *);
 void xgbe_debugfs_exit(struct xgbe_prv_data *);
+void xgbe_debugfs_rename(struct xgbe_prv_data *pdata);
 #else
 static inline void xgbe_debugfs_init(struct xgbe_prv_data *pdata) {}
 static inline void xgbe_debugfs_exit(struct xgbe_prv_data *pdata) {}
+static inline void xgbe_debugfs_rename(struct xgbe_prv_data *pdata) {}
 #endif /* CONFIG_DEBUG_FS */
 
 /* NOTE: Uncomment for function trace log messages in KERNEL LOG */
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index e45b587..3188f55 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -468,7 +468,6 @@ static void xgene_enet_configure_clock(struct xgene_enet_pdata *pdata)
 
 static void xgene_gmac_set_speed(struct xgene_enet_pdata *pdata)
 {
-	struct device *dev = &pdata->pdev->dev;
 	u32 icm0, icm2, mc2;
 	u32 intf_ctl, rgmii, value;
 
@@ -500,10 +499,8 @@ static void xgene_gmac_set_speed(struct xgene_enet_pdata *pdata)
 		intf_ctl |= ENET_GHD_MODE;
 		CFG_MACMODE_SET(&icm0, 2);
 		CFG_WAITASYNCRD_SET(&icm2, 0);
-		if (dev->of_node) {
-			CFG_TXCLK_MUXSEL0_SET(&rgmii, pdata->tx_delay);
-			CFG_RXCLK_MUXSEL0_SET(&rgmii, pdata->rx_delay);
-		}
+		CFG_TXCLK_MUXSEL0_SET(&rgmii, pdata->tx_delay);
+		CFG_RXCLK_MUXSEL0_SET(&rgmii, pdata->rx_delay);
 		rgmii |= CFG_SPEED_1250;
 
 		xgene_enet_rd_csr(pdata, DEBUG_REG_ADDR, &value);
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 6e253d9..3b889ef 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -1591,7 +1591,7 @@ static int xgene_get_tx_delay(struct xgene_enet_pdata *pdata)
 	struct device *dev = &pdata->pdev->dev;
 	int delay, ret;
 
-	ret = of_property_read_u32(dev->of_node, "tx-delay", &delay);
+	ret = device_property_read_u32(dev, "tx-delay", &delay);
 	if (ret) {
 		pdata->tx_delay = 4;
 		return 0;
@@ -1612,7 +1612,7 @@ static int xgene_get_rx_delay(struct xgene_enet_pdata *pdata)
 	struct device *dev = &pdata->pdev->dev;
 	int delay, ret;
 
-	ret = of_property_read_u32(dev->of_node, "rx-delay", &delay);
+	ret = device_property_read_u32(dev, "rx-delay", &delay);
 	if (ret) {
 		pdata->rx_delay = 2;
 		return 0;
@@ -1674,8 +1674,6 @@ static void xgene_enet_check_phy_handle(struct xgene_enet_pdata *pdata)
 	ret = xgene_enet_phy_connect(pdata->ndev);
 	if (!ret)
 		pdata->mdio_driver = true;
-
-	return;
 }
 
 static void xgene_enet_gpiod_get(struct xgene_enet_pdata *pdata)
diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c
index 96dd530..e58b157 100644
--- a/drivers/net/ethernet/apple/mace.c
+++ b/drivers/net/ethernet/apple/mace.c
@@ -114,8 +114,8 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match)
 	int j, rev, rc = -EBUSY;
 
 	if (macio_resource_count(mdev) != 3 || macio_irq_count(mdev) != 3) {
-		printk(KERN_ERR "can't use MACE %s: need 3 addrs and 3 irqs\n",
-		       mace->full_name);
+		printk(KERN_ERR "can't use MACE %pOF: need 3 addrs and 3 irqs\n",
+		       mace);
 		return -ENODEV;
 	}
 
@@ -123,8 +123,8 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match)
 	if (addr == NULL) {
 		addr = of_get_property(mace, "local-mac-address", NULL);
 		if (addr == NULL) {
-			printk(KERN_ERR "Can't get mac-address for MACE %s\n",
-			       mace->full_name);
+			printk(KERN_ERR "Can't get mac-address for MACE %pOF\n",
+			       mace);
 			return -ENODEV;
 		}
 	}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index ec5579f..4eee199 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -133,7 +133,10 @@ static inline unsigned int aq_ring_dx_in_range(unsigned int h, unsigned int i,
 }
 
 #define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
-int aq_ring_rx_clean(struct aq_ring_s *self, int *work_done, int budget)
+int aq_ring_rx_clean(struct aq_ring_s *self,
+		     struct napi_struct *napi,
+		     int *work_done,
+		     int budget)
 {
 	struct net_device *ndev = aq_nic_get_ndev(self->aq_nic);
 	int err = 0;
@@ -239,7 +242,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self, int *work_done, int budget)
 
 		skb_record_rx_queue(skb, self->idx);
 
-		netif_receive_skb(skb);
+		napi_gro_receive(napi, skb);
 
 		++self->stats.rx.packets;
 		self->stats.rx.bytes += skb->len;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
index eecd6d1..782176c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
@@ -148,7 +148,10 @@ int aq_ring_init(struct aq_ring_s *self);
 void aq_ring_rx_deinit(struct aq_ring_s *self);
 void aq_ring_free(struct aq_ring_s *self);
 void aq_ring_tx_clean(struct aq_ring_s *self);
-int aq_ring_rx_clean(struct aq_ring_s *self, int *work_done, int budget);
+int aq_ring_rx_clean(struct aq_ring_s *self,
+		     struct napi_struct *napi,
+		     int *work_done,
+		     int budget);
 int aq_ring_rx_fill(struct aq_ring_s *self);
 
 #endif /* AQ_RING_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
index fee446a..ebf5880 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
@@ -76,6 +76,7 @@ static int aq_vec_poll(struct napi_struct *napi, int budget)
 			if (ring[AQ_VEC_RX_ID].sw_head !=
 				ring[AQ_VEC_RX_ID].hw_head) {
 				err = aq_ring_rx_clean(&ring[AQ_VEC_RX_ID],
+						       napi,
 						       &work_done,
 						       budget - work_done);
 				if (err < 0)
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index 68de2f2..3241af1 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -720,6 +720,18 @@ static int arc_emac_set_address(struct net_device *ndev, void *p)
 	return 0;
 }
 
+static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	if (!netif_running(dev))
+		return -EINVAL;
+
+	if (!dev->phydev)
+		return -ENODEV;
+
+	return phy_mii_ioctl(dev->phydev, rq, cmd);
+}
+
+
 static const struct net_device_ops arc_emac_netdev_ops = {
 	.ndo_open		= arc_emac_open,
 	.ndo_stop		= arc_emac_stop,
@@ -727,6 +739,7 @@ static const struct net_device_ops arc_emac_netdev_ops = {
 	.ndo_set_mac_address	= arc_emac_set_address,
 	.ndo_get_stats		= arc_emac_stats,
 	.ndo_set_rx_mode	= arc_emac_set_rx_mode,
+	.ndo_do_ioctl		= arc_emac_ioctl,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= arc_emac_poll_controller,
 #endif
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 9641380..67134ec 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -61,10 +61,12 @@ config BCM63XX_ENET
 
 config BCMGENET
 	tristate "Broadcom GENET internal MAC support"
+	depends on OF && HAS_IOMEM
 	select MII
 	select PHYLIB
 	select FIXED_PHY
 	select BCM7XXX_PHY
+	select MDIO_BCM_UNIMAC
 	help
 	  This driver supports the built-in Ethernet MACs found in the
 	  Broadcom BCM7xxx Set Top Box family chipset.
@@ -193,6 +195,7 @@ config SYSTEMPORT
 config BNXT
 	tristate "Broadcom NetXtreme-C/E support"
 	depends on PCI
+	depends on MAY_USE_DEVLINK
 	select FW_LOADER
 	select LIBCRC32C
 	---help---
@@ -209,6 +212,15 @@ config BNXT_SRIOV
 	  Virtualization support in the NetXtreme-C/E products. This
 	  allows for virtual function acceleration in virtual environments.
 
+config BNXT_FLOWER_OFFLOAD
+	bool "TC Flower offload support for NetXtreme-C/E"
+	depends on BNXT
+	default y
+	---help---
+	  This configuration parameter enables TC Flower packet classifier
+	  offload for eswitch.  This option enables SR-IOV switchdev eswitch
+	  offload.
+
 config BNXT_DCB
 	bool "Data Center Bridging (DCB) Support"
 	default n
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 61a88b6..4f3845a 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -2674,7 +2674,7 @@ static int bcm_enetsw_set_ringparam(struct net_device *dev,
 	return 0;
 }
 
-static struct ethtool_ops bcm_enetsw_ethtool_ops = {
+static const struct ethtool_ops bcm_enetsw_ethtool_ops = {
 	.get_strings		= bcm_enetsw_get_strings,
 	.get_sset_count		= bcm_enetsw_get_sset_count,
 	.get_ethtool_stats      = bcm_enetsw_get_ethtool_stats,
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index c28fa5a..a6572b5 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -32,13 +32,13 @@
 #define BCM_SYSPORT_IO_MACRO(name, offset) \
 static inline u32 name##_readl(struct bcm_sysport_priv *priv, u32 off)	\
 {									\
-	u32 reg = __raw_readl(priv->base + offset + off);		\
+	u32 reg = readl_relaxed(priv->base + offset + off);		\
 	return reg;							\
 }									\
 static inline void name##_writel(struct bcm_sysport_priv *priv,		\
 				  u32 val, u32 off)			\
 {									\
-	__raw_writel(val, priv->base + offset + off);			\
+	writel_relaxed(val, priv->base + offset + off);			\
 }									\
 
 BCM_SYSPORT_IO_MACRO(intrl2_0, SYS_PORT_INTRL2_0_OFFSET);
@@ -59,14 +59,14 @@ static inline u32 rdma_readl(struct bcm_sysport_priv *priv, u32 off)
 {
 	if (priv->is_lite && off >= RDMA_STATUS)
 		off += 4;
-	return __raw_readl(priv->base + SYS_PORT_RDMA_OFFSET + off);
+	return readl_relaxed(priv->base + SYS_PORT_RDMA_OFFSET + off);
 }
 
 static inline void rdma_writel(struct bcm_sysport_priv *priv, u32 val, u32 off)
 {
 	if (priv->is_lite && off >= RDMA_STATUS)
 		off += 4;
-	__raw_writel(val, priv->base + SYS_PORT_RDMA_OFFSET + off);
+	writel_relaxed(val, priv->base + SYS_PORT_RDMA_OFFSET + off);
 }
 
 static inline u32 tdma_control_bit(struct bcm_sysport_priv *priv, u32 bit)
@@ -110,10 +110,10 @@ static inline void dma_desc_set_addr(struct bcm_sysport_priv *priv,
 				     dma_addr_t addr)
 {
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
-	__raw_writel(upper_32_bits(addr) & DESC_ADDR_HI_MASK,
+	writel_relaxed(upper_32_bits(addr) & DESC_ADDR_HI_MASK,
 		     d + DESC_ADDR_HI_STATUS_LEN);
 #endif
-	__raw_writel(lower_32_bits(addr), d + DESC_ADDR_LO);
+	writel_relaxed(lower_32_bits(addr), d + DESC_ADDR_LO);
 }
 
 static inline void tdma_port_write_desc_addr(struct bcm_sysport_priv *priv,
@@ -201,10 +201,10 @@ static int bcm_sysport_set_features(struct net_device *dev,
  */
 static const struct bcm_sysport_stats bcm_sysport_gstrings_stats[] = {
 	/* general stats */
-	STAT_NETDEV(rx_packets),
-	STAT_NETDEV(tx_packets),
-	STAT_NETDEV(rx_bytes),
-	STAT_NETDEV(tx_bytes),
+	STAT_NETDEV64(rx_packets),
+	STAT_NETDEV64(tx_packets),
+	STAT_NETDEV64(rx_bytes),
+	STAT_NETDEV64(tx_bytes),
 	STAT_NETDEV(rx_errors),
 	STAT_NETDEV(tx_errors),
 	STAT_NETDEV(rx_dropped),
@@ -316,6 +316,7 @@ static inline bool bcm_sysport_lite_stat_valid(enum bcm_sysport_stat_type type)
 {
 	switch (type) {
 	case BCM_SYSPORT_STAT_NETDEV:
+	case BCM_SYSPORT_STAT_NETDEV64:
 	case BCM_SYSPORT_STAT_RXCHK:
 	case BCM_SYSPORT_STAT_RBUF:
 	case BCM_SYSPORT_STAT_SOFT:
@@ -398,6 +399,7 @@ static void bcm_sysport_update_mib_counters(struct bcm_sysport_priv *priv)
 		s = &bcm_sysport_gstrings_stats[i];
 		switch (s->type) {
 		case BCM_SYSPORT_STAT_NETDEV:
+		case BCM_SYSPORT_STAT_NETDEV64:
 		case BCM_SYSPORT_STAT_SOFT:
 			continue;
 		case BCM_SYSPORT_STAT_MIB_RX:
@@ -434,7 +436,10 @@ static void bcm_sysport_get_stats(struct net_device *dev,
 				  struct ethtool_stats *stats, u64 *data)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
+	struct bcm_sysport_stats64 *stats64 = &priv->stats64;
+	struct u64_stats_sync *syncp = &priv->syncp;
 	struct bcm_sysport_tx_ring *ring;
+	unsigned int start;
 	int i, j;
 
 	if (netif_running(dev))
@@ -447,14 +452,22 @@ static void bcm_sysport_get_stats(struct net_device *dev,
 		s = &bcm_sysport_gstrings_stats[i];
 		if (s->type == BCM_SYSPORT_STAT_NETDEV)
 			p = (char *)&dev->stats;
+		else if (s->type == BCM_SYSPORT_STAT_NETDEV64)
+			p = (char *)stats64;
 		else
 			p = (char *)priv;
 
 		if (priv->is_lite && !bcm_sysport_lite_stat_valid(s->type))
 			continue;
-
 		p += s->stat_offset;
-		data[j] = *(unsigned long *)p;
+
+		if (s->stat_sizeof == sizeof(u64))
+			do {
+				start = u64_stats_fetch_begin_irq(syncp);
+				data[i] = *(u64 *)p;
+			} while (u64_stats_fetch_retry_irq(syncp, start));
+		else
+			data[i] = *(u32 *)p;
 		j++;
 	}
 
@@ -666,6 +679,7 @@ static int bcm_sysport_alloc_rx_bufs(struct bcm_sysport_priv *priv)
 static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
 					unsigned int budget)
 {
+	struct bcm_sysport_stats64 *stats64 = &priv->stats64;
 	struct net_device *ndev = priv->netdev;
 	unsigned int processed = 0, to_process;
 	struct bcm_sysport_cb *cb;
@@ -769,6 +783,10 @@ static unsigned int bcm_sysport_desc_rx(struct bcm_sysport_priv *priv,
 		skb->protocol = eth_type_trans(skb, ndev);
 		ndev->stats.rx_packets++;
 		ndev->stats.rx_bytes += len;
+		u64_stats_update_begin(&priv->syncp);
+		stats64->rx_packets++;
+		stats64->rx_bytes += len;
+		u64_stats_update_end(&priv->syncp);
 
 		napi_gro_receive(&priv->napi, skb);
 next:
@@ -791,17 +809,15 @@ static void bcm_sysport_tx_reclaim_one(struct bcm_sysport_tx_ring *ring,
 	struct device *kdev = &priv->pdev->dev;
 
 	if (cb->skb) {
-		ring->bytes += cb->skb->len;
 		*bytes_compl += cb->skb->len;
 		dma_unmap_single(kdev, dma_unmap_addr(cb, dma_addr),
 				 dma_unmap_len(cb, dma_len),
 				 DMA_TO_DEVICE);
-		ring->packets++;
 		(*pkts_compl)++;
 		bcm_sysport_free_cb(cb);
 	/* SKB fragment */
 	} else if (dma_unmap_addr(cb, dma_addr)) {
-		ring->bytes += dma_unmap_len(cb, dma_len);
+		*bytes_compl += dma_unmap_len(cb, dma_len);
 		dma_unmap_page(kdev, dma_unmap_addr(cb, dma_addr),
 			       dma_unmap_len(cb, dma_len), DMA_TO_DEVICE);
 		dma_unmap_addr_set(cb, dma_addr, 0);
@@ -812,9 +828,9 @@ static void bcm_sysport_tx_reclaim_one(struct bcm_sysport_tx_ring *ring,
 static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
 					     struct bcm_sysport_tx_ring *ring)
 {
-	struct net_device *ndev = priv->netdev;
 	unsigned int c_index, last_c_index, last_tx_cn, num_tx_cbs;
 	unsigned int pkts_compl = 0, bytes_compl = 0;
+	struct net_device *ndev = priv->netdev;
 	struct bcm_sysport_cb *cb;
 	u32 hw_ind;
 
@@ -853,6 +869,11 @@ static unsigned int __bcm_sysport_tx_reclaim(struct bcm_sysport_priv *priv,
 		last_c_index &= (num_tx_cbs - 1);
 	}
 
+	u64_stats_update_begin(&priv->syncp);
+	ring->packets += pkts_compl;
+	ring->bytes += bytes_compl;
+	u64_stats_update_end(&priv->syncp);
+
 	ring->c_index = c_index;
 
 	netif_dbg(priv, tx_done, ndev,
@@ -1371,6 +1392,19 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
 	tdma_writel(priv, RING_IGNORE_STATUS, TDMA_DESC_RING_MAPPING(index));
 	tdma_writel(priv, 0, TDMA_DESC_RING_PCP_DEI_VID(index));
 
+	/* Do not use tdma_control_bit() here because TSB_SWAP1 collides
+	 * with the original definition of ACB_ALGO
+	 */
+	reg = tdma_readl(priv, TDMA_CONTROL);
+	if (priv->is_lite)
+		reg &= ~BIT(TSB_SWAP1);
+	/* Set a correct TSB format based on host endian */
+	if (!IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+		reg |= tdma_control_bit(priv, TSB_SWAP0);
+	else
+		reg &= ~tdma_control_bit(priv, TSB_SWAP0);
+	tdma_writel(priv, reg, TDMA_CONTROL);
+
 	/* Program the number of descriptors as MAX_THRESHOLD and half of
 	 * its size for the hysteresis trigger
 	 */
@@ -1677,22 +1711,41 @@ static int bcm_sysport_change_mac(struct net_device *dev, void *p)
 	return 0;
 }
 
-static struct net_device_stats *bcm_sysport_get_nstats(struct net_device *dev)
+static void bcm_sysport_get_stats64(struct net_device *dev,
+				    struct rtnl_link_stats64 *stats)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
-	unsigned long tx_bytes = 0, tx_packets = 0;
+	struct bcm_sysport_stats64 *stats64 = &priv->stats64;
 	struct bcm_sysport_tx_ring *ring;
+	u64 tx_packets = 0, tx_bytes = 0;
+	unsigned int start;
 	unsigned int q;
 
+	netdev_stats_to_stats64(stats, &dev->stats);
+
 	for (q = 0; q < dev->num_tx_queues; q++) {
 		ring = &priv->tx_rings[q];
-		tx_bytes += ring->bytes;
-		tx_packets += ring->packets;
+		do {
+			start = u64_stats_fetch_begin_irq(&priv->syncp);
+			tx_bytes = ring->bytes;
+			tx_packets = ring->packets;
+		} while (u64_stats_fetch_retry_irq(&priv->syncp, start));
+
+		stats->tx_bytes += tx_bytes;
+		stats->tx_packets += tx_packets;
 	}
 
-	dev->stats.tx_bytes = tx_bytes;
-	dev->stats.tx_packets = tx_packets;
-	return &dev->stats;
+	/* lockless update tx_bytes and tx_packets */
+	u64_stats_update_begin(&priv->syncp);
+	stats64->tx_bytes = stats->tx_bytes;
+	stats64->tx_packets = stats->tx_packets;
+	u64_stats_update_end(&priv->syncp);
+
+	do {
+		start = u64_stats_fetch_begin_irq(&priv->syncp);
+		stats->rx_packets = stats64->rx_packets;
+		stats->rx_bytes = stats64->rx_bytes;
+	} while (u64_stats_fetch_retry_irq(&priv->syncp, start));
 }
 
 static void bcm_sysport_netif_start(struct net_device *dev)
@@ -1724,10 +1777,14 @@ static void rbuf_init(struct bcm_sysport_priv *priv)
 	reg = rbuf_readl(priv, RBUF_CONTROL);
 	reg |= RBUF_4B_ALGN | RBUF_RSB_EN;
 	/* Set a correct RSB format on SYSTEMPORT Lite */
-	if (priv->is_lite) {
+	if (priv->is_lite)
 		reg &= ~RBUF_RSB_SWAP1;
+
+	/* Set a correct RSB format based on host endian */
+	if (!IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
 		reg |= RBUF_RSB_SWAP0;
-	}
+	else
+		reg &= ~RBUF_RSB_SWAP0;
 	rbuf_writel(priv, reg, RBUF_CONTROL);
 }
 
@@ -1956,7 +2013,7 @@ static const struct net_device_ops bcm_sysport_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= bcm_sysport_poll_controller,
 #endif
-	.ndo_get_stats		= bcm_sysport_get_nstats,
+	.ndo_get_stats64	= bcm_sysport_get_stats64,
 };
 
 #define REV_FMT	"v%2x.%02x"
@@ -2104,6 +2161,8 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 	/* libphy will adjust the link state accordingly */
 	netif_carrier_off(dev);
 
+	u64_stats_init(&priv->syncp);
+
 	ret = register_netdev(dev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to register net_device\n");
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index 77a51c1..82e401d 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -449,7 +449,8 @@ struct bcm_rsb {
 /* Uses 2 bits on SYSTEMPORT Lite and shifts everything by 1 bit, we
  * keep the SYSTEMPORT layout here and adjust with tdma_control_bit()
  */
-#define  TSB_SWAP			2
+#define  TSB_SWAP0			2
+#define  TSB_SWAP1			3
 #define  ACB_ALGO			3
 #define  BUF_DATA_OFFSET_SHIFT		4
 #define  BUF_DATA_OFFSET_MASK		0x3ff
@@ -603,6 +604,7 @@ struct bcm_sysport_mib {
 /* HW maintains a large list of counters */
 enum bcm_sysport_stat_type {
 	BCM_SYSPORT_STAT_NETDEV = -1,
+	BCM_SYSPORT_STAT_NETDEV64,
 	BCM_SYSPORT_STAT_MIB_RX,
 	BCM_SYSPORT_STAT_MIB_TX,
 	BCM_SYSPORT_STAT_RUNT,
@@ -619,6 +621,13 @@ enum bcm_sysport_stat_type {
 	.type = BCM_SYSPORT_STAT_NETDEV, \
 }
 
+#define STAT_NETDEV64(m) { \
+	.stat_string = __stringify(m), \
+	.stat_sizeof = sizeof(((struct bcm_sysport_stats64 *)0)->m), \
+	.stat_offset = offsetof(struct bcm_sysport_stats64, m), \
+	.type = BCM_SYSPORT_STAT_NETDEV64, \
+}
+
 #define STAT_MIB(str, m, _type) { \
 	.stat_string = str, \
 	.stat_sizeof = sizeof(((struct bcm_sysport_priv *)0)->m), \
@@ -659,6 +668,14 @@ struct bcm_sysport_stats {
 	u16 reg_offset;
 };
 
+struct bcm_sysport_stats64 {
+	/* 64bit stats on 32bit/64bit Machine */
+	u64	rx_packets;
+	u64	rx_bytes;
+	u64	tx_packets;
+	u64	tx_bytes;
+};
+
 /* Software house keeping helper structure */
 struct bcm_sysport_cb {
 	struct sk_buff	*skb;		/* SKB for RX packets */
@@ -743,5 +760,10 @@ struct bcm_sysport_priv {
 
 	/* Ethtool */
 	u32			msg_enable;
+
+	struct bcm_sysport_stats64	stats64;
+
+	/* For atomic update generic 64bit value on 32bit Machine */
+	struct u64_stats_sync	syncp;
 };
 #endif /* __BCM_SYSPORT_H */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 67fe3d8..1216c1f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -4284,15 +4284,17 @@ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc)
 	return 0;
 }
 
-int __bnx2x_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
-		     __be16 proto, struct tc_to_netdev *tc)
+int __bnx2x_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		     void *type_data)
 {
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	struct tc_mqprio_qopt *mqprio = type_data;
+
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 
-	return bnx2x_setup_tc(dev, tc->mqprio->num_tc);
+	return bnx2x_setup_tc(dev, mqprio->num_tc);
 }
 
 /* called with rtnl_lock */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index c26688d..a5265e1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -486,8 +486,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev);
 
 /* setup_tc callback */
 int bnx2x_setup_tc(struct net_device *dev, u8 num_tc);
-int __bnx2x_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
-		     __be16 proto, struct tc_to_netdev *tc);
+int __bnx2x_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		     void *type_data);
 
 int bnx2x_get_vf_config(struct net_device *dev, int vf,
 			struct ifla_vf_info *ivi);
diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile
index a7ca45b..4f0cb8e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/Makefile
+++ b/drivers/net/ethernet/broadcom/bnxt/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_BNXT) += bnxt_en.o
 
-bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o
+bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_tc.o
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index f20b3d2..aacec8b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -33,6 +33,7 @@
 #include <linux/mii.h>
 #include <linux/if.h>
 #include <linux/if_vlan.h>
+#include <linux/if_bridge.h>
 #include <linux/rtc.h>
 #include <linux/bpf.h>
 #include <net/ip.h>
@@ -48,6 +49,8 @@
 #include <linux/aer.h>
 #include <linux/bitmap.h>
 #include <linux/cpu_rmap.h>
+#include <linux/cpumask.h>
+#include <net/pkt_cls.h>
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
@@ -56,6 +59,8 @@
 #include "bnxt_ethtool.h"
 #include "bnxt_dcb.h"
 #include "bnxt_xdp.h"
+#include "bnxt_vfr.h"
+#include "bnxt_tc.h"
 
 #define BNXT_TX_TIMEOUT		(5 * HZ)
 
@@ -101,6 +106,8 @@ enum board_idx {
 	BCM57416_NPAR,
 	BCM57452,
 	BCM57454,
+	BCM58802,
+	BCM58808,
 	NETXTREME_E_VF,
 	NETXTREME_C_VF,
 };
@@ -109,39 +116,42 @@ enum board_idx {
 static const struct {
 	char *name;
 } board_info[] = {
-	{ "Broadcom BCM57301 NetXtreme-C 10Gb Ethernet" },
-	{ "Broadcom BCM57302 NetXtreme-C 10Gb/25Gb Ethernet" },
-	{ "Broadcom BCM57304 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" },
-	{ "Broadcom BCM57417 NetXtreme-E Ethernet Partition" },
-	{ "Broadcom BCM58700 Nitro 1Gb/2.5Gb/10Gb Ethernet" },
-	{ "Broadcom BCM57311 NetXtreme-C 10Gb Ethernet" },
-	{ "Broadcom BCM57312 NetXtreme-C 10Gb/25Gb Ethernet" },
-	{ "Broadcom BCM57402 NetXtreme-E 10Gb Ethernet" },
-	{ "Broadcom BCM57404 NetXtreme-E 10Gb/25Gb Ethernet" },
-	{ "Broadcom BCM57406 NetXtreme-E 10GBase-T Ethernet" },
-	{ "Broadcom BCM57402 NetXtreme-E Ethernet Partition" },
-	{ "Broadcom BCM57407 NetXtreme-E 10GBase-T Ethernet" },
-	{ "Broadcom BCM57412 NetXtreme-E 10Gb Ethernet" },
-	{ "Broadcom BCM57414 NetXtreme-E 10Gb/25Gb Ethernet" },
-	{ "Broadcom BCM57416 NetXtreme-E 10GBase-T Ethernet" },
-	{ "Broadcom BCM57417 NetXtreme-E 10GBase-T Ethernet" },
-	{ "Broadcom BCM57412 NetXtreme-E Ethernet Partition" },
-	{ "Broadcom BCM57314 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" },
-	{ "Broadcom BCM57417 NetXtreme-E 10Gb/25Gb Ethernet" },
-	{ "Broadcom BCM57416 NetXtreme-E 10Gb Ethernet" },
-	{ "Broadcom BCM57404 NetXtreme-E Ethernet Partition" },
-	{ "Broadcom BCM57406 NetXtreme-E Ethernet Partition" },
-	{ "Broadcom BCM57407 NetXtreme-E 25Gb Ethernet" },
-	{ "Broadcom BCM57407 NetXtreme-E Ethernet Partition" },
-	{ "Broadcom BCM57414 NetXtreme-E Ethernet Partition" },
-	{ "Broadcom BCM57416 NetXtreme-E Ethernet Partition" },
-	{ "Broadcom BCM57452 NetXtreme-E 10Gb/25Gb/40Gb/50Gb Ethernet" },
-	{ "Broadcom BCM57454 NetXtreme-E 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
-	{ "Broadcom NetXtreme-E Ethernet Virtual Function" },
-	{ "Broadcom NetXtreme-C Ethernet Virtual Function" },
+	[BCM57301] = { "Broadcom BCM57301 NetXtreme-C 10Gb Ethernet" },
+	[BCM57302] = { "Broadcom BCM57302 NetXtreme-C 10Gb/25Gb Ethernet" },
+	[BCM57304] = { "Broadcom BCM57304 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" },
+	[BCM57417_NPAR] = { "Broadcom BCM57417 NetXtreme-E Ethernet Partition" },
+	[BCM58700] = { "Broadcom BCM58700 Nitro 1Gb/2.5Gb/10Gb Ethernet" },
+	[BCM57311] = { "Broadcom BCM57311 NetXtreme-C 10Gb Ethernet" },
+	[BCM57312] = { "Broadcom BCM57312 NetXtreme-C 10Gb/25Gb Ethernet" },
+	[BCM57402] = { "Broadcom BCM57402 NetXtreme-E 10Gb Ethernet" },
+	[BCM57404] = { "Broadcom BCM57404 NetXtreme-E 10Gb/25Gb Ethernet" },
+	[BCM57406] = { "Broadcom BCM57406 NetXtreme-E 10GBase-T Ethernet" },
+	[BCM57402_NPAR] = { "Broadcom BCM57402 NetXtreme-E Ethernet Partition" },
+	[BCM57407] = { "Broadcom BCM57407 NetXtreme-E 10GBase-T Ethernet" },
+	[BCM57412] = { "Broadcom BCM57412 NetXtreme-E 10Gb Ethernet" },
+	[BCM57414] = { "Broadcom BCM57414 NetXtreme-E 10Gb/25Gb Ethernet" },
+	[BCM57416] = { "Broadcom BCM57416 NetXtreme-E 10GBase-T Ethernet" },
+	[BCM57417] = { "Broadcom BCM57417 NetXtreme-E 10GBase-T Ethernet" },
+	[BCM57412_NPAR] = { "Broadcom BCM57412 NetXtreme-E Ethernet Partition" },
+	[BCM57314] = { "Broadcom BCM57314 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" },
+	[BCM57417_SFP] = { "Broadcom BCM57417 NetXtreme-E 10Gb/25Gb Ethernet" },
+	[BCM57416_SFP] = { "Broadcom BCM57416 NetXtreme-E 10Gb Ethernet" },
+	[BCM57404_NPAR] = { "Broadcom BCM57404 NetXtreme-E Ethernet Partition" },
+	[BCM57406_NPAR] = { "Broadcom BCM57406 NetXtreme-E Ethernet Partition" },
+	[BCM57407_SFP] = { "Broadcom BCM57407 NetXtreme-E 25Gb Ethernet" },
+	[BCM57407_NPAR] = { "Broadcom BCM57407 NetXtreme-E Ethernet Partition" },
+	[BCM57414_NPAR] = { "Broadcom BCM57414 NetXtreme-E Ethernet Partition" },
+	[BCM57416_NPAR] = { "Broadcom BCM57416 NetXtreme-E Ethernet Partition" },
+	[BCM57452] = { "Broadcom BCM57452 NetXtreme-E 10Gb/25Gb/40Gb/50Gb Ethernet" },
+	[BCM57454] = { "Broadcom BCM57454 NetXtreme-E 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
+	[BCM58802] = { "Broadcom BCM58802 NetXtreme-S 10Gb/25Gb/40Gb/50Gb Ethernet" },
+	[BCM58808] = { "Broadcom BCM58808 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" },
+	[NETXTREME_E_VF] = { "Broadcom NetXtreme-E Ethernet Virtual Function" },
+	[NETXTREME_C_VF] = { "Broadcom NetXtreme-C Ethernet Virtual Function" },
 };
 
 static const struct pci_device_id bnxt_pci_tbl[] = {
+	{ PCI_VDEVICE(BROADCOM, 0x1614), .driver_data = BCM57454 },
 	{ PCI_VDEVICE(BROADCOM, 0x16c0), .driver_data = BCM57417_NPAR },
 	{ PCI_VDEVICE(BROADCOM, 0x16c8), .driver_data = BCM57301 },
 	{ PCI_VDEVICE(BROADCOM, 0x16c9), .driver_data = BCM57302 },
@@ -172,8 +182,9 @@ static const struct pci_device_id bnxt_pci_tbl[] = {
 	{ PCI_VDEVICE(BROADCOM, 0x16ed), .driver_data = BCM57414_NPAR },
 	{ PCI_VDEVICE(BROADCOM, 0x16ee), .driver_data = BCM57416_NPAR },
 	{ PCI_VDEVICE(BROADCOM, 0x16ef), .driver_data = BCM57416_NPAR },
+	{ PCI_VDEVICE(BROADCOM, 0x16f0), .driver_data = BCM58808 },
 	{ PCI_VDEVICE(BROADCOM, 0x16f1), .driver_data = BCM57452 },
-	{ PCI_VDEVICE(BROADCOM, 0x1614), .driver_data = BCM57454 },
+	{ PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 },
 #ifdef CONFIG_BNXT_SRIOV
 	{ PCI_VDEVICE(BROADCOM, 0x1606), .driver_data = NETXTREME_E_VF },
 	{ PCI_VDEVICE(BROADCOM, 0x1609), .driver_data = NETXTREME_E_VF },
@@ -243,6 +254,16 @@ const u16 bnxt_lhint_arr[] = {
 	TX_BD_FLAGS_LHINT_2048_AND_LARGER,
 };
 
+static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb)
+{
+	struct metadata_dst *md_dst = skb_metadata_dst(skb);
+
+	if (!md_dst || md_dst->type != METADATA_HW_PORT_MUX)
+		return 0;
+
+	return md_dst->u.port_info.port_id;
+}
+
 static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct bnxt *bp = netdev_priv(dev);
@@ -287,7 +308,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	tx_buf->nr_frags = last_frag;
 
 	vlan_tag_flags = 0;
-	cfa_action = 0;
+	cfa_action = bnxt_xmit_get_cfa_action(skb);
 	if (skb_vlan_tag_present(skb)) {
 		vlan_tag_flags = TX_BD_CFA_META_KEY_VLAN |
 				 skb_vlan_tag_get(skb);
@@ -322,7 +343,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			tx_push1->tx_bd_hsize_lflags = 0;
 
 		tx_push1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
-		tx_push1->tx_bd_cfa_action = cpu_to_le32(cfa_action);
+		tx_push1->tx_bd_cfa_action =
+			cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT);
 
 		end = pdata + length;
 		end = PTR_ALIGN(end, 8) - 1;
@@ -427,7 +449,8 @@ normal_tx:
 	txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
 
 	txbd1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
-	txbd1->tx_bd_cfa_action = cpu_to_le32(cfa_action);
+	txbd1->tx_bd_cfa_action =
+			cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT);
 	for (i = 0; i < last_frag; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
@@ -1032,7 +1055,10 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 		bnxt_sched_reset(bp, rxr);
 		return;
 	}
-
+	/* Store cfa_code in tpa_info to use in tpa_end
+	 * completion processing.
+	 */
+	tpa_info->cfa_code = TPA_START_CFA_CODE(tpa_start1);
 	prod_rx_buf->data = tpa_info->data;
 	prod_rx_buf->data_ptr = tpa_info->data_ptr;
 
@@ -1267,6 +1293,17 @@ static inline struct sk_buff *bnxt_gro_skb(struct bnxt *bp,
 	return skb;
 }
 
+/* Given the cfa_code of a received packet determine which
+ * netdev (vf-rep or PF) the packet is destined to.
+ */
+static struct net_device *bnxt_get_pkt_dev(struct bnxt *bp, u16 cfa_code)
+{
+	struct net_device *dev = bnxt_get_vf_rep(bp, cfa_code);
+
+	/* if vf-rep dev is NULL, the must belongs to the PF */
+	return dev ? dev : bp->dev;
+}
+
 static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 					   struct bnxt_napi *bnapi,
 					   u32 *raw_cons,
@@ -1360,7 +1397,9 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 			return NULL;
 		}
 	}
-	skb->protocol = eth_type_trans(skb, bp->dev);
+
+	skb->protocol =
+		eth_type_trans(skb, bnxt_get_pkt_dev(bp, tpa_info->cfa_code));
 
 	if (tpa_info->hash_type != PKT_HASH_TYPE_NONE)
 		skb_set_hash(skb, tpa_info->rss_hash, tpa_info->hash_type);
@@ -1387,6 +1426,18 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 	return skb;
 }
 
+static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi,
+			     struct sk_buff *skb)
+{
+	if (skb->dev != bp->dev) {
+		/* this packet belongs to a vf-rep */
+		bnxt_vf_rep_rx(bp, skb);
+		return;
+	}
+	skb_record_rx_queue(skb, bnapi->index);
+	napi_gro_receive(&bnapi->napi, skb);
+}
+
 /* returns the following:
  * 1       - 1 packet successfully received
  * 0       - successful TPA_START, packet not completed yet
@@ -1403,7 +1454,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 	struct rx_cmp *rxcmp;
 	struct rx_cmp_ext *rxcmp1;
 	u32 tmp_raw_cons = *raw_cons;
-	u16 cons, prod, cp_cons = RING_CMP(tmp_raw_cons);
+	u16 cfa_code, cons, prod, cp_cons = RING_CMP(tmp_raw_cons);
 	struct bnxt_sw_rx_bd *rx_buf;
 	unsigned int len;
 	u8 *data_ptr, agg_bufs, cmp_type;
@@ -1445,8 +1496,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 
 		rc = -ENOMEM;
 		if (likely(skb)) {
-			skb_record_rx_queue(skb, bnapi->index);
-			napi_gro_receive(&bnapi->napi, skb);
+			bnxt_deliver_skb(bp, bnapi, skb);
 			rc = 1;
 		}
 		*event |= BNXT_RX_EVENT;
@@ -1535,7 +1585,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 		skb_set_hash(skb, le32_to_cpu(rxcmp->rx_cmp_rss_hash), type);
 	}
 
-	skb->protocol = eth_type_trans(skb, dev);
+	cfa_code = RX_CMP_CFA_CODE(rxcmp1);
+	skb->protocol = eth_type_trans(skb, bnxt_get_pkt_dev(bp, cfa_code));
 
 	if ((rxcmp1->rx_cmp_flags2 &
 	     cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) &&
@@ -1560,8 +1611,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 		}
 	}
 
-	skb_record_rx_queue(skb, bnapi->index);
-	napi_gro_receive(&bnapi->napi, skb);
+	bnxt_deliver_skb(bp, bnapi, skb);
 	rc = 1;
 
 next_rx:
@@ -1802,6 +1852,13 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 							   &event);
 			if (likely(rc >= 0))
 				rx_pkts += rc;
+			/* Increment rx_pkts when rc is -ENOMEM to count towards
+			 * the NAPI budget.  Otherwise, we may potentially loop
+			 * here forever if we consistently cannot allocate
+			 * buffers.
+			 */
+			else if (rc == -ENOMEM)
+				rx_pkts++;
 			else if (rc == -EBUSY)	/* partial completion */
 				break;
 		} else if (unlikely((TX_CMP_TYPE(txcmp) ==
@@ -4420,9 +4477,33 @@ static int bnxt_hwrm_reserve_tx_rings(struct bnxt *bp, int *tx_rings)
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = __bnxt_hwrm_get_tx_rings(bp, 0xffff, tx_rings);
 	mutex_unlock(&bp->hwrm_cmd_lock);
+	if (!rc)
+		bp->tx_reserved_rings = *tx_rings;
 	return rc;
 }
 
+static int bnxt_hwrm_check_tx_rings(struct bnxt *bp, int tx_rings)
+{
+	struct hwrm_func_cfg_input req = {0};
+	int rc;
+
+	if (bp->hwrm_spec_code < 0x10801)
+		return 0;
+
+	if (BNXT_VF(bp))
+		return 0;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
+	req.fid = cpu_to_le16(0xffff);
+	req.flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_TX_ASSETS_TEST);
+	req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS);
+	req.num_tx_rings = cpu_to_le16(tx_rings);
+	rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		return -ENOMEM;
+	return 0;
+}
+
 static void bnxt_hwrm_set_coal_params(struct bnxt *bp, u32 max_bufs,
 	u32 buf_tmrs, u16 flags,
 	struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req)
@@ -4577,6 +4658,7 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 {
 	struct hwrm_func_qcfg_input req = {0};
 	struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+	u16 flags;
 	int rc;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
@@ -4593,15 +4675,15 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 		vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK;
 	}
 #endif
-	if (BNXT_PF(bp)) {
-		u16 flags = le16_to_cpu(resp->flags);
-
-		if (flags & (FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED |
-			     FUNC_QCFG_RESP_FLAGS_FW_LLDP_AGENT_ENABLED))
-			bp->flags |= BNXT_FLAG_FW_LLDP_AGENT;
-		if (flags & FUNC_QCFG_RESP_FLAGS_MULTI_HOST)
-			bp->flags |= BNXT_FLAG_MULTI_HOST;
+	flags = le16_to_cpu(resp->flags);
+	if (flags & (FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED |
+		     FUNC_QCFG_RESP_FLAGS_FW_LLDP_AGENT_ENABLED)) {
+		bp->flags |= BNXT_FLAG_FW_LLDP_AGENT;
+		if (flags & FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED)
+			bp->flags |= BNXT_FLAG_FW_DCBX_AGENT;
 	}
+	if (BNXT_PF(bp) && (flags & FUNC_QCFG_RESP_FLAGS_MULTI_HOST))
+		bp->flags |= BNXT_FLAG_MULTI_HOST;
 
 	switch (resp->port_partition_type) {
 	case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0:
@@ -4610,6 +4692,13 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 		bp->port_partition_type = resp->port_partition_type;
 		break;
 	}
+	if (bp->hwrm_spec_code < 0x10707 ||
+	    resp->evb_mode == FUNC_QCFG_RESP_EVB_MODE_VEB)
+		bp->br_mode = BRIDGE_MODE_VEB;
+	else if (resp->evb_mode == FUNC_QCFG_RESP_EVB_MODE_VEPA)
+		bp->br_mode = BRIDGE_MODE_VEPA;
+	else
+		bp->br_mode = BRIDGE_MODE_UNDEF;
 
 func_qcfg_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
@@ -4900,6 +4989,26 @@ static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path,
 	}
 }
 
+static int bnxt_hwrm_set_br_mode(struct bnxt *bp, u16 br_mode)
+{
+	struct hwrm_func_cfg_input req = {0};
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
+	req.fid = cpu_to_le16(0xffff);
+	req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_EVB_MODE);
+	if (br_mode == BRIDGE_MODE_VEB)
+		req.evb_mode = FUNC_CFG_REQ_EVB_MODE_VEB;
+	else if (br_mode == BRIDGE_MODE_VEPA)
+		req.evb_mode = FUNC_CFG_REQ_EVB_MODE_VEPA;
+	else
+		return -EINVAL;
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		rc = -EIO;
+	return rc;
+}
+
 static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id)
 {
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
@@ -5035,6 +5144,15 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
 				   rc);
 			goto err_out;
 		}
+		if (bp->tx_reserved_rings != bp->tx_nr_rings) {
+			int tx = bp->tx_nr_rings;
+
+			if (bnxt_hwrm_reserve_tx_rings(bp, &tx) ||
+			    tx < bp->tx_nr_rings) {
+				rc = -ENOMEM;
+				goto err_out;
+			}
+		}
 	}
 
 	rc = bnxt_hwrm_ring_alloc(bp);
@@ -5441,8 +5559,15 @@ static void bnxt_free_irq(struct bnxt *bp)
 
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		irq = &bp->irq_tbl[i];
-		if (irq->requested)
+		if (irq->requested) {
+			if (irq->have_cpumask) {
+				irq_set_affinity_hint(irq->vector, NULL);
+				free_cpumask_var(irq->cpu_mask);
+				irq->have_cpumask = 0;
+			}
 			free_irq(irq->vector, bp->bnapi[i]);
+		}
+
 		irq->requested = 0;
 	}
 }
@@ -5475,6 +5600,21 @@ static int bnxt_request_irq(struct bnxt *bp)
 			break;
 
 		irq->requested = 1;
+
+		if (zalloc_cpumask_var(&irq->cpu_mask, GFP_KERNEL)) {
+			int numa_node = dev_to_node(&bp->pdev->dev);
+
+			irq->have_cpumask = 1;
+			cpumask_set_cpu(cpumask_local_spread(i, numa_node),
+					irq->cpu_mask);
+			rc = irq_set_affinity_hint(irq->vector, irq->cpu_mask);
+			if (rc) {
+				netdev_warn(bp->dev,
+					    "Set affinity failed, IRQ = %d\n",
+					    irq->vector);
+				break;
+			}
+		}
 	}
 	return rc;
 }
@@ -5548,12 +5688,10 @@ void bnxt_tx_disable(struct bnxt *bp)
 {
 	int i;
 	struct bnxt_tx_ring_info *txr;
-	struct netdev_queue *txq;
 
 	if (bp->tx_ring) {
 		for (i = 0; i < bp->tx_nr_rings; i++) {
 			txr = &bp->tx_ring[i];
-			txq = netdev_get_tx_queue(bp->dev, i);
 			txr->dev_state = BNXT_DEV_STATE_CLOSING;
 		}
 	}
@@ -5566,11 +5704,9 @@ void bnxt_tx_enable(struct bnxt *bp)
 {
 	int i;
 	struct bnxt_tx_ring_info *txr;
-	struct netdev_queue *txq;
 
 	for (i = 0; i < bp->tx_nr_rings; i++) {
 		txr = &bp->tx_ring[i];
-		txq = netdev_get_tx_queue(bp->dev, i);
 		txr->dev_state = 0;
 	}
 	netif_tx_wake_all_queues(bp->dev);
@@ -5635,7 +5771,7 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
 	if (rc)
 		goto hwrm_phy_qcaps_exit;
 
-	if (resp->eee_supported & PORT_PHY_QCAPS_RESP_EEE_SUPPORTED) {
+	if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED) {
 		struct ethtool_eee *eee = &bp->eee;
 		u16 fw_speeds = le16_to_cpu(resp->supported_speeds_eee_mode);
 
@@ -5650,6 +5786,8 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
 		link_info->support_auto_speeds =
 			le16_to_cpu(resp->supported_speeds_auto_mode);
 
+	bp->port_count = resp->port_cnt;
+
 hwrm_phy_qcaps_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
@@ -5675,13 +5813,15 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
 
 	memcpy(&link_info->phy_qcfg_resp, resp, sizeof(*resp));
 	link_info->phy_link_status = resp->link;
-	link_info->duplex =  resp->duplex;
+	link_info->duplex = resp->duplex_cfg;
+	if (bp->hwrm_spec_code >= 0x10800)
+		link_info->duplex = resp->duplex_state;
 	link_info->pause = resp->pause;
 	link_info->auto_mode = resp->auto_mode;
 	link_info->auto_pause_setting = resp->auto_pause;
 	link_info->lp_pause = resp->link_partner_adv_pause;
 	link_info->force_pause_setting = resp->force_pause;
-	link_info->duplex_setting = resp->duplex;
+	link_info->duplex_setting = resp->duplex_cfg;
 	if (link_info->phy_link_status == BNXT_LINK_LINK)
 		link_info->link_speed = le16_to_cpu(resp->link_speed);
 	else
@@ -6203,6 +6343,9 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 	/* Poll link status and check for SFP+ module status */
 	bnxt_get_port_module_status(bp);
 
+	/* VF-reps may need to be re-opened after the PF is re-opened */
+	if (BNXT_PF(bp))
+		bnxt_vf_reps_open(bp);
 	return 0;
 
 open_err:
@@ -6291,6 +6434,10 @@ int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 		if (rc)
 			netdev_warn(bp->dev, "timeout waiting for SRIOV config operation to complete!\n");
 	}
+
+	/* Close the VF-reps before closing PF */
+	if (BNXT_PF(bp))
+		bnxt_vf_reps_close(bp);
 #endif
 	/* Change device state to avoid TX queue wake up's */
 	bnxt_tx_disable(bp);
@@ -6802,7 +6949,8 @@ static void bnxt_timer(unsigned long data)
 	if (atomic_read(&bp->intr_sem) != 0)
 		goto bnxt_restart_timer;
 
-	if (bp->link_info.link_up && (bp->flags & BNXT_FLAG_PORT_STATS)) {
+	if (bp->link_info.link_up && (bp->flags & BNXT_FLAG_PORT_STATS) &&
+	    bp->stats_coal_ticks) {
 		set_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event);
 		schedule_work(&bp->sp_task);
 	}
@@ -6912,8 +7060,8 @@ static void bnxt_sp_task(struct work_struct *work)
 }
 
 /* Under rtnl_lock */
-int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
-		       int tx_xdp)
+int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
+		     int tx_xdp)
 {
 	int max_rx, max_tx, tx_sets = 1;
 	int tx_rings_needed;
@@ -6933,10 +7081,7 @@ int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
 	if (max_tx < tx_rings_needed)
 		return -ENOMEM;
 
-	if (bnxt_hwrm_reserve_tx_rings(bp, &tx_rings_needed) ||
-	    tx_rings_needed < (tx * tx_sets + tx_xdp))
-		return -ENOMEM;
-	return 0;
+	return bnxt_hwrm_check_tx_rings(bp, tx_rings_needed);
 }
 
 static void bnxt_unmap_bars(struct bnxt *bp, struct pci_dev *pdev)
@@ -7125,8 +7270,8 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 	if (bp->flags & BNXT_FLAG_SHARED_RINGS)
 		sh = true;
 
-	rc = bnxt_reserve_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings,
-				sh, tc, bp->tx_nr_rings_xdp);
+	rc = bnxt_check_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings,
+			      sh, tc, bp->tx_nr_rings_xdp);
 	if (rc)
 		return rc;
 
@@ -7152,15 +7297,33 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 	return 0;
 }
 
-static int bnxt_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
-			 __be16 proto, struct tc_to_netdev *ntc)
+static int bnxt_setup_flower(struct net_device *dev,
+			     struct tc_cls_flower_offload *cls_flower)
 {
-	if (ntc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	struct bnxt *bp = netdev_priv(dev);
 
-	ntc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	if (BNXT_VF(bp))
+		return -EOPNOTSUPP;
 
-	return bnxt_setup_mq_tc(dev, ntc->mqprio->num_tc);
+	return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, cls_flower);
+}
+
+static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			 void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return bnxt_setup_flower(dev, type_data);
+	case TC_SETUP_MQPRIO: {
+		struct tc_mqprio_qopt *mqprio = type_data;
+
+		mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+		return bnxt_setup_mq_tc(dev, mqprio->num_tc);
+	}
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 #ifdef CONFIG_RFS_ACCEL
@@ -7412,6 +7575,102 @@ static void bnxt_udp_tunnel_del(struct net_device *dev,
 	schedule_work(&bp->sp_task);
 }
 
+static int bnxt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+			       struct net_device *dev, u32 filter_mask,
+			       int nlflags)
+{
+	struct bnxt *bp = netdev_priv(dev);
+
+	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, bp->br_mode, 0, 0,
+				       nlflags, filter_mask, NULL);
+}
+
+static int bnxt_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
+			       u16 flags)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	struct nlattr *attr, *br_spec;
+	int rem, rc = 0;
+
+	if (bp->hwrm_spec_code < 0x10708 || !BNXT_SINGLE_PF(bp))
+		return -EOPNOTSUPP;
+
+	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+	if (!br_spec)
+		return -EINVAL;
+
+	nla_for_each_nested(attr, br_spec, rem) {
+		u16 mode;
+
+		if (nla_type(attr) != IFLA_BRIDGE_MODE)
+			continue;
+
+		if (nla_len(attr) < sizeof(mode))
+			return -EINVAL;
+
+		mode = nla_get_u16(attr);
+		if (mode == bp->br_mode)
+			break;
+
+		rc = bnxt_hwrm_set_br_mode(bp, mode);
+		if (!rc)
+			bp->br_mode = mode;
+		break;
+	}
+	return rc;
+}
+
+static int bnxt_get_phys_port_name(struct net_device *dev, char *buf,
+				   size_t len)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	int rc;
+
+	/* The PF and it's VF-reps only support the switchdev framework */
+	if (!BNXT_PF(bp))
+		return -EOPNOTSUPP;
+
+	rc = snprintf(buf, len, "p%d", bp->pf.port_id);
+
+	if (rc >= len)
+		return -EOPNOTSUPP;
+	return 0;
+}
+
+int bnxt_port_attr_get(struct bnxt *bp, struct switchdev_attr *attr)
+{
+	if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		return -EOPNOTSUPP;
+
+	/* The PF and it's VF-reps only support the switchdev framework */
+	if (!BNXT_PF(bp))
+		return -EOPNOTSUPP;
+
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
+		/* In SRIOV each PF-pool (PF + child VFs) serves as a
+		 * switching domain, the PF's perm mac-addr can be used
+		 * as the unique parent-id
+		 */
+		attr->u.ppid.id_len = ETH_ALEN;
+		ether_addr_copy(attr->u.ppid.id, bp->pf.mac_addr);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+static int bnxt_swdev_port_attr_get(struct net_device *dev,
+				    struct switchdev_attr *attr)
+{
+	return bnxt_port_attr_get(netdev_priv(dev), attr);
+}
+
+static const struct switchdev_ops bnxt_switchdev_ops = {
+	.switchdev_port_attr_get	= bnxt_swdev_port_attr_get
+};
+
 static const struct net_device_ops bnxt_netdev_ops = {
 	.ndo_open		= bnxt_open,
 	.ndo_start_xmit		= bnxt_start_xmit,
@@ -7443,6 +7702,9 @@ static const struct net_device_ops bnxt_netdev_ops = {
 	.ndo_udp_tunnel_add	= bnxt_udp_tunnel_add,
 	.ndo_udp_tunnel_del	= bnxt_udp_tunnel_del,
 	.ndo_xdp		= bnxt_xdp,
+	.ndo_bridge_getlink	= bnxt_bridge_getlink,
+	.ndo_bridge_setlink	= bnxt_bridge_setlink,
+	.ndo_get_phys_port_name = bnxt_get_phys_port_name
 };
 
 static void bnxt_remove_one(struct pci_dev *pdev)
@@ -7450,11 +7712,14 @@ static void bnxt_remove_one(struct pci_dev *pdev)
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct bnxt *bp = netdev_priv(dev);
 
-	if (BNXT_PF(bp))
+	if (BNXT_PF(bp)) {
 		bnxt_sriov_disable(bp);
+		bnxt_dl_unregister(bp);
+	}
 
 	pci_disable_pcie_error_reporting(pdev);
 	unregister_netdev(dev);
+	bnxt_shutdown_tc(bp);
 	cancel_work_sync(&bp->sp_task);
 	bp->sp_event = 0;
 
@@ -7623,6 +7888,9 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh)
 	if (sh)
 		bp->flags |= BNXT_FLAG_SHARED_RINGS;
 	dflt_rings = netif_get_num_default_rss_queues();
+	/* Reduce default rings to reduce memory usage on multi-port cards */
+	if (bp->port_count > 1)
+		dflt_rings = min_t(int, dflt_rings, 4);
 	rc = bnxt_get_dflt_rings(bp, &max_rx_rings, &max_tx_rings, sh);
 	if (rc)
 		return rc;
@@ -7722,6 +7990,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->netdev_ops = &bnxt_netdev_ops;
 	dev->watchdog_timeo = BNXT_TX_TIMEOUT;
 	dev->ethtool_ops = &bnxt_ethtool_ops;
+	SWITCHDEV_SET_OPS(dev, &bnxt_switchdev_ops);
 	pci_set_drvdata(pdev, dev);
 
 	rc = bnxt_alloc_hwrm_resources(bp);
@@ -7776,6 +8045,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 #ifdef CONFIG_BNXT_SRIOV
 	init_waitqueue_head(&bp->sriov_cfg_wait);
+	mutex_init(&bp->sriov_lock);
 #endif
 	bp->gro_func = bnxt_gro_func_5730x;
 	if (BNXT_CHIP_P4_PLUS(bp))
@@ -7820,6 +8090,10 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	bnxt_ethtool_init(bp);
 	bnxt_dcb_init(bp);
 
+	rc = bnxt_probe_phy(bp);
+	if (rc)
+		goto init_err_pci_clean;
+
 	bnxt_set_rx_skb_mode(bp, false);
 	bnxt_set_tpa_flags(bp);
 	bnxt_set_ring_params(bp);
@@ -7854,10 +8128,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (dev->hw_features & NETIF_F_HW_VLAN_CTAG_RX)
 		bp->flags |= BNXT_FLAG_STRIP_VLAN;
 
-	rc = bnxt_probe_phy(bp);
-	if (rc)
-		goto init_err_pci_clean;
-
 	rc = bnxt_init_int_mode(bp);
 	if (rc)
 		goto init_err_pci_clean;
@@ -7868,9 +8138,15 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	else
 		device_set_wakeup_capable(&pdev->dev, false);
 
+	if (BNXT_PF(bp))
+		bnxt_init_tc(bp);
+
 	rc = register_netdev(dev);
 	if (rc)
-		goto init_err_clr_int;
+		goto init_err_cleanup_tc;
+
+	if (BNXT_PF(bp))
+		bnxt_dl_register(bp);
 
 	netdev_info(dev, "%s found at mem %lx, node addr %pM\n",
 		    board_info[ent->driver_data].name,
@@ -7880,7 +8156,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	return 0;
 
-init_err_clr_int:
+init_err_cleanup_tc:
+	bnxt_shutdown_tc(bp);
 	bnxt_clear_int_mode(bp);
 
 init_err_pci_clean:
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index f34691f..7b888d4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -12,13 +12,17 @@
 #define BNXT_H
 
 #define DRV_MODULE_NAME		"bnxt_en"
-#define DRV_MODULE_VERSION	"1.7.0"
+#define DRV_MODULE_VERSION	"1.8.0"
 
 #define DRV_VER_MAJ	1
-#define DRV_VER_MIN	7
+#define DRV_VER_MIN	8
 #define DRV_VER_UPD	0
 
 #include <linux/interrupt.h>
+#include <linux/rhashtable.h>
+#include <net/devlink.h>
+#include <net/dst_metadata.h>
+#include <net/switchdev.h>
 
 struct tx_bd {
 	__le32 tx_bd_len_flags_type;
@@ -242,6 +246,10 @@ struct rx_cmp_ext {
 	    ((le32_to_cpu((rxcmp1)->rx_cmp_flags2) &			\
 	     RX_CMP_FLAGS2_T_L4_CS_CALC) >> 3)
 
+#define RX_CMP_CFA_CODE(rxcmpl1)					\
+	((le32_to_cpu((rxcmpl1)->rx_cmp_cfa_code_errors_v2) &		\
+	  RX_CMPL_CFA_CODE_MASK) >> RX_CMPL_CFA_CODE_SFT)
+
 struct rx_agg_cmp {
 	__le32 rx_agg_cmp_len_flags_type;
 	#define RX_AGG_CMP_TYPE					(0x3f << 0)
@@ -311,6 +319,10 @@ struct rx_tpa_start_cmp_ext {
 	__le32 rx_tpa_start_cmp_hdr_info;
 };
 
+#define TPA_START_CFA_CODE(rx_tpa_start)				\
+	((le32_to_cpu((rx_tpa_start)->rx_tpa_start_cmp_cfa_code_v2) &	\
+	 RX_TPA_START_CMP_CFA_CODE) >> RX_TPA_START_CMPL_CFA_CODE_SHIFT)
+
 struct rx_tpa_end_cmp {
 	__le32 rx_tpa_end_cmp_len_flags_type;
 	#define RX_TPA_END_CMP_TYPE				(0x3f << 0)
@@ -618,6 +630,8 @@ struct bnxt_tpa_info {
 
 #define BNXT_TPA_OUTER_L3_OFF(hdr_info)	\
 	((hdr_info) & 0x1ff)
+
+	u16			cfa_code; /* cfa_code in TPA start compl */
 };
 
 struct bnxt_rx_ring_info {
@@ -688,8 +702,10 @@ struct bnxt_napi {
 struct bnxt_irq {
 	irq_handler_t	handler;
 	unsigned int	vector;
-	u8		requested;
+	u8		requested:1;
+	u8		have_cpumask:1;
 	char		name[IFNAMSIZ + 2];
+	cpumask_var_t	cpu_mask;
 };
 
 #define HWRM_RING_ALLOC_TX	0x1
@@ -825,8 +841,8 @@ struct bnxt_link_info {
 	u8			loop_back;
 	u8			link_up;
 	u8			duplex;
-#define BNXT_LINK_DUPLEX_HALF	PORT_PHY_QCFG_RESP_DUPLEX_HALF
-#define BNXT_LINK_DUPLEX_FULL	PORT_PHY_QCFG_RESP_DUPLEX_FULL
+#define BNXT_LINK_DUPLEX_HALF	PORT_PHY_QCFG_RESP_DUPLEX_STATE_HALF
+#define BNXT_LINK_DUPLEX_FULL	PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL
 	u8			pause;
 #define BNXT_LINK_PAUSE_TX	PORT_PHY_QCFG_RESP_PAUSE_TX
 #define BNXT_LINK_PAUSE_RX	PORT_PHY_QCFG_RESP_PAUSE_RX
@@ -928,6 +944,45 @@ struct bnxt_test_info {
 #define BNXT_CAG_REG_LEGACY_INT_STATUS	0x4014
 #define BNXT_CAG_REG_BASE		0x300000
 
+struct bnxt_tc_info {
+	bool				enabled;
+
+	/* hash table to store TC offloaded flows */
+	struct rhashtable		flow_table;
+	struct rhashtable_params	flow_ht_params;
+
+	/* hash table to store L2 keys of TC flows */
+	struct rhashtable		l2_table;
+	struct rhashtable_params	l2_ht_params;
+
+	/* lock to atomically add/del an l2 node when a flow is
+	 * added or deleted.
+	 */
+	struct mutex			lock;
+
+	/* Stat counter mask (width) */
+	u64				bytes_mask;
+	u64				packets_mask;
+};
+
+struct bnxt_vf_rep_stats {
+	u64			packets;
+	u64			bytes;
+	u64			dropped;
+};
+
+struct bnxt_vf_rep {
+	struct bnxt			*bp;
+	struct net_device		*dev;
+	struct metadata_dst		*dst;
+	u16				vf_idx;
+	u16				tx_cfa_action;
+	u16				rx_cfa_code;
+
+	struct bnxt_vf_rep_stats	rx_stats;
+	struct bnxt_vf_rep_stats	tx_stats;
+};
+
 struct bnxt {
 	void __iomem		*bar0;
 	void __iomem		*bar1;
@@ -957,6 +1012,9 @@ struct bnxt {
 
 #define CHIP_NUM_5745X		0xd730
 
+#define CHIP_NUM_58802		0xd802
+#define CHIP_NUM_58808		0xd808
+
 #define BNXT_CHIP_NUM_5730X(chip_num)		\
 	((chip_num) >= CHIP_NUM_57301 &&	\
 	 (chip_num) <= CHIP_NUM_57304)
@@ -988,6 +1046,10 @@ struct bnxt {
 #define BNXT_CHIP_NUM_57X1X(chip_num)		\
 	(BNXT_CHIP_NUM_5731X(chip_num) || BNXT_CHIP_NUM_5741X(chip_num))
 
+#define BNXT_CHIP_NUM_588XX(chip_num)		\
+	((chip_num) == CHIP_NUM_58802 ||	\
+	 (chip_num) == CHIP_NUM_58808)
+
 	struct net_device	*dev;
 	struct pci_dev		*pdev;
 
@@ -1027,6 +1089,7 @@ struct bnxt {
 	#define BNXT_FLAG_MULTI_HOST	0x100000
 	#define BNXT_FLAG_SHORT_CMD	0x200000
 	#define BNXT_FLAG_DOUBLE_DB	0x400000
+	#define BNXT_FLAG_FW_DCBX_AGENT	0x800000
 	#define BNXT_FLAG_CHIP_NITRO_A0	0x1000000
 
 	#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA |		\
@@ -1045,6 +1108,7 @@ struct bnxt {
 #define BNXT_CHIP_P4_PLUS(bp)			\
 	(BNXT_CHIP_NUM_57X1X((bp)->chip_num) ||	\
 	 BNXT_CHIP_NUM_5745X((bp)->chip_num) ||	\
+	 BNXT_CHIP_NUM_588XX((bp)->chip_num) ||	\
 	 (BNXT_CHIP_NUM_58700((bp)->chip_num) &&	\
 	  !BNXT_CHIP_TYPE_NITRO_A0(bp)))
 
@@ -1086,6 +1150,7 @@ struct bnxt {
 	int			tx_nr_rings;
 	int			tx_nr_rings_per_tc;
 	int			tx_nr_rings_xdp;
+	int			tx_reserved_rings;
 
 	int			tx_wake_thresh;
 	int			tx_push_thresh;
@@ -1164,6 +1229,8 @@ struct bnxt {
 	u8			nge_port_cnt;
 	__le16			nge_fw_dst_port_id;
 	u8			port_partition_type;
+	u8			port_count;
+	u16			br_mode;
 
 	u16			rx_coal_ticks;
 	u16			rx_coal_ticks_irq;
@@ -1206,6 +1273,12 @@ struct bnxt {
 	wait_queue_head_t	sriov_cfg_wait;
 	bool			sriov_cfg;
 #define BNXT_SRIOV_CFG_WAIT_TMO	msecs_to_jiffies(10000)
+
+	/* lock to protect VF-rep creation/cleanup via
+	 * multiple paths such as ->sriov_configure() and
+	 * devlink ->eswitch_mode_set()
+	 */
+	struct mutex		sriov_lock;
 #endif
 
 #define BNXT_NTP_FLTR_MAX_FLTR	4096
@@ -1232,6 +1305,13 @@ struct bnxt {
 	struct bnxt_led_info	leds[BNXT_MAX_LED];
 
 	struct bpf_prog		*xdp_prog;
+
+	/* devlink interface and vf-rep structs */
+	struct devlink		*dl;
+	enum devlink_eswitch_mode eswitch_mode;
+	struct bnxt_vf_rep	**vf_reps; /* array of vf-rep ptrs */
+	u16			*cfa_code_map; /* cfa_code -> vf_idx map */
+	struct bnxt_tc_info	tc_info;
 };
 
 #define BNXT_RX_STATS_OFFSET(counter)			\
@@ -1301,9 +1381,10 @@ int bnxt_open_nic(struct bnxt *, bool, bool);
 int bnxt_half_open_nic(struct bnxt *bp);
 void bnxt_half_close_nic(struct bnxt *bp);
 int bnxt_close_nic(struct bnxt *, bool, bool);
-int bnxt_reserve_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
-		       int tx_xdp);
+int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
+		     int tx_xdp);
 int bnxt_setup_mq_tc(struct net_device *dev, u8 tc);
 int bnxt_get_max_rings(struct bnxt *, int *, int *, bool);
 void bnxt_restore_pf_fw_resources(struct bnxt *bp);
+int bnxt_port_attr_get(struct bnxt *bp, struct switchdev_attr *attr);
 #endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
index 5c6dd0c..aa1f3a2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -93,6 +93,12 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
 			cos2bw.tsa =
 				QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_ETS;
 			cos2bw.bw_weight = ets->tc_tx_bw[i];
+			/* older firmware requires min_bw to be set to the
+			 * same weight value in percent.
+			 */
+			cos2bw.min_bw =
+				cpu_to_le32((ets->tc_tx_bw[i] * 100) |
+					    BW_VALUE_UNIT_PERCENT1_100);
 		}
 		memcpy(data, &cos2bw.queue_id, sizeof(cos2bw) - 4);
 		if (i == 0) {
@@ -549,13 +555,18 @@ static u8 bnxt_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 {
 	struct bnxt *bp = netdev_priv(dev);
 
-	/* only support IEEE */
-	if ((mode & DCB_CAP_DCBX_VER_CEE) || !(mode & DCB_CAP_DCBX_VER_IEEE))
+	/* All firmware DCBX settings are set in NVRAM */
+	if (bp->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)
 		return 1;
 
 	if (mode & DCB_CAP_DCBX_HOST) {
 		if (BNXT_VF(bp) || (bp->flags & BNXT_FLAG_FW_LLDP_AGENT))
 			return 1;
+
+		/* only support IEEE */
+		if ((mode & DCB_CAP_DCBX_VER_CEE) ||
+		    !(mode & DCB_CAP_DCBX_VER_IEEE))
+			return 1;
 	}
 
 	if (mode == bp->dcbx_cap)
@@ -584,7 +595,7 @@ void bnxt_dcb_init(struct bnxt *bp)
 	bp->dcbx_cap = DCB_CAP_DCBX_VER_IEEE;
 	if (BNXT_PF(bp) && !(bp->flags & BNXT_FLAG_FW_LLDP_AGENT))
 		bp->dcbx_cap |= DCB_CAP_DCBX_HOST;
-	else
+	else if (bp->flags & BNXT_FLAG_FW_DCBX_AGENT)
 		bp->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED;
 	bp->dev->dcbnl_ops = &dcbnl_ops;
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h
index ecd0a5e..d2e0af9 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h
@@ -26,6 +26,7 @@ struct bnxt_cos2bw_cfg {
 	u8			queue_id;
 	__le32			min_bw;
 	__le32			max_bw;
+#define BW_VALUE_UNIT_PERCENT1_100		(0x1UL << 29)
 	u8			tsa;
 	u8			pri_lvl;
 	u8			bw_weight;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index be6acad..8eff05a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -86,9 +86,11 @@ static int bnxt_set_coalesce(struct net_device *dev,
 	if (bp->stats_coal_ticks != coal->stats_block_coalesce_usecs) {
 		u32 stats_ticks = coal->stats_block_coalesce_usecs;
 
-		stats_ticks = clamp_t(u32, stats_ticks,
-				      BNXT_MIN_STATS_COAL_TICKS,
-				      BNXT_MAX_STATS_COAL_TICKS);
+		/* Allow 0, which means disable. */
+		if (stats_ticks)
+			stats_ticks = clamp_t(u32, stats_ticks,
+					      BNXT_MIN_STATS_COAL_TICKS,
+					      BNXT_MAX_STATS_COAL_TICKS);
 		stats_ticks = rounddown(stats_ticks, BNXT_MIN_STATS_COAL_TICKS);
 		bp->stats_coal_ticks = stats_ticks;
 		update_stats = true;
@@ -198,19 +200,23 @@ static const struct {
 
 #define BNXT_NUM_PORT_STATS ARRAY_SIZE(bnxt_port_stats_arr)
 
+static int bnxt_get_num_stats(struct bnxt *bp)
+{
+	int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings;
+
+	if (bp->flags & BNXT_FLAG_PORT_STATS)
+		num_stats += BNXT_NUM_PORT_STATS;
+
+	return num_stats;
+}
+
 static int bnxt_get_sset_count(struct net_device *dev, int sset)
 {
 	struct bnxt *bp = netdev_priv(dev);
 
 	switch (sset) {
-	case ETH_SS_STATS: {
-		int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings;
-
-		if (bp->flags & BNXT_FLAG_PORT_STATS)
-			num_stats += BNXT_NUM_PORT_STATS;
-
-		return num_stats;
-	}
+	case ETH_SS_STATS:
+		return bnxt_get_num_stats(bp);
 	case ETH_SS_TEST:
 		if (!bp->num_tests)
 			return -EOPNOTSUPP;
@@ -225,11 +231,8 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
 {
 	u32 i, j = 0;
 	struct bnxt *bp = netdev_priv(dev);
-	u32 buf_size = sizeof(struct ctx_hw_stats) * bp->cp_nr_rings;
 	u32 stat_fields = sizeof(struct ctx_hw_stats) / 8;
 
-	memset(buf, 0, buf_size);
-
 	if (!bp->bnapi)
 		return;
 
@@ -432,8 +435,7 @@ static int bnxt_set_channels(struct net_device *dev,
 		}
 		tx_xdp = req_rx_rings;
 	}
-	rc = bnxt_reserve_rings(bp, req_tx_rings, req_rx_rings, sh, tcs,
-				tx_xdp);
+	rc = bnxt_check_rings(bp, req_tx_rings, req_rx_rings, sh, tcs, tx_xdp);
 	if (rc) {
 		netdev_warn(dev, "Unable to allocate the requested rings\n");
 		return rc;
@@ -520,7 +522,7 @@ static int bnxt_grxclsrule(struct bnxt *bp, struct ethtool_rxnfc *cmd)
 	struct flow_keys *fkeys;
 	int i, rc = -EINVAL;
 
-	if (fs->location < 0 || fs->location >= BNXT_NTP_FLTR_MAX_FLTR)
+	if (fs->location >= BNXT_NTP_FLTR_MAX_FLTR)
 		return rc;
 
 	for (i = 0; i < BNXT_NTP_FLTR_HASH_SIZE; i++) {
@@ -835,7 +837,7 @@ static void bnxt_get_drvinfo(struct net_device *dev,
 		strlcpy(info->fw_version, bp->fw_ver_str,
 			sizeof(info->fw_version));
 	strlcpy(info->bus_info, pci_name(bp->pdev), sizeof(info->bus_info));
-	info->n_stats = BNXT_NUM_STATS * bp->cp_nr_rings;
+	info->n_stats = bnxt_get_num_stats(bp);
 	info->testinfo_len = bp->num_tests;
 	/* TODO CHIMP_FW: eeprom dump details */
 	info->eedump_len = 0;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index 7dc71bb..cb04cc7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -11,14 +11,14 @@
 #ifndef BNXT_HSI_H
 #define BNXT_HSI_H
 
-/* HSI and HWRM Specification 1.7.6 */
+/* HSI and HWRM Specification 1.8.1 */
 #define HWRM_VERSION_MAJOR	1
-#define HWRM_VERSION_MINOR	7
-#define HWRM_VERSION_UPDATE	6
+#define HWRM_VERSION_MINOR	8
+#define HWRM_VERSION_UPDATE	1
 
-#define HWRM_VERSION_RSVD	2 /* non-zero means beta version */
+#define HWRM_VERSION_RSVD	4 /* non-zero means beta version */
 
-#define HWRM_VERSION_STR	"1.7.6.2"
+#define HWRM_VERSION_STR	"1.8.1.4"
 /*
  * Following is the signature for HWRM message field that indicates not
  * applicable (All F's). Need to cast it the size of the field if needed.
@@ -813,7 +813,7 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED	    0x4UL
 	#define FUNC_QCFG_RESP_FLAGS_STD_TX_RING_MODE_ENABLED      0x8UL
 	#define FUNC_QCFG_RESP_FLAGS_FW_LLDP_AGENT_ENABLED	    0x10UL
-	#define FUNC_QCFG_RESP_FLAGS_MULTI_HOST			    0x20UL
+	#define FUNC_QCFG_RESP_FLAGS_MULTI_HOST		    0x20UL
 	u8 mac_address[6];
 	__le16 pci_id;
 	__le16 alloc_rsscos_ctx;
@@ -835,9 +835,8 @@ struct hwrm_func_qcfg_output {
 	u8 port_pf_cnt;
 	#define FUNC_QCFG_RESP_PORT_PF_CNT_UNAVAIL		   0x0UL
 	__le16 dflt_vnic_id;
-	u8 host_cnt;
-	#define FUNC_QCFG_RESP_HOST_CNT_UNAVAIL		   0x0UL
 	u8 unused_0;
+	u8 unused_1;
 	__le32 min_bw;
 	#define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_MASK		    0xfffffffUL
 	#define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_SFT		    0
@@ -874,12 +873,56 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_EVB_MODE_NO_EVB			   0x0UL
 	#define FUNC_QCFG_RESP_EVB_MODE_VEB			   0x1UL
 	#define FUNC_QCFG_RESP_EVB_MODE_VEPA			   0x2UL
-	u8 unused_1;
+	u8 unused_2;
 	__le16 alloc_vfs;
 	__le32 alloc_mcast_filters;
 	__le32 alloc_hw_ring_grps;
 	__le16 alloc_sp_tx_rings;
+	u8 unused_3;
+	u8 valid;
+};
+
+/* hwrm_func_vlan_cfg */
+/* Input (48 bytes) */
+struct hwrm_func_vlan_cfg_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le16 fid;
+	u8 unused_0;
+	u8 unused_1;
+	__le32 enables;
+	#define FUNC_VLAN_CFG_REQ_ENABLES_STAG_VID		    0x1UL
+	#define FUNC_VLAN_CFG_REQ_ENABLES_CTAG_VID		    0x2UL
+	#define FUNC_VLAN_CFG_REQ_ENABLES_STAG_PCP		    0x4UL
+	#define FUNC_VLAN_CFG_REQ_ENABLES_CTAG_PCP		    0x8UL
+	#define FUNC_VLAN_CFG_REQ_ENABLES_STAG_TPID		    0x10UL
+	#define FUNC_VLAN_CFG_REQ_ENABLES_CTAG_TPID		    0x20UL
+	__le16 stag_vid;
+	u8 stag_pcp;
+	u8 unused_2;
+	__be16 stag_tpid;
+	__le16 ctag_vid;
+	u8 ctag_pcp;
+	u8 unused_3;
+	__be16 ctag_tpid;
+	__le32 rsvd1;
+	__le32 rsvd2;
+	__le32 unused_4;
+};
+
+/* Output (16 bytes) */
+struct hwrm_func_vlan_cfg_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le32 unused_0;
+	u8 unused_1;
 	u8 unused_2;
+	u8 unused_3;
 	u8 valid;
 };
 
@@ -902,6 +945,8 @@ struct hwrm_func_cfg_input {
 	#define FUNC_CFG_REQ_FLAGS_STD_TX_RING_MODE_ENABLE	    0x200UL
 	#define FUNC_CFG_REQ_FLAGS_STD_TX_RING_MODE_DISABLE	    0x400UL
 	#define FUNC_CFG_REQ_FLAGS_VIRT_MAC_PERSIST		    0x800UL
+	#define FUNC_CFG_REQ_FLAGS_NO_AUTOCLEAR_STATISTIC	    0x1000UL
+	#define FUNC_CFG_REQ_FLAGS_TX_ASSETS_TEST		    0x2000UL
 	__le32 enables;
 	#define FUNC_CFG_REQ_ENABLES_MTU			    0x1UL
 	#define FUNC_CFG_REQ_ENABLES_MRU			    0x2UL
@@ -1456,9 +1501,9 @@ struct hwrm_port_phy_qcfg_output {
 	#define PORT_PHY_QCFG_RESP_LINK_SPEED_50GB		   0x1f4UL
 	#define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB		   0x3e8UL
 	#define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB		   0xffffUL
-	u8 duplex;
-	#define PORT_PHY_QCFG_RESP_DUPLEX_HALF			   0x0UL
-	#define PORT_PHY_QCFG_RESP_DUPLEX_FULL			   0x1UL
+	u8 duplex_cfg;
+	#define PORT_PHY_QCFG_RESP_DUPLEX_CFG_HALF		   0x0UL
+	#define PORT_PHY_QCFG_RESP_DUPLEX_CFG_FULL		   0x1UL
 	u8 pause;
 	#define PORT_PHY_QCFG_RESP_PAUSE_TX			    0x1UL
 	#define PORT_PHY_QCFG_RESP_PAUSE_RX			    0x2UL
@@ -1573,6 +1618,9 @@ struct hwrm_port_phy_qcfg_output {
 	#define PORT_PHY_QCFG_RESP_PHY_TYPE_40G_BASELR4	   0x16UL
 	#define PORT_PHY_QCFG_RESP_PHY_TYPE_40G_BASEER4	   0x17UL
 	#define PORT_PHY_QCFG_RESP_PHY_TYPE_40G_ACTIVE_CABLE      0x18UL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASET		   0x19UL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASESX		   0x1aUL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_1G_BASECX		   0x1bUL
 	u8 media_type;
 	#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN		   0x0UL
 	#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP		   0x1UL
@@ -1651,14 +1699,16 @@ struct hwrm_port_phy_qcfg_output {
 	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_ENABLED    0x10UL
 	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_SUPPORTED  0x20UL
 	#define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_ENABLED    0x40UL
+	u8 duplex_state;
+	#define PORT_PHY_QCFG_RESP_DUPLEX_STATE_HALF		   0x0UL
+	#define PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL		   0x1UL
 	u8 unused_1;
-	u8 unused_2;
 	char phy_vendor_name[16];
 	char phy_vendor_partnumber[16];
-	__le32 unused_3;
+	__le32 unused_2;
+	u8 unused_3;
 	u8 unused_4;
 	u8 unused_5;
-	u8 unused_6;
 	u8 valid;
 };
 
@@ -1744,6 +1794,51 @@ struct hwrm_port_mac_cfg_output {
 	u8 valid;
 };
 
+/* hwrm_port_mac_ptp_qcfg */
+/* Input (24 bytes) */
+struct hwrm_port_mac_ptp_qcfg_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le16 port_id;
+	__le16 unused_0[3];
+};
+
+/* Output (80 bytes) */
+struct hwrm_port_mac_ptp_qcfg_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	u8 flags;
+	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_DIRECT_ACCESS	    0x1UL
+	#define PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS	    0x2UL
+	u8 unused_0;
+	__le16 unused_1;
+	__le32 rx_ts_reg_off_lower;
+	__le32 rx_ts_reg_off_upper;
+	__le32 rx_ts_reg_off_seq_id;
+	__le32 rx_ts_reg_off_src_id_0;
+	__le32 rx_ts_reg_off_src_id_1;
+	__le32 rx_ts_reg_off_src_id_2;
+	__le32 rx_ts_reg_off_domain_id;
+	__le32 rx_ts_reg_off_fifo;
+	__le32 rx_ts_reg_off_fifo_adv;
+	__le32 rx_ts_reg_off_granularity;
+	__le32 tx_ts_reg_off_lower;
+	__le32 tx_ts_reg_off_upper;
+	__le32 tx_ts_reg_off_seq_id;
+	__le32 tx_ts_reg_off_fifo;
+	__le32 tx_ts_reg_off_granularity;
+	__le32 unused_2;
+	u8 unused_3;
+	u8 unused_4;
+	u8 unused_5;
+	u8 valid;
+};
+
 /* hwrm_port_qstats */
 /* Input (40 bytes) */
 struct hwrm_port_qstats_input {
@@ -1874,11 +1969,16 @@ struct hwrm_port_phy_qcaps_output {
 	__le16 req_type;
 	__le16 seq_id;
 	__le16 resp_len;
-	u8 eee_supported;
-	#define PORT_PHY_QCAPS_RESP_EEE_SUPPORTED		    0x1UL
-	#define PORT_PHY_QCAPS_RESP_RSVD1_MASK			    0xfeUL
-	#define PORT_PHY_QCAPS_RESP_RSVD1_SFT			    1
-	u8 unused_0;
+	u8 flags;
+	#define PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED	    0x1UL
+	#define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_MASK		    0xfeUL
+	#define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_SFT		    1
+	u8 port_cnt;
+	#define PORT_PHY_QCAPS_RESP_PORT_CNT_UNKNOWN		   0x0UL
+	#define PORT_PHY_QCAPS_RESP_PORT_CNT_1			   0x1UL
+	#define PORT_PHY_QCAPS_RESP_PORT_CNT_2			   0x2UL
+	#define PORT_PHY_QCAPS_RESP_PORT_CNT_3			   0x3UL
+	#define PORT_PHY_QCAPS_RESP_PORT_CNT_4			   0x4UL
 	__le16 supported_speeds_force_mode;
 	#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MBHD 0x1UL
 	#define PORT_PHY_QCAPS_RESP_SUPPORTED_SPEEDS_FORCE_MODE_100MB 0x2UL
@@ -3152,6 +3252,95 @@ struct hwrm_queue_cos2bw_cfg_output {
 	u8 valid;
 };
 
+/* hwrm_queue_dscp_qcaps */
+/* Input (24 bytes) */
+struct hwrm_queue_dscp_qcaps_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	u8 port_id;
+	u8 unused_0[7];
+};
+
+/* Output (16 bytes) */
+struct hwrm_queue_dscp_qcaps_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	u8 num_dscp_bits;
+	u8 unused_0;
+	__le16 max_entries;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
+/* hwrm_queue_dscp2pri_qcfg */
+/* Input (32 bytes) */
+struct hwrm_queue_dscp2pri_qcfg_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le64 dest_data_addr;
+	u8 port_id;
+	u8 unused_0;
+	__le16 dest_data_buffer_size;
+	__le32 unused_1;
+};
+
+/* Output (16 bytes) */
+struct hwrm_queue_dscp2pri_qcfg_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le16 entry_cnt;
+	u8 default_pri;
+	u8 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
+/* hwrm_queue_dscp2pri_cfg */
+/* Input (40 bytes) */
+struct hwrm_queue_dscp2pri_cfg_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le64 src_data_addr;
+	__le32 flags;
+	#define QUEUE_DSCP2PRI_CFG_REQ_FLAGS_USE_HW_DEFAULT_PRI    0x1UL
+	__le32 enables;
+	#define QUEUE_DSCP2PRI_CFG_REQ_ENABLES_DEFAULT_PRI	    0x1UL
+	u8 port_id;
+	u8 default_pri;
+	__le16 entry_cnt;
+	__le32 unused_0;
+};
+
+/* Output (16 bytes) */
+struct hwrm_queue_dscp2pri_cfg_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le32 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
 /* hwrm_vnic_alloc */
 /* Input (24 bytes) */
 struct hwrm_vnic_alloc_input {
@@ -4038,7 +4227,7 @@ struct hwrm_cfa_encap_record_alloc_input {
 	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE       0x8UL
 	u8 unused_0;
 	__le16 unused_1;
-	__le32 encap_data[16];
+	__le32 encap_data[20];
 };
 
 /* Output (16 bytes) */
@@ -4120,8 +4309,8 @@ struct hwrm_cfa_ntuple_filter_alloc_input {
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6     0x6UL
 	u8 ip_protocol;
 	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN   0x0UL
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP       0x6UL
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP       0x11UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP       0x6UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP       0x11UL
 	__le16 dst_id;
 	__le16 mirror_vnic_id;
 	u8 tunnel_type;
@@ -4224,6 +4413,216 @@ struct hwrm_cfa_ntuple_filter_cfg_output {
 	u8 valid;
 };
 
+/* hwrm_cfa_flow_alloc */
+/* Input (128 bytes) */
+struct hwrm_cfa_flow_alloc_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le16 flags;
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL		    0x1UL
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_MASK		    0x6UL
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_SFT		    1
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_NONE		   (0x0UL << 1)
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_ONE		   (0x1UL << 1)
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_TWO		   (0x2UL << 1)
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_LAST    CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_TWO
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_MASK		    0x38UL
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_SFT		    3
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2		   (0x0UL << 3)
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV4		   (0x1UL << 3)
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6		   (0x2UL << 3)
+	#define CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_LAST    CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6
+	__le16 src_fid;
+	__le32 tunnel_handle;
+	__le16 action_flags;
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD		    0x1UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_RECYCLE	    0x2UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP		    0x4UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_METER		    0x8UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL		    0x10UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC	    0x20UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST	    0x40UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_IPV4_ADDRESS   0x80UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE  0x100UL
+	#define CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TTL_DECREMENT      0x200UL
+	__le16 dst_fid;
+	__be16 l2_rewrite_vlan_tpid;
+	__be16 l2_rewrite_vlan_tci;
+	__le16 act_meter_id;
+	__le16 ref_flow_handle;
+	__be16 ethertype;
+	__be16 outer_vlan_tci;
+	__be16 dmac[3];
+	__be16 inner_vlan_tci;
+	__be16 smac[3];
+	u8 ip_dst_mask_len;
+	u8 ip_src_mask_len;
+	__be32 ip_dst[4];
+	__be32 ip_src[4];
+	__be16 l4_src_port;
+	__be16 l4_src_port_mask;
+	__be16 l4_dst_port;
+	__be16 l4_dst_port_mask;
+	__be32 nat_ip_address[4];
+	__be16 l2_rewrite_dmac[3];
+	__be16 nat_port;
+	__be16 l2_rewrite_smac[3];
+	u8 ip_proto;
+	u8 unused_0;
+};
+
+/* Output (16 bytes) */
+struct hwrm_cfa_flow_alloc_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le16 flow_handle;
+	u8 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 unused_4;
+	u8 valid;
+};
+
+/* hwrm_cfa_flow_free */
+/* Input (24 bytes) */
+struct hwrm_cfa_flow_free_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le16 flow_handle;
+	__le16 unused_0[3];
+};
+
+/* Output (32 bytes) */
+struct hwrm_cfa_flow_free_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le64 packet;
+	__le64 byte;
+	__le32 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
+/* hwrm_cfa_flow_stats */
+/* Input (40 bytes) */
+struct hwrm_cfa_flow_stats_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le16 num_flows;
+	__le16 flow_handle_0;
+	__le16 flow_handle_1;
+	__le16 flow_handle_2;
+	__le16 flow_handle_3;
+	__le16 flow_handle_4;
+	__le16 flow_handle_5;
+	__le16 flow_handle_6;
+	__le16 flow_handle_7;
+	__le16 flow_handle_8;
+	__le16 flow_handle_9;
+	__le16 unused_0;
+};
+
+/* Output (176 bytes) */
+struct hwrm_cfa_flow_stats_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le64 packet_0;
+	__le64 packet_1;
+	__le64 packet_2;
+	__le64 packet_3;
+	__le64 packet_4;
+	__le64 packet_5;
+	__le64 packet_6;
+	__le64 packet_7;
+	__le64 packet_8;
+	__le64 packet_9;
+	__le64 byte_0;
+	__le64 byte_1;
+	__le64 byte_2;
+	__le64 byte_3;
+	__le64 byte_4;
+	__le64 byte_5;
+	__le64 byte_6;
+	__le64 byte_7;
+	__le64 byte_8;
+	__le64 byte_9;
+	__le32 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
+/* hwrm_cfa_vfr_alloc */
+/* Input (32 bytes) */
+struct hwrm_cfa_vfr_alloc_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le16 vf_id;
+	__le16 reserved;
+	__le32 unused_0;
+	char vfr_name[32];
+};
+
+/* Output (16 bytes) */
+struct hwrm_cfa_vfr_alloc_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le16 rx_cfa_code;
+	__le16 tx_cfa_action;
+	u8 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 valid;
+};
+
+/* hwrm_cfa_vfr_free */
+/* Input (24 bytes) */
+struct hwrm_cfa_vfr_free_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	char vfr_name[32];
+};
+
+/* Output (16 bytes) */
+struct hwrm_cfa_vfr_free_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le32 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
 /* hwrm_tunnel_dst_port_query */
 /* Input (24 bytes) */
 struct hwrm_tunnel_dst_port_query_input {
@@ -4448,12 +4847,13 @@ struct hwrm_fw_reset_input {
 	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT		   0x1UL
 	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL	   0x2UL
 	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE		   0x3UL
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_RSVD		   0x4UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST		   0x4UL
 	u8 selfrst_status;
 	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE	   0x0UL
 	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP	   0x1UL
 	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST	   0x2UL
-	__le16 unused_0[3];
+	u8 host_idx;
+	u8 unused_0[5];
 };
 
 /* Output (16 bytes) */
@@ -4487,7 +4887,7 @@ struct hwrm_fw_qstatus_input {
 	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_MGMT		   0x1UL
 	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL	   0x2UL
 	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE		   0x3UL
-	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_RSVD		   0x4UL
+	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_HOST		   0x4UL
 	u8 unused_0[7];
 };
 
@@ -4572,6 +4972,16 @@ struct hwrm_fw_set_structured_data_output {
 	u8 valid;
 };
 
+/* Command specific Error Codes (8 bytes) */
+struct hwrm_fw_set_structured_data_cmd_err {
+	u8 code;
+	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN       0x0UL
+	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_HDR_CNT   0x1UL
+	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_FMT       0x2UL
+	#define FW_SET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID	   0x3UL
+	u8 unused_0[7];
+};
+
 /* hwrm_fw_get_structured_data */
 /* Input (32 bytes) */
 struct hwrm_fw_get_structured_data_input {
@@ -4611,6 +5021,14 @@ struct hwrm_fw_get_structured_data_output {
 	u8 valid;
 };
 
+/* Command specific Error Codes (8 bytes) */
+struct hwrm_fw_get_structured_data_cmd_err {
+	u8 code;
+	#define FW_GET_STRUCTURED_DATA_CMD_ERR_CODE_UNKNOWN       0x0UL
+	#define FW_GET_STRUCTURED_DATA_CMD_ERR_CODE_BAD_ID	   0x3UL
+	u8 unused_0[7];
+};
+
 /* hwrm_exec_fwd_resp */
 /* Input (128 bytes) */
 struct hwrm_exec_fwd_resp_input {
@@ -5280,11 +5698,15 @@ struct hwrm_selftest_qlist_output {
 	#define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_LINK_TEST      0x2UL
 	#define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_REGISTER_TEST  0x4UL
 	#define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_MEMORY_TEST    0x8UL
+	#define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_PCIE_EYE_TEST  0x10UL
+	#define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_ETHERNET_EYE_TEST 0x20UL
 	u8 offline_tests;
 	#define SELFTEST_QLIST_RESP_OFFLINE_TESTS_NVM_TEST	    0x1UL
 	#define SELFTEST_QLIST_RESP_OFFLINE_TESTS_LINK_TEST	    0x2UL
 	#define SELFTEST_QLIST_RESP_OFFLINE_TESTS_REGISTER_TEST    0x4UL
 	#define SELFTEST_QLIST_RESP_OFFLINE_TESTS_MEMORY_TEST      0x8UL
+	#define SELFTEST_QLIST_RESP_OFFLINE_TESTS_PCIE_EYE_TEST    0x10UL
+	#define SELFTEST_QLIST_RESP_OFFLINE_TESTS_ETHERNET_EYE_TEST 0x20UL
 	u8 unused_0;
 	__le16 test_timeout;
 	u8 unused_1;
@@ -5312,6 +5734,8 @@ struct hwrm_selftest_exec_input {
 	#define SELFTEST_EXEC_REQ_FLAGS_LINK_TEST		    0x2UL
 	#define SELFTEST_EXEC_REQ_FLAGS_REGISTER_TEST		    0x4UL
 	#define SELFTEST_EXEC_REQ_FLAGS_MEMORY_TEST		    0x8UL
+	#define SELFTEST_EXEC_REQ_FLAGS_PCIE_EYE_TEST		    0x10UL
+	#define SELFTEST_EXEC_REQ_FLAGS_ETHERNET_EYE_TEST	    0x20UL
 	u8 unused_0[7];
 };
 
@@ -5326,11 +5750,15 @@ struct hwrm_selftest_exec_output {
 	#define SELFTEST_EXEC_RESP_REQUESTED_TESTS_LINK_TEST       0x2UL
 	#define SELFTEST_EXEC_RESP_REQUESTED_TESTS_REGISTER_TEST   0x4UL
 	#define SELFTEST_EXEC_RESP_REQUESTED_TESTS_MEMORY_TEST     0x8UL
+	#define SELFTEST_EXEC_RESP_REQUESTED_TESTS_PCIE_EYE_TEST   0x10UL
+	#define SELFTEST_EXEC_RESP_REQUESTED_TESTS_ETHERNET_EYE_TEST 0x20UL
 	u8 test_success;
 	#define SELFTEST_EXEC_RESP_TEST_SUCCESS_NVM_TEST	    0x1UL
 	#define SELFTEST_EXEC_RESP_TEST_SUCCESS_LINK_TEST	    0x2UL
 	#define SELFTEST_EXEC_RESP_TEST_SUCCESS_REGISTER_TEST      0x4UL
 	#define SELFTEST_EXEC_RESP_TEST_SUCCESS_MEMORY_TEST	    0x8UL
+	#define SELFTEST_EXEC_RESP_TEST_SUCCESS_PCIE_EYE_TEST      0x10UL
+	#define SELFTEST_EXEC_RESP_TEST_SUCCESS_ETHERNET_EYE_TEST  0x20UL
 	__le16 unused_0[3];
 };
 
@@ -5411,7 +5839,7 @@ struct cmd_nums {
 	#define HWRM_PORT_LPBK_CLR_STATS			   (0x26UL)
 	#define HWRM_PORT_PHY_QCFG				   (0x27UL)
 	#define HWRM_PORT_MAC_QCFG				   (0x28UL)
-	#define RESERVED7					   (0x29UL)
+	#define HWRM_PORT_MAC_PTP_QCFG				   (0x29UL)
 	#define HWRM_PORT_PHY_QCAPS				   (0x2aUL)
 	#define HWRM_PORT_PHY_I2C_WRITE			   (0x2bUL)
 	#define HWRM_PORT_PHY_I2C_READ				   (0x2cUL)
@@ -5421,14 +5849,17 @@ struct cmd_nums {
 	#define HWRM_QUEUE_QPORTCFG				   (0x30UL)
 	#define HWRM_QUEUE_QCFG				   (0x31UL)
 	#define HWRM_QUEUE_CFG					   (0x32UL)
-	#define RESERVED2					   (0x33UL)
-	#define RESERVED3					   (0x34UL)
+	#define HWRM_FUNC_VLAN_CFG				   (0x33UL)
+	#define HWRM_FUNC_VLAN_QCFG				   (0x34UL)
 	#define HWRM_QUEUE_PFCENABLE_QCFG			   (0x35UL)
 	#define HWRM_QUEUE_PFCENABLE_CFG			   (0x36UL)
 	#define HWRM_QUEUE_PRI2COS_QCFG			   (0x37UL)
 	#define HWRM_QUEUE_PRI2COS_CFG				   (0x38UL)
 	#define HWRM_QUEUE_COS2BW_QCFG				   (0x39UL)
 	#define HWRM_QUEUE_COS2BW_CFG				   (0x3aUL)
+	#define HWRM_QUEUE_DSCP_QCAPS				   (0x3bUL)
+	#define HWRM_QUEUE_DSCP2PRI_QCFG			   (0x3cUL)
+	#define HWRM_QUEUE_DSCP2PRI_CFG			   (0x3dUL)
 	#define HWRM_VNIC_ALLOC				   (0x40UL)
 	#define HWRM_VNIC_FREE					   (0x41UL)
 	#define HWRM_VNIC_CFG					   (0x42UL)
@@ -5455,7 +5886,7 @@ struct cmd_nums {
 	#define HWRM_CFA_L2_FILTER_FREE			   (0x91UL)
 	#define HWRM_CFA_L2_FILTER_CFG				   (0x92UL)
 	#define HWRM_CFA_L2_SET_RX_MASK			   (0x93UL)
-	#define RESERVED4					   (0x94UL)
+	#define HWRM_CFA_VLAN_ANTISPOOF_CFG			   (0x94UL)
 	#define HWRM_CFA_TUNNEL_FILTER_ALLOC			   (0x95UL)
 	#define HWRM_CFA_TUNNEL_FILTER_FREE			   (0x96UL)
 	#define HWRM_CFA_ENCAP_RECORD_ALLOC			   (0x97UL)
@@ -5494,6 +5925,8 @@ struct cmd_nums {
 	#define HWRM_CFA_METER_PROFILE_CFG			   (0xf7UL)
 	#define HWRM_CFA_METER_INSTANCE_ALLOC			   (0xf8UL)
 	#define HWRM_CFA_METER_INSTANCE_FREE			   (0xf9UL)
+	#define HWRM_CFA_VFR_ALLOC				   (0xfdUL)
+	#define HWRM_CFA_VFR_FREE				   (0xfeUL)
 	#define HWRM_CFA_VF_PAIR_ALLOC				   (0x100UL)
 	#define HWRM_CFA_VF_PAIR_FREE				   (0x101UL)
 	#define HWRM_CFA_VF_PAIR_INFO				   (0x102UL)
@@ -5502,14 +5935,20 @@ struct cmd_nums {
 	#define HWRM_CFA_FLOW_FLUSH				   (0x105UL)
 	#define HWRM_CFA_FLOW_STATS				   (0x106UL)
 	#define HWRM_CFA_FLOW_INFO				   (0x107UL)
+	#define HWRM_CFA_DECAP_FILTER_ALLOC			   (0x108UL)
+	#define HWRM_CFA_DECAP_FILTER_FREE			   (0x109UL)
+	#define HWRM_CFA_VLAN_ANTISPOOF_QCFG			   (0x10aUL)
 	#define HWRM_SELFTEST_QLIST				   (0x200UL)
 	#define HWRM_SELFTEST_EXEC				   (0x201UL)
 	#define HWRM_SELFTEST_IRQ				   (0x202UL)
+	#define HWRM_SELFTEST_RETREIVE_EYE_DATA		   (0x203UL)
 	#define HWRM_DBG_READ_DIRECT				   (0xff10UL)
 	#define HWRM_DBG_READ_INDIRECT				   (0xff11UL)
 	#define HWRM_DBG_WRITE_DIRECT				   (0xff12UL)
 	#define HWRM_DBG_WRITE_INDIRECT			   (0xff13UL)
 	#define HWRM_DBG_DUMP					   (0xff14UL)
+	#define HWRM_DBG_ERASE_NVM				   (0xff15UL)
+	#define HWRM_DBG_CFG					   (0xff16UL)
 	#define HWRM_NVM_FACTORY_DEFAULTS			   (0xffeeUL)
 	#define HWRM_NVM_VALIDATE_OPTION			   (0xffefUL)
 	#define HWRM_NVM_FLUSH					   (0xfff0UL)
@@ -5720,6 +6159,7 @@ struct hwrm_struct_hdr {
 	#define STRUCT_HDR_STRUCT_ID_LLDP_DEVICE		   0x426UL
 	#define STRUCT_HDR_STRUCT_ID_AFM_OPAQUE		   0x1UL
 	#define STRUCT_HDR_STRUCT_ID_PORT_DESCRIPTION		   0xaUL
+	#define STRUCT_HDR_STRUCT_ID_RSS_V2			   0x64UL
 	__le16 len;
 	u8 version;
 	u8 count;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index b8e7248..d37925a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -18,6 +18,7 @@
 #include "bnxt.h"
 #include "bnxt_ulp.h"
 #include "bnxt_sriov.h"
+#include "bnxt_vfr.h"
 #include "bnxt_ethtool.h"
 
 #ifdef CONFIG_BNXT_SRIOV
@@ -587,6 +588,10 @@ void bnxt_sriov_disable(struct bnxt *bp)
 	if (!num_vfs)
 		return;
 
+	/* synchronize VF and VF-rep create and destroy */
+	mutex_lock(&bp->sriov_lock);
+	bnxt_vf_reps_destroy(bp);
+
 	if (pci_vfs_assigned(bp->pdev)) {
 		bnxt_hwrm_fwd_async_event_cmpl(
 			bp, NULL, ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD);
@@ -597,6 +602,7 @@ void bnxt_sriov_disable(struct bnxt *bp)
 		/* Free the HW resources reserved for various VF's */
 		bnxt_hwrm_func_vf_resource_free(bp, num_vfs);
 	}
+	mutex_unlock(&bp->sriov_lock);
 
 	bnxt_free_vf_resources(bp);
 
@@ -794,8 +800,10 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf)
 					PORT_PHY_QCFG_RESP_LINK_LINK;
 				phy_qcfg_resp.link_speed = cpu_to_le16(
 					PORT_PHY_QCFG_RESP_LINK_SPEED_10GB);
-				phy_qcfg_resp.duplex =
-					PORT_PHY_QCFG_RESP_DUPLEX_FULL;
+				phy_qcfg_resp.duplex_cfg =
+					PORT_PHY_QCFG_RESP_DUPLEX_CFG_FULL;
+				phy_qcfg_resp.duplex_state =
+					PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL;
 				phy_qcfg_resp.pause =
 					(PORT_PHY_QCFG_RESP_PAUSE_TX |
 					 PORT_PHY_QCFG_RESP_PAUSE_RX);
@@ -804,7 +812,8 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf)
 			/* force link down */
 			phy_qcfg_resp.link = PORT_PHY_QCFG_RESP_LINK_NO_LINK;
 			phy_qcfg_resp.link_speed = 0;
-			phy_qcfg_resp.duplex = PORT_PHY_QCFG_RESP_DUPLEX_HALF;
+			phy_qcfg_resp.duplex_state =
+				PORT_PHY_QCFG_RESP_DUPLEX_STATE_HALF;
 			phy_qcfg_resp.pause = 0;
 		}
 		rc = bnxt_hwrm_fwd_resp(bp, vf, &phy_qcfg_resp,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
new file mode 100644
index 0000000..ccd699f
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -0,0 +1,834 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2017 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/if_vlan.h>
+#include <net/flow_dissector.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_skbedit.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
+
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+#include "bnxt_sriov.h"
+#include "bnxt_tc.h"
+#include "bnxt_vfr.h"
+
+#ifdef CONFIG_BNXT_FLOWER_OFFLOAD
+
+#define BNXT_FID_INVALID			0xffff
+#define VLAN_TCI(vid, prio)	((vid) | ((prio) << VLAN_PRIO_SHIFT))
+
+/* Return the dst fid of the func for flow forwarding
+ * For PFs: src_fid is the fid of the PF
+ * For VF-reps: src_fid the fid of the VF
+ */
+static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev)
+{
+	struct bnxt *bp;
+
+	/* check if dev belongs to the same switch */
+	if (!switchdev_port_same_parent_id(pf_bp->dev, dev)) {
+		netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch",
+			    dev->ifindex);
+		return BNXT_FID_INVALID;
+	}
+
+	/* Is dev a VF-rep? */
+	if (dev != pf_bp->dev)
+		return bnxt_vf_rep_get_fid(dev);
+
+	bp = netdev_priv(dev);
+	return bp->pf.fw_fid;
+}
+
+static int bnxt_tc_parse_redir(struct bnxt *bp,
+			       struct bnxt_tc_actions *actions,
+			       const struct tc_action *tc_act)
+{
+	int ifindex = tcf_mirred_ifindex(tc_act);
+	struct net_device *dev;
+	u16 dst_fid;
+
+	dev = __dev_get_by_index(dev_net(bp->dev), ifindex);
+	if (!dev) {
+		netdev_info(bp->dev, "no dev for ifindex=%d", ifindex);
+		return -EINVAL;
+	}
+
+	/* find the FID from dev */
+	dst_fid = bnxt_flow_get_dst_fid(bp, dev);
+	if (dst_fid == BNXT_FID_INVALID) {
+		netdev_info(bp->dev, "can't get fid for ifindex=%d", ifindex);
+		return -EINVAL;
+	}
+
+	actions->flags |= BNXT_TC_ACTION_FLAG_FWD;
+	actions->dst_fid = dst_fid;
+	actions->dst_dev = dev;
+	return 0;
+}
+
+static void bnxt_tc_parse_vlan(struct bnxt *bp,
+			       struct bnxt_tc_actions *actions,
+			       const struct tc_action *tc_act)
+{
+	if (tcf_vlan_action(tc_act) == TCA_VLAN_ACT_POP) {
+		actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN;
+	} else if (tcf_vlan_action(tc_act) == TCA_VLAN_ACT_PUSH) {
+		actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN;
+		actions->push_vlan_tci = htons(tcf_vlan_push_vid(tc_act));
+		actions->push_vlan_tpid = tcf_vlan_push_proto(tc_act);
+	}
+}
+
+static int bnxt_tc_parse_actions(struct bnxt *bp,
+				 struct bnxt_tc_actions *actions,
+				 struct tcf_exts *tc_exts)
+{
+	const struct tc_action *tc_act;
+	LIST_HEAD(tc_actions);
+	int rc;
+
+	if (!tcf_exts_has_actions(tc_exts)) {
+		netdev_info(bp->dev, "no actions");
+		return -EINVAL;
+	}
+
+	tcf_exts_to_list(tc_exts, &tc_actions);
+	list_for_each_entry(tc_act, &tc_actions, list) {
+		/* Drop action */
+		if (is_tcf_gact_shot(tc_act)) {
+			actions->flags |= BNXT_TC_ACTION_FLAG_DROP;
+			return 0; /* don't bother with other actions */
+		}
+
+		/* Redirect action */
+		if (is_tcf_mirred_egress_redirect(tc_act)) {
+			rc = bnxt_tc_parse_redir(bp, actions, tc_act);
+			if (rc)
+				return rc;
+			continue;
+		}
+
+		/* Push/pop VLAN */
+		if (is_tcf_vlan(tc_act)) {
+			bnxt_tc_parse_vlan(bp, actions, tc_act);
+			continue;
+		}
+	}
+
+	return 0;
+}
+
+#define GET_KEY(flow_cmd, key_type)					\
+		skb_flow_dissector_target((flow_cmd)->dissector, key_type,\
+					  (flow_cmd)->key)
+#define GET_MASK(flow_cmd, key_type)					\
+		skb_flow_dissector_target((flow_cmd)->dissector, key_type,\
+					  (flow_cmd)->mask)
+
+static int bnxt_tc_parse_flow(struct bnxt *bp,
+			      struct tc_cls_flower_offload *tc_flow_cmd,
+			      struct bnxt_tc_flow *flow)
+{
+	struct flow_dissector *dissector = tc_flow_cmd->dissector;
+	u16 addr_type = 0;
+
+	/* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */
+	if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 ||
+	    (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) == 0) {
+		netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x",
+			    dissector->used_keys);
+		return -EOPNOTSUPP;
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_dissector_key_control *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_CONTROL);
+
+		addr_type = key->addr_type;
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_dissector_key_basic *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_BASIC);
+		struct flow_dissector_key_basic *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_BASIC);
+
+		flow->l2_key.ether_type = key->n_proto;
+		flow->l2_mask.ether_type = mask->n_proto;
+
+		if (key->n_proto == htons(ETH_P_IP) ||
+		    key->n_proto == htons(ETH_P_IPV6)) {
+			flow->l4_key.ip_proto = key->ip_proto;
+			flow->l4_mask.ip_proto = mask->ip_proto;
+		}
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_dissector_key_eth_addrs *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ETH_ADDRS);
+		struct flow_dissector_key_eth_addrs *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ETH_ADDRS);
+
+		flow->flags |= BNXT_TC_FLOW_FLAGS_ETH_ADDRS;
+		ether_addr_copy(flow->l2_key.dmac, key->dst);
+		ether_addr_copy(flow->l2_mask.dmac, mask->dst);
+		ether_addr_copy(flow->l2_key.smac, key->src);
+		ether_addr_copy(flow->l2_mask.smac, mask->src);
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_dissector_key_vlan *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_VLAN);
+		struct flow_dissector_key_vlan *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_VLAN);
+
+		flow->l2_key.inner_vlan_tci =
+		   cpu_to_be16(VLAN_TCI(key->vlan_id, key->vlan_priority));
+		flow->l2_mask.inner_vlan_tci =
+		   cpu_to_be16((VLAN_TCI(mask->vlan_id, mask->vlan_priority)));
+		flow->l2_key.inner_vlan_tpid = htons(ETH_P_8021Q);
+		flow->l2_mask.inner_vlan_tpid = htons(0xffff);
+		flow->l2_key.num_vlans = 1;
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		struct flow_dissector_key_ipv4_addrs *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+		struct flow_dissector_key_ipv4_addrs *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+
+		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV4_ADDRS;
+		flow->l3_key.ipv4.daddr.s_addr = key->dst;
+		flow->l3_mask.ipv4.daddr.s_addr = mask->dst;
+		flow->l3_key.ipv4.saddr.s_addr = key->src;
+		flow->l3_mask.ipv4.saddr.s_addr = mask->src;
+	} else if (dissector_uses_key(dissector,
+				      FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+		struct flow_dissector_key_ipv6_addrs *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV6_ADDRS);
+		struct flow_dissector_key_ipv6_addrs *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV6_ADDRS);
+
+		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV6_ADDRS;
+		flow->l3_key.ipv6.daddr = key->dst;
+		flow->l3_mask.ipv6.daddr = mask->dst;
+		flow->l3_key.ipv6.saddr = key->src;
+		flow->l3_mask.ipv6.saddr = mask->src;
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_dissector_key_ports *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_PORTS);
+		struct flow_dissector_key_ports *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_PORTS);
+
+		flow->flags |= BNXT_TC_FLOW_FLAGS_PORTS;
+		flow->l4_key.ports.dport = key->dst;
+		flow->l4_mask.ports.dport = mask->dst;
+		flow->l4_key.ports.sport = key->src;
+		flow->l4_mask.ports.sport = mask->src;
+	}
+
+	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ICMP)) {
+		struct flow_dissector_key_icmp *key =
+			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ICMP);
+		struct flow_dissector_key_icmp *mask =
+			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ICMP);
+
+		flow->flags |= BNXT_TC_FLOW_FLAGS_ICMP;
+		flow->l4_key.icmp.type = key->type;
+		flow->l4_key.icmp.code = key->code;
+		flow->l4_mask.icmp.type = mask->type;
+		flow->l4_mask.icmp.code = mask->code;
+	}
+
+	return bnxt_tc_parse_actions(bp, &flow->actions, tc_flow_cmd->exts);
+}
+
+static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp, __le16 flow_handle)
+{
+	struct hwrm_cfa_flow_free_input req = { 0 };
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_FREE, -1, -1);
+	req.flow_handle = flow_handle;
+
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		netdev_info(bp->dev, "Error: %s: flow_handle=0x%x rc=%d",
+			    __func__, flow_handle, rc);
+	return rc;
+}
+
+static int ipv6_mask_len(struct in6_addr *mask)
+{
+	int mask_len = 0, i;
+
+	for (i = 0; i < 4; i++)
+		mask_len += inet_mask_len(mask->s6_addr32[i]);
+
+	return mask_len;
+}
+
+static bool is_wildcard(void *mask, int len)
+{
+	const u8 *p = mask;
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (p[i] != 0)
+			return false;
+	}
+	return true;
+}
+
+static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
+				    __le16 ref_flow_handle, __le16 *flow_handle)
+{
+	struct hwrm_cfa_flow_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+	struct bnxt_tc_actions *actions = &flow->actions;
+	struct bnxt_tc_l3_key *l3_mask = &flow->l3_mask;
+	struct bnxt_tc_l3_key *l3_key = &flow->l3_key;
+	struct hwrm_cfa_flow_alloc_input req = { 0 };
+	u16 flow_flags = 0, action_flags = 0;
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_ALLOC, -1, -1);
+
+	req.src_fid = cpu_to_le16(flow->src_fid);
+	req.ref_flow_handle = ref_flow_handle;
+	req.ethertype = flow->l2_key.ether_type;
+	req.ip_proto = flow->l4_key.ip_proto;
+
+	if (flow->flags & BNXT_TC_FLOW_FLAGS_ETH_ADDRS) {
+		memcpy(req.dmac, flow->l2_key.dmac, ETH_ALEN);
+		memcpy(req.smac, flow->l2_key.smac, ETH_ALEN);
+	}
+
+	if (flow->l2_key.num_vlans > 0) {
+		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_ONE;
+		/* FW expects the inner_vlan_tci value to be set
+		 * in outer_vlan_tci when num_vlans is 1 (which is
+		 * always the case in TC.)
+		 */
+		req.outer_vlan_tci = flow->l2_key.inner_vlan_tci;
+	}
+
+	/* If all IP and L4 fields are wildcarded then this is an L2 flow */
+	if (is_wildcard(&l3_mask, sizeof(l3_mask)) &&
+	    is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) {
+		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2;
+	} else {
+		flow_flags |= flow->l2_key.ether_type == htons(ETH_P_IP) ?
+				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV4 :
+				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6;
+
+		if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV4_ADDRS) {
+			req.ip_dst[0] = l3_key->ipv4.daddr.s_addr;
+			req.ip_dst_mask_len =
+				inet_mask_len(l3_mask->ipv4.daddr.s_addr);
+			req.ip_src[0] = l3_key->ipv4.saddr.s_addr;
+			req.ip_src_mask_len =
+				inet_mask_len(l3_mask->ipv4.saddr.s_addr);
+		} else if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV6_ADDRS) {
+			memcpy(req.ip_dst, l3_key->ipv6.daddr.s6_addr32,
+			       sizeof(req.ip_dst));
+			req.ip_dst_mask_len =
+					ipv6_mask_len(&l3_mask->ipv6.daddr);
+			memcpy(req.ip_src, l3_key->ipv6.saddr.s6_addr32,
+			       sizeof(req.ip_src));
+			req.ip_src_mask_len =
+					ipv6_mask_len(&l3_mask->ipv6.saddr);
+		}
+	}
+
+	if (flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) {
+		req.l4_src_port = flow->l4_key.ports.sport;
+		req.l4_src_port_mask = flow->l4_mask.ports.sport;
+		req.l4_dst_port = flow->l4_key.ports.dport;
+		req.l4_dst_port_mask = flow->l4_mask.ports.dport;
+	} else if (flow->flags & BNXT_TC_FLOW_FLAGS_ICMP) {
+		/* l4 ports serve as type/code when ip_proto is ICMP */
+		req.l4_src_port = htons(flow->l4_key.icmp.type);
+		req.l4_src_port_mask = htons(flow->l4_mask.icmp.type);
+		req.l4_dst_port = htons(flow->l4_key.icmp.code);
+		req.l4_dst_port_mask = htons(flow->l4_mask.icmp.code);
+	}
+	req.flags = cpu_to_le16(flow_flags);
+
+	if (actions->flags & BNXT_TC_ACTION_FLAG_DROP) {
+		action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP;
+	} else {
+		if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
+			action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD;
+			req.dst_fid = cpu_to_le16(actions->dst_fid);
+		}
+		if (actions->flags & BNXT_TC_ACTION_FLAG_PUSH_VLAN) {
+			action_flags |=
+			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
+			req.l2_rewrite_vlan_tpid = actions->push_vlan_tpid;
+			req.l2_rewrite_vlan_tci = actions->push_vlan_tci;
+			memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
+			memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
+		}
+		if (actions->flags & BNXT_TC_ACTION_FLAG_POP_VLAN) {
+			action_flags |=
+			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
+			/* Rewrite config with tpid = 0 implies vlan pop */
+			req.l2_rewrite_vlan_tpid = 0;
+			memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
+			memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
+		}
+	}
+	req.action_flags = cpu_to_le16(action_flags);
+
+	mutex_lock(&bp->hwrm_cmd_lock);
+
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc)
+		*flow_handle = resp->flow_handle;
+
+	mutex_unlock(&bp->hwrm_cmd_lock);
+
+	return rc;
+}
+
+/* Add val to accum while handling a possible wraparound
+ * of val. Eventhough val is of type u64, its actual width
+ * is denoted by mask and will wrap-around beyond that width.
+ */
+static void accumulate_val(u64 *accum, u64 val, u64 mask)
+{
+#define low_bits(x, mask)		((x) & (mask))
+#define high_bits(x, mask)		((x) & ~(mask))
+	bool wrapped = val < low_bits(*accum, mask);
+
+	*accum = high_bits(*accum, mask) + val;
+	if (wrapped)
+		*accum += (mask + 1);
+}
+
+/* The HW counters' width is much less than 64bits.
+ * Handle possible wrap-around while updating the stat counters
+ */
+static void bnxt_flow_stats_fix_wraparound(struct bnxt_tc_info *tc_info,
+					   struct bnxt_tc_flow_stats *stats,
+					   struct bnxt_tc_flow_stats *hw_stats)
+{
+	accumulate_val(&stats->bytes, hw_stats->bytes, tc_info->bytes_mask);
+	accumulate_val(&stats->packets, hw_stats->packets,
+		       tc_info->packets_mask);
+}
+
+/* Fix possible wraparound of the stats queried from HW, calculate
+ * the delta from prev_stats, and also update the prev_stats.
+ * The HW flow stats are fetched under the hwrm_cmd_lock mutex.
+ * This routine is best called while under the mutex so that the
+ * stats processing happens atomically.
+ */
+static void bnxt_flow_stats_calc(struct bnxt_tc_info *tc_info,
+				 struct bnxt_tc_flow *flow,
+				 struct bnxt_tc_flow_stats *stats)
+{
+	struct bnxt_tc_flow_stats *acc_stats, *prev_stats;
+
+	acc_stats = &flow->stats;
+	bnxt_flow_stats_fix_wraparound(tc_info, acc_stats, stats);
+
+	prev_stats = &flow->prev_stats;
+	stats->bytes = acc_stats->bytes - prev_stats->bytes;
+	stats->packets = acc_stats->packets - prev_stats->packets;
+	*prev_stats = *acc_stats;
+}
+
+static int bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp,
+					__le16 flow_handle,
+					struct bnxt_tc_flow *flow,
+					struct bnxt_tc_flow_stats *stats)
+{
+	struct hwrm_cfa_flow_stats_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_cfa_flow_stats_input req = { 0 };
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1);
+	req.num_flows = cpu_to_le16(1);
+	req.flow_handle_0 = flow_handle;
+
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc) {
+		stats->packets = le64_to_cpu(resp->packet_0);
+		stats->bytes = le64_to_cpu(resp->byte_0);
+		bnxt_flow_stats_calc(&bp->tc_info, flow, stats);
+	} else {
+		netdev_info(bp->dev, "error rc=%d", rc);
+	}
+
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
+static int bnxt_tc_put_l2_node(struct bnxt *bp,
+			       struct bnxt_tc_flow_node *flow_node)
+{
+	struct bnxt_tc_l2_node *l2_node = flow_node->l2_node;
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	int rc;
+
+	/* remove flow_node from the L2 shared flow list */
+	list_del(&flow_node->l2_list_node);
+	if (--l2_node->refcount == 0) {
+		rc =  rhashtable_remove_fast(&tc_info->l2_table, &l2_node->node,
+					     tc_info->l2_ht_params);
+		if (rc)
+			netdev_err(bp->dev,
+				   "Error: %s: rhashtable_remove_fast: %d",
+				   __func__, rc);
+		kfree_rcu(l2_node, rcu);
+	}
+	return 0;
+}
+
+static struct bnxt_tc_l2_node *
+bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table,
+		    struct rhashtable_params ht_params,
+		    struct bnxt_tc_l2_key *l2_key)
+{
+	struct bnxt_tc_l2_node *l2_node;
+	int rc;
+
+	l2_node = rhashtable_lookup_fast(l2_table, l2_key, ht_params);
+	if (!l2_node) {
+		l2_node = kzalloc(sizeof(*l2_node), GFP_KERNEL);
+		if (!l2_node) {
+			rc = -ENOMEM;
+			return NULL;
+		}
+
+		l2_node->key = *l2_key;
+		rc = rhashtable_insert_fast(l2_table, &l2_node->node,
+					    ht_params);
+		if (rc) {
+			kfree(l2_node);
+			netdev_err(bp->dev,
+				   "Error: %s: rhashtable_insert_fast: %d",
+				   __func__, rc);
+			return NULL;
+		}
+		INIT_LIST_HEAD(&l2_node->common_l2_flows);
+	}
+	return l2_node;
+}
+
+/* Get the ref_flow_handle for a flow by checking if there are any other
+ * flows that share the same L2 key as this flow.
+ */
+static int
+bnxt_tc_get_ref_flow_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
+			    struct bnxt_tc_flow_node *flow_node,
+			    __le16 *ref_flow_handle)
+{
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_flow_node *ref_flow_node;
+	struct bnxt_tc_l2_node *l2_node;
+
+	l2_node = bnxt_tc_get_l2_node(bp, &tc_info->l2_table,
+				      tc_info->l2_ht_params,
+				      &flow->l2_key);
+	if (!l2_node)
+		return -1;
+
+	/* If any other flow is using this l2_node, use it's flow_handle
+	 * as the ref_flow_handle
+	 */
+	if (l2_node->refcount > 0) {
+		ref_flow_node = list_first_entry(&l2_node->common_l2_flows,
+						 struct bnxt_tc_flow_node,
+						 l2_list_node);
+		*ref_flow_handle = ref_flow_node->flow_handle;
+	} else {
+		*ref_flow_handle = cpu_to_le16(0xffff);
+	}
+
+	/* Insert the l2_node into the flow_node so that subsequent flows
+	 * with a matching l2 key can use the flow_handle of this flow
+	 * as their ref_flow_handle
+	 */
+	flow_node->l2_node = l2_node;
+	list_add(&flow_node->l2_list_node, &l2_node->common_l2_flows);
+	l2_node->refcount++;
+	return 0;
+}
+
+/* After the flow parsing is done, this routine is used for checking
+ * if there are any aspects of the flow that prevent it from being
+ * offloaded.
+ */
+static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow)
+{
+	/* If L4 ports are specified then ip_proto must be TCP or UDP */
+	if ((flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) &&
+	    (flow->l4_key.ip_proto != IPPROTO_TCP &&
+	     flow->l4_key.ip_proto != IPPROTO_UDP)) {
+		netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports",
+			    flow->l4_key.ip_proto);
+		return false;
+	}
+
+	return true;
+}
+
+static int __bnxt_tc_del_flow(struct bnxt *bp,
+			      struct bnxt_tc_flow_node *flow_node)
+{
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	int rc;
+
+	/* send HWRM cmd to free the flow-id */
+	bnxt_hwrm_cfa_flow_free(bp, flow_node->flow_handle);
+
+	mutex_lock(&tc_info->lock);
+
+	/* release reference to l2 node */
+	bnxt_tc_put_l2_node(bp, flow_node);
+
+	mutex_unlock(&tc_info->lock);
+
+	rc = rhashtable_remove_fast(&tc_info->flow_table, &flow_node->node,
+				    tc_info->flow_ht_params);
+	if (rc)
+		netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d",
+			   __func__, rc);
+
+	kfree_rcu(flow_node, rcu);
+	return 0;
+}
+
+/* Add a new flow or replace an existing flow.
+ * Notes on locking:
+ * There are essentially two critical sections here.
+ * 1. while adding a new flow
+ *    a) lookup l2-key
+ *    b) issue HWRM cmd and get flow_handle
+ *    c) link l2-key with flow
+ * 2. while deleting a flow
+ *    a) unlinking l2-key from flow
+ * A lock is needed to protect these two critical sections.
+ *
+ * The hash-tables are already protected by the rhashtable API.
+ */
+static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
+			    struct tc_cls_flower_offload *tc_flow_cmd)
+{
+	struct bnxt_tc_flow_node *new_node, *old_node;
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_flow *flow;
+	__le16 ref_flow_handle;
+	int rc;
+
+	/* allocate memory for the new flow and it's node */
+	new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
+	if (!new_node) {
+		rc = -ENOMEM;
+		goto done;
+	}
+	new_node->cookie = tc_flow_cmd->cookie;
+	flow = &new_node->flow;
+
+	rc = bnxt_tc_parse_flow(bp, tc_flow_cmd, flow);
+	if (rc)
+		goto free_node;
+	flow->src_fid = src_fid;
+
+	if (!bnxt_tc_can_offload(bp, flow)) {
+		rc = -ENOSPC;
+		goto free_node;
+	}
+
+	/* If a flow exists with the same cookie, delete it */
+	old_node = rhashtable_lookup_fast(&tc_info->flow_table,
+					  &tc_flow_cmd->cookie,
+					  tc_info->flow_ht_params);
+	if (old_node)
+		__bnxt_tc_del_flow(bp, old_node);
+
+	/* Check if the L2 part of the flow has been offloaded already.
+	 * If so, bump up it's refcnt and get it's reference handle.
+	 */
+	mutex_lock(&tc_info->lock);
+	rc = bnxt_tc_get_ref_flow_handle(bp, flow, new_node, &ref_flow_handle);
+	if (rc)
+		goto unlock;
+
+	/* send HWRM cmd to alloc the flow */
+	rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle,
+				      &new_node->flow_handle);
+	if (rc)
+		goto put_l2;
+
+	/* add new flow to flow-table */
+	rc = rhashtable_insert_fast(&tc_info->flow_table, &new_node->node,
+				    tc_info->flow_ht_params);
+	if (rc)
+		goto hwrm_flow_free;
+
+	mutex_unlock(&tc_info->lock);
+	return 0;
+
+hwrm_flow_free:
+	bnxt_hwrm_cfa_flow_free(bp, new_node->flow_handle);
+put_l2:
+	bnxt_tc_put_l2_node(bp, new_node);
+unlock:
+	mutex_unlock(&tc_info->lock);
+free_node:
+	kfree(new_node);
+done:
+	netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d",
+		   __func__, tc_flow_cmd->cookie, rc);
+	return rc;
+}
+
+static int bnxt_tc_del_flow(struct bnxt *bp,
+			    struct tc_cls_flower_offload *tc_flow_cmd)
+{
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_flow_node *flow_node;
+
+	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
+					   &tc_flow_cmd->cookie,
+					   tc_info->flow_ht_params);
+	if (!flow_node) {
+		netdev_info(bp->dev, "ERROR: no flow_node for cookie %lx",
+			    tc_flow_cmd->cookie);
+		return -EINVAL;
+	}
+
+	return __bnxt_tc_del_flow(bp, flow_node);
+}
+
+static int bnxt_tc_get_flow_stats(struct bnxt *bp,
+				  struct tc_cls_flower_offload *tc_flow_cmd)
+{
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	struct bnxt_tc_flow_node *flow_node;
+	struct bnxt_tc_flow_stats stats;
+	int rc;
+
+	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
+					   &tc_flow_cmd->cookie,
+					   tc_info->flow_ht_params);
+	if (!flow_node) {
+		netdev_info(bp->dev, "Error: no flow_node for cookie %lx",
+			    tc_flow_cmd->cookie);
+		return -1;
+	}
+
+	rc = bnxt_hwrm_cfa_flow_stats_get(bp, flow_node->flow_handle,
+					  &flow_node->flow, &stats);
+	if (rc)
+		return rc;
+
+	tcf_exts_stats_update(tc_flow_cmd->exts, stats.bytes, stats.packets, 0);
+	return 0;
+}
+
+int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
+			 struct tc_cls_flower_offload *cls_flower)
+{
+	int rc = 0;
+
+	switch (cls_flower->command) {
+	case TC_CLSFLOWER_REPLACE:
+		rc = bnxt_tc_add_flow(bp, src_fid, cls_flower);
+		break;
+
+	case TC_CLSFLOWER_DESTROY:
+		rc = bnxt_tc_del_flow(bp, cls_flower);
+		break;
+
+	case TC_CLSFLOWER_STATS:
+		rc = bnxt_tc_get_flow_stats(bp, cls_flower);
+		break;
+	}
+	return rc;
+}
+
+static const struct rhashtable_params bnxt_tc_flow_ht_params = {
+	.head_offset = offsetof(struct bnxt_tc_flow_node, node),
+	.key_offset = offsetof(struct bnxt_tc_flow_node, cookie),
+	.key_len = sizeof(((struct bnxt_tc_flow_node *)0)->cookie),
+	.automatic_shrinking = true
+};
+
+static const struct rhashtable_params bnxt_tc_l2_ht_params = {
+	.head_offset = offsetof(struct bnxt_tc_l2_node, node),
+	.key_offset = offsetof(struct bnxt_tc_l2_node, key),
+	.key_len = BNXT_TC_L2_KEY_LEN,
+	.automatic_shrinking = true
+};
+
+/* convert counter width in bits to a mask */
+#define mask(width)		((u64)~0 >> (64 - (width)))
+
+int bnxt_init_tc(struct bnxt *bp)
+{
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+	int rc;
+
+	if (bp->hwrm_spec_code < 0x10800) {
+		netdev_warn(bp->dev,
+			    "Firmware does not support TC flower offload.\n");
+		return -ENOTSUPP;
+	}
+	mutex_init(&tc_info->lock);
+
+	/* Counter widths are programmed by FW */
+	tc_info->bytes_mask = mask(36);
+	tc_info->packets_mask = mask(28);
+
+	tc_info->flow_ht_params = bnxt_tc_flow_ht_params;
+	rc = rhashtable_init(&tc_info->flow_table, &tc_info->flow_ht_params);
+	if (rc)
+		return rc;
+
+	tc_info->l2_ht_params = bnxt_tc_l2_ht_params;
+	rc = rhashtable_init(&tc_info->l2_table, &tc_info->l2_ht_params);
+	if (rc)
+		goto destroy_flow_table;
+
+	tc_info->enabled = true;
+	bp->dev->hw_features |= NETIF_F_HW_TC;
+	bp->dev->features |= NETIF_F_HW_TC;
+	return 0;
+
+destroy_flow_table:
+	rhashtable_destroy(&tc_info->flow_table);
+	return rc;
+}
+
+void bnxt_shutdown_tc(struct bnxt *bp)
+{
+	struct bnxt_tc_info *tc_info = &bp->tc_info;
+
+	if (!tc_info->enabled)
+		return;
+
+	rhashtable_destroy(&tc_info->flow_table);
+	rhashtable_destroy(&tc_info->l2_table);
+}
+
+#else
+#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
new file mode 100644
index 0000000..6c4c1ed
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h
@@ -0,0 +1,158 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2017 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef BNXT_TC_H
+#define BNXT_TC_H
+
+#ifdef CONFIG_BNXT_FLOWER_OFFLOAD
+
+/* Structs used for storing the filter/actions of the TC cmd.
+ */
+struct bnxt_tc_l2_key {
+	u8		dmac[ETH_ALEN];
+	u8		smac[ETH_ALEN];
+	__be16		inner_vlan_tpid;
+	__be16		inner_vlan_tci;
+	__be16		ether_type;
+	u8		num_vlans;
+};
+
+struct bnxt_tc_l3_key {
+	union {
+		struct {
+			struct in_addr daddr;
+			struct in_addr saddr;
+		} ipv4;
+		struct {
+			struct in6_addr daddr;
+			struct in6_addr saddr;
+		} ipv6;
+	};
+};
+
+struct bnxt_tc_l4_key {
+	u8  ip_proto;
+	union {
+		struct {
+			__be16 sport;
+			__be16 dport;
+		} ports;
+		struct {
+			u8 type;
+			u8 code;
+		} icmp;
+	};
+};
+
+struct bnxt_tc_actions {
+	u32				flags;
+#define BNXT_TC_ACTION_FLAG_FWD			BIT(0)
+#define BNXT_TC_ACTION_FLAG_FWD_VXLAN		BIT(1)
+#define BNXT_TC_ACTION_FLAG_PUSH_VLAN		BIT(3)
+#define BNXT_TC_ACTION_FLAG_POP_VLAN		BIT(4)
+#define BNXT_TC_ACTION_FLAG_DROP		BIT(5)
+
+	u16				dst_fid;
+	struct net_device		*dst_dev;
+	__be16				push_vlan_tpid;
+	__be16				push_vlan_tci;
+};
+
+struct bnxt_tc_flow_stats {
+	u64		packets;
+	u64		bytes;
+};
+
+struct bnxt_tc_flow {
+	u32				flags;
+#define BNXT_TC_FLOW_FLAGS_ETH_ADDRS		BIT(1)
+#define BNXT_TC_FLOW_FLAGS_IPV4_ADDRS		BIT(2)
+#define BNXT_TC_FLOW_FLAGS_IPV6_ADDRS		BIT(3)
+#define BNXT_TC_FLOW_FLAGS_PORTS		BIT(4)
+#define BNXT_TC_FLOW_FLAGS_ICMP			BIT(5)
+
+	/* flow applicable to pkts ingressing on this fid */
+	u16				src_fid;
+	struct bnxt_tc_l2_key		l2_key;
+	struct bnxt_tc_l2_key		l2_mask;
+	struct bnxt_tc_l3_key		l3_key;
+	struct bnxt_tc_l3_key		l3_mask;
+	struct bnxt_tc_l4_key		l4_key;
+	struct bnxt_tc_l4_key		l4_mask;
+
+	struct bnxt_tc_actions		actions;
+
+	/* updated stats accounting for hw-counter wrap-around */
+	struct bnxt_tc_flow_stats	stats;
+	/* previous snap-shot of stats */
+	struct bnxt_tc_flow_stats	prev_stats;
+	unsigned long			lastused; /* jiffies */
+};
+
+/* L2 hash table
+ * This data-struct is used for L2-flow table.
+ * The L2 part of a flow is stored in a hash table.
+ * A flow that shares the same L2 key/mask with an
+ * already existing flow must refer to it's flow handle.
+ */
+struct bnxt_tc_l2_node {
+	/* hash key: first 16b of key */
+#define BNXT_TC_L2_KEY_LEN			16
+	struct bnxt_tc_l2_key	key;
+	struct rhash_head	node;
+
+	/* a linked list of flows that share the same l2 key */
+	struct list_head	common_l2_flows;
+
+	/* number of flows sharing the l2 key */
+	u16			refcount;
+
+	struct rcu_head		rcu;
+};
+
+struct bnxt_tc_flow_node {
+	/* hash key: provided by TC */
+	unsigned long			cookie;
+	struct rhash_head		node;
+
+	struct bnxt_tc_flow		flow;
+
+	__le16				flow_handle;
+
+	/* L2 node in l2 hashtable that shares flow's l2 key */
+	struct bnxt_tc_l2_node		*l2_node;
+	/* for the shared_flows list maintained in l2_node */
+	struct list_head		l2_list_node;
+
+	struct rcu_head			rcu;
+};
+
+int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
+			 struct tc_cls_flower_offload *cls_flower);
+int bnxt_init_tc(struct bnxt *bp);
+void bnxt_shutdown_tc(struct bnxt *bp);
+
+#else /* CONFIG_BNXT_FLOWER_OFFLOAD */
+
+static inline int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
+				       struct tc_cls_flower_offload *cls_flower)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int bnxt_init_tc(struct bnxt *bp)
+{
+	return 0;
+}
+
+static inline void bnxt_shutdown_tc(struct bnxt *bp)
+{
+}
+#endif /* CONFIG_BNXT_FLOWER_OFFLOAD */
+#endif /* BNXT_TC_H */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
new file mode 100644
index 0000000..e75db04
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -0,0 +1,513 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2016-2017 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/jhash.h>
+#include <net/pkt_cls.h>
+
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+#include "bnxt_vfr.h"
+#include "bnxt_tc.h"
+
+#ifdef CONFIG_BNXT_SRIOV
+
+#define CFA_HANDLE_INVALID		0xffff
+#define VF_IDX_INVALID			0xffff
+
+static int hwrm_cfa_vfr_alloc(struct bnxt *bp, u16 vf_idx,
+			      u16 *tx_cfa_action, u16 *rx_cfa_code)
+{
+	struct hwrm_cfa_vfr_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_cfa_vfr_alloc_input req = { 0 };
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_VFR_ALLOC, -1, -1);
+	req.vf_id = cpu_to_le16(vf_idx);
+	sprintf(req.vfr_name, "vfr%d", vf_idx);
+
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc) {
+		*tx_cfa_action = le16_to_cpu(resp->tx_cfa_action);
+		*rx_cfa_code = le16_to_cpu(resp->rx_cfa_code);
+		netdev_dbg(bp->dev, "tx_cfa_action=0x%x, rx_cfa_code=0x%x",
+			   *tx_cfa_action, *rx_cfa_code);
+	} else {
+		netdev_info(bp->dev, "%s error rc=%d", __func__, rc);
+	}
+
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
+static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx)
+{
+	struct hwrm_cfa_vfr_free_input req = { 0 };
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_VFR_FREE, -1, -1);
+	sprintf(req.vfr_name, "vfr%d", vf_idx);
+
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		netdev_info(bp->dev, "%s error rc=%d", __func__, rc);
+	return rc;
+}
+
+static int bnxt_vf_rep_open(struct net_device *dev)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+	struct bnxt *bp = vf_rep->bp;
+
+	/* Enable link and TX only if the parent PF is open. */
+	if (netif_running(bp->dev)) {
+		netif_carrier_on(dev);
+		netif_tx_start_all_queues(dev);
+	}
+	return 0;
+}
+
+static int bnxt_vf_rep_close(struct net_device *dev)
+{
+	netif_carrier_off(dev);
+	netif_tx_disable(dev);
+
+	return 0;
+}
+
+static netdev_tx_t bnxt_vf_rep_xmit(struct sk_buff *skb,
+				    struct net_device *dev)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+	int rc, len = skb->len;
+
+	skb_dst_drop(skb);
+	dst_hold((struct dst_entry *)vf_rep->dst);
+	skb_dst_set(skb, (struct dst_entry *)vf_rep->dst);
+	skb->dev = vf_rep->dst->u.port_info.lower_dev;
+
+	rc = dev_queue_xmit(skb);
+	if (!rc) {
+		vf_rep->tx_stats.packets++;
+		vf_rep->tx_stats.bytes += len;
+	}
+	return rc;
+}
+
+static void
+bnxt_vf_rep_get_stats64(struct net_device *dev,
+			struct rtnl_link_stats64 *stats)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+
+	stats->rx_packets = vf_rep->rx_stats.packets;
+	stats->rx_bytes = vf_rep->rx_stats.bytes;
+	stats->tx_packets = vf_rep->tx_stats.packets;
+	stats->tx_bytes = vf_rep->tx_stats.bytes;
+}
+
+static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+				void *type_data)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+	struct bnxt *bp = vf_rep->bp;
+	int vf_fid = bp->pf.vf[vf_rep->vf_idx].fw_fid;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return bnxt_tc_setup_flower(bp, vf_fid, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code)
+{
+	u16 vf_idx;
+
+	if (cfa_code && bp->cfa_code_map && BNXT_PF(bp)) {
+		vf_idx = bp->cfa_code_map[cfa_code];
+		if (vf_idx != VF_IDX_INVALID)
+			return bp->vf_reps[vf_idx]->dev;
+	}
+	return NULL;
+}
+
+void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(skb->dev);
+	struct bnxt_vf_rep_stats *rx_stats;
+
+	rx_stats = &vf_rep->rx_stats;
+	vf_rep->rx_stats.bytes += skb->len;
+	vf_rep->rx_stats.packets++;
+
+	netif_receive_skb(skb);
+}
+
+static int bnxt_vf_rep_get_phys_port_name(struct net_device *dev, char *buf,
+					  size_t len)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+	struct pci_dev *pf_pdev = vf_rep->bp->pdev;
+	int rc;
+
+	rc = snprintf(buf, len, "pf%dvf%d", PCI_FUNC(pf_pdev->devfn),
+		      vf_rep->vf_idx);
+	if (rc >= len)
+		return -EOPNOTSUPP;
+	return 0;
+}
+
+static void bnxt_vf_rep_get_drvinfo(struct net_device *dev,
+				    struct ethtool_drvinfo *info)
+{
+	strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver));
+	strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
+}
+
+static int bnxt_vf_rep_port_attr_get(struct net_device *dev,
+				     struct switchdev_attr *attr)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+
+	/* as only PORT_PARENT_ID is supported currently use common code
+	 * between PF and VF-rep for now.
+	 */
+	return bnxt_port_attr_get(vf_rep->bp, attr);
+}
+
+static const struct switchdev_ops bnxt_vf_rep_switchdev_ops = {
+	.switchdev_port_attr_get	= bnxt_vf_rep_port_attr_get
+};
+
+static const struct ethtool_ops bnxt_vf_rep_ethtool_ops = {
+	.get_drvinfo		= bnxt_vf_rep_get_drvinfo
+};
+
+static const struct net_device_ops bnxt_vf_rep_netdev_ops = {
+	.ndo_open		= bnxt_vf_rep_open,
+	.ndo_stop		= bnxt_vf_rep_close,
+	.ndo_start_xmit		= bnxt_vf_rep_xmit,
+	.ndo_get_stats64	= bnxt_vf_rep_get_stats64,
+	.ndo_setup_tc		= bnxt_vf_rep_setup_tc,
+	.ndo_get_phys_port_name = bnxt_vf_rep_get_phys_port_name
+};
+
+/* Called when the parent PF interface is closed:
+ * As the mode transition from SWITCHDEV to LEGACY
+ * happens under the rtnl_lock() this routine is safe
+ * under the rtnl_lock()
+ */
+void bnxt_vf_reps_close(struct bnxt *bp)
+{
+	struct bnxt_vf_rep *vf_rep;
+	u16 num_vfs, i;
+
+	if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		return;
+
+	num_vfs = pci_num_vf(bp->pdev);
+	for (i = 0; i < num_vfs; i++) {
+		vf_rep = bp->vf_reps[i];
+		if (netif_running(vf_rep->dev))
+			bnxt_vf_rep_close(vf_rep->dev);
+	}
+}
+
+/* Called when the parent PF interface is opened (re-opened):
+ * As the mode transition from SWITCHDEV to LEGACY
+ * happen under the rtnl_lock() this routine is safe
+ * under the rtnl_lock()
+ */
+void bnxt_vf_reps_open(struct bnxt *bp)
+{
+	int i;
+
+	if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		return;
+
+	for (i = 0; i < pci_num_vf(bp->pdev); i++)
+		bnxt_vf_rep_open(bp->vf_reps[i]->dev);
+}
+
+static void __bnxt_vf_reps_destroy(struct bnxt *bp)
+{
+	u16 num_vfs = pci_num_vf(bp->pdev);
+	struct bnxt_vf_rep *vf_rep;
+	int i;
+
+	for (i = 0; i < num_vfs; i++) {
+		vf_rep = bp->vf_reps[i];
+		if (vf_rep) {
+			dst_release((struct dst_entry *)vf_rep->dst);
+
+			if (vf_rep->tx_cfa_action != CFA_HANDLE_INVALID)
+				hwrm_cfa_vfr_free(bp, vf_rep->vf_idx);
+
+			if (vf_rep->dev) {
+				/* if register_netdev failed, then netdev_ops
+				 * would have been set to NULL
+				 */
+				if (vf_rep->dev->netdev_ops)
+					unregister_netdev(vf_rep->dev);
+				free_netdev(vf_rep->dev);
+			}
+		}
+	}
+
+	kfree(bp->vf_reps);
+	bp->vf_reps = NULL;
+}
+
+void bnxt_vf_reps_destroy(struct bnxt *bp)
+{
+	bool closed = false;
+
+	if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		return;
+
+	if (!bp->vf_reps)
+		return;
+
+	/* Ensure that parent PF's and VF-reps' RX/TX has been quiesced
+	 * before proceeding with VF-rep cleanup.
+	 */
+	rtnl_lock();
+	if (netif_running(bp->dev)) {
+		bnxt_close_nic(bp, false, false);
+		closed = true;
+	}
+	/* un-publish cfa_code_map so that RX path can't see it anymore */
+	kfree(bp->cfa_code_map);
+	bp->cfa_code_map = NULL;
+	bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
+
+	if (closed)
+		bnxt_open_nic(bp, false, false);
+	rtnl_unlock();
+
+	/* Need to call vf_reps_destroy() outside of rntl_lock
+	 * as unregister_netdev takes rtnl_lock
+	 */
+	__bnxt_vf_reps_destroy(bp);
+}
+
+/* Use the OUI of the PF's perm addr and report the same mac addr
+ * for the same VF-rep each time
+ */
+static void bnxt_vf_rep_eth_addr_gen(u8 *src_mac, u16 vf_idx, u8 *mac)
+{
+	u32 addr;
+
+	ether_addr_copy(mac, src_mac);
+
+	addr = jhash(src_mac, ETH_ALEN, 0) + vf_idx;
+	mac[3] = (u8)(addr & 0xFF);
+	mac[4] = (u8)((addr >> 8) & 0xFF);
+	mac[5] = (u8)((addr >> 16) & 0xFF);
+}
+
+static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
+				    struct net_device *dev)
+{
+	struct net_device *pf_dev = bp->dev;
+
+	dev->netdev_ops = &bnxt_vf_rep_netdev_ops;
+	dev->ethtool_ops = &bnxt_vf_rep_ethtool_ops;
+	SWITCHDEV_SET_OPS(dev, &bnxt_vf_rep_switchdev_ops);
+	/* Just inherit all the featues of the parent PF as the VF-R
+	 * uses the RX/TX rings of the parent PF
+	 */
+	dev->hw_features = pf_dev->hw_features;
+	dev->gso_partial_features = pf_dev->gso_partial_features;
+	dev->vlan_features = pf_dev->vlan_features;
+	dev->hw_enc_features = pf_dev->hw_enc_features;
+	dev->features |= pf_dev->features;
+	bnxt_vf_rep_eth_addr_gen(bp->pf.mac_addr, vf_rep->vf_idx,
+				 dev->perm_addr);
+	ether_addr_copy(dev->dev_addr, dev->perm_addr);
+}
+
+static int bnxt_vf_reps_create(struct bnxt *bp)
+{
+	u16 *cfa_code_map = NULL, num_vfs = pci_num_vf(bp->pdev);
+	struct bnxt_vf_rep *vf_rep;
+	struct net_device *dev;
+	int rc, i;
+
+	bp->vf_reps = kcalloc(num_vfs, sizeof(vf_rep), GFP_KERNEL);
+	if (!bp->vf_reps)
+		return -ENOMEM;
+
+	/* storage for cfa_code to vf-idx mapping */
+	cfa_code_map = kmalloc(sizeof(*bp->cfa_code_map) * MAX_CFA_CODE,
+			       GFP_KERNEL);
+	if (!cfa_code_map) {
+		rc = -ENOMEM;
+		goto err;
+	}
+	for (i = 0; i < MAX_CFA_CODE; i++)
+		cfa_code_map[i] = VF_IDX_INVALID;
+
+	for (i = 0; i < num_vfs; i++) {
+		dev = alloc_etherdev(sizeof(*vf_rep));
+		if (!dev) {
+			rc = -ENOMEM;
+			goto err;
+		}
+
+		vf_rep = netdev_priv(dev);
+		bp->vf_reps[i] = vf_rep;
+		vf_rep->dev = dev;
+		vf_rep->bp = bp;
+		vf_rep->vf_idx = i;
+		vf_rep->tx_cfa_action = CFA_HANDLE_INVALID;
+
+		/* get cfa handles from FW */
+		rc = hwrm_cfa_vfr_alloc(bp, vf_rep->vf_idx,
+					&vf_rep->tx_cfa_action,
+					&vf_rep->rx_cfa_code);
+		if (rc) {
+			rc = -ENOLINK;
+			goto err;
+		}
+		cfa_code_map[vf_rep->rx_cfa_code] = vf_rep->vf_idx;
+
+		vf_rep->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
+						 GFP_KERNEL);
+		if (!vf_rep->dst) {
+			rc = -ENOMEM;
+			goto err;
+		}
+		/* only cfa_action is needed to mux a packet while TXing */
+		vf_rep->dst->u.port_info.port_id = vf_rep->tx_cfa_action;
+		vf_rep->dst->u.port_info.lower_dev = bp->dev;
+
+		bnxt_vf_rep_netdev_init(bp, vf_rep, dev);
+		rc = register_netdev(dev);
+		if (rc) {
+			/* no need for unregister_netdev in cleanup */
+			dev->netdev_ops = NULL;
+			goto err;
+		}
+	}
+
+	/* publish cfa_code_map only after all VF-reps have been initialized */
+	bp->cfa_code_map = cfa_code_map;
+	bp->eswitch_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
+	netif_keep_dst(bp->dev);
+	return 0;
+
+err:
+	netdev_info(bp->dev, "%s error=%d", __func__, rc);
+	kfree(cfa_code_map);
+	__bnxt_vf_reps_destroy(bp);
+	return rc;
+}
+
+/* Devlink related routines */
+static int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+	struct bnxt *bp = bnxt_get_bp_from_dl(devlink);
+
+	*mode = bp->eswitch_mode;
+	return 0;
+}
+
+static int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode)
+{
+	struct bnxt *bp = bnxt_get_bp_from_dl(devlink);
+	int rc = 0;
+
+	mutex_lock(&bp->sriov_lock);
+	if (bp->eswitch_mode == mode) {
+		netdev_info(bp->dev, "already in %s eswitch mode",
+			    mode == DEVLINK_ESWITCH_MODE_LEGACY ?
+			    "legacy" : "switchdev");
+		rc = -EINVAL;
+		goto done;
+	}
+
+	switch (mode) {
+	case DEVLINK_ESWITCH_MODE_LEGACY:
+		bnxt_vf_reps_destroy(bp);
+		break;
+
+	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+		if (pci_num_vf(bp->pdev) == 0) {
+			netdev_info(bp->dev,
+				    "Enable VFs before setting switchdev mode");
+			rc = -EPERM;
+			goto done;
+		}
+		rc = bnxt_vf_reps_create(bp);
+		break;
+
+	default:
+		rc = -EINVAL;
+		goto done;
+	}
+done:
+	mutex_unlock(&bp->sriov_lock);
+	return rc;
+}
+
+static const struct devlink_ops bnxt_dl_ops = {
+	.eswitch_mode_set = bnxt_dl_eswitch_mode_set,
+	.eswitch_mode_get = bnxt_dl_eswitch_mode_get
+};
+
+int bnxt_dl_register(struct bnxt *bp)
+{
+	struct devlink *dl;
+	int rc;
+
+	if (!pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV))
+		return 0;
+
+	if (bp->hwrm_spec_code < 0x10800) {
+		netdev_warn(bp->dev, "Firmware does not support SR-IOV E-Switch SWITCHDEV mode.\n");
+		return -ENOTSUPP;
+	}
+
+	dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
+	if (!dl) {
+		netdev_warn(bp->dev, "devlink_alloc failed");
+		return -ENOMEM;
+	}
+
+	bnxt_link_bp_to_dl(bp, dl);
+	bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
+	rc = devlink_register(dl, &bp->pdev->dev);
+	if (rc) {
+		bnxt_link_bp_to_dl(bp, NULL);
+		devlink_free(dl);
+		netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc);
+		return rc;
+	}
+
+	return 0;
+}
+
+void bnxt_dl_unregister(struct bnxt *bp)
+{
+	struct devlink *dl = bp->dl;
+
+	if (!dl)
+		return;
+
+	devlink_unregister(dl);
+	devlink_free(dl);
+}
+
+#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
new file mode 100644
index 0000000..7787cd24
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
@@ -0,0 +1,89 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2016-2017 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef BNXT_VFR_H
+#define BNXT_VFR_H
+
+#ifdef CONFIG_BNXT_SRIOV
+
+#define	MAX_CFA_CODE			65536
+
+/* Struct to hold housekeeping info needed by devlink interface */
+struct bnxt_dl {
+	struct bnxt *bp;	/* back ptr to the controlling dev */
+};
+
+static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl)
+{
+	return ((struct bnxt_dl *)devlink_priv(dl))->bp;
+}
+
+/* To clear devlink pointer from bp, pass NULL dl */
+static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl)
+{
+	bp->dl = dl;
+
+	/* add a back pointer in dl to bp */
+	if (dl) {
+		struct bnxt_dl *bp_dl = devlink_priv(dl);
+
+		bp_dl->bp = bp;
+	}
+}
+
+int bnxt_dl_register(struct bnxt *bp);
+void bnxt_dl_unregister(struct bnxt *bp);
+void bnxt_vf_reps_destroy(struct bnxt *bp);
+void bnxt_vf_reps_close(struct bnxt *bp);
+void bnxt_vf_reps_open(struct bnxt *bp);
+void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb);
+struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code);
+
+static inline u16 bnxt_vf_rep_get_fid(struct net_device *dev)
+{
+	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+	struct bnxt *bp = vf_rep->bp;
+
+	return bp->pf.vf[vf_rep->vf_idx].fw_fid;
+}
+
+#else
+
+static inline int bnxt_dl_register(struct bnxt *bp)
+{
+	return 0;
+}
+
+static inline void bnxt_dl_unregister(struct bnxt *bp)
+{
+}
+
+static inline void bnxt_vf_reps_close(struct bnxt *bp)
+{
+}
+
+static inline void bnxt_vf_reps_open(struct bnxt *bp)
+{
+}
+
+static inline void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb)
+{
+}
+
+static inline struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code)
+{
+	return NULL;
+}
+
+static inline u16 bnxt_vf_rep_get_fid(struct net_device *dev)
+{
+	return 0;
+}
+#endif /* CONFIG_BNXT_SRIOV */
+#endif /* BNXT_VFR_H */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 3961a68..d8f0c83 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -169,8 +169,8 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
 	tc = netdev_get_num_tc(dev);
 	if (!tc)
 		tc = 1;
-	rc = bnxt_reserve_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings,
-				true, tc, tx_xdp);
+	rc = bnxt_check_rings(bp, bp->tx_nr_rings_per_tc, bp->rx_nr_rings,
+			      true, tc, tx_xdp);
 	if (rc) {
 		netdev_warn(dev, "Unable to reserve enough TX rings to support XDP.\n");
 		return rc;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index fea3f9a..9cebca8 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -72,23 +72,42 @@
 #define GENET_RDMA_REG_OFF	(priv->hw_params->rdma_offset + \
 				TOTAL_DESC * DMA_DESC_SIZE)
 
+static inline void bcmgenet_writel(u32 value, void __iomem *offset)
+{
+	/* MIPS chips strapped for BE will automagically configure the
+	 * peripheral registers for CPU-native byte order.
+	 */
+	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+		__raw_writel(value, offset);
+	else
+		writel_relaxed(value, offset);
+}
+
+static inline u32 bcmgenet_readl(void __iomem *offset)
+{
+	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+		return __raw_readl(offset);
+	else
+		return readl_relaxed(offset);
+}
+
 static inline void dmadesc_set_length_status(struct bcmgenet_priv *priv,
 					     void __iomem *d, u32 value)
 {
-	__raw_writel(value, d + DMA_DESC_LENGTH_STATUS);
+	bcmgenet_writel(value, d + DMA_DESC_LENGTH_STATUS);
 }
 
 static inline u32 dmadesc_get_length_status(struct bcmgenet_priv *priv,
 					    void __iomem *d)
 {
-	return __raw_readl(d + DMA_DESC_LENGTH_STATUS);
+	return bcmgenet_readl(d + DMA_DESC_LENGTH_STATUS);
 }
 
 static inline void dmadesc_set_addr(struct bcmgenet_priv *priv,
 				    void __iomem *d,
 				    dma_addr_t addr)
 {
-	__raw_writel(lower_32_bits(addr), d + DMA_DESC_ADDRESS_LO);
+	bcmgenet_writel(lower_32_bits(addr), d + DMA_DESC_ADDRESS_LO);
 
 	/* Register writes to GISB bus can take couple hundred nanoseconds
 	 * and are done for each packet, save these expensive writes unless
@@ -96,7 +115,7 @@ static inline void dmadesc_set_addr(struct bcmgenet_priv *priv,
 	 */
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
 	if (priv->hw_params->flags & GENET_HAS_40BITS)
-		__raw_writel(upper_32_bits(addr), d + DMA_DESC_ADDRESS_HI);
+		bcmgenet_writel(upper_32_bits(addr), d + DMA_DESC_ADDRESS_HI);
 #endif
 }
 
@@ -113,7 +132,7 @@ static inline dma_addr_t dmadesc_get_addr(struct bcmgenet_priv *priv,
 {
 	dma_addr_t addr;
 
-	addr = __raw_readl(d + DMA_DESC_ADDRESS_LO);
+	addr = bcmgenet_readl(d + DMA_DESC_ADDRESS_LO);
 
 	/* Register writes to GISB bus can take couple hundred nanoseconds
 	 * and are done for each packet, save these expensive writes unless
@@ -121,7 +140,7 @@ static inline dma_addr_t dmadesc_get_addr(struct bcmgenet_priv *priv,
 	 */
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
 	if (priv->hw_params->flags & GENET_HAS_40BITS)
-		addr |= (u64)__raw_readl(d + DMA_DESC_ADDRESS_HI) << 32;
+		addr |= (u64)bcmgenet_readl(d + DMA_DESC_ADDRESS_HI) << 32;
 #endif
 	return addr;
 }
@@ -156,8 +175,8 @@ static inline u32 bcmgenet_tbuf_ctrl_get(struct bcmgenet_priv *priv)
 	if (GENET_IS_V1(priv))
 		return bcmgenet_rbuf_readl(priv, TBUF_CTRL_V1);
 	else
-		return __raw_readl(priv->base +
-				priv->hw_params->tbuf_offset + TBUF_CTRL);
+		return bcmgenet_readl(priv->base +
+				      priv->hw_params->tbuf_offset + TBUF_CTRL);
 }
 
 static inline void bcmgenet_tbuf_ctrl_set(struct bcmgenet_priv *priv, u32 val)
@@ -165,7 +184,7 @@ static inline void bcmgenet_tbuf_ctrl_set(struct bcmgenet_priv *priv, u32 val)
 	if (GENET_IS_V1(priv))
 		bcmgenet_rbuf_writel(priv, val, TBUF_CTRL_V1);
 	else
-		__raw_writel(val, priv->base +
+		bcmgenet_writel(val, priv->base +
 				priv->hw_params->tbuf_offset + TBUF_CTRL);
 }
 
@@ -174,8 +193,8 @@ static inline u32 bcmgenet_bp_mc_get(struct bcmgenet_priv *priv)
 	if (GENET_IS_V1(priv))
 		return bcmgenet_rbuf_readl(priv, TBUF_BP_MC_V1);
 	else
-		return __raw_readl(priv->base +
-				priv->hw_params->tbuf_offset + TBUF_BP_MC);
+		return bcmgenet_readl(priv->base +
+				      priv->hw_params->tbuf_offset + TBUF_BP_MC);
 }
 
 static inline void bcmgenet_bp_mc_set(struct bcmgenet_priv *priv, u32 val)
@@ -183,7 +202,7 @@ static inline void bcmgenet_bp_mc_set(struct bcmgenet_priv *priv, u32 val)
 	if (GENET_IS_V1(priv))
 		bcmgenet_rbuf_writel(priv, val, TBUF_BP_MC_V1);
 	else
-		__raw_writel(val, priv->base +
+		bcmgenet_writel(val, priv->base +
 				priv->hw_params->tbuf_offset + TBUF_BP_MC);
 }
 
@@ -326,28 +345,28 @@ static inline struct bcmgenet_priv *dev_to_priv(struct device *dev)
 static inline u32 bcmgenet_tdma_readl(struct bcmgenet_priv *priv,
 				      enum dma_reg r)
 {
-	return __raw_readl(priv->base + GENET_TDMA_REG_OFF +
-			DMA_RINGS_SIZE + bcmgenet_dma_regs[r]);
+	return bcmgenet_readl(priv->base + GENET_TDMA_REG_OFF +
+			      DMA_RINGS_SIZE + bcmgenet_dma_regs[r]);
 }
 
 static inline void bcmgenet_tdma_writel(struct bcmgenet_priv *priv,
 					u32 val, enum dma_reg r)
 {
-	__raw_writel(val, priv->base + GENET_TDMA_REG_OFF +
+	bcmgenet_writel(val, priv->base + GENET_TDMA_REG_OFF +
 			DMA_RINGS_SIZE + bcmgenet_dma_regs[r]);
 }
 
 static inline u32 bcmgenet_rdma_readl(struct bcmgenet_priv *priv,
 				      enum dma_reg r)
 {
-	return __raw_readl(priv->base + GENET_RDMA_REG_OFF +
-			DMA_RINGS_SIZE + bcmgenet_dma_regs[r]);
+	return bcmgenet_readl(priv->base + GENET_RDMA_REG_OFF +
+			      DMA_RINGS_SIZE + bcmgenet_dma_regs[r]);
 }
 
 static inline void bcmgenet_rdma_writel(struct bcmgenet_priv *priv,
 					u32 val, enum dma_reg r)
 {
-	__raw_writel(val, priv->base + GENET_RDMA_REG_OFF +
+	bcmgenet_writel(val, priv->base + GENET_RDMA_REG_OFF +
 			DMA_RINGS_SIZE + bcmgenet_dma_regs[r]);
 }
 
@@ -418,16 +437,16 @@ static inline u32 bcmgenet_tdma_ring_readl(struct bcmgenet_priv *priv,
 					   unsigned int ring,
 					   enum dma_ring_reg r)
 {
-	return __raw_readl(priv->base + GENET_TDMA_REG_OFF +
-			(DMA_RING_SIZE * ring) +
-			genet_dma_ring_regs[r]);
+	return bcmgenet_readl(priv->base + GENET_TDMA_REG_OFF +
+			      (DMA_RING_SIZE * ring) +
+			      genet_dma_ring_regs[r]);
 }
 
 static inline void bcmgenet_tdma_ring_writel(struct bcmgenet_priv *priv,
 					     unsigned int ring, u32 val,
 					     enum dma_ring_reg r)
 {
-	__raw_writel(val, priv->base + GENET_TDMA_REG_OFF +
+	bcmgenet_writel(val, priv->base + GENET_TDMA_REG_OFF +
 			(DMA_RING_SIZE * ring) +
 			genet_dma_ring_regs[r]);
 }
@@ -436,16 +455,16 @@ static inline u32 bcmgenet_rdma_ring_readl(struct bcmgenet_priv *priv,
 					   unsigned int ring,
 					   enum dma_ring_reg r)
 {
-	return __raw_readl(priv->base + GENET_RDMA_REG_OFF +
-			(DMA_RING_SIZE * ring) +
-			genet_dma_ring_regs[r]);
+	return bcmgenet_readl(priv->base + GENET_RDMA_REG_OFF +
+			      (DMA_RING_SIZE * ring) +
+			      genet_dma_ring_regs[r]);
 }
 
 static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv,
 					     unsigned int ring, u32 val,
 					     enum dma_ring_reg r)
 {
-	__raw_writel(val, priv->base + GENET_RDMA_REG_OFF +
+	bcmgenet_writel(val, priv->base + GENET_RDMA_REG_OFF +
 			(DMA_RING_SIZE * ring) +
 			genet_dma_ring_regs[r]);
 }
@@ -991,12 +1010,12 @@ static void bcmgenet_eee_enable_set(struct net_device *dev, bool enable)
 	bcmgenet_umac_writel(priv, reg, UMAC_EEE_CTRL);
 
 	/* Enable EEE and switch to a 27Mhz clock automatically */
-	reg = __raw_readl(priv->base + off);
+	reg = bcmgenet_readl(priv->base + off);
 	if (enable)
 		reg |= TBUF_EEE_EN | TBUF_PM_EN;
 	else
 		reg &= ~(TBUF_EEE_EN | TBUF_PM_EN);
-	__raw_writel(reg, priv->base + off);
+	bcmgenet_writel(reg, priv->base + off);
 
 	/* Do the same for thing for RBUF */
 	reg = bcmgenet_rbuf_readl(priv, RBUF_ENERGY_CTRL);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 3a34fdb..4c49d0b 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -657,6 +657,7 @@ struct bcmgenet_priv {
 
 	struct clk *clk;
 	struct platform_device *pdev;
+	struct platform_device *mii_pdev;
 
 	/* WOL */
 	struct clk *clk_wol;
@@ -671,12 +672,21 @@ struct bcmgenet_priv {
 static inline u32 bcmgenet_##name##_readl(struct bcmgenet_priv *priv,	\
 					u32 off)			\
 {									\
-	return __raw_readl(priv->base + offset + off);			\
+	/* MIPS chips strapped for BE will automagically configure the	\
+	 * peripheral registers for CPU-native byte order.		\
+	 */								\
+	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) \
+		return __raw_readl(priv->base + offset + off);		\
+	else								\
+		return readl_relaxed(priv->base + offset + off);	\
 }									\
 static inline void bcmgenet_##name##_writel(struct bcmgenet_priv *priv,	\
 					u32 val, u32 off)		\
 {									\
-	__raw_writel(val, priv->base + offset + off);			\
+	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) \
+		__raw_writel(val, priv->base + offset + off);		\
+	else								\
+		writel_relaxed(val, priv->base + offset + off);		\
 }
 
 GENET_IO_MACRO(ext, GENET_EXT_OFF);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 30cb97b..18f5723 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -24,62 +24,10 @@
 #include <linux/of_net.h>
 #include <linux/of_mdio.h>
 #include <linux/platform_data/bcmgenet.h>
+#include <linux/platform_data/mdio-bcm-unimac.h>
 
 #include "bcmgenet.h"
 
-/* read a value from the MII */
-static int bcmgenet_mii_read(struct mii_bus *bus, int phy_id, int location)
-{
-	int ret;
-	struct net_device *dev = bus->priv;
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-	u32 reg;
-
-	bcmgenet_umac_writel(priv, (MDIO_RD | (phy_id << MDIO_PMD_SHIFT) |
-			     (location << MDIO_REG_SHIFT)), UMAC_MDIO_CMD);
-	/* Start MDIO transaction*/
-	reg = bcmgenet_umac_readl(priv, UMAC_MDIO_CMD);
-	reg |= MDIO_START_BUSY;
-	bcmgenet_umac_writel(priv, reg, UMAC_MDIO_CMD);
-	wait_event_timeout(priv->wq,
-			   !(bcmgenet_umac_readl(priv, UMAC_MDIO_CMD)
-			   & MDIO_START_BUSY),
-			   HZ / 100);
-	ret = bcmgenet_umac_readl(priv, UMAC_MDIO_CMD);
-
-	/* Some broken devices are known not to release the line during
-	 * turn-around, e.g: Broadcom BCM53125 external switches, so check for
-	 * that condition here and ignore the MDIO controller read failure
-	 * indication.
-	 */
-	if (!(bus->phy_ignore_ta_mask & 1 << phy_id) && (ret & MDIO_READ_FAIL))
-		return -EIO;
-
-	return ret & 0xffff;
-}
-
-/* write a value to the MII */
-static int bcmgenet_mii_write(struct mii_bus *bus, int phy_id,
-			      int location, u16 val)
-{
-	struct net_device *dev = bus->priv;
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-	u32 reg;
-
-	bcmgenet_umac_writel(priv, (MDIO_WR | (phy_id << MDIO_PMD_SHIFT) |
-			     (location << MDIO_REG_SHIFT) | (0xffff & val)),
-			     UMAC_MDIO_CMD);
-	reg = bcmgenet_umac_readl(priv, UMAC_MDIO_CMD);
-	reg |= MDIO_START_BUSY;
-	bcmgenet_umac_writel(priv, reg, UMAC_MDIO_CMD);
-	wait_event_timeout(priv->wq,
-			   !(bcmgenet_umac_readl(priv, UMAC_MDIO_CMD) &
-			   MDIO_START_BUSY),
-			   HZ / 100);
-
-	return 0;
-}
-
 /* setup netdev link state when PHY link status change and
  * update UMAC and RGMII block when link up
  */
@@ -393,104 +341,121 @@ int bcmgenet_mii_probe(struct net_device *dev)
 	return 0;
 }
 
-/* Workaround for integrated BCM7xxx Gigabit PHYs which have a problem with
- * their internal MDIO management controller making them fail to successfully
- * be read from or written to for the first transaction.  We insert a dummy
- * BMSR read here to make sure that phy_get_device() and get_phy_id() can
- * correctly read the PHY MII_PHYSID1/2 registers and successfully register a
- * PHY device for this peripheral.
- *
- * Once the PHY driver is registered, we can workaround subsequent reads from
- * there (e.g: during system-wide power management).
- *
- * bus->reset is invoked before mdiobus_scan during mdiobus_register and is
- * therefore the right location to stick that workaround. Since we do not want
- * to read from non-existing PHYs, we either use bus->phy_mask or do a manual
- * Device Tree scan to limit the search area.
- */
-static int bcmgenet_mii_bus_reset(struct mii_bus *bus)
+static struct device_node *bcmgenet_mii_of_find_mdio(struct bcmgenet_priv *priv)
 {
-	struct net_device *dev = bus->priv;
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct device_node *np = priv->mdio_dn;
-	struct device_node *child = NULL;
-	u32 read_mask = 0;
-	int addr = 0;
+	struct device_node *dn = priv->pdev->dev.of_node;
+	struct device *kdev = &priv->pdev->dev;
+	char *compat;
 
-	if (!np) {
-		read_mask = 1 << priv->phy_addr;
-	} else {
-		for_each_available_child_of_node(np, child) {
-			addr = of_mdio_parse_addr(&dev->dev, child);
-			if (addr < 0)
-				continue;
+	compat = kasprintf(GFP_KERNEL, "brcm,genet-mdio-v%d", priv->version);
+	if (!compat)
+		return NULL;
 
-			read_mask |= 1 << addr;
-		}
+	priv->mdio_dn = of_find_compatible_node(dn, NULL, compat);
+	kfree(compat);
+	if (!priv->mdio_dn) {
+		dev_err(kdev, "unable to find MDIO bus node\n");
+		return NULL;
 	}
 
-	for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
-		if (read_mask & 1 << addr) {
-			dev_dbg(&dev->dev, "Workaround for PHY @ %d\n", addr);
-			mdiobus_read(bus, addr, MII_BMSR);
-		}
+	return priv->mdio_dn;
+}
+
+static void bcmgenet_mii_pdata_init(struct bcmgenet_priv *priv,
+				    struct unimac_mdio_pdata *ppd)
+{
+	struct device *kdev = &priv->pdev->dev;
+	struct bcmgenet_platform_data *pd = kdev->platform_data;
+
+	if (pd->phy_interface != PHY_INTERFACE_MODE_MOCA && pd->mdio_enabled) {
+		/*
+		 * Internal or external PHY with MDIO access
+		 */
+		if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR)
+			ppd->phy_mask = 1 << pd->phy_address;
+		else
+			ppd->phy_mask = 0;
 	}
+}
 
+static int bcmgenet_mii_wait(void *wait_func_data)
+{
+	struct bcmgenet_priv *priv = wait_func_data;
+
+	wait_event_timeout(priv->wq,
+			   !(bcmgenet_umac_readl(priv, UMAC_MDIO_CMD)
+			   & MDIO_START_BUSY),
+			   HZ / 100);
 	return 0;
 }
 
-static int bcmgenet_mii_alloc(struct bcmgenet_priv *priv)
+static int bcmgenet_mii_register(struct bcmgenet_priv *priv)
 {
-	struct mii_bus *bus;
+	struct platform_device *pdev = priv->pdev;
+	struct bcmgenet_platform_data *pdata = pdev->dev.platform_data;
+	struct device_node *dn = pdev->dev.of_node;
+	struct unimac_mdio_pdata ppd;
+	struct platform_device *ppdev;
+	struct resource *pres, res;
+	int id, ret;
+
+	pres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	memset(&res, 0, sizeof(res));
+	memset(&ppd, 0, sizeof(ppd));
+
+	ppd.wait_func = bcmgenet_mii_wait;
+	ppd.wait_func_data = priv;
+	ppd.bus_name = "bcmgenet MII bus";
+
+	/* Unimac MDIO bus controller starts at UniMAC offset + MDIO_CMD
+	 * and is 2 * 32-bits word long, 8 bytes total.
+	 */
+	res.start = pres->start + GENET_UMAC_OFF + UMAC_MDIO_CMD;
+	res.end = res.start + 8;
+	res.flags = IORESOURCE_MEM;
 
-	if (priv->mii_bus)
-		return 0;
+	if (dn)
+		id = of_alias_get_id(dn, "eth");
+	else
+		id = pdev->id;
 
-	priv->mii_bus = mdiobus_alloc();
-	if (!priv->mii_bus) {
-		pr_err("failed to allocate\n");
+	ppdev = platform_device_alloc(UNIMAC_MDIO_DRV_NAME, id);
+	if (!ppdev)
 		return -ENOMEM;
-	}
 
-	bus = priv->mii_bus;
-	bus->priv = priv->dev;
-	bus->name = "bcmgenet MII bus";
-	bus->parent = &priv->pdev->dev;
-	bus->read = bcmgenet_mii_read;
-	bus->write = bcmgenet_mii_write;
-	bus->reset = bcmgenet_mii_bus_reset;
-	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d",
-		 priv->pdev->name, priv->pdev->id);
+	/* Retain this platform_device pointer for later cleanup */
+	priv->mii_pdev = ppdev;
+	ppdev->dev.parent = &pdev->dev;
+	ppdev->dev.of_node = bcmgenet_mii_of_find_mdio(priv);
+	if (pdata)
+		bcmgenet_mii_pdata_init(priv, &ppd);
+
+	ret = platform_device_add_resources(ppdev, &res, 1);
+	if (ret)
+		goto out;
+
+	ret = platform_device_add_data(ppdev, &ppd, sizeof(ppd));
+	if (ret)
+		goto out;
+
+	ret = platform_device_add(ppdev);
+	if (ret)
+		goto out;
 
 	return 0;
+out:
+	platform_device_put(ppdev);
+	return ret;
 }
 
 static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv)
 {
 	struct device_node *dn = priv->pdev->dev.of_node;
 	struct device *kdev = &priv->pdev->dev;
-	struct phy_device *phydev = NULL;
-	char *compat;
+	struct phy_device *phydev;
 	int phy_mode;
 	int ret;
 
-	compat = kasprintf(GFP_KERNEL, "brcm,genet-mdio-v%d", priv->version);
-	if (!compat)
-		return -ENOMEM;
-
-	priv->mdio_dn = of_find_compatible_node(dn, NULL, compat);
-	kfree(compat);
-	if (!priv->mdio_dn) {
-		dev_err(kdev, "unable to find MDIO bus node\n");
-		return -ENODEV;
-	}
-
-	ret = of_mdiobus_register(priv->mii_bus, priv->mdio_dn);
-	if (ret) {
-		dev_err(kdev, "failed to register MDIO bus\n");
-		return ret;
-	}
-
 	/* Fetch the PHY phandle */
 	priv->phy_dn = of_parse_phandle(dn, "phy-handle", 0);
 
@@ -537,33 +502,23 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv)
 {
 	struct device *kdev = &priv->pdev->dev;
 	struct bcmgenet_platform_data *pd = kdev->platform_data;
-	struct mii_bus *mdio = priv->mii_bus;
+	char phy_name[MII_BUS_ID_SIZE + 3];
+	char mdio_bus_id[MII_BUS_ID_SIZE];
 	struct phy_device *phydev;
-	int ret;
+
+	snprintf(mdio_bus_id, MII_BUS_ID_SIZE, "%s-%d",
+		 UNIMAC_MDIO_DRV_NAME, priv->pdev->id);
 
 	if (pd->phy_interface != PHY_INTERFACE_MODE_MOCA && pd->mdio_enabled) {
+		snprintf(phy_name, MII_BUS_ID_SIZE, PHY_ID_FMT,
+			 mdio_bus_id, pd->phy_address);
+
 		/*
 		 * Internal or external PHY with MDIO access
 		 */
-		if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR)
-			mdio->phy_mask = ~(1 << pd->phy_address);
-		else
-			mdio->phy_mask = 0;
-
-		ret = mdiobus_register(mdio);
-		if (ret) {
-			dev_err(kdev, "failed to register MDIO bus\n");
-			return ret;
-		}
-
-		if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR)
-			phydev = mdiobus_get_phy(mdio, pd->phy_address);
-		else
-			phydev = phy_find_first(mdio);
-
+		phydev = phy_attach(priv->dev, phy_name, pd->phy_interface);
 		if (!phydev) {
 			dev_err(kdev, "failed to register PHY device\n");
-			mdiobus_unregister(mdio);
 			return -ENODEV;
 		}
 	} else {
@@ -609,10 +564,9 @@ static int bcmgenet_mii_bus_init(struct bcmgenet_priv *priv)
 int bcmgenet_mii_init(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct device_node *dn = priv->pdev->dev.of_node;
 	int ret;
 
-	ret = bcmgenet_mii_alloc(priv);
+	ret = bcmgenet_mii_register(priv);
 	if (ret)
 		return ret;
 
@@ -623,11 +577,7 @@ int bcmgenet_mii_init(struct net_device *dev)
 	return 0;
 
 out:
-	if (of_phy_is_fixed_link(dn))
-		of_phy_deregister_fixed_link(dn);
-	of_node_put(priv->phy_dn);
-	mdiobus_unregister(priv->mii_bus);
-	mdiobus_free(priv->mii_bus);
+	bcmgenet_mii_exit(dev);
 	return ret;
 }
 
@@ -639,6 +589,6 @@ void bcmgenet_mii_exit(struct net_device *dev)
 	if (of_phy_is_fixed_link(dn))
 		of_phy_deregister_fixed_link(dn);
 	of_node_put(priv->phy_dn);
-	mdiobus_unregister(priv->mii_bus);
-	mdiobus_free(priv->mii_bus);
+	platform_device_unregister(priv->mii_pdev);
+	platform_device_put(priv->mii_pdev);
 }
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 16a0f19..ecdef42 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -1367,15 +1367,11 @@ static int sbmac_initctx(struct sbmac_softc *s)
 
 static void sbdma_uninitctx(struct sbmacdma *d)
 {
-	if (d->sbdma_dscrtable_unaligned) {
-		kfree(d->sbdma_dscrtable_unaligned);
-		d->sbdma_dscrtable_unaligned = d->sbdma_dscrtable = NULL;
-	}
+	kfree(d->sbdma_dscrtable_unaligned);
+	d->sbdma_dscrtable_unaligned = d->sbdma_dscrtable = NULL;
 
-	if (d->sbdma_ctxtable) {
-		kfree(d->sbdma_ctxtable);
-		d->sbdma_ctxtable = NULL;
-	}
+	kfree(d->sbdma_ctxtable);
+	d->sbdma_ctxtable = NULL;
 }
 
 
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index d600c41..af33dc1 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6587,7 +6587,7 @@ static void tg3_tx(struct tg3_napi *tnapi)
 		pkts_compl++;
 		bytes_compl += skb->len;
 
-		dev_kfree_skb_any(skb);
+		dev_consume_skb_any(skb);
 
 		if (unlikely(tx_bug)) {
 			tg3_tx_recover(tp);
@@ -7829,7 +7829,7 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi,
 		}
 	}
 
-	dev_kfree_skb_any(skb);
+	dev_consume_skb_any(skb);
 	*pskb = new_skb;
 	return ret;
 }
@@ -7882,7 +7882,7 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
 	} while (segs);
 
 tg3_tso_bug_end:
-	dev_kfree_skb_any(skb);
+	dev_consume_skb_any(skb);
 
 	return NETDEV_TX_OK;
 }
@@ -8543,7 +8543,7 @@ static void tg3_free_rings(struct tg3 *tp)
 			tg3_tx_skb_unmap(tnapi, i,
 					 skb_shinfo(skb)->nr_frags - 1);
 
-			dev_kfree_skb_any(skb);
+			dev_consume_skb_any(skb);
 		}
 		netdev_tx_reset_queue(netdev_get_tx_queue(tp->dev, j));
 	}
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 26d2574..6df2cad 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -68,7 +68,7 @@
 #define GEM_MAX_TX_LEN		((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1)))
 
 #define GEM_MTU_MIN_SIZE	ETH_MIN_MTU
-#define MACB_NETIF_LSO		(NETIF_F_TSO | NETIF_F_UFO)
+#define MACB_NETIF_LSO		NETIF_F_TSO
 
 #define MACB_WOL_HAS_MAGIC_PACKET	(0x1 << 0)
 #define MACB_WOL_ENABLED		(0x1 << 1)
diff --git a/drivers/net/ethernet/cadence/macb_pci.c b/drivers/net/ethernet/cadence/macb_pci.c
index 9906fda..248a8fc 100644
--- a/drivers/net/ethernet/cadence/macb_pci.c
+++ b/drivers/net/ethernet/cadence/macb_pci.c
@@ -128,7 +128,7 @@ static void macb_remove(struct pci_dev *pdev)
 	clk_unregister(plat_data->hclk);
 }
 
-static struct pci_device_id dev_id_table[] = {
+static const struct pci_device_id dev_id_table[] = {
 	{ PCI_DEVICE(CDNS_VENDOR_ID, CDNS_DEVICE_ID), },
 	{ 0, }
 };
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
index 67cca08..2220c77 100755
--- a/drivers/net/ethernet/cadence/macb_ptp.c
+++ b/drivers/net/ethernet/cadence/macb_ptp.c
@@ -192,7 +192,7 @@ static int gem_ptp_enable(struct ptp_clock_info *ptp,
 	return -EOPNOTSUPP;
 }
 
-static struct ptp_clock_info gem_ptp_caps_template = {
+static const struct ptp_clock_info gem_ptp_caps_template = {
 	.owner		= THIS_MODULE,
 	.name		= GEM_PTP_TIMER_NAME,
 	.max_adj	= 0,
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
index 4b0ca9f..e8b2904 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
@@ -1150,14 +1150,50 @@ static void cn23xx_get_pcie_qlmport(struct octeon_device *oct)
 		oct->pcie_port);
 }
 
-static void cn23xx_get_pf_num(struct octeon_device *oct)
+static int cn23xx_get_pf_num(struct octeon_device *oct)
 {
 	u32 fdl_bit = 0;
+	u64 pkt0_in_ctl, d64;
+	int pfnum, mac, trs, ret;
+
+	ret = 0;
 
 	/** Read Function Dependency Link reg to get the function number */
-	pci_read_config_dword(oct->pci_dev, CN23XX_PCIE_SRIOV_FDL, &fdl_bit);
-	oct->pf_num = ((fdl_bit >> CN23XX_PCIE_SRIOV_FDL_BIT_POS) &
-		       CN23XX_PCIE_SRIOV_FDL_MASK);
+	if (pci_read_config_dword(oct->pci_dev, CN23XX_PCIE_SRIOV_FDL,
+				  &fdl_bit) == 0) {
+		oct->pf_num = ((fdl_bit >> CN23XX_PCIE_SRIOV_FDL_BIT_POS) &
+			       CN23XX_PCIE_SRIOV_FDL_MASK);
+	} else {
+		ret = EINVAL;
+
+		/* Under some virtual environments, extended PCI regs are
+		 * inaccessible, in which case the above read will have failed.
+		 * In this case, read the PF number from the
+		 * SLI_PKT0_INPUT_CONTROL reg (written by f/w)
+		 */
+		pkt0_in_ctl = octeon_read_csr64(oct,
+						CN23XX_SLI_IQ_PKT_CONTROL64(0));
+		pfnum = (pkt0_in_ctl >> CN23XX_PKT_INPUT_CTL_PF_NUM_POS) &
+			CN23XX_PKT_INPUT_CTL_PF_NUM_MASK;
+		mac = (octeon_read_csr(oct, CN23XX_SLI_MAC_NUMBER)) & 0xff;
+
+		/* validate PF num by reading RINFO; f/w writes RINFO.trs == 1*/
+		d64 = octeon_read_csr64(oct,
+					CN23XX_SLI_PKT_MAC_RINFO64(mac, pfnum));
+		trs = (int)(d64 >> CN23XX_PKT_MAC_CTL_RINFO_TRS_BIT_POS) & 0xff;
+		if (trs == 1) {
+			dev_err(&oct->pci_dev->dev,
+				"OCTEON: error reading PCI cfg space pfnum, re-read %u\n",
+				pfnum);
+			oct->pf_num = pfnum;
+			ret = 0;
+		} else {
+			dev_err(&oct->pci_dev->dev,
+				"OCTEON: error reading PCI cfg space pfnum; could not ascertain PF number\n");
+		}
+	}
+
+	return ret;
 }
 
 static void cn23xx_setup_reg_address(struct octeon_device *oct)
@@ -1269,6 +1305,26 @@ static int cn23xx_sriov_config(struct octeon_device *oct)
 
 int setup_cn23xx_octeon_pf_device(struct octeon_device *oct)
 {
+	u32 data32;
+	u64 BAR0, BAR1;
+
+	pci_read_config_dword(oct->pci_dev, PCI_BASE_ADDRESS_0, &data32);
+	BAR0 = (u64)(data32 & ~0xf);
+	pci_read_config_dword(oct->pci_dev, PCI_BASE_ADDRESS_1, &data32);
+	BAR0 |= ((u64)data32 << 32);
+	pci_read_config_dword(oct->pci_dev, PCI_BASE_ADDRESS_2, &data32);
+	BAR1 = (u64)(data32 & ~0xf);
+	pci_read_config_dword(oct->pci_dev, PCI_BASE_ADDRESS_3, &data32);
+	BAR1 |= ((u64)data32 << 32);
+
+	if (!BAR0 || !BAR1) {
+		if (!BAR0)
+			dev_err(&oct->pci_dev->dev, "device BAR0 unassigned\n");
+		if (!BAR1)
+			dev_err(&oct->pci_dev->dev, "device BAR1 unassigned\n");
+		return 1;
+	}
+
 	if (octeon_map_pci_barx(oct, 0, 0))
 		return 1;
 
@@ -1279,7 +1335,8 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct)
 		return 1;
 	}
 
-	cn23xx_get_pf_num(oct);
+	if (cn23xx_get_pf_num(oct) != 0)
+		return 1;
 
 	if (cn23xx_sriov_config(oct)) {
 		octeon_unmap_pci_barx(oct, 0);
@@ -1405,8 +1462,19 @@ int cn23xx_fw_loaded(struct octeon_device *oct)
 {
 	u64 val;
 
-	val = octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1);
-	return (val >> 1) & 1ULL;
+	/* If there's more than one active PF on this NIC, then that
+	 * implies that the NIC firmware is loaded and running.  This check
+	 * prevents a rare false negative that might occur if we only relied
+	 * on checking the SCR2_BIT_FW_LOADED flag.  The false negative would
+	 * happen if the PF driver sees SCR2_BIT_FW_LOADED as cleared even
+	 * though the firmware was already loaded but still booting and has yet
+	 * to set SCR2_BIT_FW_LOADED.
+	 */
+	if (atomic_read(oct->adapter_refcount) > 1)
+		return 1;
+
+	val = octeon_read_csr64(oct, CN23XX_SLI_SCRATCH2);
+	return (val >> SCR2_BIT_FW_LOADED) & 1ULL;
 }
 
 void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx,
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
index dee6046..2aba524 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
@@ -24,8 +24,6 @@
 
 #include "cn23xx_pf_regs.h"
 
-#define LIO_CMD_WAIT_TM 100
-
 /* Register address and configuration for a CN23XX devices.
  * If device specific changes need to be made then add a struct to include
  * device specific fields as shown in the commented section
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.h
index 3f98c73..2d06097 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.h
@@ -36,8 +36,6 @@ struct octeon_cn23xx_vf {
 
 #define CN23XX_MAILBOX_MSGPARAM_SIZE		6
 
-#define MAX_VF_IP_OP_PENDING_PKT_COUNT		100
-
 void cn23xx_vf_ask_pf_to_do_flr(struct octeon_device *oct);
 
 int cn23xx_octeon_pfvf_handshake(struct octeon_device *oct);
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index adde774..23f6b60 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -165,9 +165,6 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
 		/* If command is successful, change the MTU. */
 		netif_info(lio, probe, lio->netdev, "MTU Changed from %d to %d\n",
 			   netdev->mtu, nctrl->ncmd.s.param1);
-		dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n",
-			 netdev->name, netdev->mtu,
-			 nctrl->ncmd.s.param1);
 		netdev->mtu = nctrl->ncmd.s.param1;
 		queue_delayed_work(lio->link_status_wq.wq,
 				   &lio->link_status_wq.wk.work, 0);
@@ -275,6 +272,11 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
 		netif_info(lio, probe, lio->netdev, "Set RX/TX flow control parameters\n");
 		break;
 
+	case OCTNET_CMD_QUEUE_COUNT_CTL:
+		netif_info(lio, probe, lio->netdev, "Queue count updated to %d\n",
+			   nctrl->ncmd.s.param1);
+		break;
+
 	default:
 		dev_err(&oct->pci_dev->dev, "%s Unknown cmd %d\n", __func__,
 			nctrl->ncmd.s.cmd);
@@ -364,3 +366,723 @@ void cleanup_rx_oom_poll_fn(struct net_device *netdev)
 		destroy_workqueue(lio->rxq_status_wq.wq);
 	}
 }
+
+/* Runs in interrupt context. */
+static void lio_update_txq_status(struct octeon_device *oct, int iq_num)
+{
+	struct octeon_instr_queue *iq = oct->instr_queue[iq_num];
+	struct net_device *netdev;
+	struct lio *lio;
+
+	netdev = oct->props[iq->ifidx].netdev;
+
+	/* This is needed because the first IQ does not have
+	 * a netdev associated with it.
+	 */
+	if (!netdev)
+		return;
+
+	lio = GET_LIO(netdev);
+	if (netif_is_multiqueue(netdev)) {
+		if (__netif_subqueue_stopped(netdev, iq->q_index) &&
+		    lio->linfo.link.s.link_up &&
+		    (!octnet_iq_is_full(oct, iq_num))) {
+			netif_wake_subqueue(netdev, iq->q_index);
+			INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
+						  tx_restart, 1);
+		}
+	} else if (netif_queue_stopped(netdev) &&
+		   lio->linfo.link.s.link_up &&
+		   (!octnet_iq_is_full(oct, lio->txq))) {
+		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
+					  tx_restart, 1);
+		netif_wake_queue(netdev);
+	}
+}
+
+/**
+ * \brief Setup output queue
+ * @param oct octeon device
+ * @param q_no which queue
+ * @param num_descs how many descriptors
+ * @param desc_size size of each descriptor
+ * @param app_ctx application context
+ */
+static int octeon_setup_droq(struct octeon_device *oct, int q_no, int num_descs,
+			     int desc_size, void *app_ctx)
+{
+	int ret_val;
+
+	dev_dbg(&oct->pci_dev->dev, "Creating Droq: %d\n", q_no);
+	/* droq creation and local register settings. */
+	ret_val = octeon_create_droq(oct, q_no, num_descs, desc_size, app_ctx);
+	if (ret_val < 0)
+		return ret_val;
+
+	if (ret_val == 1) {
+		dev_dbg(&oct->pci_dev->dev, "Using default droq %d\n", q_no);
+		return 0;
+	}
+
+	/* Enable the droq queues */
+	octeon_set_droq_pkt_op(oct, q_no, 1);
+
+	/* Send Credit for Octeon Output queues. Credits are always
+	 * sent after the output queue is enabled.
+	 */
+	writel(oct->droq[q_no]->max_count, oct->droq[q_no]->pkts_credit_reg);
+
+	return ret_val;
+}
+
+/** Routine to push packets arriving on Octeon interface upto network layer.
+ * @param oct_id   - octeon device id.
+ * @param skbuff   - skbuff struct to be passed to network layer.
+ * @param len      - size of total data received.
+ * @param rh       - Control header associated with the packet
+ * @param param    - additional control data with the packet
+ * @param arg      - farg registered in droq_ops
+ */
+static void
+liquidio_push_packet(u32 octeon_id __attribute__((unused)),
+		     void *skbuff,
+		     u32 len,
+		     union octeon_rh *rh,
+		     void *param,
+		     void *arg)
+{
+	struct net_device *netdev = (struct net_device *)arg;
+	struct octeon_droq *droq =
+	    container_of(param, struct octeon_droq, napi);
+	struct sk_buff *skb = (struct sk_buff *)skbuff;
+	struct skb_shared_hwtstamps *shhwtstamps;
+	struct napi_struct *napi = param;
+	u16 vtag = 0;
+	u32 r_dh_off;
+	u64 ns;
+
+	if (netdev) {
+		struct lio *lio = GET_LIO(netdev);
+		struct octeon_device *oct = lio->oct_dev;
+		int packet_was_received;
+
+		/* Do not proceed if the interface is not in RUNNING state. */
+		if (!ifstate_check(lio, LIO_IFSTATE_RUNNING)) {
+			recv_buffer_free(skb);
+			droq->stats.rx_dropped++;
+			return;
+		}
+
+		skb->dev = netdev;
+
+		skb_record_rx_queue(skb, droq->q_no);
+		if (likely(len > MIN_SKB_SIZE)) {
+			struct octeon_skb_page_info *pg_info;
+			unsigned char *va;
+
+			pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+			if (pg_info->page) {
+				/* For Paged allocation use the frags */
+				va = page_address(pg_info->page) +
+					pg_info->page_offset;
+				memcpy(skb->data, va, MIN_SKB_SIZE);
+				skb_put(skb, MIN_SKB_SIZE);
+				skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+						pg_info->page,
+						pg_info->page_offset +
+						MIN_SKB_SIZE,
+						len - MIN_SKB_SIZE,
+						LIO_RXBUFFER_SZ);
+			}
+		} else {
+			struct octeon_skb_page_info *pg_info =
+				((struct octeon_skb_page_info *)(skb->cb));
+			skb_copy_to_linear_data(skb, page_address(pg_info->page)
+						+ pg_info->page_offset, len);
+			skb_put(skb, len);
+			put_page(pg_info->page);
+		}
+
+		r_dh_off = (rh->r_dh.len - 1) * BYTES_PER_DHLEN_UNIT;
+
+		if (oct->ptp_enable) {
+			if (rh->r_dh.has_hwtstamp) {
+				/* timestamp is included from the hardware at
+				 * the beginning of the packet.
+				 */
+				if (ifstate_check
+					(lio,
+					 LIO_IFSTATE_RX_TIMESTAMP_ENABLED)) {
+					/* Nanoseconds are in the first 64-bits
+					 * of the packet.
+					 */
+					memcpy(&ns, (skb->data + r_dh_off),
+					       sizeof(ns));
+					r_dh_off -= BYTES_PER_DHLEN_UNIT;
+					shhwtstamps = skb_hwtstamps(skb);
+					shhwtstamps->hwtstamp =
+						ns_to_ktime(ns +
+							    lio->ptp_adjust);
+				}
+			}
+		}
+
+		if (rh->r_dh.has_hash) {
+			__be32 *hash_be = (__be32 *)(skb->data + r_dh_off);
+			u32 hash = be32_to_cpu(*hash_be);
+
+			skb_set_hash(skb, hash, PKT_HASH_TYPE_L4);
+			r_dh_off -= BYTES_PER_DHLEN_UNIT;
+		}
+
+		skb_pull(skb, rh->r_dh.len * BYTES_PER_DHLEN_UNIT);
+		skb->protocol = eth_type_trans(skb, skb->dev);
+
+		if ((netdev->features & NETIF_F_RXCSUM) &&
+		    (((rh->r_dh.encap_on) &&
+		      (rh->r_dh.csum_verified & CNNIC_TUN_CSUM_VERIFIED)) ||
+		     (!(rh->r_dh.encap_on) &&
+		      (rh->r_dh.csum_verified & CNNIC_CSUM_VERIFIED))))
+			/* checksum has already been verified */
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		else
+			skb->ip_summed = CHECKSUM_NONE;
+
+		/* Setting Encapsulation field on basis of status received
+		 * from the firmware
+		 */
+		if (rh->r_dh.encap_on) {
+			skb->encapsulation = 1;
+			skb->csum_level = 1;
+			droq->stats.rx_vxlan++;
+		}
+
+		/* inbound VLAN tag */
+		if ((netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+		    rh->r_dh.vlan) {
+			u16 priority = rh->r_dh.priority;
+			u16 vid = rh->r_dh.vlan;
+
+			vtag = (priority << VLAN_PRIO_SHIFT) | vid;
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vtag);
+		}
+
+		packet_was_received = (napi_gro_receive(napi, skb) != GRO_DROP);
+
+		if (packet_was_received) {
+			droq->stats.rx_bytes_received += len;
+			droq->stats.rx_pkts_received++;
+		} else {
+			droq->stats.rx_dropped++;
+			netif_info(lio, rx_err, lio->netdev,
+				   "droq:%d  error rx_dropped:%llu\n",
+				   droq->q_no, droq->stats.rx_dropped);
+		}
+
+	} else {
+		recv_buffer_free(skb);
+	}
+}
+
+/**
+ * \brief wrapper for calling napi_schedule
+ * @param param parameters to pass to napi_schedule
+ *
+ * Used when scheduling on different CPUs
+ */
+static void napi_schedule_wrapper(void *param)
+{
+	struct napi_struct *napi = param;
+
+	napi_schedule(napi);
+}
+
+/**
+ * \brief callback when receive interrupt occurs and we are in NAPI mode
+ * @param arg pointer to octeon output queue
+ */
+static void liquidio_napi_drv_callback(void *arg)
+{
+	struct octeon_device *oct;
+	struct octeon_droq *droq = arg;
+	int this_cpu = smp_processor_id();
+
+	oct = droq->oct_dev;
+
+	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct) ||
+	    droq->cpu_id == this_cpu) {
+		napi_schedule_irqoff(&droq->napi);
+	} else {
+		call_single_data_t *csd = &droq->csd;
+
+		csd->func = napi_schedule_wrapper;
+		csd->info = &droq->napi;
+		csd->flags = 0;
+
+		smp_call_function_single_async(droq->cpu_id, csd);
+	}
+}
+
+/**
+ * \brief Entry point for NAPI polling
+ * @param napi NAPI structure
+ * @param budget maximum number of items to process
+ */
+static int liquidio_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct octeon_instr_queue *iq;
+	struct octeon_device *oct;
+	struct octeon_droq *droq;
+	int tx_done = 0, iq_no;
+	int work_done;
+
+	droq = container_of(napi, struct octeon_droq, napi);
+	oct = droq->oct_dev;
+	iq_no = droq->q_no;
+
+	/* Handle Droq descriptors */
+	work_done = octeon_process_droq_poll_cmd(oct, droq->q_no,
+						 POLL_EVENT_PROCESS_PKTS,
+						 budget);
+
+	/* Flush the instruction queue */
+	iq = oct->instr_queue[iq_no];
+	if (iq) {
+		/* TODO: move this check to inside octeon_flush_iq,
+		 * once check_db_timeout is removed
+		 */
+		if (atomic_read(&iq->instr_pending))
+			/* Process iq buffers with in the budget limits */
+			tx_done = octeon_flush_iq(oct, iq, budget);
+		else
+			tx_done = 1;
+		/* Update iq read-index rather than waiting for next interrupt.
+		 * Return back if tx_done is false.
+		 */
+		/* sub-queue status update */
+		lio_update_txq_status(oct, iq_no);
+	} else {
+		dev_err(&oct->pci_dev->dev, "%s:  iq (%d) num invalid\n",
+			__func__, iq_no);
+	}
+
+#define MAX_REG_CNT  2000000U
+	/* force enable interrupt if reg cnts are high to avoid wraparound */
+	if ((work_done < budget && tx_done) ||
+	    (iq && iq->pkt_in_done >= MAX_REG_CNT) ||
+	    (droq->pkt_count >= MAX_REG_CNT)) {
+		tx_done = 1;
+		napi_complete_done(napi, work_done);
+
+		octeon_process_droq_poll_cmd(droq->oct_dev, droq->q_no,
+					     POLL_EVENT_ENABLE_INTR, 0);
+		return 0;
+	}
+
+	return (!tx_done) ? (budget) : (work_done);
+}
+
+/**
+ * \brief Setup input and output queues
+ * @param octeon_dev octeon device
+ * @param ifidx Interface index
+ *
+ * Note: Queues are with respect to the octeon device. Thus
+ * an input queue is for egress packets, and output queues
+ * are for ingress packets.
+ */
+int liquidio_setup_io_queues(struct octeon_device *octeon_dev, int ifidx,
+			     u32 num_iqs, u32 num_oqs)
+{
+	struct octeon_droq_ops droq_ops;
+	struct net_device *netdev;
+	struct octeon_droq *droq;
+	struct napi_struct *napi;
+	int cpu_id_modulus;
+	int num_tx_descs;
+	struct lio *lio;
+	int retval = 0;
+	int q, q_no;
+	int cpu_id;
+
+	netdev = octeon_dev->props[ifidx].netdev;
+
+	lio = GET_LIO(netdev);
+
+	memset(&droq_ops, 0, sizeof(struct octeon_droq_ops));
+
+	droq_ops.fptr = liquidio_push_packet;
+	droq_ops.farg = netdev;
+
+	droq_ops.poll_mode = 1;
+	droq_ops.napi_fn = liquidio_napi_drv_callback;
+	cpu_id = 0;
+	cpu_id_modulus = num_present_cpus();
+
+	/* set up DROQs. */
+	for (q = 0; q < num_oqs; q++) {
+		q_no = lio->linfo.rxpciq[q].s.q_no;
+		dev_dbg(&octeon_dev->pci_dev->dev,
+			"%s index:%d linfo.rxpciq.s.q_no:%d\n",
+			__func__, q, q_no);
+		retval = octeon_setup_droq(
+		    octeon_dev, q_no,
+		    CFG_GET_NUM_RX_DESCS_NIC_IF(octeon_get_conf(octeon_dev),
+						lio->ifidx),
+		    CFG_GET_NUM_RX_BUF_SIZE_NIC_IF(octeon_get_conf(octeon_dev),
+						   lio->ifidx),
+		    NULL);
+		if (retval) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				"%s : Runtime DROQ(RxQ) creation failed.\n",
+				__func__);
+			return 1;
+		}
+
+		droq = octeon_dev->droq[q_no];
+		napi = &droq->napi;
+		dev_dbg(&octeon_dev->pci_dev->dev, "netif_napi_add netdev:%llx oct:%llx\n",
+			(u64)netdev, (u64)octeon_dev);
+		netif_napi_add(netdev, napi, liquidio_napi_poll, 64);
+
+		/* designate a CPU for this droq */
+		droq->cpu_id = cpu_id;
+		cpu_id++;
+		if (cpu_id >= cpu_id_modulus)
+			cpu_id = 0;
+
+		octeon_register_droq_ops(octeon_dev, q_no, &droq_ops);
+	}
+
+	if (OCTEON_CN23XX_PF(octeon_dev) || OCTEON_CN23XX_VF(octeon_dev)) {
+		/* 23XX PF/VF can send/recv control messages (via the first
+		 * PF/VF-owned droq) from the firmware even if the ethX
+		 * interface is down, so that's why poll_mode must be off
+		 * for the first droq.
+		 */
+		octeon_dev->droq[0]->ops.poll_mode = 0;
+	}
+
+	/* set up IQs. */
+	for (q = 0; q < num_iqs; q++) {
+		num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(
+		    octeon_get_conf(octeon_dev), lio->ifidx);
+		retval = octeon_setup_iq(octeon_dev, ifidx, q,
+					 lio->linfo.txpciq[q], num_tx_descs,
+					 netdev_get_tx_queue(netdev, q));
+		if (retval) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				" %s : Runtime IQ(TxQ) creation failed.\n",
+				__func__);
+			return 1;
+		}
+
+		/* XPS */
+		if (!OCTEON_CN23XX_VF(octeon_dev) && octeon_dev->msix_on &&
+		    octeon_dev->ioq_vector) {
+			struct octeon_ioq_vector    *ioq_vector;
+
+			ioq_vector = &octeon_dev->ioq_vector[q];
+			netif_set_xps_queue(netdev,
+					    &ioq_vector->affinity_mask,
+					    ioq_vector->iq_index);
+		}
+	}
+
+	return 0;
+}
+
+static
+int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret)
+{
+	struct octeon_device *oct = droq->oct_dev;
+	struct octeon_device_priv *oct_priv =
+	    (struct octeon_device_priv *)oct->priv;
+
+	if (droq->ops.poll_mode) {
+		droq->ops.napi_fn(droq);
+	} else {
+		if (ret & MSIX_PO_INT) {
+			if (OCTEON_CN23XX_VF(oct))
+				dev_err(&oct->pci_dev->dev,
+					"should not come here should not get rx when poll mode = 0 for vf\n");
+			tasklet_schedule(&oct_priv->droq_tasklet);
+			return 1;
+		}
+		/* this will be flushed periodically by check iq db */
+		if (ret & MSIX_PI_INT)
+			return 0;
+	}
+
+	return 0;
+}
+
+irqreturn_t
+liquidio_msix_intr_handler(int irq __attribute__((unused)), void *dev)
+{
+	struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev;
+	struct octeon_device *oct = ioq_vector->oct_dev;
+	struct octeon_droq *droq = oct->droq[ioq_vector->droq_index];
+	u64 ret;
+
+	ret = oct->fn_list.msix_interrupt_handler(ioq_vector);
+
+	if (ret & MSIX_PO_INT || ret & MSIX_PI_INT)
+		liquidio_schedule_msix_droq_pkt_handler(droq, ret);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * \brief Droq packet processor sceduler
+ * @param oct octeon device
+ */
+static void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct)
+{
+	struct octeon_device_priv *oct_priv =
+		(struct octeon_device_priv *)oct->priv;
+	struct octeon_droq *droq;
+	u64 oq_no;
+
+	if (oct->int_status & OCT_DEV_INTR_PKT_DATA) {
+		for (oq_no = 0; oq_no < MAX_OCTEON_OUTPUT_QUEUES(oct);
+		     oq_no++) {
+			if (!(oct->droq_intr & BIT_ULL(oq_no)))
+				continue;
+
+			droq = oct->droq[oq_no];
+
+			if (droq->ops.poll_mode) {
+				droq->ops.napi_fn(droq);
+				oct_priv->napi_mask |= (1 << oq_no);
+			} else {
+				tasklet_schedule(&oct_priv->droq_tasklet);
+			}
+		}
+	}
+}
+
+/**
+ * \brief Interrupt handler for octeon
+ * @param irq unused
+ * @param dev octeon device
+ */
+static
+irqreturn_t liquidio_legacy_intr_handler(int irq __attribute__((unused)),
+					 void *dev)
+{
+	struct octeon_device *oct = (struct octeon_device *)dev;
+	irqreturn_t ret;
+
+	/* Disable our interrupts for the duration of ISR */
+	oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
+
+	ret = oct->fn_list.process_interrupt_regs(oct);
+
+	if (ret == IRQ_HANDLED)
+		liquidio_schedule_droq_pkt_handlers(oct);
+
+	/* Re-enable our interrupts  */
+	if (!(atomic_read(&oct->status) == OCT_DEV_IN_RESET))
+		oct->fn_list.enable_interrupt(oct, OCTEON_ALL_INTR);
+
+	return ret;
+}
+
+/**
+ * \brief Setup interrupt for octeon device
+ * @param oct octeon device
+ *
+ *  Enable interrupt in Octeon device as given in the PCI interrupt mask.
+ */
+int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs)
+{
+	struct msix_entry *msix_entries;
+	char *queue_irq_names = NULL;
+	int i, num_interrupts = 0;
+	int num_alloc_ioq_vectors;
+	char *aux_irq_name = NULL;
+	int num_ioq_vectors;
+	int irqret, err;
+
+	oct->num_msix_irqs = num_ioqs;
+	if (oct->msix_on) {
+		if (OCTEON_CN23XX_PF(oct)) {
+			num_interrupts = MAX_IOQ_INTERRUPTS_PER_PF + 1;
+
+			/* one non ioq interrupt for handling
+			 * sli_mac_pf_int_sum
+			 */
+			oct->num_msix_irqs += 1;
+		} else if (OCTEON_CN23XX_VF(oct)) {
+			num_interrupts = MAX_IOQ_INTERRUPTS_PER_VF;
+		}
+
+		/* allocate storage for the names assigned to each irq */
+		oct->irq_name_storage =
+			kcalloc(num_interrupts, INTRNAMSIZ, GFP_KERNEL);
+		if (!oct->irq_name_storage) {
+			dev_err(&oct->pci_dev->dev, "Irq name storage alloc failed...\n");
+			return -ENOMEM;
+		}
+
+		queue_irq_names = oct->irq_name_storage;
+
+		if (OCTEON_CN23XX_PF(oct))
+			aux_irq_name = &queue_irq_names
+				[IRQ_NAME_OFF(MAX_IOQ_INTERRUPTS_PER_PF)];
+
+		oct->msix_entries = kcalloc(oct->num_msix_irqs,
+					    sizeof(struct msix_entry),
+					    GFP_KERNEL);
+		if (!oct->msix_entries) {
+			dev_err(&oct->pci_dev->dev, "Memory Alloc failed...\n");
+			kfree(oct->irq_name_storage);
+			oct->irq_name_storage = NULL;
+			return -ENOMEM;
+		}
+
+		msix_entries = (struct msix_entry *)oct->msix_entries;
+
+		/*Assumption is that pf msix vectors start from pf srn to pf to
+		 * trs and not from 0. if not change this code
+		 */
+		if (OCTEON_CN23XX_PF(oct)) {
+			for (i = 0; i < oct->num_msix_irqs - 1; i++)
+				msix_entries[i].entry =
+					oct->sriov_info.pf_srn + i;
+
+			msix_entries[oct->num_msix_irqs - 1].entry =
+				oct->sriov_info.trs;
+		} else if (OCTEON_CN23XX_VF(oct)) {
+			for (i = 0; i < oct->num_msix_irqs; i++)
+				msix_entries[i].entry = i;
+		}
+		num_alloc_ioq_vectors = pci_enable_msix_range(
+						oct->pci_dev, msix_entries,
+						oct->num_msix_irqs,
+						oct->num_msix_irqs);
+		if (num_alloc_ioq_vectors < 0) {
+			dev_err(&oct->pci_dev->dev, "unable to Allocate MSI-X interrupts\n");
+			kfree(oct->msix_entries);
+			oct->msix_entries = NULL;
+			kfree(oct->irq_name_storage);
+			oct->irq_name_storage = NULL;
+			return num_alloc_ioq_vectors;
+		}
+
+		dev_dbg(&oct->pci_dev->dev, "OCTEON: Enough MSI-X interrupts are allocated...\n");
+
+		num_ioq_vectors = oct->num_msix_irqs;
+		/** For PF, there is one non-ioq interrupt handler */
+		if (OCTEON_CN23XX_PF(oct)) {
+			num_ioq_vectors -= 1;
+
+			snprintf(aux_irq_name, INTRNAMSIZ,
+				 "LiquidIO%u-pf%u-aux", oct->octeon_id,
+				 oct->pf_num);
+			irqret = request_irq(
+					msix_entries[num_ioq_vectors].vector,
+					liquidio_legacy_intr_handler, 0,
+					aux_irq_name, oct);
+			if (irqret) {
+				dev_err(&oct->pci_dev->dev,
+					"Request_irq failed for MSIX interrupt Error: %d\n",
+					irqret);
+				pci_disable_msix(oct->pci_dev);
+				kfree(oct->msix_entries);
+				kfree(oct->irq_name_storage);
+				oct->irq_name_storage = NULL;
+				oct->msix_entries = NULL;
+				return irqret;
+			}
+		}
+		for (i = 0 ; i < num_ioq_vectors ; i++) {
+			if (OCTEON_CN23XX_PF(oct))
+				snprintf(&queue_irq_names[IRQ_NAME_OFF(i)],
+					 INTRNAMSIZ, "LiquidIO%u-pf%u-rxtx-%u",
+					 oct->octeon_id, oct->pf_num, i);
+
+			if (OCTEON_CN23XX_VF(oct))
+				snprintf(&queue_irq_names[IRQ_NAME_OFF(i)],
+					 INTRNAMSIZ, "LiquidIO%u-vf%u-rxtx-%u",
+					 oct->octeon_id, oct->vf_num, i);
+
+			irqret = request_irq(msix_entries[i].vector,
+					     liquidio_msix_intr_handler, 0,
+					     &queue_irq_names[IRQ_NAME_OFF(i)],
+					     &oct->ioq_vector[i]);
+
+			if (irqret) {
+				dev_err(&oct->pci_dev->dev,
+					"Request_irq failed for MSIX interrupt Error: %d\n",
+					irqret);
+				/** Freeing the non-ioq irq vector here . */
+				free_irq(msix_entries[num_ioq_vectors].vector,
+					 oct);
+
+				while (i) {
+					i--;
+					/** clearing affinity mask. */
+					irq_set_affinity_hint(
+						      msix_entries[i].vector,
+						      NULL);
+					free_irq(msix_entries[i].vector,
+						 &oct->ioq_vector[i]);
+				}
+				pci_disable_msix(oct->pci_dev);
+				kfree(oct->msix_entries);
+				kfree(oct->irq_name_storage);
+				oct->irq_name_storage = NULL;
+				oct->msix_entries = NULL;
+				return irqret;
+			}
+			oct->ioq_vector[i].vector = msix_entries[i].vector;
+			/* assign the cpu mask for this msix interrupt vector */
+			irq_set_affinity_hint(msix_entries[i].vector,
+					      &oct->ioq_vector[i].affinity_mask
+					      );
+		}
+		dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: MSI-X enabled\n",
+			oct->octeon_id);
+	} else {
+		err = pci_enable_msi(oct->pci_dev);
+		if (err)
+			dev_warn(&oct->pci_dev->dev, "Reverting to legacy interrupts. Error: %d\n",
+				 err);
+		else
+			oct->flags |= LIO_FLAG_MSI_ENABLED;
+
+		/* allocate storage for the names assigned to the irq */
+		oct->irq_name_storage = kcalloc(1, INTRNAMSIZ, GFP_KERNEL);
+		if (!oct->irq_name_storage)
+			return -ENOMEM;
+
+		queue_irq_names = oct->irq_name_storage;
+
+		if (OCTEON_CN23XX_PF(oct))
+			snprintf(&queue_irq_names[IRQ_NAME_OFF(0)], INTRNAMSIZ,
+				 "LiquidIO%u-pf%u-rxtx-%u",
+				 oct->octeon_id, oct->pf_num, 0);
+
+		if (OCTEON_CN23XX_VF(oct))
+			snprintf(&queue_irq_names[IRQ_NAME_OFF(0)], INTRNAMSIZ,
+				 "LiquidIO%u-vf%u-rxtx-%u",
+				 oct->octeon_id, oct->vf_num, 0);
+
+		irqret = request_irq(oct->pci_dev->irq,
+				     liquidio_legacy_intr_handler,
+				     IRQF_SHARED,
+				     &queue_irq_names[IRQ_NAME_OFF(0)], oct);
+		if (irqret) {
+			if (oct->flags & LIO_FLAG_MSI_ENABLED)
+				pci_disable_msi(oct->pci_dev);
+			dev_err(&oct->pci_dev->dev, "Request IRQ failed with code: %d\n",
+				irqret);
+			kfree(oct->irq_name_storage);
+			oct->irq_name_storage = NULL;
+			return irqret;
+		}
+	}
+	return 0;
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index ebd353b..a63ddf0 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -31,6 +31,7 @@
 #include "cn23xx_pf_device.h"
 #include "cn23xx_vf_device.h"
 
+static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs);
 static int octnet_get_link_stats(struct net_device *netdev);
 
 struct oct_intrmod_context {
@@ -105,6 +106,7 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
 	"tx_total_sent",
 	"tx_total_fwd",
 	"tx_err_pko",
+	"tx_err_pki",
 	"tx_err_link",
 	"tx_err_drop",
 
@@ -299,6 +301,35 @@ lio_get_vf_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
 	strncpy(drvinfo->bus_info, pci_name(oct->pci_dev), 32);
 }
 
+static int
+lio_send_queue_count_update(struct net_device *netdev, uint32_t num_queues)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_QUEUE_COUNT_CTL;
+	nctrl.ncmd.s.param1 = num_queues;
+	nctrl.ncmd.s.param2 = num_queues;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.wait_time = 100;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "Failed to send Queue reset command (ret: 0x%x)\n",
+			ret);
+		return -1;
+	}
+
+	return 0;
+}
+
 static void
 lio_ethtool_get_channels(struct net_device *dev,
 			 struct ethtool_channels *channel)
@@ -306,6 +337,7 @@ lio_ethtool_get_channels(struct net_device *dev,
 	struct lio *lio = GET_LIO(dev);
 	struct octeon_device *oct = lio->oct_dev;
 	u32 max_rx = 0, max_tx = 0, tx_count = 0, rx_count = 0;
+	u32 combined_count = 0, max_combined = 0;
 
 	if (OCTEON_CN6XXX(oct)) {
 		struct octeon_config *conf6x = CHIP_CONF(oct, cn6xxx);
@@ -315,22 +347,137 @@ lio_ethtool_get_channels(struct net_device *dev,
 		rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf6x, lio->ifidx);
 		tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf6x, lio->ifidx);
 	} else if (OCTEON_CN23XX_PF(oct)) {
-
-		max_rx = oct->sriov_info.num_pf_rings;
-		max_tx = oct->sriov_info.num_pf_rings;
-		rx_count = lio->linfo.num_rxpciq;
-		tx_count = lio->linfo.num_txpciq;
+		max_combined = lio->linfo.num_txpciq;
+		combined_count = oct->num_iqs;
 	} else if (OCTEON_CN23XX_VF(oct)) {
-		max_tx = oct->sriov_info.rings_per_vf;
-		max_rx = oct->sriov_info.rings_per_vf;
-		rx_count = lio->linfo.num_rxpciq;
-		tx_count = lio->linfo.num_txpciq;
+		u64 reg_val = 0ULL;
+		u64 ctrl = CN23XX_VF_SLI_IQ_PKT_CONTROL64(0);
+
+		reg_val = octeon_read_csr64(oct, ctrl);
+		reg_val = reg_val >> CN23XX_PKT_INPUT_CTL_RPVF_POS;
+		max_combined = reg_val & CN23XX_PKT_INPUT_CTL_RPVF_MASK;
+		combined_count = oct->num_iqs;
 	}
 
 	channel->max_rx = max_rx;
 	channel->max_tx = max_tx;
+	channel->max_combined = max_combined;
 	channel->rx_count = rx_count;
 	channel->tx_count = tx_count;
+	channel->combined_count = combined_count;
+}
+
+static int
+lio_irq_reallocate_irqs(struct octeon_device *oct, uint32_t num_ioqs)
+{
+	struct msix_entry *msix_entries;
+	int num_msix_irqs = 0;
+	int i;
+
+	if (!oct->msix_on)
+		return 0;
+
+	/* Disable the input and output queues now. No more packets will
+	 * arrive from Octeon.
+	 */
+	oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
+
+	if (oct->msix_on) {
+		if (OCTEON_CN23XX_PF(oct))
+			num_msix_irqs = oct->num_msix_irqs - 1;
+		else if (OCTEON_CN23XX_VF(oct))
+			num_msix_irqs = oct->num_msix_irqs;
+
+		msix_entries = (struct msix_entry *)oct->msix_entries;
+		for (i = 0; i < num_msix_irqs; i++) {
+			if (oct->ioq_vector[i].vector) {
+				/* clear the affinity_cpumask */
+				irq_set_affinity_hint(msix_entries[i].vector,
+						      NULL);
+				free_irq(msix_entries[i].vector,
+					 &oct->ioq_vector[i]);
+				oct->ioq_vector[i].vector = 0;
+			}
+		}
+
+		/* non-iov vector's argument is oct struct */
+		if (OCTEON_CN23XX_PF(oct))
+			free_irq(msix_entries[i].vector, oct);
+
+		pci_disable_msix(oct->pci_dev);
+		kfree(oct->msix_entries);
+		oct->msix_entries = NULL;
+	}
+
+	kfree(oct->irq_name_storage);
+	oct->irq_name_storage = NULL;
+	if (octeon_setup_interrupt(oct, num_ioqs)) {
+		dev_info(&oct->pci_dev->dev, "Setup interrupt failed\n");
+		return 1;
+	}
+
+	/* Enable Octeon device interrupts */
+	oct->fn_list.enable_interrupt(oct, OCTEON_ALL_INTR);
+
+	return 0;
+}
+
+static int
+lio_ethtool_set_channels(struct net_device *dev,
+			 struct ethtool_channels *channel)
+{
+	u32 combined_count, max_combined;
+	struct lio *lio = GET_LIO(dev);
+	struct octeon_device *oct = lio->oct_dev;
+	int stopped = 0;
+
+	if (strcmp(oct->fw_info.liquidio_firmware_version, "1.6.1") < 0) {
+		dev_err(&oct->pci_dev->dev, "Minimum firmware version required is 1.6.1\n");
+		return -EINVAL;
+	}
+
+	if (!channel->combined_count || channel->other_count ||
+	    channel->rx_count || channel->tx_count)
+		return -EINVAL;
+
+	combined_count = channel->combined_count;
+
+	if (OCTEON_CN23XX_PF(oct)) {
+		max_combined = channel->max_combined;
+	} else if (OCTEON_CN23XX_VF(oct)) {
+		u64 reg_val = 0ULL;
+		u64 ctrl = CN23XX_VF_SLI_IQ_PKT_CONTROL64(0);
+
+		reg_val = octeon_read_csr64(oct, ctrl);
+		reg_val = reg_val >> CN23XX_PKT_INPUT_CTL_RPVF_POS;
+		max_combined = reg_val & CN23XX_PKT_INPUT_CTL_RPVF_MASK;
+	} else {
+		return -EINVAL;
+	}
+
+	if (combined_count > max_combined || combined_count < 1)
+		return -EINVAL;
+
+	if (combined_count == oct->num_iqs)
+		return 0;
+
+	ifstate_set(lio, LIO_IFSTATE_RESETTING);
+
+	if (netif_running(dev)) {
+		dev->netdev_ops->ndo_stop(dev);
+		stopped = 1;
+	}
+
+	if (lio_reset_queues(dev, combined_count))
+		return -EINVAL;
+
+	lio_irq_reallocate_irqs(oct, combined_count);
+	if (stopped)
+		dev->netdev_ops->ndo_open(dev);
+
+	ifstate_reset(lio, LIO_IFSTATE_RESETTING);
+
+	return 0;
 }
 
 static int lio_get_eeprom_len(struct net_device *netdev)
@@ -577,23 +724,18 @@ static int lio_set_phys_id(struct net_device *netdev,
 		break;
 
 	case ETHTOOL_ID_ON:
-		if (oct->chip_id == OCTEON_CN66XX) {
+		if (oct->chip_id == OCTEON_CN66XX)
 			octnet_gpio_access(netdev, VITESSE_PHY_GPIO_CFG,
 					   VITESSE_PHY_GPIO_HIGH);
-
-		} else if (oct->chip_id == OCTEON_CN68XX) {
-			return -EINVAL;
-		} else {
+		else
 			return -EINVAL;
-		}
+
 		break;
 
 	case ETHTOOL_ID_OFF:
 		if (oct->chip_id == OCTEON_CN66XX)
 			octnet_gpio_access(netdev, VITESSE_PHY_GPIO_CFG,
 					   VITESSE_PHY_GPIO_LOW);
-		else if (oct->chip_id == OCTEON_CN68XX)
-			return -EINVAL;
 		else
 			return -EINVAL;
 
@@ -641,6 +783,9 @@ lio_ethtool_get_ringparam(struct net_device *netdev,
 	u32 tx_max_pending = 0, rx_max_pending = 0, tx_pending = 0,
 	    rx_pending = 0;
 
+	if (ifstate_check(lio, LIO_IFSTATE_RESETTING))
+		return;
+
 	if (OCTEON_CN6XXX(oct)) {
 		struct octeon_config *conf6x = CHIP_CONF(oct, cn6xxx);
 
@@ -648,33 +793,147 @@ lio_ethtool_get_ringparam(struct net_device *netdev,
 		rx_max_pending = CN6XXX_MAX_OQ_DESCRIPTORS;
 		rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf6x, lio->ifidx);
 		tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf6x, lio->ifidx);
-	} else if (OCTEON_CN23XX_PF(oct)) {
-		struct octeon_config *conf23 = CHIP_CONF(oct, cn23xx_pf);
-
+	} else if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
 		tx_max_pending = CN23XX_MAX_IQ_DESCRIPTORS;
 		rx_max_pending = CN23XX_MAX_OQ_DESCRIPTORS;
-		rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf23, lio->ifidx);
-		tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf23, lio->ifidx);
-	}
-
-	if (lio->mtu > OCTNET_DEFAULT_FRM_SIZE - OCTNET_FRM_HEADER_SIZE) {
-		ering->rx_pending = 0;
-		ering->rx_max_pending = 0;
-		ering->rx_mini_pending = 0;
-		ering->rx_jumbo_pending = rx_pending;
-		ering->rx_mini_max_pending = 0;
-		ering->rx_jumbo_max_pending = rx_max_pending;
-	} else {
-		ering->rx_pending = rx_pending;
-		ering->rx_max_pending = rx_max_pending;
-		ering->rx_mini_pending = 0;
-		ering->rx_jumbo_pending = 0;
-		ering->rx_mini_max_pending = 0;
-		ering->rx_jumbo_max_pending = 0;
+		rx_pending = oct->droq[0]->max_count;
+		tx_pending = oct->instr_queue[0]->max_count;
 	}
 
 	ering->tx_pending = tx_pending;
 	ering->tx_max_pending = tx_max_pending;
+	ering->rx_pending = rx_pending;
+	ering->rx_max_pending = rx_max_pending;
+	ering->rx_mini_pending = 0;
+	ering->rx_jumbo_pending = 0;
+	ering->rx_mini_max_pending = 0;
+	ering->rx_jumbo_max_pending = 0;
+}
+
+static int lio_reset_queues(struct net_device *netdev, uint32_t num_qs)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct napi_struct *napi, *n;
+	int i, update = 0;
+
+	if (wait_for_pending_requests(oct))
+		dev_err(&oct->pci_dev->dev, "There were pending requests\n");
+
+	if (lio_wait_for_instr_fetch(oct))
+		dev_err(&oct->pci_dev->dev, "IQ had pending instructions\n");
+
+	if (octeon_set_io_queues_off(oct)) {
+		dev_err(&oct->pci_dev->dev, "setting io queues off failed\n");
+		return -1;
+	}
+
+	/* Disable the input and output queues now. No more packets will
+	 * arrive from Octeon.
+	 */
+	oct->fn_list.disable_io_queues(oct);
+	/* Delete NAPI */
+	list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+		netif_napi_del(napi);
+
+	if (num_qs != oct->num_iqs) {
+		netif_set_real_num_rx_queues(netdev, num_qs);
+		netif_set_real_num_tx_queues(netdev, num_qs);
+		update = 1;
+	}
+
+	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
+		if (!(oct->io_qmask.oq & BIT_ULL(i)))
+			continue;
+		octeon_delete_droq(oct, i);
+	}
+
+	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
+		if (!(oct->io_qmask.iq & BIT_ULL(i)))
+			continue;
+		octeon_delete_instr_queue(oct, i);
+	}
+
+	if (oct->fn_list.setup_device_regs(oct)) {
+		dev_err(&oct->pci_dev->dev, "Failed to configure device registers\n");
+		return -1;
+	}
+
+	if (liquidio_setup_io_queues(oct, 0, num_qs, num_qs)) {
+		dev_err(&oct->pci_dev->dev, "IO queues initialization failed\n");
+		return -1;
+	}
+
+	/* Enable the input and output queues for this Octeon device */
+	if (oct->fn_list.enable_io_queues(oct)) {
+		dev_err(&oct->pci_dev->dev, "Failed to enable input/output queues");
+		return -1;
+	}
+
+	if (update && lio_send_queue_count_update(netdev, num_qs))
+		return -1;
+
+	return 0;
+}
+
+static int lio_ethtool_set_ringparam(struct net_device *netdev,
+				     struct ethtool_ringparam *ering)
+{
+	u32 rx_count, tx_count, rx_count_old, tx_count_old;
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	int stopped = 0;
+
+	if (!OCTEON_CN23XX_PF(oct) && !OCTEON_CN23XX_VF(oct))
+		return -EINVAL;
+
+	if (ering->rx_mini_pending || ering->rx_jumbo_pending)
+		return -EINVAL;
+
+	rx_count = clamp_t(u32, ering->rx_pending, CN23XX_MIN_OQ_DESCRIPTORS,
+			   CN23XX_MAX_OQ_DESCRIPTORS);
+	tx_count = clamp_t(u32, ering->tx_pending, CN23XX_MIN_IQ_DESCRIPTORS,
+			   CN23XX_MAX_IQ_DESCRIPTORS);
+
+	rx_count_old = oct->droq[0]->max_count;
+	tx_count_old = oct->instr_queue[0]->max_count;
+
+	if (rx_count == rx_count_old && tx_count == tx_count_old)
+		return 0;
+
+	ifstate_set(lio, LIO_IFSTATE_RESETTING);
+
+	if (netif_running(netdev)) {
+		netdev->netdev_ops->ndo_stop(netdev);
+		stopped = 1;
+	}
+
+	/* Change RX/TX DESCS  count */
+	if (tx_count != tx_count_old)
+		CFG_SET_NUM_TX_DESCS_NIC_IF(octeon_get_conf(oct), lio->ifidx,
+					    tx_count);
+	if (rx_count != rx_count_old)
+		CFG_SET_NUM_RX_DESCS_NIC_IF(octeon_get_conf(oct), lio->ifidx,
+					    rx_count);
+
+	if (lio_reset_queues(netdev, lio->linfo.num_txpciq))
+		goto err_lio_reset_queues;
+
+	if (stopped)
+		netdev->netdev_ops->ndo_open(netdev);
+
+	ifstate_reset(lio, LIO_IFSTATE_RESETTING);
+
+	return 0;
+
+err_lio_reset_queues:
+	if (tx_count != tx_count_old)
+		CFG_SET_NUM_TX_DESCS_NIC_IF(octeon_get_conf(oct), lio->ifidx,
+					    tx_count_old);
+	if (rx_count != rx_count_old)
+		CFG_SET_NUM_RX_DESCS_NIC_IF(octeon_get_conf(oct), lio->ifidx,
+					    rx_count_old);
+	return -EINVAL;
 }
 
 static u32 lio_get_msglevel(struct net_device *netdev)
@@ -795,6 +1054,9 @@ lio_get_ethtool_stats(struct net_device *netdev,
 	struct net_device_stats *netstats = &netdev->stats;
 	int i = 0, j;
 
+	if (ifstate_check(lio, LIO_IFSTATE_RESETTING))
+		return;
+
 	netdev->netdev_ops->ndo_get_stats(netdev);
 	octnet_get_link_stats(netdev);
 
@@ -826,6 +1088,8 @@ lio_get_ethtool_stats(struct net_device *netdev,
 	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_total_fwd);
 	/*per_core_stats[j].link_stats[i].fromhost.fw_err_pko */
 	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_pko);
+	/*per_core_stats[j].link_stats[i].fromhost.fw_err_pki */
+	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_pki);
 	/*per_core_stats[j].link_stats[i].fromhost.fw_err_link */
 	data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_link);
 	/*per_core_stats[cvmx_get_core_num()].link_stats[idx].fromhost.
@@ -1057,6 +1321,9 @@ static void lio_vf_get_ethtool_stats(struct net_device *netdev,
 	struct octeon_device *oct_dev = lio->oct_dev;
 	int i = 0, j, vj;
 
+	if (ifstate_check(lio, LIO_IFSTATE_RESETTING))
+		return;
+
 	netdev->netdev_ops->ndo_get_stats(netdev);
 	/* sum of oct->droq[oq_no]->stats->rx_pkts_received */
 	data[i++] = CVM_CAST64(netstats->rx_packets);
@@ -1079,7 +1346,7 @@ static void lio_vf_get_ethtool_stats(struct net_device *netdev,
 	/* lio->link_changes */
 	data[i++] = CVM_CAST64(lio->link_changes);
 
-	for (vj = 0; vj < lio->linfo.num_txpciq; vj++) {
+	for (vj = 0; vj < oct_dev->num_iqs; vj++) {
 		j = lio->linfo.txpciq[vj].s.q_no;
 
 		/* packets to network port */
@@ -1121,7 +1388,7 @@ static void lio_vf_get_ethtool_stats(struct net_device *netdev,
 	}
 
 	/* RX */
-	for (vj = 0; vj < lio->linfo.num_rxpciq; vj++) {
+	for (vj = 0; vj < oct_dev->num_oqs; vj++) {
 		j = lio->linfo.rxpciq[vj].s.q_no;
 
 		/* packets send to TCP/IP network stack */
@@ -1568,6 +1835,7 @@ octnet_nic_stats_callback(struct octeon_device *oct_dev,
 		tstats->fw_total_sent = rsp_tstats->fw_total_sent;
 		tstats->fw_total_fwd = rsp_tstats->fw_total_fwd;
 		tstats->fw_err_pko = rsp_tstats->fw_err_pko;
+		tstats->fw_err_pki = rsp_tstats->fw_err_pki;
 		tstats->fw_err_link = rsp_tstats->fw_err_link;
 		tstats->fw_err_drop = rsp_tstats->fw_err_drop;
 		tstats->fw_tso = rsp_tstats->fw_tso;
@@ -2587,7 +2855,9 @@ static const struct ethtool_ops lio_ethtool_ops = {
 	.get_link		= ethtool_op_get_link,
 	.get_drvinfo		= lio_get_drvinfo,
 	.get_ringparam		= lio_ethtool_get_ringparam,
+	.set_ringparam		= lio_ethtool_set_ringparam,
 	.get_channels		= lio_ethtool_get_channels,
+	.set_channels		= lio_ethtool_set_channels,
 	.set_phys_id		= lio_set_phys_id,
 	.get_eeprom_len		= lio_get_eeprom_len,
 	.get_eeprom		= lio_get_eeprom,
@@ -2612,7 +2882,9 @@ static const struct ethtool_ops lio_vf_ethtool_ops = {
 	.get_link		= ethtool_op_get_link,
 	.get_drvinfo		= lio_get_vf_drvinfo,
 	.get_ringparam		= lio_ethtool_get_ringparam,
+	.set_ringparam          = lio_ethtool_set_ringparam,
 	.get_channels		= lio_ethtool_get_channels,
+	.set_channels		= lio_ethtool_set_channels,
 	.get_strings		= lio_vf_get_strings,
 	.get_ethtool_stats	= lio_vf_get_ethtool_stats,
 	.get_regs_len		= lio_get_regs_len,
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 120b6e537..e7f5494 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -39,10 +39,14 @@ MODULE_AUTHOR("Cavium Networks, <support@cavium.com>");
 MODULE_DESCRIPTION("Cavium LiquidIO Intelligent Server Adapter Driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(LIQUIDIO_VERSION);
-MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_210SV_NAME LIO_FW_NAME_SUFFIX);
-MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_210NV_NAME LIO_FW_NAME_SUFFIX);
-MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_410NV_NAME LIO_FW_NAME_SUFFIX);
-MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_23XX_NAME LIO_FW_NAME_SUFFIX);
+MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_210SV_NAME
+		"_" LIO_FW_NAME_TYPE_NIC LIO_FW_NAME_SUFFIX);
+MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_210NV_NAME
+		"_" LIO_FW_NAME_TYPE_NIC LIO_FW_NAME_SUFFIX);
+MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_410NV_NAME
+		"_" LIO_FW_NAME_TYPE_NIC LIO_FW_NAME_SUFFIX);
+MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_23XX_NAME
+		"_" LIO_FW_NAME_TYPE_NIC LIO_FW_NAME_SUFFIX);
 
 static int ddr_timeout = 10000;
 module_param(ddr_timeout, int, 0644);
@@ -55,11 +59,24 @@ static int debug = -1;
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "NETIF_MSG debug bits");
 
-static char fw_type[LIO_MAX_FW_TYPE_LEN];
-module_param_string(fw_type, fw_type, sizeof(fw_type), 0000);
-MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded. Default \"nic\"");
+static char fw_type[LIO_MAX_FW_TYPE_LEN] = LIO_FW_NAME_TYPE_NIC;
+module_param_string(fw_type, fw_type, sizeof(fw_type), 0444);
+MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded. Default \"nic\".  Use \"none\" to load firmware from flash.");
 
-static int ptp_enable = 1;
+static u32 console_bitmask;
+module_param(console_bitmask, int, 0644);
+MODULE_PARM_DESC(console_bitmask,
+		 "Bitmask indicating which consoles have debug output redirected to syslog.");
+
+/**
+ * \brief determines if a given console has debug enabled.
+ * @param console console to check
+ * @returns  1 = enabled. 0 otherwise
+ */
+static int octeon_console_debug_enabled(u32 console)
+{
+	return (console_bitmask >> (console)) & 0x1;
+}
 
 /* Polling interval for determining when NIC application is alive */
 #define LIQUIDIO_STARTER_POLL_INTERVAL_MS 100
@@ -158,16 +175,13 @@ struct handshake {
 	int started_ok;
 };
 
-struct octeon_device_priv {
-	/** Tasklet structures for this device. */
-	struct tasklet_struct droq_tasklet;
-	unsigned long napi_mask;
-};
-
 #ifdef CONFIG_PCI_IOV
 static int liquidio_enable_sriov(struct pci_dev *dev, int num_vfs);
 #endif
 
+static int octeon_dbg_console_print(struct octeon_device *oct, u32 console_num,
+				    char *prefix, char *suffix);
+
 static int octeon_device_init(struct octeon_device *);
 static int liquidio_stop(struct net_device *netdev);
 static void liquidio_remove(struct pci_dev *pdev);
@@ -256,32 +270,6 @@ static void force_io_queues_off(struct octeon_device *oct)
 }
 
 /**
- * \brief wait for all pending requests to complete
- * @param oct Pointer to Octeon device
- *
- * Called during shutdown sequence
- */
-static int wait_for_pending_requests(struct octeon_device *oct)
-{
-	int i, pcount = 0;
-
-	for (i = 0; i < 100; i++) {
-		pcount =
-			atomic_read(&oct->response_list
-				[OCTEON_ORDERED_SC_LIST].pending_req_count);
-		if (pcount)
-			schedule_timeout_uninterruptible(HZ / 10);
-		else
-			break;
-	}
-
-	if (pcount)
-		return 1;
-
-	return 0;
-}
-
-/**
  * \brief Cause device to go quiet so it can be safely removed/reset/etc
  * @param oct Pointer to Octeon device
  */
@@ -572,7 +560,7 @@ static inline void txqs_wake(struct net_device *netdev)
 
 		for (i = 0; i < netdev->num_tx_queues; i++) {
 			int qno = lio->linfo.txpciq[i %
-				(lio->linfo.num_txpciq)].s.q_no;
+				lio->oct_dev->num_iqs].s.q_no;
 
 			if (__netif_subqueue_stopped(netdev, i)) {
 				INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
@@ -652,7 +640,7 @@ static inline int check_txq_status(struct lio *lio)
 		/* check each sub-queue state */
 		for (q = 0; q < numqs; q++) {
 			iq = lio->linfo.txpciq[q %
-				(lio->linfo.num_txpciq)].s.q_no;
+				lio->oct_dev->num_iqs].s.q_no;
 			if (octnet_iq_is_full(lio->oct_dev, iq))
 				continue;
 			if (__netif_subqueue_stopped(lio->netdev, q)) {
@@ -823,7 +811,8 @@ static void print_link_info(struct net_device *netdev)
 {
 	struct lio *lio = GET_LIO(netdev);
 
-	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED) {
+	if (!ifstate_check(lio, LIO_IFSTATE_RESETTING) &&
+	    ifstate_check(lio, LIO_IFSTATE_REGISTERED)) {
 		struct oct_link_info *linfo = &lio->linfo;
 
 		if (linfo->link.s.link_up) {
@@ -912,295 +901,6 @@ static inline void update_link_status(struct net_device *netdev,
 	}
 }
 
-/* Runs in interrupt context. */
-static void update_txq_status(struct octeon_device *oct, int iq_num)
-{
-	struct net_device *netdev;
-	struct lio *lio;
-	struct octeon_instr_queue *iq = oct->instr_queue[iq_num];
-
-	netdev = oct->props[iq->ifidx].netdev;
-
-	/* This is needed because the first IQ does not have
-	 * a netdev associated with it.
-	 */
-	if (!netdev)
-		return;
-
-	lio = GET_LIO(netdev);
-	if (netif_is_multiqueue(netdev)) {
-		if (__netif_subqueue_stopped(netdev, iq->q_index) &&
-		    lio->linfo.link.s.link_up &&
-		    (!octnet_iq_is_full(oct, iq_num))) {
-			INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
-						  tx_restart, 1);
-			netif_wake_subqueue(netdev, iq->q_index);
-		}
-	} else if (netif_queue_stopped(netdev) &&
-		   lio->linfo.link.s.link_up &&
-		   (!octnet_iq_is_full(oct, lio->txq))) {
-		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev,
-					  lio->txq, tx_restart, 1);
-		netif_wake_queue(netdev);
-	}
-}
-
-static
-int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret)
-{
-	struct octeon_device *oct = droq->oct_dev;
-	struct octeon_device_priv *oct_priv =
-	    (struct octeon_device_priv *)oct->priv;
-
-	if (droq->ops.poll_mode) {
-		droq->ops.napi_fn(droq);
-	} else {
-		if (ret & MSIX_PO_INT) {
-			tasklet_schedule(&oct_priv->droq_tasklet);
-			return 1;
-		}
-		/* this will be flushed periodically by check iq db */
-		if (ret & MSIX_PI_INT)
-			return 0;
-	}
-	return 0;
-}
-
-/**
- * \brief Droq packet processor sceduler
- * @param oct octeon device
- */
-static void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct)
-{
-	struct octeon_device_priv *oct_priv =
-		(struct octeon_device_priv *)oct->priv;
-	u64 oq_no;
-	struct octeon_droq *droq;
-
-	if (oct->int_status & OCT_DEV_INTR_PKT_DATA) {
-		for (oq_no = 0; oq_no < MAX_OCTEON_OUTPUT_QUEUES(oct);
-		     oq_no++) {
-			if (!(oct->droq_intr & BIT_ULL(oq_no)))
-				continue;
-
-			droq = oct->droq[oq_no];
-
-			if (droq->ops.poll_mode) {
-				droq->ops.napi_fn(droq);
-				oct_priv->napi_mask |= (1 << oq_no);
-			} else {
-				tasklet_schedule(&oct_priv->droq_tasklet);
-			}
-		}
-	}
-}
-
-static irqreturn_t
-liquidio_msix_intr_handler(int irq __attribute__((unused)), void *dev)
-{
-	u64 ret;
-	struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev;
-	struct octeon_device *oct = ioq_vector->oct_dev;
-	struct octeon_droq *droq = oct->droq[ioq_vector->droq_index];
-
-	ret = oct->fn_list.msix_interrupt_handler(ioq_vector);
-
-	if ((ret & MSIX_PO_INT) || (ret & MSIX_PI_INT))
-		liquidio_schedule_msix_droq_pkt_handler(droq, ret);
-
-	return IRQ_HANDLED;
-}
-
-/**
- * \brief Interrupt handler for octeon
- * @param irq unused
- * @param dev octeon device
- */
-static
-irqreturn_t liquidio_legacy_intr_handler(int irq __attribute__((unused)),
-					 void *dev)
-{
-	struct octeon_device *oct = (struct octeon_device *)dev;
-	irqreturn_t ret;
-
-	/* Disable our interrupts for the duration of ISR */
-	oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
-
-	ret = oct->fn_list.process_interrupt_regs(oct);
-
-	if (ret == IRQ_HANDLED)
-		liquidio_schedule_droq_pkt_handlers(oct);
-
-	/* Re-enable our interrupts  */
-	if (!(atomic_read(&oct->status) == OCT_DEV_IN_RESET))
-		oct->fn_list.enable_interrupt(oct, OCTEON_ALL_INTR);
-
-	return ret;
-}
-
-/**
- * \brief Setup interrupt for octeon device
- * @param oct octeon device
- *
- *  Enable interrupt in Octeon device as given in the PCI interrupt mask.
- */
-static int octeon_setup_interrupt(struct octeon_device *oct)
-{
-	int irqret, err;
-	struct msix_entry *msix_entries;
-	int i;
-	int num_ioq_vectors;
-	int num_alloc_ioq_vectors;
-	char *queue_irq_names = NULL;
-	char *aux_irq_name = NULL;
-
-	if (OCTEON_CN23XX_PF(oct) && oct->msix_on) {
-		oct->num_msix_irqs = oct->sriov_info.num_pf_rings;
-		/* one non ioq interrupt for handling sli_mac_pf_int_sum */
-		oct->num_msix_irqs += 1;
-
-		/* allocate storage for the names assigned to each irq */
-		oct->irq_name_storage =
-			kcalloc((MAX_IOQ_INTERRUPTS_PER_PF + 1), INTRNAMSIZ,
-				GFP_KERNEL);
-		if (!oct->irq_name_storage) {
-			dev_err(&oct->pci_dev->dev, "Irq name storage alloc failed...\n");
-			return -ENOMEM;
-		}
-
-		queue_irq_names = oct->irq_name_storage;
-		aux_irq_name = &queue_irq_names
-				[IRQ_NAME_OFF(MAX_IOQ_INTERRUPTS_PER_PF)];
-
-		oct->msix_entries = kcalloc(
-		    oct->num_msix_irqs, sizeof(struct msix_entry), GFP_KERNEL);
-		if (!oct->msix_entries) {
-			dev_err(&oct->pci_dev->dev, "Memory Alloc failed...\n");
-			kfree(oct->irq_name_storage);
-			oct->irq_name_storage = NULL;
-			return -ENOMEM;
-		}
-
-		msix_entries = (struct msix_entry *)oct->msix_entries;
-		/*Assumption is that pf msix vectors start from pf srn to pf to
-		 * trs and not from 0. if not change this code
-		 */
-		for (i = 0; i < oct->num_msix_irqs - 1; i++)
-			msix_entries[i].entry = oct->sriov_info.pf_srn + i;
-		msix_entries[oct->num_msix_irqs - 1].entry =
-		    oct->sriov_info.trs;
-		num_alloc_ioq_vectors = pci_enable_msix_range(
-						oct->pci_dev, msix_entries,
-						oct->num_msix_irqs,
-						oct->num_msix_irqs);
-		if (num_alloc_ioq_vectors < 0) {
-			dev_err(&oct->pci_dev->dev, "unable to Allocate MSI-X interrupts\n");
-			kfree(oct->msix_entries);
-			oct->msix_entries = NULL;
-			kfree(oct->irq_name_storage);
-			oct->irq_name_storage = NULL;
-			return num_alloc_ioq_vectors;
-		}
-		dev_dbg(&oct->pci_dev->dev, "OCTEON: Enough MSI-X interrupts are allocated...\n");
-
-		num_ioq_vectors = oct->num_msix_irqs;
-
-		/** For PF, there is one non-ioq interrupt handler */
-		num_ioq_vectors -= 1;
-
-		snprintf(aux_irq_name, INTRNAMSIZ,
-			 "LiquidIO%u-pf%u-aux", oct->octeon_id, oct->pf_num);
-		irqret = request_irq(msix_entries[num_ioq_vectors].vector,
-				     liquidio_legacy_intr_handler, 0,
-				     aux_irq_name, oct);
-		if (irqret) {
-			dev_err(&oct->pci_dev->dev,
-				"OCTEON: Request_irq failed for MSIX interrupt Error: %d\n",
-				irqret);
-			pci_disable_msix(oct->pci_dev);
-			kfree(oct->msix_entries);
-			oct->msix_entries = NULL;
-			kfree(oct->irq_name_storage);
-			oct->irq_name_storage = NULL;
-			return irqret;
-		}
-
-		for (i = 0; i < num_ioq_vectors; i++) {
-			snprintf(&queue_irq_names[IRQ_NAME_OFF(i)], INTRNAMSIZ,
-				 "LiquidIO%u-pf%u-rxtx-%u",
-				 oct->octeon_id, oct->pf_num, i);
-
-			irqret = request_irq(msix_entries[i].vector,
-					     liquidio_msix_intr_handler, 0,
-					     &queue_irq_names[IRQ_NAME_OFF(i)],
-					     &oct->ioq_vector[i]);
-			if (irqret) {
-				dev_err(&oct->pci_dev->dev,
-					"OCTEON: Request_irq failed for MSIX interrupt Error: %d\n",
-					irqret);
-				/** Freeing the non-ioq irq vector here . */
-				free_irq(msix_entries[num_ioq_vectors].vector,
-					 oct);
-
-				while (i) {
-					i--;
-					/** clearing affinity mask. */
-					irq_set_affinity_hint(
-						msix_entries[i].vector, NULL);
-					free_irq(msix_entries[i].vector,
-						 &oct->ioq_vector[i]);
-				}
-				pci_disable_msix(oct->pci_dev);
-				kfree(oct->msix_entries);
-				oct->msix_entries = NULL;
-				kfree(oct->irq_name_storage);
-				oct->irq_name_storage = NULL;
-				return irqret;
-			}
-			oct->ioq_vector[i].vector = msix_entries[i].vector;
-			/* assign the cpu mask for this msix interrupt vector */
-			irq_set_affinity_hint(
-					msix_entries[i].vector,
-					(&oct->ioq_vector[i].affinity_mask));
-		}
-		dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: MSI-X enabled\n",
-			oct->octeon_id);
-	} else {
-		err = pci_enable_msi(oct->pci_dev);
-		if (err)
-			dev_warn(&oct->pci_dev->dev, "Reverting to legacy interrupts. Error: %d\n",
-				 err);
-		else
-			oct->flags |= LIO_FLAG_MSI_ENABLED;
-
-		/* allocate storage for the names assigned to the irq */
-		oct->irq_name_storage = kcalloc(1, INTRNAMSIZ, GFP_KERNEL);
-		if (!oct->irq_name_storage)
-			return -ENOMEM;
-
-		queue_irq_names = oct->irq_name_storage;
-
-		snprintf(&queue_irq_names[IRQ_NAME_OFF(0)], INTRNAMSIZ,
-			 "LiquidIO%u-pf%u-rxtx-%u",
-			 oct->octeon_id, oct->pf_num, 0);
-
-		irqret = request_irq(oct->pci_dev->irq,
-				     liquidio_legacy_intr_handler,
-				     IRQF_SHARED,
-				     &queue_irq_names[IRQ_NAME_OFF(0)], oct);
-		if (irqret) {
-			if (oct->flags & LIO_FLAG_MSI_ENABLED)
-				pci_disable_msi(oct->pci_dev);
-			dev_err(&oct->pci_dev->dev, "Request IRQ failed with code: %d\n",
-				irqret);
-			kfree(oct->irq_name_storage);
-			oct->irq_name_storage = NULL;
-			return irqret;
-		}
-	}
-	return 0;
-}
-
 static struct octeon_device *get_other_octeon_device(struct octeon_device *oct)
 {
 	struct octeon_device *other_oct;
@@ -1344,6 +1044,13 @@ liquidio_probe(struct pci_dev *pdev,
 	if (pdev->device == OCTEON_CN23XX_PF_VID)
 		oct_dev->msix_on = LIO_FLAG_MSIX_ENABLED;
 
+	/* Enable PTP for 6XXX Device */
+	if (((pdev->device == OCTEON_CN66XX) ||
+	     (pdev->device == OCTEON_CN68XX)))
+		oct_dev->ptp_enable = true;
+	else
+		oct_dev->ptp_enable = false;
+
 	dev_info(&pdev->dev, "Initializing device %x:%x.\n",
 		 (u32)pdev->vendor, (u32)pdev->device);
 
@@ -1415,6 +1122,33 @@ static bool fw_type_is_none(void)
 }
 
 /**
+ * \brief PCI FLR for each Octeon device.
+ * @param oct octeon device
+ */
+static void octeon_pci_flr(struct octeon_device *oct)
+{
+	int rc;
+
+	pci_save_state(oct->pci_dev);
+
+	pci_cfg_access_lock(oct->pci_dev);
+
+	/* Quiesce the device completely */
+	pci_write_config_word(oct->pci_dev, PCI_COMMAND,
+			      PCI_COMMAND_INTX_DISABLE);
+
+	rc = __pci_reset_function_locked(oct->pci_dev);
+
+	if (rc != 0)
+		dev_err(&oct->pci_dev->dev, "Error %d resetting PCI function %d\n",
+			rc, oct->pf_num);
+
+	pci_cfg_access_unlock(oct->pci_dev);
+
+	pci_restore_state(oct->pci_dev);
+}
+
+/**
  *\brief Destroy resources associated with octeon device
  * @param pdev PCI device structure
  * @param ent unused
@@ -1474,11 +1208,15 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 		if (oct->msix_on) {
 			msix_entries = (struct msix_entry *)oct->msix_entries;
 			for (i = 0; i < oct->num_msix_irqs - 1; i++) {
-				/* clear the affinity_cpumask */
-				irq_set_affinity_hint(msix_entries[i].vector,
-						      NULL);
-				free_irq(msix_entries[i].vector,
-					 &oct->ioq_vector[i]);
+				if (oct->ioq_vector[i].vector) {
+					/* clear the affinity_cpumask */
+					irq_set_affinity_hint(
+							msix_entries[i].vector,
+							NULL);
+					free_irq(msix_entries[i].vector,
+						 &oct->ioq_vector[i]);
+					oct->ioq_vector[i].vector = 0;
+				}
 			}
 			/* non-iov vector's argument is oct struct */
 			free_irq(msix_entries[i].vector, oct);
@@ -1558,14 +1296,16 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 	case OCT_DEV_PCI_MAP_DONE:
 		refcount = octeon_deregister_device(oct);
 
-		if (!fw_type_is_none()) {
-			/* Soft reset the octeon device before exiting.
-			 * Implementation note: here, we reset the device
-			 * if it is a CN6XXX OR the last CN23XX device.
-			 */
-			if (OCTEON_CN6XXX(oct) || !refcount)
-				oct->fn_list.soft_reset(oct);
-		}
+		/* Soft reset the octeon device before exiting.
+		 * However, if fw was loaded from card (i.e. autoboot),
+		 * perform an FLR instead.
+		 * Implementation note: only soft-reset the device
+		 * if it is a CN6XXX OR the LAST CN23XX device.
+		 */
+		if (fw_type_is_none())
+			octeon_pci_flr(oct);
+		else if (OCTEON_CN6XXX(oct) || !refcount)
+			oct->fn_list.soft_reset(oct);
 
 		octeon_unmap_pci_barx(oct, 0);
 		octeon_unmap_pci_barx(oct, 1);
@@ -1698,15 +1438,6 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING)
 		liquidio_stop(netdev);
 
-	if (fw_type_is_none()) {
-		struct octnic_ctrl_pkt nctrl;
-
-		memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
-		nctrl.ncmd.s.cmd = OCTNET_CMD_RESET_PF;
-		nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-		octnet_send_nic_ctrl_pkt(oct, &nctrl);
-	}
-
 	if (oct->props[lio->ifidx].napi_enabled == 1) {
 		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
 			napi_disable(napi);
@@ -1717,6 +1448,10 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 			oct->droq[0]->ops.poll_mode = 0;
 	}
 
+	/* Delete NAPI */
+	list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+		netif_napi_del(napi);
+
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
 		unregister_netdev(netdev);
 
@@ -1754,7 +1489,7 @@ static int liquidio_stop_nic_module(struct octeon_device *oct)
 
 	for (i = 0; i < oct->ifcount; i++) {
 		lio = GET_LIO(oct->props[i].netdev);
-		for (j = 0; j < lio->linfo.num_rxpciq; j++)
+		for (j = 0; j < oct->num_oqs; j++)
 			octeon_unregister_droq_ops(oct,
 						   lio->linfo.rxpciq[j].s.q_no);
 	}
@@ -1825,6 +1560,13 @@ static int octeon_chip_specific_setup(struct octeon_device *oct)
 	case OCTEON_CN23XX_PCIID_PF:
 		oct->chip_id = OCTEON_CN23XX_PF_VID;
 		ret = setup_cn23xx_octeon_pf_device(oct);
+		if (ret)
+			break;
+#ifdef CONFIG_PCI_IOV
+		if (!ret)
+			pci_sriov_set_totalvfs(oct->pci_dev,
+					       oct->sriov_info.max_vfs);
+#endif
 		s = "CN23XX";
 		break;
 
@@ -1889,7 +1631,7 @@ static inline int check_txq_state(struct lio *lio, struct sk_buff *skb)
 
 	if (netif_is_multiqueue(lio->netdev)) {
 		q = skb->queue_mapping;
-		iq = lio->linfo.txpciq[(q % (lio->linfo.num_txpciq))].s.q_no;
+		iq = lio->linfo.txpciq[(q % lio->oct_dev->num_iqs)].s.q_no;
 	} else {
 		iq = lio->txq;
 		q = iq;
@@ -2192,11 +1934,6 @@ static int load_firmware(struct octeon_device *oct)
 	char fw_name[LIO_MAX_FW_FILENAME_LEN];
 	char *tmp_fw_type;
 
-	if (fw_type_is_none()) {
-		dev_info(&oct->pci_dev->dev, "Skipping firmware load\n");
-		return ret;
-	}
-
 	if (fw_type[0] == '\0')
 		tmp_fw_type = LIO_FW_NAME_TYPE_NIC;
 	else
@@ -2222,43 +1959,6 @@ static int load_firmware(struct octeon_device *oct)
 }
 
 /**
- * \brief Setup output queue
- * @param oct octeon device
- * @param q_no which queue
- * @param num_descs how many descriptors
- * @param desc_size size of each descriptor
- * @param app_ctx application context
- */
-static int octeon_setup_droq(struct octeon_device *oct, int q_no, int num_descs,
-			     int desc_size, void *app_ctx)
-{
-	int ret_val = 0;
-
-	dev_dbg(&oct->pci_dev->dev, "Creating Droq: %d\n", q_no);
-	/* droq creation and local register settings. */
-	ret_val = octeon_create_droq(oct, q_no, num_descs, desc_size, app_ctx);
-	if (ret_val < 0)
-		return ret_val;
-
-	if (ret_val == 1) {
-		dev_dbg(&oct->pci_dev->dev, "Using default droq %d\n", q_no);
-		return 0;
-	}
-	/* tasklet creation for the droq */
-
-	/* Enable the droq queues */
-	octeon_set_droq_pkt_op(oct, q_no, 1);
-
-	/* Send Credit for Octeon Output queues. Credits are always
-	 * sent after the output queue is enabled.
-	 */
-	writel(oct->droq[q_no]->max_count,
-	       oct->droq[q_no]->pkts_credit_reg);
-
-	return ret_val;
-}
-
-/**
  * \brief Callback for getting interface configuration
  * @param status status of request
  * @param buf pointer to resp structure
@@ -2291,352 +1991,6 @@ static void if_cfg_callback(struct octeon_device *oct,
 	wake_up_interruptible(&ctx->wc);
 }
 
-/** Routine to push packets arriving on Octeon interface upto network layer.
- * @param oct_id   - octeon device id.
- * @param skbuff   - skbuff struct to be passed to network layer.
- * @param len      - size of total data received.
- * @param rh       - Control header associated with the packet
- * @param param    - additional control data with the packet
- * @param arg	   - farg registered in droq_ops
- */
-static void
-liquidio_push_packet(u32 octeon_id __attribute__((unused)),
-		     void *skbuff,
-		     u32 len,
-		     union octeon_rh *rh,
-		     void *param,
-		     void *arg)
-{
-	struct napi_struct *napi = param;
-	struct sk_buff *skb = (struct sk_buff *)skbuff;
-	struct skb_shared_hwtstamps *shhwtstamps;
-	u64 ns;
-	u16 vtag = 0;
-	u32 r_dh_off;
-	struct net_device *netdev = (struct net_device *)arg;
-	struct octeon_droq *droq = container_of(param, struct octeon_droq,
-						napi);
-	if (netdev) {
-		int packet_was_received;
-		struct lio *lio = GET_LIO(netdev);
-		struct octeon_device *oct = lio->oct_dev;
-
-		/* Do not proceed if the interface is not in RUNNING state. */
-		if (!ifstate_check(lio, LIO_IFSTATE_RUNNING)) {
-			recv_buffer_free(skb);
-			droq->stats.rx_dropped++;
-			return;
-		}
-
-		skb->dev = netdev;
-
-		skb_record_rx_queue(skb, droq->q_no);
-		if (likely(len > MIN_SKB_SIZE)) {
-			struct octeon_skb_page_info *pg_info;
-			unsigned char *va;
-
-			pg_info = ((struct octeon_skb_page_info *)(skb->cb));
-			if (pg_info->page) {
-				/* For Paged allocation use the frags */
-				va = page_address(pg_info->page) +
-					pg_info->page_offset;
-				memcpy(skb->data, va, MIN_SKB_SIZE);
-				skb_put(skb, MIN_SKB_SIZE);
-				skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-						pg_info->page,
-						pg_info->page_offset +
-						MIN_SKB_SIZE,
-						len - MIN_SKB_SIZE,
-						LIO_RXBUFFER_SZ);
-			}
-		} else {
-			struct octeon_skb_page_info *pg_info =
-				((struct octeon_skb_page_info *)(skb->cb));
-			skb_copy_to_linear_data(skb, page_address(pg_info->page)
-						+ pg_info->page_offset, len);
-			skb_put(skb, len);
-			put_page(pg_info->page);
-		}
-
-		r_dh_off = (rh->r_dh.len - 1) * BYTES_PER_DHLEN_UNIT;
-
-		if (((oct->chip_id == OCTEON_CN66XX) ||
-		     (oct->chip_id == OCTEON_CN68XX)) &&
-		    ptp_enable) {
-			if (rh->r_dh.has_hwtstamp) {
-				/* timestamp is included from the hardware at
-				 * the beginning of the packet.
-				 */
-				if (ifstate_check
-				    (lio, LIO_IFSTATE_RX_TIMESTAMP_ENABLED)) {
-					/* Nanoseconds are in the first 64-bits
-					 * of the packet.
-					 */
-					memcpy(&ns, (skb->data + r_dh_off),
-					       sizeof(ns));
-					r_dh_off -= BYTES_PER_DHLEN_UNIT;
-					shhwtstamps = skb_hwtstamps(skb);
-					shhwtstamps->hwtstamp =
-						ns_to_ktime(ns +
-							    lio->ptp_adjust);
-				}
-			}
-		}
-
-		if (rh->r_dh.has_hash) {
-			__be32 *hash_be = (__be32 *)(skb->data + r_dh_off);
-			u32 hash = be32_to_cpu(*hash_be);
-
-			skb_set_hash(skb, hash, PKT_HASH_TYPE_L4);
-			r_dh_off -= BYTES_PER_DHLEN_UNIT;
-		}
-
-		skb_pull(skb, rh->r_dh.len * BYTES_PER_DHLEN_UNIT);
-
-		skb->protocol = eth_type_trans(skb, skb->dev);
-		if ((netdev->features & NETIF_F_RXCSUM) &&
-		    (((rh->r_dh.encap_on) &&
-		      (rh->r_dh.csum_verified & CNNIC_TUN_CSUM_VERIFIED)) ||
-		     (!(rh->r_dh.encap_on) &&
-		      (rh->r_dh.csum_verified & CNNIC_CSUM_VERIFIED))))
-			/* checksum has already been verified */
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-		else
-			skb->ip_summed = CHECKSUM_NONE;
-
-		/* Setting Encapsulation field on basis of status received
-		 * from the firmware
-		 */
-		if (rh->r_dh.encap_on) {
-			skb->encapsulation = 1;
-			skb->csum_level = 1;
-			droq->stats.rx_vxlan++;
-		}
-
-		/* inbound VLAN tag */
-		if ((netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
-		    (rh->r_dh.vlan != 0)) {
-			u16 vid = rh->r_dh.vlan;
-			u16 priority = rh->r_dh.priority;
-
-			vtag = priority << 13 | vid;
-			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vtag);
-		}
-
-		packet_was_received = napi_gro_receive(napi, skb) != GRO_DROP;
-
-		if (packet_was_received) {
-			droq->stats.rx_bytes_received += len;
-			droq->stats.rx_pkts_received++;
-		} else {
-			droq->stats.rx_dropped++;
-			netif_info(lio, rx_err, lio->netdev,
-				   "droq:%d  error rx_dropped:%llu\n",
-				   droq->q_no, droq->stats.rx_dropped);
-		}
-
-	} else {
-		recv_buffer_free(skb);
-	}
-}
-
-/**
- * \brief wrapper for calling napi_schedule
- * @param param parameters to pass to napi_schedule
- *
- * Used when scheduling on different CPUs
- */
-static void napi_schedule_wrapper(void *param)
-{
-	struct napi_struct *napi = param;
-
-	napi_schedule(napi);
-}
-
-/**
- * \brief callback when receive interrupt occurs and we are in NAPI mode
- * @param arg pointer to octeon output queue
- */
-static void liquidio_napi_drv_callback(void *arg)
-{
-	struct octeon_device *oct;
-	struct octeon_droq *droq = arg;
-	int this_cpu = smp_processor_id();
-
-	oct = droq->oct_dev;
-
-	if (OCTEON_CN23XX_PF(oct) || droq->cpu_id == this_cpu) {
-		napi_schedule_irqoff(&droq->napi);
-	} else {
-		call_single_data_t *csd = &droq->csd;
-
-		csd->func = napi_schedule_wrapper;
-		csd->info = &droq->napi;
-		csd->flags = 0;
-
-		smp_call_function_single_async(droq->cpu_id, csd);
-	}
-}
-
-/**
- * \brief Entry point for NAPI polling
- * @param napi NAPI structure
- * @param budget maximum number of items to process
- */
-static int liquidio_napi_poll(struct napi_struct *napi, int budget)
-{
-	struct octeon_droq *droq;
-	int work_done;
-	int tx_done = 0, iq_no;
-	struct octeon_instr_queue *iq;
-	struct octeon_device *oct;
-
-	droq = container_of(napi, struct octeon_droq, napi);
-	oct = droq->oct_dev;
-	iq_no = droq->q_no;
-	/* Handle Droq descriptors */
-	work_done = octeon_process_droq_poll_cmd(oct, droq->q_no,
-						 POLL_EVENT_PROCESS_PKTS,
-						 budget);
-
-	/* Flush the instruction queue */
-	iq = oct->instr_queue[iq_no];
-	if (iq) {
-		if (atomic_read(&iq->instr_pending))
-			/* Process iq buffers with in the budget limits */
-			tx_done = octeon_flush_iq(oct, iq, budget);
-		else
-			tx_done = 1;
-		/* Update iq read-index rather than waiting for next interrupt.
-		 * Return back if tx_done is false.
-		 */
-		update_txq_status(oct, iq_no);
-	} else {
-		dev_err(&oct->pci_dev->dev, "%s:  iq (%d) num invalid\n",
-			__func__, iq_no);
-	}
-
-	/* force enable interrupt if reg cnts are high to avoid wraparound */
-	if ((work_done < budget && tx_done) ||
-	    (iq && iq->pkt_in_done >= MAX_REG_CNT) ||
-	    (droq->pkt_count >= MAX_REG_CNT)) {
-		tx_done = 1;
-		napi_complete_done(napi, work_done);
-		octeon_process_droq_poll_cmd(droq->oct_dev, droq->q_no,
-					     POLL_EVENT_ENABLE_INTR, 0);
-		return 0;
-	}
-
-	return (!tx_done) ? (budget) : (work_done);
-}
-
-/**
- * \brief Setup input and output queues
- * @param octeon_dev octeon device
- * @param ifidx  Interface Index
- *
- * Note: Queues are with respect to the octeon device. Thus
- * an input queue is for egress packets, and output queues
- * are for ingress packets.
- */
-static inline int setup_io_queues(struct octeon_device *octeon_dev,
-				  int ifidx)
-{
-	struct octeon_droq_ops droq_ops;
-	struct net_device *netdev;
-	static int cpu_id;
-	static int cpu_id_modulus;
-	struct octeon_droq *droq;
-	struct napi_struct *napi;
-	int q, q_no, retval = 0;
-	struct lio *lio;
-	int num_tx_descs;
-
-	netdev = octeon_dev->props[ifidx].netdev;
-
-	lio = GET_LIO(netdev);
-
-	memset(&droq_ops, 0, sizeof(struct octeon_droq_ops));
-
-	droq_ops.fptr = liquidio_push_packet;
-	droq_ops.farg = (void *)netdev;
-
-	droq_ops.poll_mode = 1;
-	droq_ops.napi_fn = liquidio_napi_drv_callback;
-	cpu_id = 0;
-	cpu_id_modulus = num_present_cpus();
-
-	/* set up DROQs. */
-	for (q = 0; q < lio->linfo.num_rxpciq; q++) {
-		q_no = lio->linfo.rxpciq[q].s.q_no;
-		dev_dbg(&octeon_dev->pci_dev->dev,
-			"setup_io_queues index:%d linfo.rxpciq.s.q_no:%d\n",
-			q, q_no);
-		retval = octeon_setup_droq(octeon_dev, q_no,
-					   CFG_GET_NUM_RX_DESCS_NIC_IF
-						   (octeon_get_conf(octeon_dev),
-						   lio->ifidx),
-					   CFG_GET_NUM_RX_BUF_SIZE_NIC_IF
-						   (octeon_get_conf(octeon_dev),
-						   lio->ifidx), NULL);
-		if (retval) {
-			dev_err(&octeon_dev->pci_dev->dev,
-				"%s : Runtime DROQ(RxQ) creation failed.\n",
-				__func__);
-			return 1;
-		}
-
-		droq = octeon_dev->droq[q_no];
-		napi = &droq->napi;
-		dev_dbg(&octeon_dev->pci_dev->dev, "netif_napi_add netdev:%llx oct:%llx pf_num:%d\n",
-			(u64)netdev, (u64)octeon_dev, octeon_dev->pf_num);
-		netif_napi_add(netdev, napi, liquidio_napi_poll, 64);
-
-		/* designate a CPU for this droq */
-		droq->cpu_id = cpu_id;
-		cpu_id++;
-		if (cpu_id >= cpu_id_modulus)
-			cpu_id = 0;
-
-		octeon_register_droq_ops(octeon_dev, q_no, &droq_ops);
-	}
-
-	if (OCTEON_CN23XX_PF(octeon_dev)) {
-		/* 23XX PF can receive control messages (via the first PF-owned
-		 * droq) from the firmware even if the ethX interface is down,
-		 * so that's why poll_mode must be off for the first droq.
-		 */
-		octeon_dev->droq[0]->ops.poll_mode = 0;
-	}
-
-	/* set up IQs. */
-	for (q = 0; q < lio->linfo.num_txpciq; q++) {
-		num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(octeon_get_conf
-							   (octeon_dev),
-							   lio->ifidx);
-		retval = octeon_setup_iq(octeon_dev, ifidx, q,
-					 lio->linfo.txpciq[q], num_tx_descs,
-					 netdev_get_tx_queue(netdev, q));
-		if (retval) {
-			dev_err(&octeon_dev->pci_dev->dev,
-				" %s : Runtime IQ(TxQ) creation failed.\n",
-				__func__);
-			return 1;
-		}
-
-		if (octeon_dev->ioq_vector) {
-			struct octeon_ioq_vector *ioq_vector;
-
-			ioq_vector = &octeon_dev->ioq_vector[q];
-			netif_set_xps_queue(netdev,
-					    &ioq_vector->affinity_mask,
-					    ioq_vector->iq_index);
-		}
-	}
-
-	return 0;
-}
-
 /**
  * \brief Poll routine for checking transmit queue status
  * @param work work_struct data structure
@@ -2707,8 +2061,7 @@ static int liquidio_open(struct net_device *netdev)
 			oct->droq[0]->ops.poll_mode = 1;
 	}
 
-	if ((oct->chip_id == OCTEON_CN66XX || oct->chip_id == OCTEON_CN68XX) &&
-	    ptp_enable)
+	if (oct->ptp_enable)
 		oct_ptp_open(netdev);
 
 	ifstate_set(lio, LIO_IFSTATE_RUNNING);
@@ -2746,6 +2099,17 @@ static int liquidio_stop(struct net_device *netdev)
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
+	struct napi_struct *napi, *n;
+
+	if (oct->props[lio->ifidx].napi_enabled) {
+		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+			napi_disable(napi);
+
+		oct->props[lio->ifidx].napi_enabled = 0;
+
+		if (OCTEON_CN23XX_PF(oct))
+			oct->droq[0]->ops.poll_mode = 0;
+	}
 
 	ifstate_reset(lio, LIO_IFSTATE_RUNNING);
 
@@ -2916,7 +2280,10 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
 
 	oct = lio->oct_dev;
 
-	for (i = 0; i < lio->linfo.num_txpciq; i++) {
+	if (ifstate_check(lio, LIO_IFSTATE_RESETTING))
+		return stats;
+
+	for (i = 0; i < oct->num_iqs; i++) {
 		iq_no = lio->linfo.txpciq[i].s.q_no;
 		iq_stats = &oct->instr_queue[iq_no]->stats;
 		pkts += iq_stats->tx_done;
@@ -2932,7 +2299,7 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
 	drop = 0;
 	bytes = 0;
 
-	for (i = 0; i < lio->linfo.num_rxpciq; i++) {
+	for (i = 0; i < oct->num_oqs; i++) {
 		oq_no = lio->linfo.rxpciq[i].s.q_no;
 		oq_stats = &oct->droq[oq_no]->stats;
 		pkts += oq_stats->rx_pkts_received;
@@ -3052,8 +2419,7 @@ static int liquidio_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 
 	switch (cmd) {
 	case SIOCSHWTSTAMP:
-		if ((lio->oct_dev->chip_id == OCTEON_CN66XX ||
-		     lio->oct_dev->chip_id == OCTEON_CN68XX) && ptp_enable)
+		if (lio->oct_dev->ptp_enable)
 			return hwtstamp_ioctl(netdev, ifr);
 	default:
 		return -EOPNOTSUPP;
@@ -4188,7 +3554,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		 */
 		lio->txq = lio->linfo.txpciq[0].s.q_no;
 		lio->rxq = lio->linfo.rxpciq[0].s.q_no;
-		if (setup_io_queues(octeon_dev, i)) {
+		if (liquidio_setup_io_queues(octeon_dev, i,
+					     lio->linfo.num_txpciq,
+					     lio->linfo.num_rxpciq)) {
 			dev_err(&octeon_dev->pci_dev->dev, "I/O queues creation failed\n");
 			goto setup_nic_dev_fail;
 		}
@@ -4516,6 +3884,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 	int j, ret;
 	int fw_loaded = 0;
 	char bootcmd[] = "\n";
+	char *dbg_enb = NULL;
 	struct octeon_device_priv *oct_priv =
 		(struct octeon_device_priv *)octeon_dev->priv;
 	atomic_set(&octeon_dev->status, OCT_DEV_BEGIN_STATE);
@@ -4548,18 +3917,16 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 	octeon_dev->app_mode = CVM_DRV_INVALID_APP;
 
 	if (OCTEON_CN23XX_PF(octeon_dev)) {
-		if (!cn23xx_fw_loaded(octeon_dev)) {
+		if (!cn23xx_fw_loaded(octeon_dev) && !fw_type_is_none()) {
 			fw_loaded = 0;
-			if (!fw_type_is_none()) {
-				/* Do a soft reset of the Octeon device. */
-				if (octeon_dev->fn_list.soft_reset(octeon_dev))
-					return 1;
-				/* things might have changed */
-				if (!cn23xx_fw_loaded(octeon_dev))
-					fw_loaded = 0;
-				else
-					fw_loaded = 1;
-			}
+			/* Do a soft reset of the Octeon device. */
+			if (octeon_dev->fn_list.soft_reset(octeon_dev))
+				return 1;
+			/* things might have changed */
+			if (!cn23xx_fw_loaded(octeon_dev))
+				fw_loaded = 0;
+			else
+				fw_loaded = 1;
 		} else {
 			fw_loaded = 1;
 		}
@@ -4666,7 +4033,8 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
 	/* Setup the interrupt handler and record the INT SUM register address
 	 */
-	if (octeon_setup_interrupt(octeon_dev))
+	if (octeon_setup_interrupt(octeon_dev,
+				   octeon_dev->sriov_info.num_pf_rings))
 		return 1;
 
 	/* Enable Octeon device interrupts */
@@ -4674,6 +4042,18 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
 	atomic_set(&octeon_dev->status, OCT_DEV_INTR_SET_DONE);
 
+	/* Send Credit for Octeon Output queues. Credits are always sent BEFORE
+	 * the output queue is enabled.
+	 * This ensures that we'll receive the f/w CORE DRV_ACTIVE message in
+	 * case we've configured CN23XX_SLI_GBL_CONTROL[NOPTR_D] = 0.
+	 * Otherwise, it is possible that the DRV_ACTIVE message will be sent
+	 * before any credits have been issued, causing the ring to be reset
+	 * (and the f/w appear to never have started).
+	 */
+	for (j = 0; j < octeon_dev->num_oqs; j++)
+		writel(octeon_dev->droq[j]->max_count,
+		       octeon_dev->droq[j]->pkts_credit_reg);
+
 	/* Enable the input and output queues for this Octeon device */
 	ret = octeon_dev->fn_list.enable_io_queues(octeon_dev);
 	if (ret) {
@@ -4722,10 +4102,19 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 			dev_err(&octeon_dev->pci_dev->dev, "Could not access board consoles\n");
 			return 1;
 		}
-		ret = octeon_add_console(octeon_dev, 0);
+		/* If console debug enabled, specify empty string to use default
+		 * enablement ELSE specify NULL string for 'disabled'.
+		 */
+		dbg_enb = octeon_console_debug_enabled(0) ? "" : NULL;
+		ret = octeon_add_console(octeon_dev, 0, dbg_enb);
 		if (ret) {
 			dev_err(&octeon_dev->pci_dev->dev, "Could not access board console\n");
 			return 1;
+		} else if (octeon_console_debug_enabled(0)) {
+			/* If console was added AND we're logging console output
+			 * then set our console print function.
+			 */
+			octeon_dev->console[0].print = octeon_dbg_console_print;
 		}
 
 		atomic_set(&octeon_dev->status, OCT_DEV_CONSOLE_INIT_DONE);
@@ -4736,12 +4125,6 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 			dev_err(&octeon_dev->pci_dev->dev, "Could not load firmware to board\n");
 			return 1;
 		}
-		/* set bit 1 of SLI_SCRATCH_1 to indicate that firmware is
-		 * loaded
-		 */
-		if (OCTEON_CN23XX_PF(octeon_dev))
-			octeon_write_csr64(octeon_dev, CN23XX_SLI_SCRATCH1,
-					   2ULL);
 	}
 
 	handshake[octeon_dev->octeon_id].init_ok = 1;
@@ -4749,14 +4132,33 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
 	atomic_set(&octeon_dev->status, OCT_DEV_HOST_OK);
 
-	/* Send Credit for Octeon Output queues. Credits are always sent after
-	 * the output queue is enabled.
-	 */
-	for (j = 0; j < octeon_dev->num_oqs; j++)
-		writel(octeon_dev->droq[j]->max_count,
-		       octeon_dev->droq[j]->pkts_credit_reg);
+	return 0;
+}
+
+/**
+ * \brief Debug console print function
+ * @param octeon_dev  octeon device
+ * @param console_num console number
+ * @param prefix      first portion of line to display
+ * @param suffix      second portion of line to display
+ *
+ * The OCTEON debug console outputs entire lines (excluding '\n').
+ * Normally, the line will be passed in the 'prefix' parameter.
+ * However, due to buffering, it is possible for a line to be split into two
+ * parts, in which case they will be passed as the 'prefix' parameter and
+ * 'suffix' parameter.
+ */
+static int octeon_dbg_console_print(struct octeon_device *oct, u32 console_num,
+				    char *prefix, char *suffix)
+{
+	if (prefix && suffix)
+		dev_info(&oct->pci_dev->dev, "%u: %s%s\n", console_num, prefix,
+			 suffix);
+	else if (prefix)
+		dev_info(&oct->pci_dev->dev, "%u: %s\n", console_num, prefix);
+	else if (suffix)
+		dev_info(&oct->pci_dev->dev, "%u: %s\n", console_num, suffix);
 
-	/* Packets can start arriving on the output queues from this point. */
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 9b24710..2e993ce 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -107,12 +107,6 @@ struct octnic_gather {
 	dma_addr_t sg_dma_ptr;
 };
 
-struct octeon_device_priv {
-	/* Tasklet structures for this device. */
-	struct tasklet_struct droq_tasklet;
-	unsigned long napi_mask;
-};
-
 static int
 liquidio_vf_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
 static void liquidio_vf_remove(struct pci_dev *pdev);
@@ -123,7 +117,7 @@ static int lio_wait_for_oq_pkts(struct octeon_device *oct)
 {
 	struct octeon_device_priv *oct_priv =
 	    (struct octeon_device_priv *)oct->priv;
-	int retry = MAX_VF_IP_OP_PENDING_PKT_COUNT;
+	int retry = MAX_IO_PENDING_PKT_COUNT;
 	int pkt_cnt = 0, pending_pkts;
 	int i;
 
@@ -148,32 +142,6 @@ static int lio_wait_for_oq_pkts(struct octeon_device *oct)
 }
 
 /**
- * \brief wait for all pending requests to complete
- * @param oct Pointer to Octeon device
- *
- * Called during shutdown sequence
- */
-static int wait_for_pending_requests(struct octeon_device *oct)
-{
-	int i, pcount = 0;
-
-	for (i = 0; i < MAX_VF_IP_OP_PENDING_PKT_COUNT; i++) {
-		pcount = atomic_read(
-		    &oct->response_list[OCTEON_ORDERED_SC_LIST]
-			 .pending_req_count);
-		if (pcount)
-			schedule_timeout_uninterruptible(HZ / 10);
-		else
-			break;
-	}
-
-	if (pcount)
-		return 1;
-
-	return 0;
-}
-
-/**
  * \brief Cause device to go quiet so it can be safely removed/reset/etc
  * @param oct Pointer to Octeon device
  */
@@ -374,7 +342,7 @@ static void txqs_wake(struct net_device *netdev)
 		int i;
 
 		for (i = 0; i < netdev->num_tx_queues; i++) {
-			int qno = lio->linfo.txpciq[i % (lio->linfo.num_txpciq)]
+			int qno = lio->linfo.txpciq[i % lio->oct_dev->num_iqs]
 				      .s.q_no;
 			if (__netif_subqueue_stopped(netdev, i)) {
 				INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
@@ -574,7 +542,8 @@ static void print_link_info(struct net_device *netdev)
 {
 	struct lio *lio = GET_LIO(netdev);
 
-	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED) {
+	if (!ifstate_check(lio, LIO_IFSTATE_RESETTING) &&
+	    ifstate_check(lio, LIO_IFSTATE_REGISTERED)) {
 		struct oct_link_info *linfo = &lio->linfo;
 
 		if (linfo->link.s.link_up) {
@@ -661,6 +630,12 @@ static void update_link_status(struct net_device *netdev,
 			txqs_stop(netdev);
 		}
 
+		if (lio->linfo.link.s.mtu != netdev->max_mtu) {
+			dev_info(&oct->pci_dev->dev, "Max MTU Changed from %d to %d\n",
+				 netdev->max_mtu, lio->linfo.link.s.mtu);
+			netdev->max_mtu = lio->linfo.link.s.mtu;
+		}
+
 		if (lio->linfo.link.s.mtu < netdev->mtu) {
 			dev_warn(&oct->pci_dev->dev,
 				 "PF has changed the MTU for gmx port. Reducing the mtu from %d to %d\n",
@@ -673,167 +648,6 @@ static void update_link_status(struct net_device *netdev,
 	}
 }
 
-static void update_txq_status(struct octeon_device *oct, int iq_num)
-{
-	struct octeon_instr_queue *iq = oct->instr_queue[iq_num];
-	struct net_device *netdev;
-	struct lio *lio;
-
-	netdev = oct->props[iq->ifidx].netdev;
-	lio = GET_LIO(netdev);
-	if (netif_is_multiqueue(netdev)) {
-		if (__netif_subqueue_stopped(netdev, iq->q_index) &&
-		    lio->linfo.link.s.link_up &&
-		    (!octnet_iq_is_full(oct, iq_num))) {
-			netif_wake_subqueue(netdev, iq->q_index);
-			INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
-						  tx_restart, 1);
-		}
-	} else if (netif_queue_stopped(netdev) && lio->linfo.link.s.link_up &&
-		   (!octnet_iq_is_full(oct, lio->txq))) {
-		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev,
-					  lio->txq, tx_restart, 1);
-		netif_wake_queue(netdev);
-	}
-}
-
-static
-int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret)
-{
-	struct octeon_device *oct = droq->oct_dev;
-	struct octeon_device_priv *oct_priv =
-	    (struct octeon_device_priv *)oct->priv;
-
-	if (droq->ops.poll_mode) {
-		droq->ops.napi_fn(droq);
-	} else {
-		if (ret & MSIX_PO_INT) {
-			dev_err(&oct->pci_dev->dev,
-				"should not come here should not get rx when poll mode = 0 for vf\n");
-			tasklet_schedule(&oct_priv->droq_tasklet);
-			return 1;
-		}
-		/* this will be flushed periodically by check iq db */
-		if (ret & MSIX_PI_INT)
-			return 0;
-	}
-	return 0;
-}
-
-static irqreturn_t
-liquidio_msix_intr_handler(int irq __attribute__((unused)), void *dev)
-{
-	struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev;
-	struct octeon_device *oct = ioq_vector->oct_dev;
-	struct octeon_droq *droq = oct->droq[ioq_vector->droq_index];
-	u64 ret;
-
-	ret = oct->fn_list.msix_interrupt_handler(ioq_vector);
-
-	if ((ret & MSIX_PO_INT) || (ret & MSIX_PI_INT))
-		liquidio_schedule_msix_droq_pkt_handler(droq, ret);
-
-	return IRQ_HANDLED;
-}
-
-/**
- * \brief Setup interrupt for octeon device
- * @param oct octeon device
- *
- *  Enable interrupt in Octeon device as given in the PCI interrupt mask.
- */
-static int octeon_setup_interrupt(struct octeon_device *oct)
-{
-	struct msix_entry *msix_entries;
-	char *queue_irq_names = NULL;
-	int num_alloc_ioq_vectors;
-	int num_ioq_vectors;
-	int irqret;
-	int i;
-
-	if (oct->msix_on) {
-		oct->num_msix_irqs = oct->sriov_info.rings_per_vf;
-
-		/* allocate storage for the names assigned to each irq */
-		oct->irq_name_storage =
-			kcalloc(MAX_IOQ_INTERRUPTS_PER_VF, INTRNAMSIZ,
-				GFP_KERNEL);
-		if (!oct->irq_name_storage) {
-			dev_err(&oct->pci_dev->dev, "Irq name storage alloc failed...\n");
-			return -ENOMEM;
-		}
-
-		queue_irq_names = oct->irq_name_storage;
-
-		oct->msix_entries = kcalloc(
-		    oct->num_msix_irqs, sizeof(struct msix_entry), GFP_KERNEL);
-		if (!oct->msix_entries) {
-			dev_err(&oct->pci_dev->dev, "Memory Alloc failed...\n");
-			kfree(oct->irq_name_storage);
-			oct->irq_name_storage = NULL;
-			return -ENOMEM;
-		}
-
-		msix_entries = (struct msix_entry *)oct->msix_entries;
-
-		for (i = 0; i < oct->num_msix_irqs; i++)
-			msix_entries[i].entry = i;
-		num_alloc_ioq_vectors = pci_enable_msix_range(
-						oct->pci_dev, msix_entries,
-						oct->num_msix_irqs,
-						oct->num_msix_irqs);
-		if (num_alloc_ioq_vectors < 0) {
-			dev_err(&oct->pci_dev->dev, "unable to Allocate MSI-X interrupts\n");
-			kfree(oct->msix_entries);
-			oct->msix_entries = NULL;
-			kfree(oct->irq_name_storage);
-			oct->irq_name_storage = NULL;
-			return num_alloc_ioq_vectors;
-		}
-		dev_dbg(&oct->pci_dev->dev, "OCTEON: Enough MSI-X interrupts are allocated...\n");
-
-		num_ioq_vectors = oct->num_msix_irqs;
-
-		for (i = 0; i < num_ioq_vectors; i++) {
-			snprintf(&queue_irq_names[IRQ_NAME_OFF(i)], INTRNAMSIZ,
-				 "LiquidIO%u-vf%u-rxtx-%u",
-				 oct->octeon_id, oct->vf_num, i);
-
-			irqret = request_irq(msix_entries[i].vector,
-					     liquidio_msix_intr_handler, 0,
-					     &queue_irq_names[IRQ_NAME_OFF(i)],
-					     &oct->ioq_vector[i]);
-			if (irqret) {
-				dev_err(&oct->pci_dev->dev,
-					"OCTEON: Request_irq failed for MSIX interrupt Error: %d\n",
-					irqret);
-
-				while (i) {
-					i--;
-					irq_set_affinity_hint(
-					    msix_entries[i].vector, NULL);
-					free_irq(msix_entries[i].vector,
-						 &oct->ioq_vector[i]);
-				}
-				pci_disable_msix(oct->pci_dev);
-				kfree(oct->msix_entries);
-				oct->msix_entries = NULL;
-				kfree(oct->irq_name_storage);
-				oct->irq_name_storage = NULL;
-				return irqret;
-			}
-			oct->ioq_vector[i].vector = msix_entries[i].vector;
-			/* assign the cpu mask for this msix interrupt vector */
-			irq_set_affinity_hint(
-			    msix_entries[i].vector,
-			    (&oct->ioq_vector[i].affinity_mask));
-		}
-		dev_dbg(&oct->pci_dev->dev,
-			"OCTEON[%d]: MSI-X enabled\n", oct->octeon_id);
-	}
-	return 0;
-}
-
 /**
  * \brief PCI probe handler
  * @param pdev PCI device structure
@@ -942,10 +756,14 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 		if (oct->msix_on) {
 			msix_entries = (struct msix_entry *)oct->msix_entries;
 			for (i = 0; i < oct->num_msix_irqs; i++) {
-				irq_set_affinity_hint(msix_entries[i].vector,
-						      NULL);
-				free_irq(msix_entries[i].vector,
-					 &oct->ioq_vector[i]);
+				if (oct->ioq_vector[i].vector) {
+					irq_set_affinity_hint(
+							msix_entries[i].vector,
+							NULL);
+					free_irq(msix_entries[i].vector,
+						 &oct->ioq_vector[i]);
+					oct->ioq_vector[i].vector = 0;
+				}
 			}
 			pci_disable_msix(oct->pci_dev);
 			kfree(oct->msix_entries);
@@ -1137,6 +955,10 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 		oct->droq[0]->ops.poll_mode = 0;
 	}
 
+	/* Delete NAPI */
+	list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+		netif_napi_del(napi);
+
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
 		unregister_netdev(netdev);
 
@@ -1174,7 +996,7 @@ static int liquidio_stop_nic_module(struct octeon_device *oct)
 
 	for (i = 0; i < oct->ifcount; i++) {
 		lio = GET_LIO(oct->props[i].netdev);
-		for (j = 0; j < lio->linfo.num_rxpciq; j++)
+		for (j = 0; j < oct->num_oqs; j++)
 			octeon_unregister_droq_ops(oct,
 						   lio->linfo.rxpciq[j].s.q_no);
 	}
@@ -1262,7 +1084,7 @@ static int check_txq_state(struct lio *lio, struct sk_buff *skb)
 
 	if (netif_is_multiqueue(lio->netdev)) {
 		q = skb->queue_mapping;
-		iq = lio->linfo.txpciq[(q % (lio->linfo.num_txpciq))].s.q_no;
+		iq = lio->linfo.txpciq[q % lio->oct_dev->num_iqs].s.q_no;
 	} else {
 		iq = lio->txq;
 		q = iq;
@@ -1391,41 +1213,6 @@ static void free_netsgbuf_with_resp(void *buf)
 }
 
 /**
- * \brief Setup output queue
- * @param oct octeon device
- * @param q_no which queue
- * @param num_descs how many descriptors
- * @param desc_size size of each descriptor
- * @param app_ctx application context
- */
-static int octeon_setup_droq(struct octeon_device *oct, int q_no, int num_descs,
-			     int desc_size, void *app_ctx)
-{
-	int ret_val;
-
-	dev_dbg(&oct->pci_dev->dev, "Creating Droq: %d\n", q_no);
-	/* droq creation and local register settings. */
-	ret_val = octeon_create_droq(oct, q_no, num_descs, desc_size, app_ctx);
-	if (ret_val < 0)
-		return ret_val;
-
-	if (ret_val == 1) {
-		dev_dbg(&oct->pci_dev->dev, "Using default droq %d\n", q_no);
-		return 0;
-	}
-
-	/* Enable the droq queues */
-	octeon_set_droq_pkt_op(oct, q_no, 1);
-
-	/* Send Credit for Octeon Output queues. Credits are always
-	 * sent after the output queue is enabled.
-	 */
-	writel(oct->droq[q_no]->max_count, oct->droq[q_no]->pkts_credit_reg);
-
-	return ret_val;
-}
-
-/**
  * \brief Callback for getting interface configuration
  * @param status status of request
  * @param buf pointer to resp structure
@@ -1457,290 +1244,6 @@ static void if_cfg_callback(struct octeon_device *oct,
 	wake_up_interruptible(&ctx->wc);
 }
 
-/** Routine to push packets arriving on Octeon interface upto network layer.
- * @param oct_id   - octeon device id.
- * @param skbuff   - skbuff struct to be passed to network layer.
- * @param len      - size of total data received.
- * @param rh       - Control header associated with the packet
- * @param param    - additional control data with the packet
- * @param arg      - farg registered in droq_ops
- */
-static void
-liquidio_push_packet(u32 octeon_id __attribute__((unused)),
-		     void *skbuff,
-		     u32 len,
-		     union octeon_rh *rh,
-		     void *param,
-		     void *arg)
-{
-	struct napi_struct *napi = param;
-	struct octeon_droq *droq =
-		container_of(param, struct octeon_droq, napi);
-	struct net_device *netdev = (struct net_device *)arg;
-	struct sk_buff *skb = (struct sk_buff *)skbuff;
-	u16 vtag = 0;
-	u32 r_dh_off;
-
-	if (netdev) {
-		struct lio *lio = GET_LIO(netdev);
-		int packet_was_received;
-
-		/* Do not proceed if the interface is not in RUNNING state. */
-		if (!ifstate_check(lio, LIO_IFSTATE_RUNNING)) {
-			recv_buffer_free(skb);
-			droq->stats.rx_dropped++;
-			return;
-		}
-
-		skb->dev = netdev;
-
-		skb_record_rx_queue(skb, droq->q_no);
-		if (likely(len > MIN_SKB_SIZE)) {
-			struct octeon_skb_page_info *pg_info;
-			unsigned char *va;
-
-			pg_info = ((struct octeon_skb_page_info *)(skb->cb));
-			if (pg_info->page) {
-				/* For Paged allocation use the frags */
-				va = page_address(pg_info->page) +
-					pg_info->page_offset;
-				memcpy(skb->data, va, MIN_SKB_SIZE);
-				skb_put(skb, MIN_SKB_SIZE);
-				skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-						pg_info->page,
-						pg_info->page_offset +
-						MIN_SKB_SIZE,
-						len - MIN_SKB_SIZE,
-						LIO_RXBUFFER_SZ);
-			}
-		} else {
-			struct octeon_skb_page_info *pg_info =
-				((struct octeon_skb_page_info *)(skb->cb));
-			skb_copy_to_linear_data(skb,
-						page_address(pg_info->page) +
-						pg_info->page_offset, len);
-			skb_put(skb, len);
-			put_page(pg_info->page);
-		}
-
-		r_dh_off = (rh->r_dh.len - 1) * BYTES_PER_DHLEN_UNIT;
-
-		if (rh->r_dh.has_hwtstamp)
-			r_dh_off -= BYTES_PER_DHLEN_UNIT;
-
-		if (rh->r_dh.has_hash) {
-			__be32 *hash_be = (__be32 *)(skb->data + r_dh_off);
-			u32 hash = be32_to_cpu(*hash_be);
-
-			skb_set_hash(skb, hash, PKT_HASH_TYPE_L4);
-			r_dh_off -= BYTES_PER_DHLEN_UNIT;
-		}
-
-		skb_pull(skb, rh->r_dh.len * BYTES_PER_DHLEN_UNIT);
-		skb->protocol = eth_type_trans(skb, skb->dev);
-
-		if ((netdev->features & NETIF_F_RXCSUM) &&
-		    (((rh->r_dh.encap_on) &&
-		      (rh->r_dh.csum_verified & CNNIC_TUN_CSUM_VERIFIED)) ||
-		     (!(rh->r_dh.encap_on) &&
-		      (rh->r_dh.csum_verified & CNNIC_CSUM_VERIFIED))))
-			/* checksum has already been verified */
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-		else
-			skb->ip_summed = CHECKSUM_NONE;
-
-		/* Setting Encapsulation field on basis of status received
-		 * from the firmware
-		 */
-		if (rh->r_dh.encap_on) {
-			skb->encapsulation = 1;
-			skb->csum_level = 1;
-			droq->stats.rx_vxlan++;
-		}
-
-		/* inbound VLAN tag */
-		if ((netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
-		    rh->r_dh.vlan) {
-			u16 priority = rh->r_dh.priority;
-			u16 vid = rh->r_dh.vlan;
-
-			vtag = (priority << VLAN_PRIO_SHIFT) | vid;
-			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vtag);
-		}
-
-		packet_was_received = (napi_gro_receive(napi, skb) != GRO_DROP);
-
-		if (packet_was_received) {
-			droq->stats.rx_bytes_received += len;
-			droq->stats.rx_pkts_received++;
-		} else {
-			droq->stats.rx_dropped++;
-			netif_info(lio, rx_err, lio->netdev,
-				   "droq:%d  error rx_dropped:%llu\n",
-				   droq->q_no, droq->stats.rx_dropped);
-		}
-
-	} else {
-		recv_buffer_free(skb);
-	}
-}
-
-/**
- * \brief callback when receive interrupt occurs and we are in NAPI mode
- * @param arg pointer to octeon output queue
- */
-static void liquidio_vf_napi_drv_callback(void *arg)
-{
-	struct octeon_droq *droq = arg;
-
-	napi_schedule_irqoff(&droq->napi);
-}
-
-/**
- * \brief Entry point for NAPI polling
- * @param napi NAPI structure
- * @param budget maximum number of items to process
- */
-static int liquidio_napi_poll(struct napi_struct *napi, int budget)
-{
-	struct octeon_instr_queue *iq;
-	struct octeon_device *oct;
-	struct octeon_droq *droq;
-	int tx_done = 0, iq_no;
-	int work_done;
-
-	droq = container_of(napi, struct octeon_droq, napi);
-	oct = droq->oct_dev;
-	iq_no = droq->q_no;
-
-	/* Handle Droq descriptors */
-	work_done = octeon_process_droq_poll_cmd(oct, droq->q_no,
-						 POLL_EVENT_PROCESS_PKTS,
-						 budget);
-
-	/* Flush the instruction queue */
-	iq = oct->instr_queue[iq_no];
-	if (iq) {
-		if (atomic_read(&iq->instr_pending))
-			/* Process iq buffers with in the budget limits */
-			tx_done = octeon_flush_iq(oct, iq, budget);
-		else
-			tx_done = 1;
-
-		/* Update iq read-index rather than waiting for next interrupt.
-		 * Return back if tx_done is false.
-		 */
-		update_txq_status(oct, iq_no);
-	} else {
-		dev_err(&oct->pci_dev->dev, "%s: iq (%d) num invalid\n",
-			__func__, iq_no);
-	}
-
-	/* force enable interrupt if reg cnts are high to avoid wraparound */
-	if ((work_done < budget && tx_done) ||
-	    (iq && iq->pkt_in_done >= MAX_REG_CNT) ||
-	    (droq->pkt_count >= MAX_REG_CNT)) {
-		tx_done = 1;
-		napi_complete_done(napi, work_done);
-		octeon_process_droq_poll_cmd(droq->oct_dev, droq->q_no,
-					     POLL_EVENT_ENABLE_INTR, 0);
-		return 0;
-	}
-
-	return (!tx_done) ? (budget) : (work_done);
-}
-
-/**
- * \brief Setup input and output queues
- * @param octeon_dev octeon device
- * @param ifidx Interface index
- *
- * Note: Queues are with respect to the octeon device. Thus
- * an input queue is for egress packets, and output queues
- * are for ingress packets.
- */
-static int setup_io_queues(struct octeon_device *octeon_dev, int ifidx)
-{
-	struct octeon_droq_ops droq_ops;
-	struct net_device *netdev;
-	static int cpu_id_modulus;
-	struct octeon_droq *droq;
-	struct napi_struct *napi;
-	static int cpu_id;
-	int num_tx_descs;
-	struct lio *lio;
-	int retval = 0;
-	int q, q_no;
-
-	netdev = octeon_dev->props[ifidx].netdev;
-
-	lio = GET_LIO(netdev);
-
-	memset(&droq_ops, 0, sizeof(struct octeon_droq_ops));
-
-	droq_ops.fptr = liquidio_push_packet;
-	droq_ops.farg = netdev;
-
-	droq_ops.poll_mode = 1;
-	droq_ops.napi_fn = liquidio_vf_napi_drv_callback;
-	cpu_id = 0;
-	cpu_id_modulus = num_present_cpus();
-
-	/* set up DROQs. */
-	for (q = 0; q < lio->linfo.num_rxpciq; q++) {
-		q_no = lio->linfo.rxpciq[q].s.q_no;
-
-		retval = octeon_setup_droq(
-		    octeon_dev, q_no,
-		    CFG_GET_NUM_RX_DESCS_NIC_IF(octeon_get_conf(octeon_dev),
-						lio->ifidx),
-		    CFG_GET_NUM_RX_BUF_SIZE_NIC_IF(octeon_get_conf(octeon_dev),
-						   lio->ifidx),
-		    NULL);
-		if (retval) {
-			dev_err(&octeon_dev->pci_dev->dev,
-				"%s : Runtime DROQ(RxQ) creation failed.\n",
-				__func__);
-			return 1;
-		}
-
-		droq = octeon_dev->droq[q_no];
-		napi = &droq->napi;
-		netif_napi_add(netdev, napi, liquidio_napi_poll, 64);
-
-		/* designate a CPU for this droq */
-		droq->cpu_id = cpu_id;
-		cpu_id++;
-		if (cpu_id >= cpu_id_modulus)
-			cpu_id = 0;
-
-		octeon_register_droq_ops(octeon_dev, q_no, &droq_ops);
-	}
-
-	/* 23XX VF can send/recv control messages (via the first VF-owned
-	 * droq) from the firmware even if the ethX interface is down,
-	 * so that's why poll_mode must be off for the first droq.
-	 */
-	octeon_dev->droq[0]->ops.poll_mode = 0;
-
-	/* set up IQs. */
-	for (q = 0; q < lio->linfo.num_txpciq; q++) {
-		num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(
-		    octeon_get_conf(octeon_dev), lio->ifidx);
-		retval = octeon_setup_iq(octeon_dev, ifidx, q,
-					 lio->linfo.txpciq[q], num_tx_descs,
-					 netdev_get_tx_queue(netdev, q));
-		if (retval) {
-			dev_err(&octeon_dev->pci_dev->dev,
-				" %s : Runtime IQ(TxQ) creation failed.\n",
-				__func__);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
 /**
  * \brief Net device open for LiquidIO
  * @param netdev network device
@@ -1784,6 +1287,16 @@ static int liquidio_stop(struct net_device *netdev)
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
+	struct napi_struct *napi, *n;
+
+	if (oct->props[lio->ifidx].napi_enabled) {
+		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+			napi_disable(napi);
+
+		oct->props[lio->ifidx].napi_enabled = 0;
+
+		oct->droq[0]->ops.poll_mode = 0;
+	}
 
 	netif_info(lio, ifdown, lio->netdev, "Stopping interface!\n");
 	/* Inform that netif carrier is down */
@@ -1988,7 +1501,10 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
 
 	oct = lio->oct_dev;
 
-	for (i = 0; i < lio->linfo.num_txpciq; i++) {
+	if (ifstate_check(lio, LIO_IFSTATE_RESETTING))
+		return stats;
+
+	for (i = 0; i < oct->num_iqs; i++) {
 		iq_no = lio->linfo.txpciq[i].s.q_no;
 		iq_stats = &oct->instr_queue[iq_no]->stats;
 		pkts += iq_stats->tx_done;
@@ -2004,7 +1520,7 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
 	drop = 0;
 	bytes = 0;
 
-	for (i = 0; i < lio->linfo.num_rxpciq; i++) {
+	for (i = 0; i < oct->num_oqs; i++) {
 		oq_no = lio->linfo.rxpciq[i].s.q_no;
 		oq_stats = &oct->droq[oq_no]->stats;
 		pkts += oq_stats->rx_pkts_received;
@@ -2028,17 +1544,31 @@ static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
  */
 static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
 {
-	struct lio *lio = GET_LIO(netdev);
-	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	struct octeon_device *oct;
+	struct lio *lio;
+	int ret = 0;
 
-	lio->mtu = new_mtu;
+	lio = GET_LIO(netdev);
+	oct = lio->oct_dev;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
 
-	netif_info(lio, probe, lio->netdev, "MTU Changed from %d to %d\n",
-		   netdev->mtu, new_mtu);
-	dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n",
-		 netdev->name, netdev->mtu, new_mtu);
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MTU;
+	nctrl.ncmd.s.param1 = new_mtu;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.wait_time = LIO_CMD_WAIT_TM;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
 
-	netdev->mtu = new_mtu;
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "Failed to set MTU\n");
+		return -EIO;
+	}
+
+	lio->mtu = new_mtu;
 
 	return 0;
 }
@@ -2959,7 +2489,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		/* Copy MAC Address to OS network device structure */
 		ether_addr_copy(netdev->dev_addr, mac);
 
-		if (setup_io_queues(octeon_dev, i)) {
+		if (liquidio_setup_io_queues(octeon_dev, i,
+					     lio->linfo.num_txpciq,
+					     lio->linfo.num_rxpciq)) {
 			dev_err(&octeon_dev->pci_dev->dev, "I/O queues creation failed\n");
 			goto setup_nic_dev_fail;
 		}
@@ -3182,7 +2714,7 @@ static int octeon_device_init(struct octeon_device *oct)
 		 LIQUIDIO_VERSION, oct->sriov_info.rings_per_vf);
 
 	/* Setup the interrupt handler and record the INT SUM register address*/
-	if (octeon_setup_interrupt(oct))
+	if (octeon_setup_interrupt(oct, oct->sriov_info.rings_per_vf))
 		return 1;
 
 	atomic_set(&oct->status, OCT_DEV_INTR_SET_DONE);
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 231dd7f..3788c8c 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -27,7 +27,7 @@
 
 #define LIQUIDIO_PACKAGE ""
 #define LIQUIDIO_BASE_MAJOR_VERSION 1
-#define LIQUIDIO_BASE_MINOR_VERSION 5
+#define LIQUIDIO_BASE_MINOR_VERSION 6
 #define LIQUIDIO_BASE_MICRO_VERSION 1
 #define LIQUIDIO_BASE_VERSION   __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \
 				__stringify(LIQUIDIO_BASE_MINOR_VERSION)
@@ -106,6 +106,7 @@ enum octeon_tag_type {
 #define MAX_IOQ_INTERRUPTS_PER_PF   (64 * 2)
 #define MAX_IOQ_INTERRUPTS_PER_VF   (8 * 2)
 
+#define SCR2_BIT_FW_LOADED	    63
 
 static inline u32 incr_index(u32 index, u32 count, u32 max)
 {
@@ -189,7 +190,6 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 #define   OCTNET_CMD_Q                0
 
 /* NIC Command types */
-#define   OCTNET_CMD_RESET_PF         0x0
 #define   OCTNET_CMD_CHANGE_MTU       0x1
 #define   OCTNET_CMD_CHANGE_MACADDR   0x2
 #define   OCTNET_CMD_CHANGE_DEVFLAGS  0x3
@@ -226,6 +226,9 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 
 #define   OCTNET_CMD_SET_UC_LIST       0x1b
 #define   OCTNET_CMD_SET_VF_LINKSTATE  0x1c
+
+#define   OCTNET_CMD_QUEUE_COUNT_CTL	0x1f
+
 #define   OCTNET_CMD_VXLAN_PORT_ADD    0x0
 #define   OCTNET_CMD_VXLAN_PORT_DEL    0x1
 #define   OCTNET_CMD_RXCSUM_ENABLE     0x0
@@ -235,6 +238,8 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 #define   OCTNET_CMD_VLAN_FILTER_ENABLE 0x1
 #define   OCTNET_CMD_VLAN_FILTER_DISABLE 0x0
 
+#define   LIO_CMD_WAIT_TM 100
+
 /* RX(packets coming from wire) Checksum verification flags */
 /* TCP/UDP csum */
 #define   CNNIC_L4SUM_VERIFIED             0x1
@@ -768,6 +773,7 @@ struct nic_rx_stats {
 	/* firmware stats */
 	u64 fw_total_rcvd;
 	u64 fw_total_fwd;
+	u64 fw_total_fwd_bytes;
 	u64 fw_err_pko;
 	u64 fw_err_link;
 	u64 fw_err_drop;
@@ -814,6 +820,7 @@ struct nic_tx_stats {
 	u64 fw_tso;		/* number of tso requests */
 	u64 fw_tso_fwd;		/* number of packets segmented in tso */
 	u64 fw_tx_vxlan;
+	u64 fw_err_pki;
 };
 
 struct oct_link_stats {
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
index f229d79..63bd9c9 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
@@ -71,13 +71,17 @@
 #define   CN23XX_MAX_RINGS_PER_VF          8
 
 #define   CN23XX_MAX_INPUT_QUEUES	CN23XX_MAX_RINGS_PER_PF
-#define   CN23XX_MAX_IQ_DESCRIPTORS	512
+#define   CN23XX_MAX_IQ_DESCRIPTORS	2048
+#define   CN23XX_DEFAULT_IQ_DESCRIPTORS	512
+#define   CN23XX_MIN_IQ_DESCRIPTORS	128
 #define   CN23XX_DB_MIN                 1
 #define   CN23XX_DB_MAX                 8
 #define   CN23XX_DB_TIMEOUT             1
 
 #define   CN23XX_MAX_OUTPUT_QUEUES	CN23XX_MAX_RINGS_PER_PF
-#define   CN23XX_MAX_OQ_DESCRIPTORS	512
+#define   CN23XX_MAX_OQ_DESCRIPTORS	2048
+#define   CN23XX_DEFAULT_OQ_DESCRIPTORS	512
+#define   CN23XX_MIN_OQ_DESCRIPTORS	128
 #define   CN23XX_OQ_BUF_SIZE		1664
 #define   CN23XX_OQ_PKTSPER_INTR	128
 /*#define CAVIUM_ONLY_CN23XX_RX_PERF*/
@@ -163,6 +167,11 @@
 				((cfg)->misc.oct_link_query_interval)
 #define CFG_GET_IS_SLI_BP_ON(cfg)                ((cfg)->misc.enable_sli_oq_bp)
 
+#define CFG_SET_NUM_RX_DESCS_NIC_IF(cfg, idx, value) \
+				((cfg)->nic_if_cfg[idx].num_rx_descs = value)
+#define CFG_SET_NUM_TX_DESCS_NIC_IF(cfg, idx, value) \
+				((cfg)->nic_if_cfg[idx].num_tx_descs = value)
+
 /* Max IOQs per OCTEON Link */
 #define MAX_IOQS_PER_NICIF              64
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
index e08f760..ec3dd69 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
@@ -37,13 +37,6 @@ static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct,
 					     u32 flags);
 static int octeon_console_read(struct octeon_device *oct, u32 console_num,
 			       char *buffer, u32 buf_size);
-static u32 console_bitmask;
-module_param(console_bitmask, int, 0644);
-MODULE_PARM_DESC(console_bitmask,
-		 "Bitmask indicating which consoles have debug output redirected to syslog.");
-
-#define MIN(a, b) min((a), (b))
-#define CAST_ULL(v) ((u64)(v))
 
 #define BOOTLOADER_PCI_READ_BUFFER_DATA_ADDR    0x0006c008
 #define BOOTLOADER_PCI_READ_BUFFER_LEN_ADDR     0x0006c004
@@ -139,16 +132,6 @@ struct octeon_pci_console_desc {
 };
 
 /**
- * \brief determines if a given console has debug enabled.
- * @param console console to check
- * @returns  1 = enabled. 0 otherwise
- */
-static int octeon_console_debug_enabled(u32 console)
-{
-	return (console_bitmask >> (console)) & 0x1;
-}
-
-/**
  * This function is the implementation of the get macros defined
  * for individual structure members. The argument are generated
  * by the macros inorder to read only the needed memory.
@@ -234,7 +217,7 @@ static int __cvmx_bootmem_check_version(struct octeon_device *oct,
 	    (exact_match && major_version != exact_match)) {
 		dev_err(&oct->pci_dev->dev, "bootmem ver mismatch %d.%d addr:0x%llx\n",
 			major_version, minor_version,
-			CAST_ULL(oct->bootmem_desc_addr));
+			(long long)oct->bootmem_desc_addr);
 		return -1;
 	} else {
 		return 0;
@@ -454,20 +437,31 @@ static void output_console_line(struct octeon_device *oct,
 {
 	char *line;
 	s32 i;
+	size_t len;
 
 	line = console_buffer;
 	for (i = 0; i < bytes_read; i++) {
 		/* Output a line at a time, prefixed */
 		if (console_buffer[i] == '\n') {
 			console_buffer[i] = '\0';
-			if (console->leftover[0]) {
-				dev_info(&oct->pci_dev->dev, "%lu: %s%s\n",
-					 console_num, console->leftover,
-					 line);
+			/* We need to output 'line', prefaced by 'leftover'.
+			 * However, it is possible we're being called to
+			 * output 'leftover' by itself (in the case of nothing
+			 * having been read from the console).
+			 *
+			 * To avoid duplication, check for this condition.
+			 */
+			if (console->leftover[0] &&
+			    (line != console->leftover)) {
+				if (console->print)
+					(*console->print)(oct, (u32)console_num,
+							  console->leftover,
+							  line);
 				console->leftover[0] = '\0';
 			} else {
-				dev_info(&oct->pci_dev->dev, "%lu: %s\n",
-					 console_num, line);
+				if (console->print)
+					(*console->print)(oct, (u32)console_num,
+							  line, NULL);
 			}
 			line = &console_buffer[i + 1];
 		}
@@ -476,13 +470,16 @@ static void output_console_line(struct octeon_device *oct,
 	/* Save off any leftovers */
 	if (line != &console_buffer[bytes_read]) {
 		console_buffer[bytes_read] = '\0';
-		strcpy(console->leftover, line);
+		len = strlen(console->leftover);
+		strncpy(&console->leftover[len], line,
+			sizeof(console->leftover) - len);
 	}
 }
 
 static void check_console(struct work_struct *work)
 {
 	s32 bytes_read, tries, total_read;
+	size_t len;
 	struct octeon_console *console;
 	struct cavium_wk *wk = (struct cavium_wk *)work;
 	struct octeon_device *oct = (struct octeon_device *)wk->ctxptr;
@@ -504,7 +501,7 @@ static void check_console(struct work_struct *work)
 			total_read += bytes_read;
 			if (console->waiting)
 				octeon_console_handle_result(oct, console_num);
-			if (octeon_console_debug_enabled(console_num)) {
+			if (console->print) {
 				output_console_line(oct, console, console_num,
 						    console_buffer, bytes_read);
 			}
@@ -519,10 +516,13 @@ static void check_console(struct work_struct *work)
 	/* If nothing is read after polling the console,
 	 * output any leftovers if any
 	 */
-	if (octeon_console_debug_enabled(console_num) &&
-	    (total_read == 0) && (console->leftover[0])) {
-		dev_info(&oct->pci_dev->dev, "%u: %s\n",
-			 console_num, console->leftover);
+	if (console->print && (total_read == 0) &&
+	    (console->leftover[0])) {
+		/* append '\n' as terminator for 'output_console_line' */
+		len = strlen(console->leftover);
+		console->leftover[len] = '\n';
+		output_console_line(oct, console, console_num,
+				    console->leftover, (s32)(len + 1));
 		console->leftover[0] = '\0';
 	}
 
@@ -574,7 +574,84 @@ int octeon_init_consoles(struct octeon_device *oct)
 	return ret;
 }
 
-int octeon_add_console(struct octeon_device *oct, u32 console_num)
+static void octeon_get_uboot_version(struct octeon_device *oct)
+{
+	s32 bytes_read, tries, total_read;
+	struct octeon_console *console;
+	u32 console_num = 0;
+	char *uboot_ver;
+	char *buf;
+	char *p;
+
+#define OCTEON_UBOOT_VER_BUF_SIZE 512
+	buf = kmalloc(OCTEON_UBOOT_VER_BUF_SIZE, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	if (octeon_console_send_cmd(oct, "setenv stdout pci\n", 50)) {
+		kfree(buf);
+		return;
+	}
+
+	if (octeon_console_send_cmd(oct, "version\n", 1)) {
+		kfree(buf);
+		return;
+	}
+
+	console = &oct->console[console_num];
+	tries = 0;
+	total_read = 0;
+
+	do {
+		/* Take console output regardless of whether it will
+		 * be logged
+		 */
+		bytes_read =
+			octeon_console_read(oct,
+					    console_num, buf + total_read,
+					    OCTEON_UBOOT_VER_BUF_SIZE - 1 -
+					    total_read);
+		if (bytes_read > 0) {
+			buf[bytes_read] = '\0';
+
+			total_read += bytes_read;
+			if (console->waiting)
+				octeon_console_handle_result(oct, console_num);
+		} else if (bytes_read < 0) {
+			dev_err(&oct->pci_dev->dev, "Error reading console %u, ret=%d\n",
+				console_num, bytes_read);
+		}
+
+		tries++;
+	} while ((bytes_read > 0) && (tries < 16));
+
+	/* If nothing is read after polling the console,
+	 * output any leftovers if any
+	 */
+	if ((total_read == 0) && (console->leftover[0])) {
+		dev_dbg(&oct->pci_dev->dev, "%u: %s\n",
+			console_num, console->leftover);
+		console->leftover[0] = '\0';
+	}
+
+	buf[OCTEON_UBOOT_VER_BUF_SIZE - 1] = '\0';
+
+	uboot_ver = strstr(buf, "U-Boot");
+	if (uboot_ver) {
+		p = strstr(uboot_ver, "mips");
+		if (p) {
+			p--;
+			*p = '\0';
+			dev_info(&oct->pci_dev->dev, "%s\n", uboot_ver);
+		}
+	}
+
+	kfree(buf);
+	octeon_console_send_cmd(oct, "setenv stdout serial\n", 50);
+}
+
+int octeon_add_console(struct octeon_device *oct, u32 console_num,
+		       char *dbg_enb)
 {
 	int ret = 0;
 	u32 delay;
@@ -610,17 +687,19 @@ int octeon_add_console(struct octeon_device *oct, u32 console_num)
 
 		work = &oct->console_poll_work[console_num].work;
 
+		octeon_get_uboot_version(oct);
+
 		INIT_DELAYED_WORK(work, check_console);
 		oct->console_poll_work[console_num].ctxptr = (void *)oct;
 		oct->console_poll_work[console_num].ctxul = console_num;
 		delay = OCTEON_CONSOLE_POLL_INTERVAL_MS;
 		schedule_delayed_work(work, msecs_to_jiffies(delay));
 
-		if (octeon_console_debug_enabled(console_num)) {
-			ret = octeon_console_send_cmd(oct,
-						      "setenv pci_console_active 1",
-						      2000);
-		}
+		/* an empty string means use default debug console enablement */
+		if (dbg_enb && !dbg_enb[0])
+			dbg_enb = "setenv pci_console_active 1";
+		if (dbg_enb)
+			ret = octeon_console_send_cmd(oct, dbg_enb, 2000);
 
 		console->active = 1;
 	}
@@ -704,7 +783,7 @@ static int octeon_console_read(struct octeon_device *oct, u32 console_num,
 	if (bytes_to_read <= 0)
 		return bytes_to_read;
 
-	bytes_to_read = MIN(bytes_to_read, (s32)buf_size);
+	bytes_to_read = min_t(s32, bytes_to_read, buf_size);
 
 	/* Check to see if what we want to read is not contiguous, and limit
 	 * ourselves to the contiguous block
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 623e28c..29d53b1 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -418,7 +418,7 @@ static struct octeon_config default_cn23xx_conf = {
 	/** IQ attributes */
 	.iq = {
 		.max_iqs		= CN23XX_CFG_IO_QUEUES,
-		.pending_list_size	= (CN23XX_MAX_IQ_DESCRIPTORS *
+		.pending_list_size	= (CN23XX_DEFAULT_IQ_DESCRIPTORS *
 					   CN23XX_CFG_IO_QUEUES),
 		.instr_type		= OCTEON_64BYTE_INSTR,
 		.db_min			= CN23XX_DB_MIN,
@@ -436,8 +436,8 @@ static struct octeon_config default_cn23xx_conf = {
 	},
 
 	.num_nic_ports				= DEFAULT_NUM_NIC_PORTS_23XX,
-	.num_def_rx_descs			= CN23XX_MAX_OQ_DESCRIPTORS,
-	.num_def_tx_descs			= CN23XX_MAX_IQ_DESCRIPTORS,
+	.num_def_rx_descs			= CN23XX_DEFAULT_OQ_DESCRIPTORS,
+	.num_def_tx_descs			= CN23XX_DEFAULT_IQ_DESCRIPTORS,
 	.def_rx_buf_size			= CN23XX_OQ_BUF_SIZE,
 
 	/* For ethernet interface 0:  Port cfg Attributes */
@@ -455,10 +455,10 @@ static struct octeon_config default_cn23xx_conf = {
 		.num_rxqs			= DEF_RXQS_PER_INTF,
 
 		/* Num of desc for rx rings */
-		.num_rx_descs			= CN23XX_MAX_OQ_DESCRIPTORS,
+		.num_rx_descs			= CN23XX_DEFAULT_OQ_DESCRIPTORS,
 
 		/* Num of desc for tx rings */
-		.num_tx_descs			= CN23XX_MAX_IQ_DESCRIPTORS,
+		.num_tx_descs			= CN23XX_DEFAULT_IQ_DESCRIPTORS,
 
 		/* SKB size, We need not change buf size even for Jumbo frames.
 		 * Octeon can send jumbo frames in 4 consecutive descriptors,
@@ -484,10 +484,10 @@ static struct octeon_config default_cn23xx_conf = {
 		.num_rxqs			= DEF_RXQS_PER_INTF,
 
 		/* Num of desc for rx rings */
-		.num_rx_descs			= CN23XX_MAX_OQ_DESCRIPTORS,
+		.num_rx_descs			= CN23XX_DEFAULT_OQ_DESCRIPTORS,
 
 		/* Num of desc for tx rings */
-		.num_tx_descs			= CN23XX_MAX_IQ_DESCRIPTORS,
+		.num_tx_descs			= CN23XX_DEFAULT_IQ_DESCRIPTORS,
 
 		/* SKB size, We need not change buf size even for Jumbo frames.
 		 * Octeon can send jumbo frames in 4 consecutive descriptors,
@@ -528,9 +528,10 @@ static struct octeon_config_ptr {
 };
 
 static char oct_dev_state_str[OCT_DEV_STATES + 1][32] = {
-	"BEGIN", "PCI-MAP-DONE", "DISPATCH-INIT-DONE",
+	"BEGIN", "PCI-ENABLE-DONE", "PCI-MAP-DONE", "DISPATCH-INIT-DONE",
 	"IQ-INIT-DONE", "SCBUFF-POOL-INIT-DONE", "RESPLIST-INIT-DONE",
-	"DROQ-INIT-DONE", "IO-QUEUES-INIT-DONE", "CONSOLE-INIT-DONE",
+	"DROQ-INIT-DONE", "MBOX-SETUP-DONE", "MSIX-ALLOC-VECTOR-DONE",
+	"INTR-SET-DONE", "IO-QUEUES-INIT-DONE", "CONSOLE-INIT-DONE",
 	"HOST-READY", "CORE-READY", "RUNNING", "IN-RESET",
 	"INVALID"
 };
@@ -876,11 +877,11 @@ int octeon_setup_instr_queues(struct octeon_device *oct)
 
 	oct->num_iqs = 0;
 
-	oct->instr_queue[0] = vmalloc_node(sizeof(*oct->instr_queue[0]),
+	oct->instr_queue[0] = vzalloc_node(sizeof(*oct->instr_queue[0]),
 				numa_node);
 	if (!oct->instr_queue[0])
 		oct->instr_queue[0] =
-			vmalloc(sizeof(struct octeon_instr_queue));
+			vzalloc(sizeof(struct octeon_instr_queue));
 	if (!oct->instr_queue[0])
 		return 1;
 	memset(oct->instr_queue[0], 0, sizeof(struct octeon_instr_queue));
@@ -923,9 +924,9 @@ int octeon_setup_output_queues(struct octeon_device *oct)
 		desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_CONF(oct, cn23xx_vf));
 	}
 	oct->num_oqs = 0;
-	oct->droq[0] = vmalloc_node(sizeof(*oct->droq[0]), numa_node);
+	oct->droq[0] = vzalloc_node(sizeof(*oct->droq[0]), numa_node);
 	if (!oct->droq[0])
-		oct->droq[0] = vmalloc(sizeof(*oct->droq[0]));
+		oct->droq[0] = vzalloc(sizeof(*oct->droq[0]));
 	if (!oct->droq[0])
 		return 1;
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index c90ed48..894af19 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -22,6 +22,8 @@
 #ifndef _OCTEON_DEVICE_H_
 #define  _OCTEON_DEVICE_H_
 
+#include <linux/interrupt.h>
+
 /** PCI VendorId Device Id */
 #define  OCTEON_CN68XX_PCIID          0x91177d
 #define  OCTEON_CN66XX_PCIID          0x92177d
@@ -192,6 +194,8 @@ struct octeon_reg_list {
 };
 
 #define OCTEON_CONSOLE_MAX_READ_BYTES 512
+typedef int (*octeon_console_print_fn)(struct octeon_device *oct,
+				       u32 num, char *pre, char *suf);
 struct octeon_console {
 	u32 active;
 	u32 waiting;
@@ -199,6 +203,7 @@ struct octeon_console {
 	u32 buffer_size;
 	u64 input_base_addr;
 	u64 output_base_addr;
+	octeon_console_print_fn print;
 	char leftover[OCTEON_CONSOLE_MAX_READ_BYTES];
 };
 
@@ -552,6 +557,7 @@ struct octeon_device {
 	} loc;
 
 	atomic_t *adapter_refcount; /* reference count of adapter */
+	bool ptp_enable;
 };
 
 #define  OCT_DRV_ONLINE 1
@@ -565,6 +571,8 @@ struct octeon_device {
 #define CHIP_CONF(oct, TYPE)             \
 	(((struct octeon_ ## TYPE  *)((oct)->chip))->conf)
 
+#define MAX_IO_PENDING_PKT_COUNT 100
+
 /*------------------ Function Prototypes ----------------------*/
 
 /** Initialize device list memory */
@@ -740,11 +748,17 @@ int octeon_init_consoles(struct octeon_device *oct);
 /**
  * Adds access to a console to the device.
  *
- * @param oct which octeon to add to
- * @param console_num which console
+ * @param oct:          which octeon to add to
+ * @param console_num:  which console
+ * @param dbg_enb:      ptr to debug enablement string, one of:
+ *                    * NULL for no debug output (i.e. disabled)
+ *                    * empty string enables debug output (via default method)
+ *                    * specific string to enable debug console output
+ *
  * @return Zero on success, negative on failure.
  */
-int octeon_add_console(struct octeon_device *oct, u32 console_num);
+int octeon_add_console(struct octeon_device *oct, u32 console_num,
+		       char *dbg_enb);
 
 /** write or read from a console */
 int octeon_console_write(struct octeon_device *oct, u32 console_num,
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 2e190de..9372d4c 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -145,6 +145,8 @@ octeon_droq_destroy_ring_buffers(struct octeon_device *oct,
 
 	for (i = 0; i < droq->max_count; i++) {
 		pg_info = &droq->recv_buf_list[i].pg_info;
+		if (!pg_info)
+			continue;
 
 		if (pg_info->dma)
 			lio_unmap_ring(oct->pci_dev,
@@ -207,6 +209,10 @@ int octeon_delete_droq(struct octeon_device *oct, u32 q_no)
 			     droq->desc_ring, droq->desc_ring_dma);
 
 	memset(droq, 0, OCT_DROQ_SIZE);
+	oct->io_qmask.oq &= ~(1ULL << q_no);
+	vfree(oct->droq[q_no]);
+	oct->droq[q_no] = NULL;
+	oct->num_oqs--;
 
 	return 0;
 }
@@ -275,12 +281,12 @@ int octeon_init_droq(struct octeon_device *oct,
 		droq->max_count);
 
 	droq->recv_buf_list = (struct octeon_recv_buffer *)
-			      vmalloc_node(droq->max_count *
+			      vzalloc_node(droq->max_count *
 						OCT_DROQ_RECVBUF_SIZE,
 						numa_node);
 	if (!droq->recv_buf_list)
 		droq->recv_buf_list = (struct octeon_recv_buffer *)
-				      vmalloc(droq->max_count *
+				      vzalloc(droq->max_count *
 						OCT_DROQ_RECVBUF_SIZE);
 	if (!droq->recv_buf_list) {
 		dev_err(&oct->pci_dev->dev, "Output queue recv buf list alloc failed\n");
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index 7ccffbb..32ef3a7 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -35,6 +35,12 @@
 
 #define DRV_NAME "LiquidIO"
 
+struct octeon_device_priv {
+	/** Tasklet structures for this device. */
+	struct tasklet_struct droq_tasklet;
+	unsigned long napi_mask;
+};
+
 /** This structure is used by NIC driver to store information required
  * to free the sk_buff when the packet has been fetched by Octeon.
  * Bytes offset below assume worst-case of a 64-bit system.
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index ec8504b..9e36319 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -33,6 +33,7 @@
 #define   LIO_IFSTATE_REGISTERED           0x02
 #define   LIO_IFSTATE_RUNNING              0x04
 #define   LIO_IFSTATE_RX_TIMESTAMP_ENABLED 0x08
+#define   LIO_IFSTATE_RESETTING		   0x10
 
 struct oct_nic_stats_resp {
 	u64     rh;
@@ -166,6 +167,14 @@ void cleanup_rx_oom_poll_fn(struct net_device *netdev);
  */
 void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr);
 
+int liquidio_setup_io_queues(struct octeon_device *octeon_dev, int ifidx,
+			     u32 num_iqs, u32 num_oqs);
+
+irqreturn_t liquidio_msix_intr_handler(int irq __attribute__((unused)),
+				       void *dev);
+
+int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs);
+
 /**
  * \brief Register ethtool operations
  * @param netdev    pointer to network device
@@ -448,4 +457,30 @@ static inline void ifstate_reset(struct lio *lio, int state_flag)
 	atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) & ~(state_flag)));
 }
 
+/**
+ * \brief wait for all pending requests to complete
+ * @param oct Pointer to Octeon device
+ *
+ * Called during shutdown sequence
+ */
+static inline int wait_for_pending_requests(struct octeon_device *oct)
+{
+	int i, pcount = 0;
+
+	for (i = 0; i < MAX_IO_PENDING_PKT_COUNT; i++) {
+		pcount = atomic_read(
+		    &oct->response_list[OCTEON_ORDERED_SC_LIST]
+			 .pending_req_count);
+		if (pcount)
+			schedule_timeout_uninterruptible(HZ / 10);
+		else
+			break;
+	}
+
+	if (pcount)
+		return 1;
+
+	return 0;
+}
+
 #endif
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index 7b297f1..1e0fbce 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -77,13 +77,6 @@ int octeon_init_instr_queue(struct octeon_device *oct,
 		return 1;
 	}
 
-	if (num_descs & (num_descs - 1)) {
-		dev_err(&oct->pci_dev->dev,
-			"Number of descriptors for instr queue %d not in power of 2.\n",
-			iq_no);
-		return 1;
-	}
-
 	q_size = (u32)conf->instr_type * num_descs;
 
 	iq = oct->instr_queue[iq_no];
@@ -190,6 +183,10 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
 		q_size = iq->max_count * desc_size;
 		lio_dma_free(oct, (u32)q_size, iq->base_addr,
 			     iq->base_addr_dma);
+		oct->io_qmask.iq &= ~(1ULL << iq_no);
+		vfree(oct->instr_queue[iq_no]);
+		oct->instr_queue[iq_no] = NULL;
+		oct->num_iqs--;
 		return 0;
 	}
 	return 1;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 5785852..67d1a32 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -277,7 +277,6 @@ struct snd_queue {
 	u16		xdp_free_cnt;
 	bool		is_xdp;
 
-#define	TSO_HEADER_SIZE	128
 	/* For TSO segment's header */
 	char		*tso_hdrs;
 	dma_addr_t	tso_hdrs_phys;
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 0bc6a4f..6a01536 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -793,7 +793,9 @@ static struct attribute *cxgb3_attrs[] = {
 	NULL
 };
 
-static struct attribute_group cxgb3_attr_group = {.attrs = cxgb3_attrs };
+static const struct attribute_group cxgb3_attr_group = {
+	.attrs = cxgb3_attrs,
+};
 
 static ssize_t tm_attr_show(struct device *d,
 			    char *buf, int sched)
@@ -880,7 +882,9 @@ static struct attribute *offload_attrs[] = {
 	NULL
 };
 
-static struct attribute_group offload_attr_group = {.attrs = offload_attrs };
+static const struct attribute_group offload_attr_group = {
+	.attrs = offload_attrs,
+};
 
 /*
  * Sends an sk_buff to an offload queue driver
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 09ea62e..ea72d2d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -104,13 +104,13 @@ enum dev_state {
 	DEV_STATE_ERR
 };
 
-enum {
+enum cc_pause {
 	PAUSE_RX      = 1 << 0,
 	PAUSE_TX      = 1 << 1,
 	PAUSE_AUTONEG = 1 << 2
 };
 
-enum {
+enum cc_fec {
 	FEC_AUTO      = 1 << 0,	 /* IEEE 802.3 "automatic" */
 	FEC_RS        = 1 << 1,  /* Reed-Solomon */
 	FEC_BASER_RS  = 1 << 2   /* BaseR/Reed-Solomon */
@@ -338,10 +338,12 @@ struct adapter_params {
 	unsigned int sf_nsec;             /* # of flash sectors */
 	unsigned int sf_fw_start;         /* start of FW image in flash */
 
-	unsigned int fw_vers;
-	unsigned int bs_vers;		/* bootstrap version */
-	unsigned int tp_vers;
-	unsigned int er_vers;		/* expansion ROM version */
+	unsigned int fw_vers;		  /* firmware version */
+	unsigned int bs_vers;		  /* bootstrap version */
+	unsigned int tp_vers;		  /* TP microcode version */
+	unsigned int er_vers;		  /* expansion ROM version */
+	unsigned int scfg_vers;		  /* Serial Configuration version */
+	unsigned int vpd_vers;		  /* VPD Version */
 	u8 api_vers[7];
 
 	unsigned short mtus[NMTUS];
@@ -364,6 +366,7 @@ struct adapter_params {
 	unsigned int max_ordird_qp;       /* Max read depth per RDMA QP */
 	unsigned int max_ird_adapter;     /* Max read depth per adapter */
 	bool fr_nsmr_tpte_wr_support;	  /* FW support for FR_NSMR_TPTE_WR */
+	u8 fw_caps_support;		/* 32-bit Port Capabilities */
 
 	/* MPS Buffer Group Map[per Port].  Bit i is set if buffer group i is
 	 * used by the Port
@@ -437,18 +440,34 @@ struct trace_params {
 	unsigned char port;
 };
 
+/* Firmware Port Capabilities types. */
+
+typedef u16 fw_port_cap16_t;	/* 16-bit Port Capabilities integral value */
+typedef u32 fw_port_cap32_t;	/* 32-bit Port Capabilities integral value */
+
+enum fw_caps {
+	FW_CAPS_UNKNOWN	= 0,	/* 0'ed out initial state */
+	FW_CAPS16	= 1,	/* old Firmware: 16-bit Port Capabilities */
+	FW_CAPS32	= 2,	/* new Firmware: 32-bit Port Capabilities */
+};
+
 struct link_config {
-	unsigned short supported;        /* link capabilities */
-	unsigned short advertising;      /* advertised capabilities */
-	unsigned short lp_advertising;   /* peer advertised capabilities */
-	unsigned int   requested_speed;  /* speed user has requested */
-	unsigned int   speed;            /* actual link speed */
-	unsigned char  requested_fc;     /* flow control user has requested */
-	unsigned char  fc;               /* actual link flow control */
-	unsigned char  auto_fec;	 /* Forward Error Correction: */
-	unsigned char  requested_fec;	 /* "automatic" (IEEE 802.3), */
-	unsigned char  fec;		 /* requested, and actual in use */
+	fw_port_cap32_t pcaps;           /* link capabilities */
+	fw_port_cap32_t def_acaps;       /* default advertised capabilities */
+	fw_port_cap32_t acaps;           /* advertised capabilities */
+	fw_port_cap32_t lpacaps;         /* peer advertised capabilities */
+
+	fw_port_cap32_t speed_caps;      /* speed(s) user has requested */
+	unsigned int   speed;            /* actual link speed (Mb/s) */
+
+	enum cc_pause  requested_fc;     /* flow control user has requested */
+	enum cc_pause  fc;               /* actual link flow control */
+
+	enum cc_fec    requested_fec;	 /* Forward Error Correction: */
+	enum cc_fec    fec;		 /* requested and actual in use */
+
 	unsigned char  autoneg;          /* autonegotiating? */
+
 	unsigned char  link_ok;          /* link up? */
 	unsigned char  link_down_rc;     /* link down reason */
 };
@@ -1404,10 +1423,15 @@ int t4_fw_upgrade(struct adapter *adap, unsigned int mbox,
 int t4_fl_pkt_align(struct adapter *adap);
 unsigned int t4_flash_cfg_addr(struct adapter *adapter);
 int t4_check_fw_version(struct adapter *adap);
+int t4_load_cfg(struct adapter *adapter, const u8 *cfg_data, unsigned int size);
 int t4_get_fw_version(struct adapter *adapter, u32 *vers);
 int t4_get_bs_version(struct adapter *adapter, u32 *vers);
 int t4_get_tp_version(struct adapter *adapter, u32 *vers);
 int t4_get_exprom_version(struct adapter *adapter, u32 *vers);
+int t4_get_scfg_version(struct adapter *adapter, u32 *vers);
+int t4_get_vpd_version(struct adapter *adapter, u32 *vers);
+int t4_get_version_info(struct adapter *adapter);
+void t4_dump_version_info(struct adapter *adapter);
 int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info,
 	       const u8 *fw_data, unsigned int fw_size,
 	       struct fw_hdr *card_fw, enum dev_state state, int *reset);
@@ -1573,6 +1597,8 @@ int t4_ofld_eq_free(struct adapter *adap, unsigned int mbox, unsigned int pf,
 int t4_sge_ctxt_flush(struct adapter *adap, unsigned int mbox);
 void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl);
 int t4_update_port_info(struct port_info *pi);
+int t4_get_link_params(struct port_info *pi, unsigned int *link_okp,
+		       unsigned int *speedp, unsigned int *mtup);
 int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl);
 void t4_db_full(struct adapter *adapter);
 void t4_db_dropped(struct adapter *adapter);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 26eb00a45..a71af1e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -533,17 +533,23 @@ static int from_fw_port_mod_type(enum fw_port_type port_type,
 static unsigned int speed_to_fw_caps(int speed)
 {
 	if (speed == 100)
-		return FW_PORT_CAP_SPEED_100M;
+		return FW_PORT_CAP32_SPEED_100M;
 	if (speed == 1000)
-		return FW_PORT_CAP_SPEED_1G;
+		return FW_PORT_CAP32_SPEED_1G;
 	if (speed == 10000)
-		return FW_PORT_CAP_SPEED_10G;
+		return FW_PORT_CAP32_SPEED_10G;
 	if (speed == 25000)
-		return FW_PORT_CAP_SPEED_25G;
+		return FW_PORT_CAP32_SPEED_25G;
 	if (speed == 40000)
-		return FW_PORT_CAP_SPEED_40G;
+		return FW_PORT_CAP32_SPEED_40G;
+	if (speed == 50000)
+		return FW_PORT_CAP32_SPEED_50G;
 	if (speed == 100000)
-		return FW_PORT_CAP_SPEED_100G;
+		return FW_PORT_CAP32_SPEED_100G;
+	if (speed == 200000)
+		return FW_PORT_CAP32_SPEED_200G;
+	if (speed == 400000)
+		return FW_PORT_CAP32_SPEED_400G;
 	return 0;
 }
 
@@ -560,12 +566,13 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 			   unsigned int fw_caps,
 			   unsigned long *link_mode_mask)
 {
-	#define SET_LMM(__lmm_name) __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name \
-					## _BIT, link_mode_mask)
+	#define SET_LMM(__lmm_name) \
+		__set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \
+			  link_mode_mask)
 
 	#define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
 		do { \
-			if (fw_caps & FW_PORT_CAP_ ## __fw_name) \
+			if (fw_caps & FW_PORT_CAP32_ ## __fw_name) \
 				SET_LMM(__lmm_name); \
 		} while (0)
 
@@ -645,7 +652,10 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 	case FW_PORT_TYPE_KR4_100G:
 	case FW_PORT_TYPE_CR4_QSFP:
 		SET_LMM(FIBRE);
-		SET_LMM(100000baseCR4_Full);
+		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
+		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
+		FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
+		FW_CAPS_TO_LMM(SPEED_100G, 100000baseCR4_Full);
 		break;
 
 	default:
@@ -663,8 +673,7 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 /**
  *	lmm_to_fw_caps - translate ethtool Link Mode Mask to Firmware
  *	capabilities
- *
- *	@link_mode_mask: ethtool Link Mode Mask
+ *	@et_lmm: ethtool Link Mode Mask
  *
  *	Translate ethtool Link Mode Mask into a Firmware Port capabilities
  *	value.
@@ -677,7 +686,7 @@ static unsigned int lmm_to_fw_caps(const unsigned long *link_mode_mask)
 		do { \
 			if (test_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \
 				     link_mode_mask)) \
-				fw_caps |= FW_PORT_CAP_ ## __fw_name; \
+				fw_caps |= FW_PORT_CAP32_ ## __fw_name; \
 		} while (0)
 
 	LMM_TO_FW_CAPS(100baseT_Full, SPEED_100M);
@@ -685,6 +694,7 @@ static unsigned int lmm_to_fw_caps(const unsigned long *link_mode_mask)
 	LMM_TO_FW_CAPS(10000baseT_Full, SPEED_10G);
 	LMM_TO_FW_CAPS(40000baseSR4_Full, SPEED_40G);
 	LMM_TO_FW_CAPS(25000baseCR_Full, SPEED_25G);
+	LMM_TO_FW_CAPS(50000baseCR2_Full, SPEED_50G);
 	LMM_TO_FW_CAPS(100000baseCR4_Full, SPEED_100G);
 
 	#undef LMM_TO_FW_CAPS
@@ -698,10 +708,6 @@ static int get_link_ksettings(struct net_device *dev,
 	struct port_info *pi = netdev_priv(dev);
 	struct ethtool_link_settings *base = &link_ksettings->base;
 
-	ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
-	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
-	ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
-
 	/* For the nonce, the Firmware doesn't send up Port State changes
 	 * when the Virtual Interface attached to the Port is down.  So
 	 * if it's down, let's grab any changes.
@@ -709,6 +715,10 @@ static int get_link_ksettings(struct net_device *dev,
 	if (!netif_running(dev))
 		(void)t4_update_port_info(pi);
 
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
+
 	base->port = from_fw_port_mod_type(pi->port_type, pi->mod_type);
 
 	if (pi->mdio_addr >= 0) {
@@ -721,11 +731,11 @@ static int get_link_ksettings(struct net_device *dev,
 		base->mdio_support = 0;
 	}
 
-	fw_caps_to_lmm(pi->port_type, pi->link_cfg.supported,
+	fw_caps_to_lmm(pi->port_type, pi->link_cfg.pcaps,
 		       link_ksettings->link_modes.supported);
-	fw_caps_to_lmm(pi->port_type, pi->link_cfg.advertising,
+	fw_caps_to_lmm(pi->port_type, pi->link_cfg.acaps,
 		       link_ksettings->link_modes.advertising);
-	fw_caps_to_lmm(pi->port_type, pi->link_cfg.lp_advertising,
+	fw_caps_to_lmm(pi->port_type, pi->link_cfg.lpacaps,
 		       link_ksettings->link_modes.lp_advertising);
 
 	if (netif_carrier_ok(dev)) {
@@ -736,8 +746,24 @@ static int get_link_ksettings(struct net_device *dev,
 		base->duplex = DUPLEX_UNKNOWN;
 	}
 
+	if (pi->link_cfg.fc & PAUSE_RX) {
+		if (pi->link_cfg.fc & PAUSE_TX) {
+			ethtool_link_ksettings_add_link_mode(link_ksettings,
+							     advertising,
+							     Pause);
+		} else {
+			ethtool_link_ksettings_add_link_mode(link_ksettings,
+							     advertising,
+							     Asym_Pause);
+		}
+	} else if (pi->link_cfg.fc & PAUSE_TX) {
+		ethtool_link_ksettings_add_link_mode(link_ksettings,
+						     advertising,
+						     Asym_Pause);
+	}
+
 	base->autoneg = pi->link_cfg.autoneg;
-	if (pi->link_cfg.supported & FW_PORT_CAP_ANEG)
+	if (pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG)
 		ethtool_link_ksettings_add_link_mode(link_ksettings,
 						     supported, Autoneg);
 	if (pi->link_cfg.autoneg)
@@ -748,8 +774,7 @@ static int get_link_ksettings(struct net_device *dev,
 }
 
 static int set_link_ksettings(struct net_device *dev,
-			      const struct ethtool_link_ksettings
-						*link_ksettings)
+			    const struct ethtool_link_ksettings *link_ksettings)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct link_config *lc = &pi->link_cfg;
@@ -762,12 +787,12 @@ static int set_link_ksettings(struct net_device *dev,
 	if (base->duplex != DUPLEX_FULL)
 		return -EINVAL;
 
-	if (!(lc->supported & FW_PORT_CAP_ANEG)) {
+	if (!(lc->pcaps & FW_PORT_CAP32_ANEG)) {
 		/* PHY offers a single speed.  See if that's what's
 		 * being requested.
 		 */
 		if (base->autoneg == AUTONEG_DISABLE &&
-		    (lc->supported & speed_to_fw_caps(base->speed)))
+		    (lc->pcaps & speed_to_fw_caps(base->speed)))
 			return 0;
 		return -EINVAL;
 	}
@@ -776,18 +801,17 @@ static int set_link_ksettings(struct net_device *dev,
 	if (base->autoneg == AUTONEG_DISABLE) {
 		fw_caps = speed_to_fw_caps(base->speed);
 
-		if (!(lc->supported & fw_caps))
+		if (!(lc->pcaps & fw_caps))
 			return -EINVAL;
-		lc->requested_speed = fw_caps;
-		lc->advertising = 0;
+		lc->speed_caps = fw_caps;
+		lc->acaps = 0;
 	} else {
 		fw_caps =
-			lmm_to_fw_caps(link_ksettings->link_modes.advertising);
-
-		if (!(lc->supported & fw_caps))
+			 lmm_to_fw_caps(link_ksettings->link_modes.advertising);
+		if (!(lc->pcaps & fw_caps))
 			return -EINVAL;
-		lc->requested_speed = 0;
-		lc->advertising = fw_caps | FW_PORT_CAP_ANEG;
+		lc->speed_caps = 0;
+		lc->acaps = fw_caps | FW_PORT_CAP32_ANEG;
 	}
 	lc->autoneg = base->autoneg;
 
@@ -801,6 +825,104 @@ static int set_link_ksettings(struct net_device *dev,
 	return ret;
 }
 
+/* Translate the Firmware FEC value into the ethtool value. */
+static inline unsigned int fwcap_to_eth_fec(unsigned int fw_fec)
+{
+	unsigned int eth_fec = 0;
+
+	if (fw_fec & FW_PORT_CAP32_FEC_RS)
+		eth_fec |= ETHTOOL_FEC_RS;
+	if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS)
+		eth_fec |= ETHTOOL_FEC_BASER;
+
+	/* if nothing is set, then FEC is off */
+	if (!eth_fec)
+		eth_fec = ETHTOOL_FEC_OFF;
+
+	return eth_fec;
+}
+
+/* Translate Common Code FEC value into ethtool value. */
+static inline unsigned int cc_to_eth_fec(unsigned int cc_fec)
+{
+	unsigned int eth_fec = 0;
+
+	if (cc_fec & FEC_AUTO)
+		eth_fec |= ETHTOOL_FEC_AUTO;
+	if (cc_fec & FEC_RS)
+		eth_fec |= ETHTOOL_FEC_RS;
+	if (cc_fec & FEC_BASER_RS)
+		eth_fec |= ETHTOOL_FEC_BASER;
+
+	/* if nothing is set, then FEC is off */
+	if (!eth_fec)
+		eth_fec = ETHTOOL_FEC_OFF;
+
+	return eth_fec;
+}
+
+/* Translate ethtool FEC value into Common Code value. */
+static inline unsigned int eth_to_cc_fec(unsigned int eth_fec)
+{
+	unsigned int cc_fec = 0;
+
+	if (eth_fec & ETHTOOL_FEC_OFF)
+		return cc_fec;
+
+	if (eth_fec & ETHTOOL_FEC_AUTO)
+		cc_fec |= FEC_AUTO;
+	if (eth_fec & ETHTOOL_FEC_RS)
+		cc_fec |= FEC_RS;
+	if (eth_fec & ETHTOOL_FEC_BASER)
+		cc_fec |= FEC_BASER_RS;
+
+	return cc_fec;
+}
+
+static int get_fecparam(struct net_device *dev, struct ethtool_fecparam *fec)
+{
+	const struct port_info *pi = netdev_priv(dev);
+	const struct link_config *lc = &pi->link_cfg;
+
+	/* Translate the Firmware FEC Support into the ethtool value.  We
+	 * always support IEEE 802.3 "automatic" selection of Link FEC type if
+	 * any FEC is supported.
+	 */
+	fec->fec = fwcap_to_eth_fec(lc->pcaps);
+	if (fec->fec != ETHTOOL_FEC_OFF)
+		fec->fec |= ETHTOOL_FEC_AUTO;
+
+	/* Translate the current internal FEC parameters into the
+	 * ethtool values.
+	 */
+	fec->active_fec = cc_to_eth_fec(lc->fec);
+
+	return 0;
+}
+
+static int set_fecparam(struct net_device *dev, struct ethtool_fecparam *fec)
+{
+	struct port_info *pi = netdev_priv(dev);
+	struct link_config *lc = &pi->link_cfg;
+	struct link_config old_lc;
+	int ret;
+
+	/* Save old Link Configuration in case the L1 Configure below
+	 * fails.
+	 */
+	old_lc = *lc;
+
+	/* Try to perform the L1 Configure and return the result of that
+	 * effort.  If it fails, revert the attempted change.
+	 */
+	lc->requested_fec = eth_to_cc_fec(fec->fec);
+	ret = t4_link_l1cfg(pi->adapter, pi->adapter->mbox,
+			    pi->tx_chan, lc);
+	if (ret)
+		*lc = old_lc;
+	return ret;
+}
+
 static void get_pauseparam(struct net_device *dev,
 			   struct ethtool_pauseparam *epause)
 {
@@ -819,7 +941,7 @@ static int set_pauseparam(struct net_device *dev,
 
 	if (epause->autoneg == AUTONEG_DISABLE)
 		lc->requested_fc = 0;
-	else if (lc->supported & FW_PORT_CAP_ANEG)
+	else if (lc->pcaps & FW_PORT_CAP32_ANEG)
 		lc->requested_fc = PAUSE_AUTONEG;
 	else
 		return -EINVAL;
@@ -1255,6 +1377,8 @@ static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 static const struct ethtool_ops cxgb_ethtool_ops = {
 	.get_link_ksettings = get_link_ksettings,
 	.set_link_ksettings = set_link_ksettings,
+	.get_fecparam      = get_fecparam,
+	.set_fecparam      = set_fecparam,
 	.get_drvinfo       = get_drvinfo,
 	.get_msglevel      = get_msglevel,
 	.set_msglevel      = set_msglevel,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 33bb867..92d9d79 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -530,15 +530,22 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
 			FW_PORT_CMD_ACTION_G(ntohl(pcmd->action_to_len16));
 
 		if (cmd == FW_PORT_CMD &&
-		    action == FW_PORT_ACTION_GET_PORT_INFO) {
+		    (action == FW_PORT_ACTION_GET_PORT_INFO ||
+		     action == FW_PORT_ACTION_GET_PORT_INFO32)) {
 			int port = FW_PORT_CMD_PORTID_G(
 					be32_to_cpu(pcmd->op_to_portid));
-			struct net_device *dev =
-				q->adap->port[q->adap->chan_map[port]];
-			int state_input = ((pcmd->u.info.dcbxdis_pkd &
-					    FW_PORT_CMD_DCBXDIS_F)
-					   ? CXGB4_DCB_INPUT_FW_DISABLED
-					   : CXGB4_DCB_INPUT_FW_ENABLED);
+			struct net_device *dev;
+			int dcbxdis, state_input;
+
+			dev = q->adap->port[q->adap->chan_map[port]];
+			dcbxdis = (action == FW_PORT_ACTION_GET_PORT_INFO
+				   ? !!(pcmd->u.info.dcbxdis_pkd &
+					FW_PORT_CMD_DCBXDIS_F)
+				   : !!(pcmd->u.info32.lstatus32_to_cbllen32 &
+					FW_PORT_CMD_DCBXDIS32_F));
+			state_input = (dcbxdis
+				       ? CXGB4_DCB_INPUT_FW_DISABLED
+				       : CXGB4_DCB_INPUT_FW_ENABLED);
 
 			cxgb4_dcb_state_fsm(dev, state_input);
 		}
@@ -2672,11 +2679,10 @@ static int cxgb_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adap = pi->adapter;
-	struct fw_port_cmd port_cmd, port_rpl;
-	u32 link_status, speed = 0;
+	unsigned int link_ok, speed, mtu;
 	u32 fw_pfvf, fw_class;
 	int class_id = vf;
-	int link_ok, ret;
+	int ret;
 	u16 pktsize;
 
 	if (vf >= adap->num_vfs)
@@ -2688,41 +2694,18 @@ static int cxgb_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
 			min_tx_rate, vf);
 		return -EINVAL;
 	}
-	/* Retrieve link details for VF port */
-	memset(&port_cmd, 0, sizeof(port_cmd));
-	port_cmd.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
-					    FW_CMD_REQUEST_F |
-					    FW_CMD_READ_F |
-					    FW_PORT_CMD_PORTID_V(pi->port_id));
-	port_cmd.action_to_len16 =
-		cpu_to_be32(FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_GET_PORT_INFO) |
-			    FW_LEN16(port_cmd));
-	ret = t4_wr_mbox(adap, adap->mbox, &port_cmd, sizeof(port_cmd),
-			 &port_rpl);
+
+	ret = t4_get_link_params(pi, &link_ok, &speed, &mtu);
 	if (ret != FW_SUCCESS) {
 		dev_err(adap->pdev_dev,
-			"Failed to get link status for VF %d\n", vf);
+			"Failed to get link information for VF %d\n", vf);
 		return -EINVAL;
 	}
-	link_status = be32_to_cpu(port_rpl.u.info.lstatus_to_modtype);
-	link_ok = (link_status & FW_PORT_CMD_LSTATUS_F) != 0;
+
 	if (!link_ok) {
 		dev_err(adap->pdev_dev, "Link down for VF %d\n", vf);
 		return -EINVAL;
 	}
-	/* Determine link speed */
-	if (link_status & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100M))
-		speed = 100;
-	else if (link_status & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_1G))
-		speed = 1000;
-	else if (link_status & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
-		speed = 10000;
-	else if (link_status & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
-		speed = 25000;
-	else if (link_status & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
-		speed = 40000;
-	else if (link_status & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
-		speed = 100000;
 
 	if (max_tx_rate > speed) {
 		dev_err(adap->pdev_dev,
@@ -2730,7 +2713,8 @@ static int cxgb_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
 			max_tx_rate, vf, speed);
 		return -EINVAL;
 	}
-	pktsize = be16_to_cpu(port_rpl.u.info.mtu);
+
+	pktsize = mtu;
 	/* subtract ethhdr size and 4 bytes crc since, f/w appends it */
 	pktsize = pktsize - sizeof(struct ethhdr) - 4;
 	/* subtract ipv4 hdr size, tcp hdr size to get typical IPv4 MSS size */
@@ -2741,7 +2725,7 @@ static int cxgb_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
 			      SCHED_CLASS_MODE_CLASS,
 			      SCHED_CLASS_RATEUNIT_BITS,
 			      SCHED_CLASS_RATEMODE_ABS,
-			      pi->port_id, class_id, 0,
+			      pi->tx_chan, class_id, 0,
 			      max_tx_rate * 1000, 0, pktsize);
 	if (ret) {
 		dev_err(adap->pdev_dev, "Err %d for Traffic Class config\n",
@@ -2889,14 +2873,29 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
 	return err;
 }
 
-static int cxgb_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
-			 __be16 proto, struct tc_to_netdev *tc)
+static int cxgb_setup_tc_cls_u32(struct net_device *dev,
+				 struct tc_cls_u32_offload *cls_u32)
 {
-	struct port_info *pi = netdev2pinfo(dev);
-	struct adapter *adap = netdev2adap(dev);
+	if (!is_classid_clsact_ingress(cls_u32->common.classid) ||
+	    cls_u32->common.chain_index)
+		return -EOPNOTSUPP;
 
-	if (chain_index)
+	switch (cls_u32->command) {
+	case TC_CLSU32_NEW_KNODE:
+	case TC_CLSU32_REPLACE_KNODE:
+		return cxgb4_config_knode(dev, cls_u32);
+	case TC_CLSU32_DELETE_KNODE:
+		return cxgb4_delete_knode(dev, cls_u32);
+	default:
 		return -EOPNOTSUPP;
+	}
+}
+
+static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			 void *type_data)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
 
 	if (!(adap->flags & FULL_INIT_DONE)) {
 		dev_err(adap->pdev_dev,
@@ -2905,20 +2904,12 @@ static int cxgb_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
 		return -EINVAL;
 	}
 
-	if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) &&
-	    tc->type == TC_SETUP_CLSU32) {
-		switch (tc->cls_u32->command) {
-		case TC_CLSU32_NEW_KNODE:
-		case TC_CLSU32_REPLACE_KNODE:
-			return cxgb4_config_knode(dev, proto, tc->cls_u32);
-		case TC_CLSU32_DELETE_KNODE:
-			return cxgb4_delete_knode(dev, proto, tc->cls_u32);
-		default:
-			return -EOPNOTSUPP;
-		}
+	switch (type) {
+	case TC_SETUP_CLSU32:
+		return cxgb_setup_tc_cls_u32(dev, type_data);
+	default:
+		return -EOPNOTSUPP;
 	}
-
-	return -EOPNOTSUPP;
 }
 
 static netdev_features_t cxgb_fix_features(struct net_device *dev,
@@ -3610,11 +3601,8 @@ static int adap_init0(struct adapter *adap)
 	 * later reporting and B. to warn if the currently loaded firmware
 	 * is excessively mismatched relative to the driver.)
 	 */
-	t4_get_fw_version(adap, &adap->params.fw_vers);
-	t4_get_bs_version(adap, &adap->params.bs_vers);
-	t4_get_tp_version(adap, &adap->params.tp_vers);
-	t4_get_exprom_version(adap, &adap->params.er_vers);
 
+	t4_get_version_info(adap);
 	ret = t4_check_fw_version(adap);
 	/* If firmware is too old (not supported by driver) force an update. */
 	if (ret)
@@ -4204,8 +4192,9 @@ static inline bool is_x_10g_port(const struct link_config *lc)
 {
 	unsigned int speeds, high_speeds;
 
-	speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported));
-	high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G);
+	speeds = FW_PORT_CAP32_SPEED_V(FW_PORT_CAP32_SPEED_G(lc->pcaps));
+	high_speeds = speeds &
+			~(FW_PORT_CAP32_SPEED_100M | FW_PORT_CAP32_SPEED_1G);
 
 	return high_speeds != 0;
 }
@@ -4560,56 +4549,8 @@ static void cxgb4_check_pcie_caps(struct adapter *adap)
 /* Dump basic information about the adapter */
 static void print_adapter_info(struct adapter *adapter)
 {
-	/* Device information */
-	dev_info(adapter->pdev_dev, "Chelsio %s rev %d\n",
-		 adapter->params.vpd.id,
-		 CHELSIO_CHIP_RELEASE(adapter->params.chip));
-	dev_info(adapter->pdev_dev, "S/N: %s, P/N: %s\n",
-		 adapter->params.vpd.sn, adapter->params.vpd.pn);
-
-	/* Firmware Version */
-	if (!adapter->params.fw_vers)
-		dev_warn(adapter->pdev_dev, "No firmware loaded\n");
-	else
-		dev_info(adapter->pdev_dev, "Firmware version: %u.%u.%u.%u\n",
-			 FW_HDR_FW_VER_MAJOR_G(adapter->params.fw_vers),
-			 FW_HDR_FW_VER_MINOR_G(adapter->params.fw_vers),
-			 FW_HDR_FW_VER_MICRO_G(adapter->params.fw_vers),
-			 FW_HDR_FW_VER_BUILD_G(adapter->params.fw_vers));
-
-	/* Bootstrap Firmware Version. (Some adapters don't have Bootstrap
-	 * Firmware, so dev_info() is more appropriate here.)
-	 */
-	if (!adapter->params.bs_vers)
-		dev_info(adapter->pdev_dev, "No bootstrap loaded\n");
-	else
-		dev_info(adapter->pdev_dev, "Bootstrap version: %u.%u.%u.%u\n",
-			 FW_HDR_FW_VER_MAJOR_G(adapter->params.bs_vers),
-			 FW_HDR_FW_VER_MINOR_G(adapter->params.bs_vers),
-			 FW_HDR_FW_VER_MICRO_G(adapter->params.bs_vers),
-			 FW_HDR_FW_VER_BUILD_G(adapter->params.bs_vers));
-
-	/* TP Microcode Version */
-	if (!adapter->params.tp_vers)
-		dev_warn(adapter->pdev_dev, "No TP Microcode loaded\n");
-	else
-		dev_info(adapter->pdev_dev,
-			 "TP Microcode version: %u.%u.%u.%u\n",
-			 FW_HDR_FW_VER_MAJOR_G(adapter->params.tp_vers),
-			 FW_HDR_FW_VER_MINOR_G(adapter->params.tp_vers),
-			 FW_HDR_FW_VER_MICRO_G(adapter->params.tp_vers),
-			 FW_HDR_FW_VER_BUILD_G(adapter->params.tp_vers));
-
-	/* Expansion ROM version */
-	if (!adapter->params.er_vers)
-		dev_info(adapter->pdev_dev, "No Expansion ROM loaded\n");
-	else
-		dev_info(adapter->pdev_dev,
-			 "Expansion ROM version: %u.%u.%u.%u\n",
-			 FW_HDR_FW_VER_MAJOR_G(adapter->params.er_vers),
-			 FW_HDR_FW_VER_MINOR_G(adapter->params.er_vers),
-			 FW_HDR_FW_VER_MICRO_G(adapter->params.er_vers),
-			 FW_HDR_FW_VER_BUILD_G(adapter->params.er_vers));
+	/* Hardware/Firmware/etc. Version/Revision IDs */
+	t4_dump_version_info(adapter);
 
 	/* Software/Hardware configuration */
 	dev_info(adapter->pdev_dev, "Configuration: %sNIC %s, %s capable\n",
@@ -4634,18 +4575,24 @@ static void print_port_info(const struct net_device *dev)
 	else if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_8_0GB)
 		spd = " 8 GT/s";
 
-	if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100M)
+	if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_100M)
 		bufp += sprintf(bufp, "100M/");
-	if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_1G)
+	if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_1G)
 		bufp += sprintf(bufp, "1G/");
-	if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
+	if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_10G)
 		bufp += sprintf(bufp, "10G/");
-	if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G)
+	if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_25G)
 		bufp += sprintf(bufp, "25G/");
-	if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G)
+	if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_40G)
 		bufp += sprintf(bufp, "40G/");
-	if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G)
+	if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_50G)
+		bufp += sprintf(bufp, "50G/");
+	if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_100G)
 		bufp += sprintf(bufp, "100G/");
+	if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_200G)
+		bufp += sprintf(bufp, "200G/");
+	if (pi->link_cfg.pcaps & FW_PORT_CAP32_SPEED_400G)
+		bufp += sprintf(bufp, "400G/");
 	if (bufp != buf)
 		--bufp;
 	sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type));
@@ -4751,10 +4698,11 @@ static int config_mgmt_dev(struct pci_dev *pdev)
 
 	pi = netdev_priv(netdev);
 	pi->adapter = adap;
-	pi->port_id = adap->pf % adap->params.nports;
+	pi->tx_chan = adap->pf % adap->params.nports;
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 
 	adap->port[0] = netdev;
+	pi->port_id = 0;
 
 	err = register_netdev(adap->port[0]);
 	if (err) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index ef06ce8..48970ba 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -96,7 +96,7 @@ static int fill_action_fields(struct adapter *adap,
 	LIST_HEAD(actions);
 
 	exts = cls->knode.exts;
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return -EINVAL;
 
 	tcf_exts_to_list(exts, &actions);
@@ -146,11 +146,11 @@ static int fill_action_fields(struct adapter *adap,
 	return 0;
 }
 
-int cxgb4_config_knode(struct net_device *dev, __be16 protocol,
-		       struct tc_cls_u32_offload *cls)
+int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 {
 	const struct cxgb4_match_field *start, *link_start = NULL;
 	struct adapter *adapter = netdev2adap(dev);
+	__be16 protocol = cls->common.protocol;
 	struct ch_filter_specification fs;
 	struct cxgb4_tc_u32_table *t;
 	struct cxgb4_link *link;
@@ -338,8 +338,7 @@ out:
 	return ret;
 }
 
-int cxgb4_delete_knode(struct net_device *dev, __be16 protocol,
-		       struct tc_cls_u32_offload *cls)
+int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
 {
 	struct adapter *adapter = netdev2adap(dev);
 	unsigned int filter_id, max_tids, i, j;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h
index 021261a..70a07b7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h
@@ -44,10 +44,8 @@ static inline bool can_tc_u32_offload(struct net_device *dev)
 	return (dev->features & NETIF_F_HW_TC) && adap->tc_u32 ? true : false;
 }
 
-int cxgb4_config_knode(struct net_device *dev, __be16 protocol,
-		       struct tc_cls_u32_offload *cls);
-int cxgb4_delete_knode(struct net_device *dev, __be16 protocol,
-		       struct tc_cls_u32_offload *cls);
+int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls);
+int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls);
 
 void cxgb4_cleanup_tc_u32(struct adapter *adapter);
 struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c
index 02acff7..9148abb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c
@@ -533,10 +533,10 @@ struct sched_table *t4_init_sched(unsigned int sched_size)
 void t4_cleanup_sched(struct adapter *adap)
 {
 	struct sched_table *s;
-	unsigned int i;
+	unsigned int j, i;
 
-	for_each_port(adap, i) {
-		struct port_info *pi = netdev2pinfo(adap->port[i]);
+	for_each_port(adap, j) {
+		struct port_info *pi = netdev2pinfo(adap->port[j]);
 
 		s = pi->sched_tbl;
 		for (i = 0; i < s->sched_size; i++) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 0293b41..b65ce26 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -913,7 +913,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0xd010, 0xd03c,
 		0xdfc0, 0xdfe0,
 		0xe000, 0xea7c,
-		0xf000, 0x11190,
+		0xf000, 0x11110,
+		0x11118, 0x11190,
 		0x19040, 0x1906c,
 		0x19078, 0x19080,
 		0x1908c, 0x190e4,
@@ -1439,8 +1440,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x1ff00, 0x1ff84,
 		0x1ffc0, 0x1ffc8,
 		0x30000, 0x30030,
-		0x30038, 0x30038,
-		0x30040, 0x30040,
 		0x30100, 0x30144,
 		0x30190, 0x301a0,
 		0x301a8, 0x301b8,
@@ -1551,8 +1550,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x33c3c, 0x33c50,
 		0x33cf0, 0x33cfc,
 		0x34000, 0x34030,
-		0x34038, 0x34038,
-		0x34040, 0x34040,
 		0x34100, 0x34144,
 		0x34190, 0x341a0,
 		0x341a8, 0x341b8,
@@ -1663,8 +1660,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x37c3c, 0x37c50,
 		0x37cf0, 0x37cfc,
 		0x38000, 0x38030,
-		0x38038, 0x38038,
-		0x38040, 0x38040,
 		0x38100, 0x38144,
 		0x38190, 0x381a0,
 		0x381a8, 0x381b8,
@@ -1775,8 +1770,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x3bc3c, 0x3bc50,
 		0x3bcf0, 0x3bcfc,
 		0x3c000, 0x3c030,
-		0x3c038, 0x3c038,
-		0x3c040, 0x3c040,
 		0x3c100, 0x3c144,
 		0x3c190, 0x3c1a0,
 		0x3c1a8, 0x3c1b8,
@@ -2040,12 +2033,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x1190, 0x1194,
 		0x11a0, 0x11a4,
 		0x11b0, 0x11b4,
-		0x11fc, 0x1258,
-		0x1280, 0x12d4,
-		0x12d9, 0x12d9,
-		0x12de, 0x12de,
-		0x12e3, 0x12e3,
-		0x12e8, 0x133c,
+		0x11fc, 0x1274,
+		0x1280, 0x133c,
 		0x1800, 0x18fc,
 		0x3000, 0x302c,
 		0x3060, 0x30b0,
@@ -2076,6 +2065,9 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x5ea0, 0x5eb0,
 		0x5ec0, 0x5ec0,
 		0x5ec8, 0x5ed0,
+		0x5ee0, 0x5ee0,
+		0x5ef0, 0x5ef0,
+		0x5f00, 0x5f00,
 		0x6000, 0x6020,
 		0x6028, 0x6040,
 		0x6058, 0x609c,
@@ -2133,6 +2125,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0xd300, 0xd31c,
 		0xdfc0, 0xdfe0,
 		0xe000, 0xf008,
+		0xf010, 0xf018,
+		0xf020, 0xf028,
 		0x11000, 0x11014,
 		0x11048, 0x1106c,
 		0x11074, 0x11088,
@@ -2256,13 +2250,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x1ff00, 0x1ff84,
 		0x1ffc0, 0x1ffc8,
 		0x30000, 0x30030,
-		0x30038, 0x30038,
-		0x30040, 0x30040,
-		0x30048, 0x30048,
-		0x30050, 0x30050,
-		0x3005c, 0x30060,
-		0x30068, 0x30068,
-		0x30070, 0x30070,
 		0x30100, 0x30168,
 		0x30190, 0x301a0,
 		0x301a8, 0x301b8,
@@ -2325,13 +2312,12 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x326a8, 0x326a8,
 		0x326ec, 0x326ec,
 		0x32a00, 0x32abc,
-		0x32b00, 0x32b38,
+		0x32b00, 0x32b18,
+		0x32b20, 0x32b38,
 		0x32b40, 0x32b58,
 		0x32b60, 0x32b78,
 		0x32c00, 0x32c00,
 		0x32c08, 0x32c3c,
-		0x32e00, 0x32e2c,
-		0x32f00, 0x32f2c,
 		0x33000, 0x3302c,
 		0x33034, 0x33050,
 		0x33058, 0x33058,
@@ -2396,13 +2382,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x33c38, 0x33c50,
 		0x33cf0, 0x33cfc,
 		0x34000, 0x34030,
-		0x34038, 0x34038,
-		0x34040, 0x34040,
-		0x34048, 0x34048,
-		0x34050, 0x34050,
-		0x3405c, 0x34060,
-		0x34068, 0x34068,
-		0x34070, 0x34070,
 		0x34100, 0x34168,
 		0x34190, 0x341a0,
 		0x341a8, 0x341b8,
@@ -2465,13 +2444,12 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x366a8, 0x366a8,
 		0x366ec, 0x366ec,
 		0x36a00, 0x36abc,
-		0x36b00, 0x36b38,
+		0x36b00, 0x36b18,
+		0x36b20, 0x36b38,
 		0x36b40, 0x36b58,
 		0x36b60, 0x36b78,
 		0x36c00, 0x36c00,
 		0x36c08, 0x36c3c,
-		0x36e00, 0x36e2c,
-		0x36f00, 0x36f2c,
 		0x37000, 0x3702c,
 		0x37034, 0x37050,
 		0x37058, 0x37058,
@@ -2545,8 +2523,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 		0x40280, 0x40280,
 		0x40304, 0x40304,
 		0x40330, 0x4033c,
-		0x41304, 0x413b8,
-		0x413c0, 0x413c8,
+		0x41304, 0x413c8,
 		0x413d0, 0x413dc,
 		0x413f0, 0x413f0,
 		0x41400, 0x4140c,
@@ -3100,6 +3077,179 @@ int t4_get_exprom_version(struct adapter *adap, u32 *vers)
 }
 
 /**
+ *      t4_get_vpd_version - return the VPD version
+ *      @adapter: the adapter
+ *      @vers: where to place the version
+ *
+ *      Reads the VPD via the Firmware interface (thus this can only be called
+ *      once we're ready to issue Firmware commands).  The format of the
+ *      VPD version is adapter specific.  Returns 0 on success, an error on
+ *      failure.
+ *
+ *      Note that early versions of the Firmware didn't include the ability
+ *      to retrieve the VPD version, so we zero-out the return-value parameter
+ *      in that case to avoid leaving it with garbage in it.
+ *
+ *      Also note that the Firmware will return its cached copy of the VPD
+ *      Revision ID, not the actual Revision ID as written in the Serial
+ *      EEPROM.  This is only an issue if a new VPD has been written and the
+ *      Firmware/Chip haven't yet gone through a RESET sequence.  So it's best
+ *      to defer calling this routine till after a FW_RESET_CMD has been issued
+ *      if the Host Driver will be performing a full adapter initialization.
+ */
+int t4_get_vpd_version(struct adapter *adapter, u32 *vers)
+{
+	u32 vpdrev_param;
+	int ret;
+
+	vpdrev_param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+			FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_VPDREV));
+	ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0,
+			      1, &vpdrev_param, vers);
+	if (ret)
+		*vers = 0;
+	return ret;
+}
+
+/**
+ *      t4_get_scfg_version - return the Serial Configuration version
+ *      @adapter: the adapter
+ *      @vers: where to place the version
+ *
+ *      Reads the Serial Configuration Version via the Firmware interface
+ *      (thus this can only be called once we're ready to issue Firmware
+ *      commands).  The format of the Serial Configuration version is
+ *      adapter specific.  Returns 0 on success, an error on failure.
+ *
+ *      Note that early versions of the Firmware didn't include the ability
+ *      to retrieve the Serial Configuration version, so we zero-out the
+ *      return-value parameter in that case to avoid leaving it with
+ *      garbage in it.
+ *
+ *      Also note that the Firmware will return its cached copy of the Serial
+ *      Initialization Revision ID, not the actual Revision ID as written in
+ *      the Serial EEPROM.  This is only an issue if a new VPD has been written
+ *      and the Firmware/Chip haven't yet gone through a RESET sequence.  So
+ *      it's best to defer calling this routine till after a FW_RESET_CMD has
+ *      been issued if the Host Driver will be performing a full adapter
+ *      initialization.
+ */
+int t4_get_scfg_version(struct adapter *adapter, u32 *vers)
+{
+	u32 scfgrev_param;
+	int ret;
+
+	scfgrev_param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+			 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_SCFGREV));
+	ret = t4_query_params(adapter, adapter->mbox, adapter->pf, 0,
+			      1, &scfgrev_param, vers);
+	if (ret)
+		*vers = 0;
+	return ret;
+}
+
+/**
+ *      t4_get_version_info - extract various chip/firmware version information
+ *      @adapter: the adapter
+ *
+ *      Reads various chip/firmware version numbers and stores them into the
+ *      adapter Adapter Parameters structure.  If any of the efforts fails
+ *      the first failure will be returned, but all of the version numbers
+ *      will be read.
+ */
+int t4_get_version_info(struct adapter *adapter)
+{
+	int ret = 0;
+
+	#define FIRST_RET(__getvinfo) \
+	do { \
+		int __ret = __getvinfo; \
+		if (__ret && !ret) \
+			ret = __ret; \
+	} while (0)
+
+	FIRST_RET(t4_get_fw_version(adapter, &adapter->params.fw_vers));
+	FIRST_RET(t4_get_bs_version(adapter, &adapter->params.bs_vers));
+	FIRST_RET(t4_get_tp_version(adapter, &adapter->params.tp_vers));
+	FIRST_RET(t4_get_exprom_version(adapter, &adapter->params.er_vers));
+	FIRST_RET(t4_get_scfg_version(adapter, &adapter->params.scfg_vers));
+	FIRST_RET(t4_get_vpd_version(adapter, &adapter->params.vpd_vers));
+
+	#undef FIRST_RET
+	return ret;
+}
+
+/**
+ *      t4_dump_version_info - dump all of the adapter configuration IDs
+ *      @adapter: the adapter
+ *
+ *      Dumps all of the various bits of adapter configuration version/revision
+ *      IDs information.  This is typically called at some point after
+ *      t4_get_version_info() has been called.
+ */
+void t4_dump_version_info(struct adapter *adapter)
+{
+	/* Device information */
+	dev_info(adapter->pdev_dev, "Chelsio %s rev %d\n",
+		 adapter->params.vpd.id,
+		 CHELSIO_CHIP_RELEASE(adapter->params.chip));
+	dev_info(adapter->pdev_dev, "S/N: %s, P/N: %s\n",
+		 adapter->params.vpd.sn, adapter->params.vpd.pn);
+
+	/* Firmware Version */
+	if (!adapter->params.fw_vers)
+		dev_warn(adapter->pdev_dev, "No firmware loaded\n");
+	else
+		dev_info(adapter->pdev_dev, "Firmware version: %u.%u.%u.%u\n",
+			 FW_HDR_FW_VER_MAJOR_G(adapter->params.fw_vers),
+			 FW_HDR_FW_VER_MINOR_G(adapter->params.fw_vers),
+			 FW_HDR_FW_VER_MICRO_G(adapter->params.fw_vers),
+			 FW_HDR_FW_VER_BUILD_G(adapter->params.fw_vers));
+
+	/* Bootstrap Firmware Version. (Some adapters don't have Bootstrap
+	 * Firmware, so dev_info() is more appropriate here.)
+	 */
+	if (!adapter->params.bs_vers)
+		dev_info(adapter->pdev_dev, "No bootstrap loaded\n");
+	else
+		dev_info(adapter->pdev_dev, "Bootstrap version: %u.%u.%u.%u\n",
+			 FW_HDR_FW_VER_MAJOR_G(adapter->params.bs_vers),
+			 FW_HDR_FW_VER_MINOR_G(adapter->params.bs_vers),
+			 FW_HDR_FW_VER_MICRO_G(adapter->params.bs_vers),
+			 FW_HDR_FW_VER_BUILD_G(adapter->params.bs_vers));
+
+	/* TP Microcode Version */
+	if (!adapter->params.tp_vers)
+		dev_warn(adapter->pdev_dev, "No TP Microcode loaded\n");
+	else
+		dev_info(adapter->pdev_dev,
+			 "TP Microcode version: %u.%u.%u.%u\n",
+			 FW_HDR_FW_VER_MAJOR_G(adapter->params.tp_vers),
+			 FW_HDR_FW_VER_MINOR_G(adapter->params.tp_vers),
+			 FW_HDR_FW_VER_MICRO_G(adapter->params.tp_vers),
+			 FW_HDR_FW_VER_BUILD_G(adapter->params.tp_vers));
+
+	/* Expansion ROM version */
+	if (!adapter->params.er_vers)
+		dev_info(adapter->pdev_dev, "No Expansion ROM loaded\n");
+	else
+		dev_info(adapter->pdev_dev,
+			 "Expansion ROM version: %u.%u.%u.%u\n",
+			 FW_HDR_FW_VER_MAJOR_G(adapter->params.er_vers),
+			 FW_HDR_FW_VER_MINOR_G(adapter->params.er_vers),
+			 FW_HDR_FW_VER_MICRO_G(adapter->params.er_vers),
+			 FW_HDR_FW_VER_BUILD_G(adapter->params.er_vers));
+
+	/* Serial Configuration version */
+	dev_info(adapter->pdev_dev, "Serial Configuration version: %#x\n",
+		 adapter->params.scfg_vers);
+
+	/* VPD Version */
+	dev_info(adapter->pdev_dev, "VPD version: %#x\n",
+		 adapter->params.vpd_vers);
+}
+
+/**
  *	t4_check_fw_version - check if the FW is supported with this driver
  *	@adap: the adapter
  *
@@ -3685,16 +3835,143 @@ void t4_ulprx_read_la(struct adapter *adap, u32 *la_buf)
 	}
 }
 
-#define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\
-		     FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \
-		     FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \
-		     FW_PORT_CAP_ANEG)
+#define ADVERT_MASK (FW_PORT_CAP32_SPEED_V(FW_PORT_CAP32_SPEED_M) | \
+		     FW_PORT_CAP32_ANEG)
+
+/**
+ *	fwcaps16_to_caps32 - convert 16-bit Port Capabilities to 32-bits
+ *	@caps16: a 16-bit Port Capabilities value
+ *
+ *	Returns the equivalent 32-bit Port Capabilities value.
+ */
+static fw_port_cap32_t fwcaps16_to_caps32(fw_port_cap16_t caps16)
+{
+	fw_port_cap32_t caps32 = 0;
+
+	#define CAP16_TO_CAP32(__cap) \
+		do { \
+			if (caps16 & FW_PORT_CAP_##__cap) \
+				caps32 |= FW_PORT_CAP32_##__cap; \
+		} while (0)
+
+	CAP16_TO_CAP32(SPEED_100M);
+	CAP16_TO_CAP32(SPEED_1G);
+	CAP16_TO_CAP32(SPEED_25G);
+	CAP16_TO_CAP32(SPEED_10G);
+	CAP16_TO_CAP32(SPEED_40G);
+	CAP16_TO_CAP32(SPEED_100G);
+	CAP16_TO_CAP32(FC_RX);
+	CAP16_TO_CAP32(FC_TX);
+	CAP16_TO_CAP32(ANEG);
+	CAP16_TO_CAP32(MDIX);
+	CAP16_TO_CAP32(MDIAUTO);
+	CAP16_TO_CAP32(FEC_RS);
+	CAP16_TO_CAP32(FEC_BASER_RS);
+	CAP16_TO_CAP32(802_3_PAUSE);
+	CAP16_TO_CAP32(802_3_ASM_DIR);
+
+	#undef CAP16_TO_CAP32
+
+	return caps32;
+}
+
+/**
+ *	fwcaps32_to_caps16 - convert 32-bit Port Capabilities to 16-bits
+ *	@caps32: a 32-bit Port Capabilities value
+ *
+ *	Returns the equivalent 16-bit Port Capabilities value.  Note that
+ *	not all 32-bit Port Capabilities can be represented in the 16-bit
+ *	Port Capabilities and some fields/values may not make it.
+ */
+static fw_port_cap16_t fwcaps32_to_caps16(fw_port_cap32_t caps32)
+{
+	fw_port_cap16_t caps16 = 0;
+
+	#define CAP32_TO_CAP16(__cap) \
+		do { \
+			if (caps32 & FW_PORT_CAP32_##__cap) \
+				caps16 |= FW_PORT_CAP_##__cap; \
+		} while (0)
+
+	CAP32_TO_CAP16(SPEED_100M);
+	CAP32_TO_CAP16(SPEED_1G);
+	CAP32_TO_CAP16(SPEED_10G);
+	CAP32_TO_CAP16(SPEED_25G);
+	CAP32_TO_CAP16(SPEED_40G);
+	CAP32_TO_CAP16(SPEED_100G);
+	CAP32_TO_CAP16(FC_RX);
+	CAP32_TO_CAP16(FC_TX);
+	CAP32_TO_CAP16(802_3_PAUSE);
+	CAP32_TO_CAP16(802_3_ASM_DIR);
+	CAP32_TO_CAP16(ANEG);
+	CAP32_TO_CAP16(MDIX);
+	CAP32_TO_CAP16(MDIAUTO);
+	CAP32_TO_CAP16(FEC_RS);
+	CAP32_TO_CAP16(FEC_BASER_RS);
+
+	#undef CAP32_TO_CAP16
+
+	return caps16;
+}
+
+/* Translate Firmware Port Capabilities Pause specification to Common Code */
+static inline enum cc_pause fwcap_to_cc_pause(fw_port_cap32_t fw_pause)
+{
+	enum cc_pause cc_pause = 0;
+
+	if (fw_pause & FW_PORT_CAP32_FC_RX)
+		cc_pause |= PAUSE_RX;
+	if (fw_pause & FW_PORT_CAP32_FC_TX)
+		cc_pause |= PAUSE_TX;
+
+	return cc_pause;
+}
+
+/* Translate Common Code Pause specification into Firmware Port Capabilities */
+static inline fw_port_cap32_t cc_to_fwcap_pause(enum cc_pause cc_pause)
+{
+	fw_port_cap32_t fw_pause = 0;
+
+	if (cc_pause & PAUSE_RX)
+		fw_pause |= FW_PORT_CAP32_FC_RX;
+	if (cc_pause & PAUSE_TX)
+		fw_pause |= FW_PORT_CAP32_FC_TX;
+
+	return fw_pause;
+}
+
+/* Translate Firmware Forward Error Correction specification to Common Code */
+static inline enum cc_fec fwcap_to_cc_fec(fw_port_cap32_t fw_fec)
+{
+	enum cc_fec cc_fec = 0;
+
+	if (fw_fec & FW_PORT_CAP32_FEC_RS)
+		cc_fec |= FEC_RS;
+	if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS)
+		cc_fec |= FEC_BASER_RS;
+
+	return cc_fec;
+}
+
+/* Translate Common Code Forward Error Correction specification to Firmware */
+static inline fw_port_cap32_t cc_to_fwcap_fec(enum cc_fec cc_fec)
+{
+	fw_port_cap32_t fw_fec = 0;
+
+	if (cc_fec & FEC_RS)
+		fw_fec |= FW_PORT_CAP32_FEC_RS;
+	if (cc_fec & FEC_BASER_RS)
+		fw_fec |= FW_PORT_CAP32_FEC_BASER_RS;
+
+	return fw_fec;
+}
 
 /**
  *	t4_link_l1cfg - apply link configuration to MAC/PHY
- *	@phy: the PHY to setup
- *	@mac: the MAC to setup
- *	@lc: the requested link configuration
+ *	@adapter: the adapter
+ *	@mbox: the Firmware Mailbox to use
+ *	@port: the Port ID
+ *	@lc: the Port's Link Configuration
  *
  *	Set up a port's MAC and PHY according to a desired link configuration.
  *	- If the PHY can auto-negotiate first decide what to advertise, then
@@ -3703,47 +3980,64 @@ void t4_ulprx_read_la(struct adapter *adap, u32 *la_buf)
  *	- If auto-negotiation is off set the MAC to the proper speed/duplex/FC,
  *	  otherwise do it later based on the outcome of auto-negotiation.
  */
-int t4_link_l1cfg(struct adapter *adap, unsigned int mbox, unsigned int port,
-		  struct link_config *lc)
+int t4_link_l1cfg(struct adapter *adapter, unsigned int mbox,
+		  unsigned int port, struct link_config *lc)
 {
-	struct fw_port_cmd c;
-	unsigned int mdi = FW_PORT_CAP_MDI_V(FW_PORT_CAP_MDI_AUTO);
-	unsigned int fc = 0, fec = 0, fw_fec = 0;
+	unsigned int fw_caps = adapter->params.fw_caps_support;
+	struct fw_port_cmd cmd;
+	unsigned int fw_mdi = FW_PORT_CAP32_MDI_V(FW_PORT_CAP32_MDI_AUTO);
+	fw_port_cap32_t fw_fc, cc_fec, fw_fec, rcap;
 
 	lc->link_ok = 0;
-	if (lc->requested_fc & PAUSE_RX)
-		fc |= FW_PORT_CAP_FC_RX;
-	if (lc->requested_fc & PAUSE_TX)
-		fc |= FW_PORT_CAP_FC_TX;
-
-	fec = lc->requested_fec & FEC_AUTO ? lc->auto_fec : lc->requested_fec;
-
-	if (fec & FEC_RS)
-		fw_fec |= FW_PORT_CAP_FEC_RS;
-	if (fec & FEC_BASER_RS)
-		fw_fec |= FW_PORT_CAP_FEC_BASER_RS;
 
-	memset(&c, 0, sizeof(c));
-	c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
-				     FW_CMD_REQUEST_F | FW_CMD_EXEC_F |
-				     FW_PORT_CMD_PORTID_V(port));
-	c.action_to_len16 =
-		cpu_to_be32(FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_L1_CFG) |
-			    FW_LEN16(c));
+	/* Convert driver coding of Pause Frame Flow Control settings into the
+	 * Firmware's API.
+	 */
+	fw_fc = cc_to_fwcap_pause(lc->requested_fc);
+
+	/* Convert Common Code Forward Error Control settings into the
+	 * Firmware's API.  If the current Requested FEC has "Automatic"
+	 * (IEEE 802.3) specified, then we use whatever the Firmware
+	 * sent us as part of it's IEEE 802.3-based interpratation of
+	 * the Transceiver Module EPROM FEC parameters.  Otherwise we
+	 * use whatever is in the current Requested FEC settings.
+	 */
+	if (lc->requested_fec & FEC_AUTO)
+		cc_fec = fwcap_to_cc_fec(lc->def_acaps);
+	else
+		cc_fec = lc->requested_fec;
+	fw_fec = cc_to_fwcap_fec(cc_fec);
 
-	if (!(lc->supported & FW_PORT_CAP_ANEG)) {
-		c.u.l1cfg.rcap = cpu_to_be32((lc->supported & ADVERT_MASK) |
-					     fc | fw_fec);
-		lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX);
+	/* Figure out what our Requested Port Capabilities are going to be.
+	 */
+	if (!(lc->pcaps & FW_PORT_CAP32_ANEG)) {
+		rcap = (lc->pcaps & ADVERT_MASK) | fw_fc | fw_fec;
+		lc->fc = lc->requested_fc & ~PAUSE_AUTONEG;
+		lc->fec = cc_fec;
 	} else if (lc->autoneg == AUTONEG_DISABLE) {
-		c.u.l1cfg.rcap = cpu_to_be32(lc->requested_speed | fc |
-					     fw_fec | mdi);
-		lc->fc = lc->requested_fc & (PAUSE_RX | PAUSE_TX);
-	} else
-		c.u.l1cfg.rcap = cpu_to_be32(lc->advertising | fc |
-					     fw_fec | mdi);
+		rcap = lc->speed_caps | fw_fc | fw_fec | fw_mdi;
+		lc->fc = lc->requested_fc & ~PAUSE_AUTONEG;
+		lc->fec = cc_fec;
+	} else {
+		rcap = lc->acaps | fw_fc | fw_fec | fw_mdi;
+	}
 
-	return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL);
+	/* And send that on to the Firmware ...
+	 */
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
+				       FW_CMD_REQUEST_F | FW_CMD_EXEC_F |
+				       FW_PORT_CMD_PORTID_V(port));
+	cmd.action_to_len16 =
+		cpu_to_be32(FW_PORT_CMD_ACTION_V(fw_caps == FW_CAPS16
+						 ? FW_PORT_ACTION_L1_CFG
+						 : FW_PORT_ACTION_L1_CFG32) |
+			    FW_LEN16(cmd));
+	if (fw_caps == FW_CAPS16)
+		cmd.u.l1cfg.rcap = cpu_to_be32(fwcaps32_to_caps16(rcap));
+	else
+		cmd.u.l1cfg32.rcap32 = cpu_to_be32(rcap);
+	return t4_wr_mbox(adapter, mbox, &cmd, sizeof(cmd), NULL);
 }
 
 /**
@@ -3765,7 +4059,7 @@ int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port)
 	c.action_to_len16 =
 		cpu_to_be32(FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_L1_CFG) |
 			    FW_LEN16(c));
-	c.u.l1cfg.rcap = cpu_to_be32(FW_PORT_CAP_ANEG);
+	c.u.l1cfg.rcap = cpu_to_be32(FW_PORT_CAP32_ANEG);
 	return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL);
 }
 
@@ -4254,6 +4548,18 @@ static void mps_intr_handler(struct adapter *adapter)
 		{ FRMERR_F, "MPS Tx framing error", -1, 1 },
 		{ 0 }
 	};
+	static const struct intr_info t6_mps_tx_intr_info[] = {
+		{ TPFIFO_V(TPFIFO_M), "MPS Tx TP FIFO parity error", -1, 1 },
+		{ NCSIFIFO_F, "MPS Tx NC-SI FIFO parity error", -1, 1 },
+		{ TXDATAFIFO_V(TXDATAFIFO_M), "MPS Tx data FIFO parity error",
+		  -1, 1 },
+		{ TXDESCFIFO_V(TXDESCFIFO_M), "MPS Tx desc FIFO parity error",
+		  -1, 1 },
+		/* MPS Tx Bubble is normal for T6 */
+		{ SECNTERR_F, "MPS Tx SOP/EOP error", -1, 1 },
+		{ FRMERR_F, "MPS Tx framing error", -1, 1 },
+		{ 0 }
+	};
 	static const struct intr_info mps_trc_intr_info[] = {
 		{ FILTMEM_V(FILTMEM_M), "MPS TRC filter parity error", -1, 1 },
 		{ PKTFIFO_V(PKTFIFO_M), "MPS TRC packet FIFO parity error",
@@ -4285,7 +4591,9 @@ static void mps_intr_handler(struct adapter *adapter)
 	fat = t4_handle_intr_status(adapter, MPS_RX_PERR_INT_CAUSE_A,
 				    mps_rx_intr_info) +
 	      t4_handle_intr_status(adapter, MPS_TX_INT_CAUSE_A,
-				    mps_tx_intr_info) +
+				    is_t6(adapter->params.chip)
+				    ? t6_mps_tx_intr_info
+				    : mps_tx_intr_info) +
 	      t4_handle_intr_status(adapter, MPS_TRC_INT_CAUSE_A,
 				    mps_trc_intr_info) +
 	      t4_handle_intr_status(adapter, MPS_STAT_PERR_INT_CAUSE_SRAM_A,
@@ -5693,10 +6001,8 @@ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p)
 	p->tx_ppp7             = GET_STAT(TX_PORT_PPP7);
 
 	if (CHELSIO_CHIP_VERSION(adap->params.chip) >= CHELSIO_T5) {
-		if (stat_ctl & COUNTPAUSESTATTX_F) {
-			p->tx_frames -= p->tx_pause;
-			p->tx_octets -= p->tx_pause * 64;
-		}
+		if (stat_ctl & COUNTPAUSESTATTX_F)
+			p->tx_frames_64 -= p->tx_pause;
 		if (stat_ctl & COUNTPAUSEMCTX_F)
 			p->tx_mcast_frames -= p->tx_pause;
 	}
@@ -5729,10 +6035,8 @@ void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p)
 	p->rx_ppp7             = GET_STAT(RX_PORT_PPP7);
 
 	if (CHELSIO_CHIP_VERSION(adap->params.chip) >= CHELSIO_T5) {
-		if (stat_ctl & COUNTPAUSESTATRX_F) {
-			p->rx_frames -= p->rx_pause;
-			p->rx_octets -= p->rx_pause * 64;
-		}
+		if (stat_ctl & COUNTPAUSESTATRX_F)
+			p->rx_frames_64 -= p->rx_pause;
 		if (stat_ctl & COUNTPAUSEMCRX_F)
 			p->rx_mcast_frames -= p->rx_pause;
 	}
@@ -6449,6 +6753,17 @@ int t4_fw_upgrade(struct adapter *adap, unsigned int mbox,
 		goto out;
 
 	/*
+	 * If there was a Firmware Configuration File stored in FLASH,
+	 * there's a good chance that it won't be compatible with the new
+	 * Firmware.  In order to prevent difficult to diagnose adapter
+	 * initialization issues, we clear out the Firmware Configuration File
+	 * portion of the FLASH .  The user will need to re-FLASH a new
+	 * Firmware Configuration File which is compatible with the new
+	 * Firmware if that's desired.
+	 */
+	(void)t4_load_cfg(adap, NULL, 0);
+
+	/*
 	 * Older versions of the firmware don't understand the new
 	 * PCIE_FW.HALT flag and so won't know to perform a RESET when they
 	 * restart.  So for newly loaded older firmware we'll have to do the
@@ -7471,6 +7786,98 @@ static const char *t4_link_down_rc_str(unsigned char link_down_rc)
 }
 
 /**
+ * Return the highest speed set in the port capabilities, in Mb/s.
+ */
+static unsigned int fwcap_to_speed(fw_port_cap32_t caps)
+{
+	#define TEST_SPEED_RETURN(__caps_speed, __speed) \
+		do { \
+			if (caps & FW_PORT_CAP32_SPEED_##__caps_speed) \
+				return __speed; \
+		} while (0)
+
+	TEST_SPEED_RETURN(400G, 400000);
+	TEST_SPEED_RETURN(200G, 200000);
+	TEST_SPEED_RETURN(100G, 100000);
+	TEST_SPEED_RETURN(50G,   50000);
+	TEST_SPEED_RETURN(40G,   40000);
+	TEST_SPEED_RETURN(25G,   25000);
+	TEST_SPEED_RETURN(10G,   10000);
+	TEST_SPEED_RETURN(1G,     1000);
+	TEST_SPEED_RETURN(100M,    100);
+
+	#undef TEST_SPEED_RETURN
+
+	return 0;
+}
+
+/**
+ *	fwcap_to_fwspeed - return highest speed in Port Capabilities
+ *	@acaps: advertised Port Capabilities
+ *
+ *	Get the highest speed for the port from the advertised Port
+ *	Capabilities.  It will be either the highest speed from the list of
+ *	speeds or whatever user has set using ethtool.
+ */
+static fw_port_cap32_t fwcap_to_fwspeed(fw_port_cap32_t acaps)
+{
+	#define TEST_SPEED_RETURN(__caps_speed) \
+		do { \
+			if (acaps & FW_PORT_CAP32_SPEED_##__caps_speed) \
+				return FW_PORT_CAP32_SPEED_##__caps_speed; \
+		} while (0)
+
+	TEST_SPEED_RETURN(400G);
+	TEST_SPEED_RETURN(200G);
+	TEST_SPEED_RETURN(100G);
+	TEST_SPEED_RETURN(50G);
+	TEST_SPEED_RETURN(40G);
+	TEST_SPEED_RETURN(25G);
+	TEST_SPEED_RETURN(10G);
+	TEST_SPEED_RETURN(1G);
+	TEST_SPEED_RETURN(100M);
+
+	#undef TEST_SPEED_RETURN
+
+	return 0;
+}
+
+/**
+ *	lstatus_to_fwcap - translate old lstatus to 32-bit Port Capabilities
+ *	@lstatus: old FW_PORT_ACTION_GET_PORT_INFO lstatus value
+ *
+ *	Translates old FW_PORT_ACTION_GET_PORT_INFO lstatus field into new
+ *	32-bit Port Capabilities value.
+ */
+static fw_port_cap32_t lstatus_to_fwcap(u32 lstatus)
+{
+	fw_port_cap32_t linkattr = 0;
+
+	/* Unfortunately the format of the Link Status in the old
+	 * 16-bit Port Information message isn't the same as the
+	 * 16-bit Port Capabilities bitfield used everywhere else ...
+	 */
+	if (lstatus & FW_PORT_CMD_RXPAUSE_F)
+		linkattr |= FW_PORT_CAP32_FC_RX;
+	if (lstatus & FW_PORT_CMD_TXPAUSE_F)
+		linkattr |= FW_PORT_CAP32_FC_TX;
+	if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100M))
+		linkattr |= FW_PORT_CAP32_SPEED_100M;
+	if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_1G))
+		linkattr |= FW_PORT_CAP32_SPEED_1G;
+	if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
+		linkattr |= FW_PORT_CAP32_SPEED_10G;
+	if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
+		linkattr |= FW_PORT_CAP32_SPEED_25G;
+	if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
+		linkattr |= FW_PORT_CAP32_SPEED_40G;
+	if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
+		linkattr |= FW_PORT_CAP32_SPEED_100G;
+
+	return linkattr;
+}
+
+/**
  *	t4_handle_get_port_info - process a FW reply message
  *	@pi: the port info
  *	@rpl: start of the FW message
@@ -7479,56 +7886,123 @@ static const char *t4_link_down_rc_str(unsigned char link_down_rc)
  */
 void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
 {
-	const struct fw_port_cmd *p = (const void *)rpl;
-	struct adapter *adap = pi->adapter;
-
-	/* link/module state change message */
-	int speed = 0, fc = 0;
-	struct link_config *lc;
-	u32 stat = be32_to_cpu(p->u.info.lstatus_to_modtype);
-	int link_ok = (stat & FW_PORT_CMD_LSTATUS_F) != 0;
-	u32 mod = FW_PORT_CMD_MODTYPE_G(stat);
-
-	if (stat & FW_PORT_CMD_RXPAUSE_F)
-		fc |= PAUSE_RX;
-	if (stat & FW_PORT_CMD_TXPAUSE_F)
-		fc |= PAUSE_TX;
-	if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100M))
-		speed = 100;
-	else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_1G))
-		speed = 1000;
-	else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
-		speed = 10000;
-	else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
-		speed = 25000;
-	else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
-		speed = 40000;
-	else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
-		speed = 100000;
-
-	lc = &pi->link_cfg;
-
-	if (mod != pi->mod_type) {
-		pi->mod_type = mod;
-		t4_os_portmod_changed(adap, pi->port_id);
+	const struct fw_port_cmd *cmd = (const void *)rpl;
+	int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
+	struct adapter *adapter = pi->adapter;
+	struct link_config *lc = &pi->link_cfg;
+	int link_ok, linkdnrc;
+	enum fw_port_type port_type;
+	enum fw_port_module_type mod_type;
+	unsigned int speed, fc, fec;
+	fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
+
+	/* Extract the various fields from the Port Information message.
+	 */
+	switch (action) {
+	case FW_PORT_ACTION_GET_PORT_INFO: {
+		u32 lstatus = be32_to_cpu(cmd->u.info.lstatus_to_modtype);
+
+		link_ok = (lstatus & FW_PORT_CMD_LSTATUS_F) != 0;
+		linkdnrc = FW_PORT_CMD_LINKDNRC_G(lstatus);
+		port_type = FW_PORT_CMD_PTYPE_G(lstatus);
+		mod_type = FW_PORT_CMD_MODTYPE_G(lstatus);
+		pcaps = fwcaps16_to_caps32(be16_to_cpu(cmd->u.info.pcap));
+		acaps = fwcaps16_to_caps32(be16_to_cpu(cmd->u.info.acap));
+		lpacaps = fwcaps16_to_caps32(be16_to_cpu(cmd->u.info.lpacap));
+		linkattr = lstatus_to_fwcap(lstatus);
+		break;
 	}
+
+	case FW_PORT_ACTION_GET_PORT_INFO32: {
+		u32 lstatus32;
+
+		lstatus32 = be32_to_cpu(cmd->u.info32.lstatus32_to_cbllen32);
+		link_ok = (lstatus32 & FW_PORT_CMD_LSTATUS32_F) != 0;
+		linkdnrc = FW_PORT_CMD_LINKDNRC32_G(lstatus32);
+		port_type = FW_PORT_CMD_PORTTYPE32_G(lstatus32);
+		mod_type = FW_PORT_CMD_MODTYPE32_G(lstatus32);
+		pcaps = be32_to_cpu(cmd->u.info32.pcaps32);
+		acaps = be32_to_cpu(cmd->u.info32.acaps32);
+		lpacaps = be32_to_cpu(cmd->u.info32.lpacaps32);
+		linkattr = be32_to_cpu(cmd->u.info32.linkattr32);
+		break;
+	}
+
+	default:
+		dev_err(adapter->pdev_dev, "Handle Port Information: Bad Command/Action %#x\n",
+			be32_to_cpu(cmd->action_to_len16));
+		return;
+	}
+
+	fec = fwcap_to_cc_fec(acaps);
+	fc = fwcap_to_cc_pause(linkattr);
+	speed = fwcap_to_speed(linkattr);
+
+	if (mod_type != pi->mod_type) {
+		/* With the newer SFP28 and QSFP28 Transceiver Module Types,
+		 * various fundamental Port Capabilities which used to be
+		 * immutable can now change radically.  We can now have
+		 * Speeds, Auto-Negotiation, Forward Error Correction, etc.
+		 * all change based on what Transceiver Module is inserted.
+		 * So we need to record the Physical "Port" Capabilities on
+		 * every Transceiver Module change.
+		 */
+		lc->pcaps = pcaps;
+
+		/* When a new Transceiver Module is inserted, the Firmware
+		 * will examine its i2c EPROM to determine its type and
+		 * general operating parameters including things like Forward
+		 * Error Control, etc.  Various IEEE 802.3 standards dictate
+		 * how to interpret these i2c values to determine default
+		 * "sutomatic" settings.  We record these for future use when
+		 * the user explicitly requests these standards-based values.
+		 */
+		lc->def_acaps = acaps;
+
+		/* Some versions of the early T6 Firmware "cheated" when
+		 * handling different Transceiver Modules by changing the
+		 * underlaying Port Type reported to the Host Drivers.  As
+		 * such we need to capture whatever Port Type the Firmware
+		 * sends us and record it in case it's different from what we
+		 * were told earlier.  Unfortunately, since Firmware is
+		 * forever, we'll need to keep this code here forever, but in
+		 * later T6 Firmware it should just be an assignment of the
+		 * same value already recorded.
+		 */
+		pi->port_type = port_type;
+
+		pi->mod_type = mod_type;
+		t4_os_portmod_changed(adapter, pi->port_id);
+	}
+
 	if (link_ok != lc->link_ok || speed != lc->speed ||
-	    fc != lc->fc) {	/* something changed */
+	    fc != lc->fc || fec != lc->fec) {	/* something changed */
 		if (!link_ok && lc->link_ok) {
-			unsigned char rc = FW_PORT_CMD_LINKDNRC_G(stat);
-
-			lc->link_down_rc = rc;
-			dev_warn(adap->pdev_dev,
-				 "Port %d link down, reason: %s\n",
-				 pi->port_id, t4_link_down_rc_str(rc));
+			lc->link_down_rc = linkdnrc;
+			dev_warn(adapter->pdev_dev, "Port %d link down, reason: %s\n",
+				 pi->tx_chan, t4_link_down_rc_str(linkdnrc));
 		}
 		lc->link_ok = link_ok;
 		lc->speed = speed;
 		lc->fc = fc;
-		lc->supported = be16_to_cpu(p->u.info.pcap);
-		lc->lp_advertising = be16_to_cpu(p->u.info.lpacap);
+		lc->fec = fec;
 
-		t4_os_link_changed(adap, pi->port_id, link_ok);
+		lc->lpacaps = lpacaps;
+		lc->acaps = acaps & ADVERT_MASK;
+
+		if (lc->acaps & FW_PORT_CAP32_ANEG) {
+			lc->autoneg = AUTONEG_ENABLE;
+		} else {
+			/* When Autoneg is disabled, user needs to set
+			 * single speed.
+			 * Similar to cxgb4_ethtool.c: set_link_ksettings
+			 */
+			lc->acaps = 0;
+			lc->speed_caps = fwcap_to_fwspeed(acaps);
+			lc->autoneg = AUTONEG_DISABLE;
+		}
+
+		t4_os_link_changed(adapter, pi->port_id, link_ok);
 	}
 }
 
@@ -7542,15 +8016,18 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
  */
 int t4_update_port_info(struct port_info *pi)
 {
+	unsigned int fw_caps = pi->adapter->params.fw_caps_support;
 	struct fw_port_cmd port_cmd;
 	int ret;
 
 	memset(&port_cmd, 0, sizeof(port_cmd));
 	port_cmd.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
 					    FW_CMD_REQUEST_F | FW_CMD_READ_F |
-					    FW_PORT_CMD_PORTID_V(pi->port_id));
+					    FW_PORT_CMD_PORTID_V(pi->tx_chan));
 	port_cmd.action_to_len16 = cpu_to_be32(
-		FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_GET_PORT_INFO) |
+		FW_PORT_CMD_ACTION_V(fw_caps == FW_CAPS16
+				     ? FW_PORT_ACTION_GET_PORT_INFO
+				     : FW_PORT_ACTION_GET_PORT_INFO32) |
 		FW_LEN16(port_cmd));
 	ret = t4_wr_mbox(pi->adapter, pi->adapter->mbox,
 			 &port_cmd, sizeof(port_cmd), &port_cmd);
@@ -7562,6 +8039,65 @@ int t4_update_port_info(struct port_info *pi)
 }
 
 /**
+ *	t4_get_link_params - retrieve basic link parameters for given port
+ *	@pi: the port
+ *	@link_okp: value return pointer for link up/down
+ *	@speedp: value return pointer for speed (Mb/s)
+ *	@mtup: value return pointer for mtu
+ *
+ *	Retrieves basic link parameters for a port: link up/down, speed (Mb/s),
+ *	and MTU for a specified port.  A negative error is returned on
+ *	failure; 0 on success.
+ */
+int t4_get_link_params(struct port_info *pi, unsigned int *link_okp,
+		       unsigned int *speedp, unsigned int *mtup)
+{
+	unsigned int fw_caps = pi->adapter->params.fw_caps_support;
+	struct fw_port_cmd port_cmd;
+	unsigned int action, link_ok, speed, mtu;
+	fw_port_cap32_t linkattr;
+	int ret;
+
+	memset(&port_cmd, 0, sizeof(port_cmd));
+	port_cmd.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
+					    FW_CMD_REQUEST_F | FW_CMD_READ_F |
+					    FW_PORT_CMD_PORTID_V(pi->tx_chan));
+	action = (fw_caps == FW_CAPS16
+		  ? FW_PORT_ACTION_GET_PORT_INFO
+		  : FW_PORT_ACTION_GET_PORT_INFO32);
+	port_cmd.action_to_len16 = cpu_to_be32(
+		FW_PORT_CMD_ACTION_V(action) |
+		FW_LEN16(port_cmd));
+	ret = t4_wr_mbox(pi->adapter, pi->adapter->mbox,
+			 &port_cmd, sizeof(port_cmd), &port_cmd);
+	if (ret)
+		return ret;
+
+	if (action == FW_PORT_ACTION_GET_PORT_INFO) {
+		u32 lstatus = be32_to_cpu(port_cmd.u.info.lstatus_to_modtype);
+
+		link_ok = !!(lstatus & FW_PORT_CMD_LSTATUS_F);
+		linkattr = lstatus_to_fwcap(lstatus);
+		mtu = be16_to_cpu(port_cmd.u.info.mtu);
+	} else {
+		u32 lstatus32 =
+			   be32_to_cpu(port_cmd.u.info32.lstatus32_to_cbllen32);
+
+		link_ok = !!(lstatus32 & FW_PORT_CMD_LSTATUS32_F);
+		linkattr = be32_to_cpu(port_cmd.u.info32.linkattr32);
+		mtu = FW_PORT_CMD_MTU32_G(
+			be32_to_cpu(port_cmd.u.info32.auxlinfo32_mtu32));
+	}
+	speed = fwcap_to_speed(linkattr);
+
+	*link_okp = link_ok;
+	*speedp = fwcap_to_speed(linkattr);
+	*mtup = mtu;
+
+	return 0;
+}
+
+/**
  *      t4_handle_fw_rpl - process a FW reply message
  *      @adap: the adapter
  *      @rpl: start of the FW message
@@ -7581,7 +8117,9 @@ int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl)
 	unsigned int action =
 		FW_PORT_CMD_ACTION_G(be32_to_cpu(p->action_to_len16));
 
-	if (opcode == FW_PORT_CMD && action == FW_PORT_ACTION_GET_PORT_INFO) {
+	if (opcode == FW_PORT_CMD &&
+	    (action == FW_PORT_ACTION_GET_PORT_INFO ||
+	     action == FW_PORT_ACTION_GET_PORT_INFO32)) {
 		int i;
 		int chan = FW_PORT_CMD_PORTID_G(be32_to_cpu(p->op_to_portid));
 		struct port_info *pi = NULL;
@@ -7594,7 +8132,8 @@ int t4_handle_fw_rpl(struct adapter *adap, const __be64 *rpl)
 
 		t4_handle_get_port_info(pi, rpl);
 	} else {
-		dev_warn(adap->pdev_dev, "Unknown firmware reply %d\n", opcode);
+		dev_warn(adap->pdev_dev, "Unknown firmware reply %d\n",
+			 opcode);
 		return -EINVAL;
 	}
 	return 0;
@@ -7613,38 +8152,35 @@ static void get_pci_mode(struct adapter *adapter, struct pci_params *p)
 
 /**
  *	init_link_config - initialize a link's SW state
- *	@lc: structure holding the link state
- *	@caps: link capabilities
+ *	@lc: pointer to structure holding the link state
+ *	@pcaps: link Port Capabilities
+ *	@acaps: link current Advertised Port Capabilities
  *
  *	Initializes the SW state maintained for each link, including the link's
  *	capabilities and default speed/flow-control/autonegotiation settings.
  */
-static void init_link_config(struct link_config *lc, unsigned int pcaps,
-			     unsigned int acaps)
+static void init_link_config(struct link_config *lc, fw_port_cap32_t pcaps,
+			     fw_port_cap32_t acaps)
 {
-	lc->supported = pcaps;
-	lc->lp_advertising = 0;
-	lc->requested_speed = 0;
+	lc->pcaps = pcaps;
+	lc->def_acaps = acaps;
+	lc->lpacaps = 0;
+	lc->speed_caps = 0;
 	lc->speed = 0;
 	lc->requested_fc = lc->fc = PAUSE_RX | PAUSE_TX;
-	lc->auto_fec = 0;
 
 	/* For Forward Error Control, we default to whatever the Firmware
 	 * tells us the Link is currently advertising.
 	 */
-	if (acaps & FW_PORT_CAP_FEC_RS)
-		lc->auto_fec |= FEC_RS;
-	if (acaps & FW_PORT_CAP_FEC_BASER_RS)
-		lc->auto_fec |= FEC_BASER_RS;
 	lc->requested_fec = FEC_AUTO;
-	lc->fec = lc->auto_fec;
+	lc->fec = fwcap_to_cc_fec(lc->def_acaps);
 
-	if (lc->supported & FW_PORT_CAP_ANEG) {
-		lc->advertising = lc->supported & ADVERT_MASK;
+	if (lc->pcaps & FW_PORT_CAP32_ANEG) {
+		lc->acaps = lc->pcaps & ADVERT_MASK;
 		lc->autoneg = AUTONEG_ENABLE;
 		lc->requested_fc |= PAUSE_AUTONEG;
 	} else {
-		lc->advertising = 0;
+		lc->acaps = 0;
 		lc->autoneg = AUTONEG_DISABLE;
 	}
 }
@@ -8169,7 +8705,7 @@ int t4_init_rss_mode(struct adapter *adap, int mbox)
 }
 
 /**
- *	t4_init_portinfo - allocate a virtual interface amd initialize port_info
+ *	t4_init_portinfo - allocate a virtual interface and initialize port_info
  *	@pi: the port_info
  *	@mbox: mailbox to use for the FW command
  *	@port: physical port associated with the VI
@@ -8185,21 +8721,67 @@ int t4_init_rss_mode(struct adapter *adap, int mbox)
 int t4_init_portinfo(struct port_info *pi, int mbox,
 		     int port, int pf, int vf, u8 mac[])
 {
-	int ret;
-	struct fw_port_cmd c;
+	struct adapter *adapter = pi->adapter;
+	unsigned int fw_caps = adapter->params.fw_caps_support;
+	struct fw_port_cmd cmd;
 	unsigned int rss_size;
+	enum fw_port_type port_type;
+	int mdio_addr;
+	fw_port_cap32_t pcaps, acaps;
+	int ret;
 
-	memset(&c, 0, sizeof(c));
-	c.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
-				     FW_CMD_REQUEST_F | FW_CMD_READ_F |
-				     FW_PORT_CMD_PORTID_V(port));
-	c.action_to_len16 = cpu_to_be32(
-		FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_GET_PORT_INFO) |
-		FW_LEN16(c));
-	ret = t4_wr_mbox(pi->adapter, mbox, &c, sizeof(c), &c);
+	/* If we haven't yet determined whether we're talking to Firmware
+	 * which knows the new 32-bit Port Capabilities, it's time to find
+	 * out now.  This will also tell new Firmware to send us Port Status
+	 * Updates using the new 32-bit Port Capabilities version of the
+	 * Port Information message.
+	 */
+	if (fw_caps == FW_CAPS_UNKNOWN) {
+		u32 param, val;
+
+		param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
+			 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_PORT_CAPS32));
+		val = 1;
+		ret = t4_set_params(adapter, mbox, pf, vf, 1, &param, &val);
+		fw_caps = (ret == 0 ? FW_CAPS32 : FW_CAPS16);
+		adapter->params.fw_caps_support = fw_caps;
+	}
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
+				       FW_CMD_REQUEST_F | FW_CMD_READ_F |
+				       FW_PORT_CMD_PORTID_V(port));
+	cmd.action_to_len16 = cpu_to_be32(
+		FW_PORT_CMD_ACTION_V(fw_caps == FW_CAPS16
+				     ? FW_PORT_ACTION_GET_PORT_INFO
+				     : FW_PORT_ACTION_GET_PORT_INFO32) |
+		FW_LEN16(cmd));
+	ret = t4_wr_mbox(pi->adapter, mbox, &cmd, sizeof(cmd), &cmd);
 	if (ret)
 		return ret;
 
+	/* Extract the various fields from the Port Information message.
+	 */
+	if (fw_caps == FW_CAPS16) {
+		u32 lstatus = be32_to_cpu(cmd.u.info.lstatus_to_modtype);
+
+		port_type = FW_PORT_CMD_PTYPE_G(lstatus);
+		mdio_addr = ((lstatus & FW_PORT_CMD_MDIOCAP_F)
+			     ? FW_PORT_CMD_MDIOADDR_G(lstatus)
+			     : -1);
+		pcaps = fwcaps16_to_caps32(be16_to_cpu(cmd.u.info.pcap));
+		acaps = fwcaps16_to_caps32(be16_to_cpu(cmd.u.info.acap));
+	} else {
+		u32 lstatus32 = be32_to_cpu(cmd.u.info32.lstatus32_to_cbllen32);
+
+		port_type = FW_PORT_CMD_PORTTYPE32_G(lstatus32);
+		mdio_addr = ((lstatus32 & FW_PORT_CMD_MDIOCAP32_F)
+			     ? FW_PORT_CMD_MDIOADDR32_G(lstatus32)
+			     : -1);
+		pcaps = be32_to_cpu(cmd.u.info32.pcaps32);
+		acaps = be32_to_cpu(cmd.u.info32.acaps32);
+	}
+
 	ret = t4_alloc_vi(pi->adapter, mbox, port, pf, vf, 1, mac, &rss_size);
 	if (ret < 0)
 		return ret;
@@ -8209,14 +8791,11 @@ int t4_init_portinfo(struct port_info *pi, int mbox,
 	pi->lport = port;
 	pi->rss_size = rss_size;
 
-	ret = be32_to_cpu(c.u.info.lstatus_to_modtype);
-	pi->mdio_addr = (ret & FW_PORT_CMD_MDIOCAP_F) ?
-		FW_PORT_CMD_MDIOADDR_G(ret) : -1;
-	pi->port_type = FW_PORT_CMD_PTYPE_G(ret);
+	pi->port_type = port_type;
+	pi->mdio_addr = mdio_addr;
 	pi->mod_type = FW_PORT_MOD_TYPE_NA;
 
-	init_link_config(&pi->link_cfg, be16_to_cpu(c.u.info.pcap),
-			 be16_to_cpu(c.u.info.acap));
+	init_link_config(&pi->link_cfg, pcaps, acaps);
 	return 0;
 }
 
@@ -8664,6 +9243,65 @@ void t4_idma_monitor(struct adapter *adapter,
 }
 
 /**
+ *	t4_load_cfg - download config file
+ *	@adap: the adapter
+ *	@cfg_data: the cfg text file to write
+ *	@size: text file size
+ *
+ *	Write the supplied config text file to the card's serial flash.
+ */
+int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
+{
+	int ret, i, n, cfg_addr;
+	unsigned int addr;
+	unsigned int flash_cfg_start_sec;
+	unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec;
+
+	cfg_addr = t4_flash_cfg_addr(adap);
+	if (cfg_addr < 0)
+		return cfg_addr;
+
+	addr = cfg_addr;
+	flash_cfg_start_sec = addr / SF_SEC_SIZE;
+
+	if (size > FLASH_CFG_MAX_SIZE) {
+		dev_err(adap->pdev_dev, "cfg file too large, max is %u bytes\n",
+			FLASH_CFG_MAX_SIZE);
+		return -EFBIG;
+	}
+
+	i = DIV_ROUND_UP(FLASH_CFG_MAX_SIZE,	/* # of sectors spanned */
+			 sf_sec_size);
+	ret = t4_flash_erase_sectors(adap, flash_cfg_start_sec,
+				     flash_cfg_start_sec + i - 1);
+	/* If size == 0 then we're simply erasing the FLASH sectors associated
+	 * with the on-adapter Firmware Configuration File.
+	 */
+	if (ret || size == 0)
+		goto out;
+
+	/* this will write to the flash up to SF_PAGE_SIZE at a time */
+	for (i = 0; i < size; i += SF_PAGE_SIZE) {
+		if ((size - i) <  SF_PAGE_SIZE)
+			n = size - i;
+		else
+			n = SF_PAGE_SIZE;
+		ret = t4_write_flash(adap, addr, n, cfg_data);
+		if (ret)
+			goto out;
+
+		addr += SF_PAGE_SIZE;
+		cfg_data += SF_PAGE_SIZE;
+	}
+
+out:
+	if (ret)
+		dev_err(adap->pdev_dev, "config file %s failed %d\n",
+			(size == 0 ? "clear" : "download"), ret);
+	return ret;
+}
+
+/**
  *	t4_set_vf_mac - Set MAC address for the specified VF
  *	@adapter: The adapter
  *	@vf: one of the VFs instantiated by the specified PF
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 0ebed64..ca2756d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1124,6 +1124,8 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_MAXIRD_ADAPTER = 0x14, /* max supported adap IRD */
 	FW_PARAMS_PARAM_DEV_ULPTX_MEMWRITE_DSGL = 0x17,
 	FW_PARAMS_PARAM_DEV_FWCACHE = 0x18,
+	FW_PARAMS_PARAM_DEV_SCFGREV = 0x1A,
+	FW_PARAMS_PARAM_DEV_VPDREV = 0x1B,
 	FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR	= 0x1C,
 	FW_PARAMS_PARAM_DEV_MPSBGMAP	= 0x1E,
 };
@@ -1171,7 +1173,8 @@ enum fw_params_param_pfvf {
 	FW_PARAMS_PARAM_PFVF_ACTIVE_FILTER_END = 0x2E,
 	FW_PARAMS_PARAM_PFVF_ETHOFLD_END = 0x30,
 	FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP = 0x31,
-	FW_PARAMS_PARAM_PFVF_NCRYPTO_LOOKASIDE = 0x32
+	FW_PARAMS_PARAM_PFVF_NCRYPTO_LOOKASIDE = 0x32,
+	FW_PARAMS_PARAM_PFVF_PORT_CAPS32 = 0x3A,
 };
 
 /*
@@ -2254,6 +2257,7 @@ struct fw_acl_vlan_cmd {
 #define FW_ACL_VLAN_CMD_FM_S	6
 #define FW_ACL_VLAN_CMD_FM_V(x)	((x) << FW_ACL_VLAN_CMD_FM_S)
 
+/* old 16-bit port capabilities bitmap (fw_port_cap16_t) */
 enum fw_port_cap {
 	FW_PORT_CAP_SPEED_100M		= 0x0001,
 	FW_PORT_CAP_SPEED_1G		= 0x0002,
@@ -2289,6 +2293,84 @@ enum fw_port_mdi {
 #define FW_PORT_CAP_MDI_S 9
 #define FW_PORT_CAP_MDI_V(x) ((x) << FW_PORT_CAP_MDI_S)
 
+/* new 32-bit port capabilities bitmap (fw_port_cap32_t) */
+#define	FW_PORT_CAP32_SPEED_100M	0x00000001UL
+#define	FW_PORT_CAP32_SPEED_1G		0x00000002UL
+#define	FW_PORT_CAP32_SPEED_10G		0x00000004UL
+#define	FW_PORT_CAP32_SPEED_25G		0x00000008UL
+#define	FW_PORT_CAP32_SPEED_40G		0x00000010UL
+#define	FW_PORT_CAP32_SPEED_50G		0x00000020UL
+#define	FW_PORT_CAP32_SPEED_100G	0x00000040UL
+#define	FW_PORT_CAP32_SPEED_200G	0x00000080UL
+#define	FW_PORT_CAP32_SPEED_400G	0x00000100UL
+#define	FW_PORT_CAP32_SPEED_RESERVED1	0x00000200UL
+#define	FW_PORT_CAP32_SPEED_RESERVED2	0x00000400UL
+#define	FW_PORT_CAP32_SPEED_RESERVED3	0x00000800UL
+#define	FW_PORT_CAP32_RESERVED1		0x0000f000UL
+#define	FW_PORT_CAP32_FC_RX		0x00010000UL
+#define	FW_PORT_CAP32_FC_TX		0x00020000UL
+#define	FW_PORT_CAP32_802_3_PAUSE	0x00040000UL
+#define	FW_PORT_CAP32_802_3_ASM_DIR	0x00080000UL
+#define	FW_PORT_CAP32_ANEG		0x00100000UL
+#define	FW_PORT_CAP32_MDIX		0x00200000UL
+#define	FW_PORT_CAP32_MDIAUTO		0x00400000UL
+#define	FW_PORT_CAP32_FEC_RS		0x00800000UL
+#define	FW_PORT_CAP32_FEC_BASER_RS	0x01000000UL
+#define	FW_PORT_CAP32_FEC_RESERVED1	0x02000000UL
+#define	FW_PORT_CAP32_FEC_RESERVED2	0x04000000UL
+#define	FW_PORT_CAP32_FEC_RESERVED3	0x08000000UL
+#define	FW_PORT_CAP32_RESERVED2		0xf0000000UL
+
+#define FW_PORT_CAP32_SPEED_S	0
+#define FW_PORT_CAP32_SPEED_M	0xfff
+#define FW_PORT_CAP32_SPEED_V(x)	((x) << FW_PORT_CAP32_SPEED_S)
+#define FW_PORT_CAP32_SPEED_G(x) \
+	(((x) >> FW_PORT_CAP32_SPEED_S) & FW_PORT_CAP32_SPEED_M)
+
+#define FW_PORT_CAP32_FC_S	16
+#define FW_PORT_CAP32_FC_M	0x3
+#define FW_PORT_CAP32_FC_V(x)	((x) << FW_PORT_CAP32_FC_S)
+#define FW_PORT_CAP32_FC_G(x) \
+	(((x) >> FW_PORT_CAP32_FC_S) & FW_PORT_CAP32_FC_M)
+
+#define FW_PORT_CAP32_802_3_S	18
+#define FW_PORT_CAP32_802_3_M	0x3
+#define FW_PORT_CAP32_802_3_V(x)	((x) << FW_PORT_CAP32_802_3_S)
+#define FW_PORT_CAP32_802_3_G(x) \
+	(((x) >> FW_PORT_CAP32_802_3_S) & FW_PORT_CAP32_802_3_M)
+
+#define FW_PORT_CAP32_ANEG_S	20
+#define FW_PORT_CAP32_ANEG_M	0x1
+#define FW_PORT_CAP32_ANEG_V(x)	((x) << FW_PORT_CAP32_ANEG_S)
+#define FW_PORT_CAP32_ANEG_G(x) \
+	(((x) >> FW_PORT_CAP32_ANEG_S) & FW_PORT_CAP32_ANEG_M)
+
+enum fw_port_mdi32 {
+	FW_PORT_CAP32_MDI_UNCHANGED,
+	FW_PORT_CAP32_MDI_AUTO,
+	FW_PORT_CAP32_MDI_F_STRAIGHT,
+	FW_PORT_CAP32_MDI_F_CROSSOVER
+};
+
+#define FW_PORT_CAP32_MDI_S 21
+#define FW_PORT_CAP32_MDI_M 3
+#define FW_PORT_CAP32_MDI_V(x) ((x) << FW_PORT_CAP32_MDI_S)
+#define FW_PORT_CAP32_MDI_G(x) \
+	(((x) >> FW_PORT_CAP32_MDI_S) & FW_PORT_CAP32_MDI_M)
+
+#define FW_PORT_CAP32_FEC_S	23
+#define FW_PORT_CAP32_FEC_M	0x1f
+#define FW_PORT_CAP32_FEC_V(x)	((x) << FW_PORT_CAP32_FEC_S)
+#define FW_PORT_CAP32_FEC_G(x) \
+	(((x) >> FW_PORT_CAP32_FEC_S) & FW_PORT_CAP32_FEC_M)
+
+/* macros to isolate various 32-bit Port Capabilities sub-fields */
+#define CAP32_SPEED(__cap32) \
+	(FW_PORT_CAP32_SPEED_V(FW_PORT_CAP32_SPEED_M) & __cap32)
+
+#define CAP32_FEC(__cap32) \
+	(FW_PORT_CAP32_FEC_V(FW_PORT_CAP32_FEC_M) & __cap32)
+
 enum fw_port_action {
 	FW_PORT_ACTION_L1_CFG		= 0x0001,
 	FW_PORT_ACTION_L2_CFG		= 0x0002,
@@ -2298,6 +2380,8 @@ enum fw_port_action {
 	FW_PORT_ACTION_DCB_READ_TRANS	= 0x0006,
 	FW_PORT_ACTION_DCB_READ_RECV	= 0x0007,
 	FW_PORT_ACTION_DCB_READ_DET	= 0x0008,
+	FW_PORT_ACTION_L1_CFG32		= 0x0009,
+	FW_PORT_ACTION_GET_PORT_INFO32	= 0x000a,
 	FW_PORT_ACTION_LOW_PWR_TO_NORMAL = 0x0010,
 	FW_PORT_ACTION_L1_LOW_PWR_EN	= 0x0011,
 	FW_PORT_ACTION_L2_WOL_MODE_EN	= 0x0012,
@@ -2445,6 +2529,18 @@ struct fw_port_cmd {
 				__be64 r12;
 			} control;
 		} dcb;
+		struct fw_port_l1cfg32 {
+			__be32 rcap32;
+			__be32 r;
+		} l1cfg32;
+		struct fw_port_info32 {
+			__be32 lstatus32_to_cbllen32;
+			__be32 auxlinfo32_mtu32;
+			__be32 linkattr32;
+			__be32 pcaps32;
+			__be32 acaps32;
+			__be32 lpacaps32;
+		} info32;
 	} u;
 };
 
@@ -2553,6 +2649,85 @@ struct fw_port_cmd {
 #define FW_PORT_CMD_DCB_VERSION_G(x)	\
 	(((x) >> FW_PORT_CMD_DCB_VERSION_S) & FW_PORT_CMD_DCB_VERSION_M)
 
+#define FW_PORT_CMD_LSTATUS32_S		31
+#define FW_PORT_CMD_LSTATUS32_M		0x1
+#define FW_PORT_CMD_LSTATUS32_V(x)	((x) << FW_PORT_CMD_LSTATUS32_S)
+#define FW_PORT_CMD_LSTATUS32_G(x)	\
+	(((x) >> FW_PORT_CMD_LSTATUS32_S) & FW_PORT_CMD_LSTATUS32_M)
+#define FW_PORT_CMD_LSTATUS32_F	FW_PORT_CMD_LSTATUS32_V(1U)
+
+#define FW_PORT_CMD_LINKDNRC32_S	28
+#define FW_PORT_CMD_LINKDNRC32_M	0x7
+#define FW_PORT_CMD_LINKDNRC32_V(x)	((x) << FW_PORT_CMD_LINKDNRC32_S)
+#define FW_PORT_CMD_LINKDNRC32_G(x)	\
+	(((x) >> FW_PORT_CMD_LINKDNRC32_S) & FW_PORT_CMD_LINKDNRC32_M)
+
+#define FW_PORT_CMD_DCBXDIS32_S		27
+#define FW_PORT_CMD_DCBXDIS32_M		0x1
+#define FW_PORT_CMD_DCBXDIS32_V(x)	((x) << FW_PORT_CMD_DCBXDIS32_S)
+#define FW_PORT_CMD_DCBXDIS32_G(x)	\
+	(((x) >> FW_PORT_CMD_DCBXDIS32_S) & FW_PORT_CMD_DCBXDIS32_M)
+#define FW_PORT_CMD_DCBXDIS32_F	FW_PORT_CMD_DCBXDIS32_V(1U)
+
+#define FW_PORT_CMD_MDIOCAP32_S		26
+#define FW_PORT_CMD_MDIOCAP32_M		0x1
+#define FW_PORT_CMD_MDIOCAP32_V(x)	((x) << FW_PORT_CMD_MDIOCAP32_S)
+#define FW_PORT_CMD_MDIOCAP32_G(x)	\
+	(((x) >> FW_PORT_CMD_MDIOCAP32_S) & FW_PORT_CMD_MDIOCAP32_M)
+#define FW_PORT_CMD_MDIOCAP32_F	FW_PORT_CMD_MDIOCAP32_V(1U)
+
+#define FW_PORT_CMD_MDIOADDR32_S	21
+#define FW_PORT_CMD_MDIOADDR32_M	0x1f
+#define FW_PORT_CMD_MDIOADDR32_V(x)	((x) << FW_PORT_CMD_MDIOADDR32_S)
+#define FW_PORT_CMD_MDIOADDR32_G(x)	\
+	(((x) >> FW_PORT_CMD_MDIOADDR32_S) & FW_PORT_CMD_MDIOADDR32_M)
+
+#define FW_PORT_CMD_PORTTYPE32_S	13
+#define FW_PORT_CMD_PORTTYPE32_M	0xff
+#define FW_PORT_CMD_PORTTYPE32_V(x)	((x) << FW_PORT_CMD_PORTTYPE32_S)
+#define FW_PORT_CMD_PORTTYPE32_G(x)	\
+	(((x) >> FW_PORT_CMD_PORTTYPE32_S) & FW_PORT_CMD_PORTTYPE32_M)
+
+#define FW_PORT_CMD_MODTYPE32_S		8
+#define FW_PORT_CMD_MODTYPE32_M		0x1f
+#define FW_PORT_CMD_MODTYPE32_V(x)	((x) << FW_PORT_CMD_MODTYPE32_S)
+#define FW_PORT_CMD_MODTYPE32_G(x)	\
+	(((x) >> FW_PORT_CMD_MODTYPE32_S) & FW_PORT_CMD_MODTYPE32_M)
+
+#define FW_PORT_CMD_CBLLEN32_S		0
+#define FW_PORT_CMD_CBLLEN32_M		0xff
+#define FW_PORT_CMD_CBLLEN32_V(x)	((x) << FW_PORT_CMD_CBLLEN32_S)
+#define FW_PORT_CMD_CBLLEN32_G(x)	\
+	(((x) >> FW_PORT_CMD_CBLLEN32_S) & FW_PORT_CMD_CBLLEN32_M)
+
+#define FW_PORT_CMD_AUXLINFO32_S	24
+#define FW_PORT_CMD_AUXLINFO32_M	0xff
+#define FW_PORT_CMD_AUXLINFO32_V(x)	((x) << FW_PORT_CMD_AUXLINFO32_S)
+#define FW_PORT_CMD_AUXLINFO32_G(x)	\
+	(((x) >> FW_PORT_CMD_AUXLINFO32_S) & FW_PORT_CMD_AUXLINFO32_M)
+
+#define FW_PORT_AUXLINFO32_KX4_S	2
+#define FW_PORT_AUXLINFO32_KX4_M	0x1
+#define FW_PORT_AUXLINFO32_KX4_V(x) \
+	((x) << FW_PORT_AUXLINFO32_KX4_S)
+#define FW_PORT_AUXLINFO32_KX4_G(x) \
+	(((x) >> FW_PORT_AUXLINFO32_KX4_S) & FW_PORT_AUXLINFO32_KX4_M)
+#define FW_PORT_AUXLINFO32_KX4_F	FW_PORT_AUXLINFO32_KX4_V(1U)
+
+#define FW_PORT_AUXLINFO32_KR_S	1
+#define FW_PORT_AUXLINFO32_KR_M	0x1
+#define FW_PORT_AUXLINFO32_KR_V(x) \
+	((x) << FW_PORT_AUXLINFO32_KR_S)
+#define FW_PORT_AUXLINFO32_KR_G(x) \
+	(((x) >> FW_PORT_AUXLINFO32_KR_S) & FW_PORT_AUXLINFO32_KR_M)
+#define FW_PORT_AUXLINFO32_KR_F	FW_PORT_AUXLINFO32_KR_V(1U)
+
+#define FW_PORT_CMD_MTU32_S	0
+#define FW_PORT_CMD_MTU32_M	0xffff
+#define FW_PORT_CMD_MTU32_V(x)	((x) << FW_PORT_CMD_MTU32_S)
+#define FW_PORT_CMD_MTU32_G(x)	\
+	(((x) >> FW_PORT_CMD_MTU32_S) & FW_PORT_CMD_MTU32_M)
+
 enum fw_port_type {
 	FW_PORT_TYPE_FIBER_XFI,
 	FW_PORT_TYPE_FIBER_XAUI,
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 2b85b87..8996ebb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -182,7 +182,7 @@ void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 			break;
 		}
 
-		switch (pi->link_cfg.fc) {
+		switch ((int)pi->link_cfg.fc) {
 		case PAUSE_RX:
 			fc = "RX";
 			break;
@@ -191,7 +191,7 @@ void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 			fc = "TX";
 			break;
 
-		case PAUSE_RX|PAUSE_TX:
+		case PAUSE_RX | PAUSE_TX:
 			fc = "RX/TX";
 			break;
 
@@ -1213,7 +1213,11 @@ static int from_fw_port_mod_type(enum fw_port_type port_type,
 	} else if (port_type == FW_PORT_TYPE_SFP ||
 		   port_type == FW_PORT_TYPE_QSFP_10G ||
 		   port_type == FW_PORT_TYPE_QSA ||
-		   port_type == FW_PORT_TYPE_QSFP) {
+		   port_type == FW_PORT_TYPE_QSFP ||
+		   port_type == FW_PORT_TYPE_CR4_QSFP ||
+		   port_type == FW_PORT_TYPE_CR_QSFP ||
+		   port_type == FW_PORT_TYPE_CR2_QSFP ||
+		   port_type == FW_PORT_TYPE_SFP28) {
 		if (mod_type == FW_PORT_MOD_TYPE_LR ||
 		    mod_type == FW_PORT_MOD_TYPE_SR ||
 		    mod_type == FW_PORT_MOD_TYPE_ER ||
@@ -1224,6 +1228,9 @@ static int from_fw_port_mod_type(enum fw_port_type port_type,
 			return PORT_DA;
 		else
 			return PORT_OTHER;
+	} else if (port_type == FW_PORT_TYPE_KR4_100G ||
+		   port_type == FW_PORT_TYPE_KR_SFP28) {
+		return PORT_NONE;
 	}
 
 	return PORT_OTHER;
@@ -1242,12 +1249,13 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 			   unsigned int fw_caps,
 			   unsigned long *link_mode_mask)
 {
-	#define SET_LMM(__lmm_name) __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name\
-			 ## _BIT, link_mode_mask)
+	#define SET_LMM(__lmm_name) \
+		__set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \
+			  link_mode_mask)
 
 	#define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
 		do { \
-			if (fw_caps & FW_PORT_CAP_ ## __fw_name) \
+			if (fw_caps & FW_PORT_CAP32_ ## __fw_name) \
 				SET_LMM(__lmm_name); \
 		} while (0)
 
@@ -1310,6 +1318,16 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 		SET_LMM(25000baseCR_Full);
 		break;
 
+	case FW_PORT_TYPE_KR_SFP28:
+		SET_LMM(Backplane);
+		SET_LMM(25000baseKR_Full);
+		break;
+
+	case FW_PORT_TYPE_CR2_QSFP:
+		SET_LMM(FIBRE);
+		SET_LMM(50000baseSR2_Full);
+		break;
+
 	case FW_PORT_TYPE_KR4_100G:
 	case FW_PORT_TYPE_CR4_QSFP:
 		SET_LMM(FIBRE);
@@ -1329,12 +1347,18 @@ static void fw_caps_to_lmm(enum fw_port_type port_type,
 }
 
 static int cxgb4vf_get_link_ksettings(struct net_device *dev,
-				      struct ethtool_link_ksettings
-							*link_ksettings)
+				  struct ethtool_link_ksettings *link_ksettings)
 {
-	const struct port_info *pi = netdev_priv(dev);
+	struct port_info *pi = netdev_priv(dev);
 	struct ethtool_link_settings *base = &link_ksettings->base;
 
+	/* For the nonce, the Firmware doesn't send up Port State changes
+	 * when the Virtual Interface attached to the Port is down.  So
+	 * if it's down, let's grab any changes.
+	 */
+	if (!netif_running(dev))
+		(void)t4vf_update_port_info(pi);
+
 	ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
 	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
 	ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
@@ -1351,11 +1375,11 @@ static int cxgb4vf_get_link_ksettings(struct net_device *dev,
 		base->mdio_support = 0;
 	}
 
-	fw_caps_to_lmm(pi->port_type, pi->link_cfg.supported,
+	fw_caps_to_lmm(pi->port_type, pi->link_cfg.pcaps,
 		       link_ksettings->link_modes.supported);
-	fw_caps_to_lmm(pi->port_type, pi->link_cfg.advertising,
+	fw_caps_to_lmm(pi->port_type, pi->link_cfg.acaps,
 		       link_ksettings->link_modes.advertising);
-	fw_caps_to_lmm(pi->port_type, pi->link_cfg.lp_advertising,
+	fw_caps_to_lmm(pi->port_type, pi->link_cfg.lpacaps,
 		       link_ksettings->link_modes.lp_advertising);
 
 	if (netif_carrier_ok(dev)) {
@@ -1367,7 +1391,7 @@ static int cxgb4vf_get_link_ksettings(struct net_device *dev,
 	}
 
 	base->autoneg = pi->link_cfg.autoneg;
-	if (pi->link_cfg.supported & FW_PORT_CAP_ANEG)
+	if (pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG)
 		ethtool_link_ksettings_add_link_mode(link_ksettings,
 						     supported, Autoneg);
 	if (pi->link_cfg.autoneg)
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
index b3903fe..9cf9c56 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
@@ -104,24 +104,62 @@ struct t4vf_port_stats {
 /*
  * Per-"port" (Virtual Interface) link configuration ...
  */
-struct link_config {
-	unsigned int   supported;        /* link capabilities */
-	unsigned int   advertising;      /* advertised capabilities */
-	unsigned short lp_advertising;   /* peer advertised capabilities */
-	unsigned int   requested_speed;  /* speed user has requested */
-	unsigned int   speed;            /* actual link speed */
-	unsigned char  requested_fc;     /* flow control user has requested */
-	unsigned char  fc;               /* actual link flow control */
-	unsigned char  autoneg;          /* autonegotiating? */
-	unsigned char  link_ok;          /* link up? */
+typedef u16 fw_port_cap16_t;    /* 16-bit Port Capabilities integral value */
+typedef u32 fw_port_cap32_t;    /* 32-bit Port Capabilities integral value */
+
+enum fw_caps {
+	FW_CAPS_UNKNOWN	= 0,	/* 0'ed out initial state */
+	FW_CAPS16	= 1,	/* old Firmware: 16-bit Port Capabilities */
+	FW_CAPS32	= 2,	/* new Firmware: 32-bit Port Capabilities */
 };
 
-enum {
-	PAUSE_RX      = 1 << 0,
-	PAUSE_TX      = 1 << 1,
-	PAUSE_AUTONEG = 1 << 2
+enum cc_pause {
+	PAUSE_RX	= 1 << 0,
+	PAUSE_TX	= 1 << 1,
+	PAUSE_AUTONEG	= 1 << 2
+};
+
+enum cc_fec {
+	FEC_AUTO	= 1 << 0,	/* IEEE 802.3 "automatic" */
+	FEC_RS		= 1 << 1,	/* Reed-Solomon */
+	FEC_BASER_RS	= 1 << 2,	/* BaseR/Reed-Solomon */
+};
+
+struct link_config {
+	fw_port_cap32_t pcaps;		/* link capabilities */
+	fw_port_cap32_t	acaps;		/* advertised capabilities */
+	fw_port_cap32_t	lpacaps;	/* peer advertised capabilities */
+
+	fw_port_cap32_t	speed_caps;	/* speed(s) user has requested */
+	u32		speed;		/* actual link speed */
+
+	enum cc_pause	requested_fc;	/* flow control user has requested */
+	enum cc_pause	fc;		/* actual link flow control */
+
+	enum cc_fec	auto_fec;	/* Forward Error Correction: */
+	enum cc_fec	requested_fec;	/*   "automatic" (IEEE 802.3), */
+	enum cc_fec	fec;		/*   requested, and actual in use */
+
+	unsigned char	autoneg;	/* autonegotiating? */
+
+	unsigned char	link_ok;	/* link up? */
+	unsigned char	link_down_rc;	/* link down reason */
 };
 
+/* Return true if the Link Configuration supports "High Speeds" (those greater
+ * than 1Gb/s).
+ */
+static inline bool is_x_10g_port(const struct link_config *lc)
+{
+	fw_port_cap32_t speeds, high_speeds;
+
+	speeds = FW_PORT_CAP32_SPEED_V(FW_PORT_CAP32_SPEED_G(lc->pcaps));
+	high_speeds =
+		speeds & ~(FW_PORT_CAP32_SPEED_100M | FW_PORT_CAP32_SPEED_1G);
+
+	return high_speeds != 0;
+}
+
 /*
  * General device parameters ...
  */
@@ -227,6 +265,7 @@ struct adapter_params {
 	struct arch_specific_params arch; /* chip specific params */
 	enum chip_type chip;		/* chip code */
 	u8 nports;			/* # of Ethernet "ports" */
+	u8 fw_caps_support;		/* 32-bit Port Capabilities */
 };
 
 /* Firmware Mailbox Command/Reply log.  All values are in Host-Endian format.
@@ -266,24 +305,6 @@ static inline struct mbox_cmd *mbox_cmd_log_entry(struct mbox_cmd_log *log,
 #define for_each_port(adapter, iter) \
 	for (iter = 0; iter < (adapter)->params.nports; iter++)
 
-static inline bool is_10g_port(const struct link_config *lc)
-{
-	return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0;
-}
-
-/* Return true if the Link Configuration supports "High Speeds" (those greater
- * than 1Gb/s).
- */
-static inline bool is_x_10g_port(const struct link_config *lc)
-{
-	unsigned int speeds, high_speeds;
-
-	speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported));
-	high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G);
-
-	return high_speeds != 0;
-}
-
 static inline unsigned int core_ticks_per_usec(const struct adapter *adapter)
 {
 	return adapter->params.vpd.cclk / 1000;
@@ -387,6 +408,7 @@ int t4vf_iq_free(struct adapter *, unsigned int, unsigned int, unsigned int,
 		 unsigned int);
 int t4vf_eth_eq_free(struct adapter *, unsigned int);
 
+int t4vf_update_port_info(struct port_info *pi);
 int t4vf_handle_fw_rpl(struct adapter *, const __be64 *);
 int t4vf_prep_adapter(struct adapter *);
 int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index e98248f..a8d9496 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -313,32 +313,130 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size,
 	return ret;
 }
 
-#define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\
-		     FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \
-		     FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \
-		     FW_PORT_CAP_ANEG)
+#define ADVERT_MASK (FW_PORT_CAP32_SPEED_V(FW_PORT_CAP32_SPEED_M) | \
+		     FW_PORT_CAP32_ANEG)
 
 /**
+ *	fwcaps16_to_caps32 - convert 16-bit Port Capabilities to 32-bits
+ *	@caps16: a 16-bit Port Capabilities value
+ *
+ *	Returns the equivalent 32-bit Port Capabilities value.
+ */
+static fw_port_cap32_t fwcaps16_to_caps32(fw_port_cap16_t caps16)
+{
+	fw_port_cap32_t caps32 = 0;
+
+	#define CAP16_TO_CAP32(__cap) \
+		do { \
+			if (caps16 & FW_PORT_CAP_##__cap) \
+				caps32 |= FW_PORT_CAP32_##__cap; \
+		} while (0)
+
+	CAP16_TO_CAP32(SPEED_100M);
+	CAP16_TO_CAP32(SPEED_1G);
+	CAP16_TO_CAP32(SPEED_25G);
+	CAP16_TO_CAP32(SPEED_10G);
+	CAP16_TO_CAP32(SPEED_40G);
+	CAP16_TO_CAP32(SPEED_100G);
+	CAP16_TO_CAP32(FC_RX);
+	CAP16_TO_CAP32(FC_TX);
+	CAP16_TO_CAP32(ANEG);
+	CAP16_TO_CAP32(MDIX);
+	CAP16_TO_CAP32(MDIAUTO);
+	CAP16_TO_CAP32(FEC_RS);
+	CAP16_TO_CAP32(FEC_BASER_RS);
+	CAP16_TO_CAP32(802_3_PAUSE);
+	CAP16_TO_CAP32(802_3_ASM_DIR);
+
+	#undef CAP16_TO_CAP32
+
+	return caps32;
+}
+
+/* Translate Firmware Pause specification to Common Code */
+static inline enum cc_pause fwcap_to_cc_pause(fw_port_cap32_t fw_pause)
+{
+	enum cc_pause cc_pause = 0;
+
+	if (fw_pause & FW_PORT_CAP32_FC_RX)
+		cc_pause |= PAUSE_RX;
+	if (fw_pause & FW_PORT_CAP32_FC_TX)
+		cc_pause |= PAUSE_TX;
+
+	return cc_pause;
+}
+
+/* Translate Firmware Forward Error Correction specification to Common Code */
+static inline enum cc_fec fwcap_to_cc_fec(fw_port_cap32_t fw_fec)
+{
+	enum cc_fec cc_fec = 0;
+
+	if (fw_fec & FW_PORT_CAP32_FEC_RS)
+		cc_fec |= FEC_RS;
+	if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS)
+		cc_fec |= FEC_BASER_RS;
+
+	return cc_fec;
+}
+
+/**
+ * Return the highest speed set in the port capabilities, in Mb/s.
+ */
+static unsigned int fwcap_to_speed(fw_port_cap32_t caps)
+{
+	#define TEST_SPEED_RETURN(__caps_speed, __speed) \
+		do { \
+			if (caps & FW_PORT_CAP32_SPEED_##__caps_speed) \
+				return __speed; \
+		} while (0)
+
+	TEST_SPEED_RETURN(400G, 400000);
+	TEST_SPEED_RETURN(200G, 200000);
+	TEST_SPEED_RETURN(100G, 100000);
+	TEST_SPEED_RETURN(50G,   50000);
+	TEST_SPEED_RETURN(40G,   40000);
+	TEST_SPEED_RETURN(25G,   25000);
+	TEST_SPEED_RETURN(10G,   10000);
+	TEST_SPEED_RETURN(1G,     1000);
+	TEST_SPEED_RETURN(100M,    100);
+
+	#undef TEST_SPEED_RETURN
+
+	return 0;
+}
+
+/*
  *	init_link_config - initialize a link's SW state
  *	@lc: structure holding the link state
- *	@caps: link capabilities
+ *	@pcaps: link Port Capabilities
+ *	@acaps: link current Advertised Port Capabilities
  *
  *	Initializes the SW state maintained for each link, including the link's
  *	capabilities and default speed/flow-control/autonegotiation settings.
  */
-static void init_link_config(struct link_config *lc, unsigned int caps)
+static void init_link_config(struct link_config *lc,
+			     fw_port_cap32_t pcaps,
+			     fw_port_cap32_t acaps)
 {
-	lc->supported = caps;
-	lc->lp_advertising = 0;
-	lc->requested_speed = 0;
+	lc->pcaps = pcaps;
+	lc->lpacaps = 0;
+	lc->speed_caps = 0;
 	lc->speed = 0;
 	lc->requested_fc = lc->fc = PAUSE_RX | PAUSE_TX;
-	if (lc->supported & FW_PORT_CAP_ANEG) {
-		lc->advertising = lc->supported & ADVERT_MASK;
+
+	/* For Forward Error Control, we default to whatever the Firmware
+	 * tells us the Link is currently advertising.
+	 */
+	lc->auto_fec = fwcap_to_cc_fec(acaps);
+	lc->requested_fec = FEC_AUTO;
+	lc->fec = lc->auto_fec;
+
+	if (lc->pcaps & FW_PORT_CAP32_ANEG) {
+		lc->acaps = acaps & ADVERT_MASK;
 		lc->autoneg = AUTONEG_ENABLE;
 		lc->requested_fc |= PAUSE_AUTONEG;
 	} else {
-		lc->advertising = 0;
+		lc->acaps = 0;
 		lc->autoneg = AUTONEG_DISABLE;
 	}
 }
@@ -351,9 +449,30 @@ static void init_link_config(struct link_config *lc, unsigned int caps)
 int t4vf_port_init(struct adapter *adapter, int pidx)
 {
 	struct port_info *pi = adap2pinfo(adapter, pidx);
+	unsigned int fw_caps = adapter->params.fw_caps_support;
 	struct fw_vi_cmd vi_cmd, vi_rpl;
 	struct fw_port_cmd port_cmd, port_rpl;
-	int v;
+	enum fw_port_type port_type;
+	int mdio_addr;
+	fw_port_cap32_t pcaps, acaps;
+	int ret;
+
+	/* If we haven't yet determined whether we're talking to Firmware
+	 * which knows the new 32-bit Port Capabilities, it's time to find
+	 * out now.  This will also tell new Firmware to send us Port Status
+	 * Updates using the new 32-bit Port Capabilities version of the
+	 * Port Information message.
+	 */
+	if (fw_caps == FW_CAPS_UNKNOWN) {
+		u32 param, val;
+
+		param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
+			 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_PORT_CAPS32));
+		val = 1;
+		ret = t4vf_set_params(adapter, 1, &param, &val);
+		fw_caps = (ret == 0 ? FW_CAPS32 : FW_CAPS16);
+		adapter->params.fw_caps_support = fw_caps;
+	}
 
 	/*
 	 * Execute a VI Read command to get our Virtual Interface information
@@ -365,9 +484,9 @@ int t4vf_port_init(struct adapter *adapter, int pidx)
 				       FW_CMD_READ_F);
 	vi_cmd.alloc_to_len16 = cpu_to_be32(FW_LEN16(vi_cmd));
 	vi_cmd.type_viid = cpu_to_be16(FW_VI_CMD_VIID_V(pi->viid));
-	v = t4vf_wr_mbox(adapter, &vi_cmd, sizeof(vi_cmd), &vi_rpl);
-	if (v)
-		return v;
+	ret = t4vf_wr_mbox(adapter, &vi_cmd, sizeof(vi_cmd), &vi_rpl);
+	if (ret != FW_SUCCESS)
+		return ret;
 
 	BUG_ON(pi->port_id != FW_VI_CMD_PORTID_G(vi_rpl.portid_pkd));
 	pi->rss_size = FW_VI_CMD_RSSSIZE_G(be16_to_cpu(vi_rpl.rsssize_pkd));
@@ -385,21 +504,42 @@ int t4vf_port_init(struct adapter *adapter, int pidx)
 					    FW_CMD_REQUEST_F |
 					    FW_CMD_READ_F |
 					    FW_PORT_CMD_PORTID_V(pi->port_id));
-	port_cmd.action_to_len16 =
-		cpu_to_be32(FW_PORT_CMD_ACTION_V(FW_PORT_ACTION_GET_PORT_INFO) |
-			    FW_LEN16(port_cmd));
-	v = t4vf_wr_mbox(adapter, &port_cmd, sizeof(port_cmd), &port_rpl);
-	if (v)
-		return v;
+	port_cmd.action_to_len16 = cpu_to_be32(
+		FW_PORT_CMD_ACTION_V(fw_caps == FW_CAPS16
+				     ? FW_PORT_ACTION_GET_PORT_INFO
+				     : FW_PORT_ACTION_GET_PORT_INFO32) |
+		FW_LEN16(port_cmd));
+	ret = t4vf_wr_mbox(adapter, &port_cmd, sizeof(port_cmd), &port_rpl);
+	if (ret != FW_SUCCESS)
+		return ret;
 
-	v = be32_to_cpu(port_rpl.u.info.lstatus_to_modtype);
-	pi->mdio_addr = (v & FW_PORT_CMD_MDIOCAP_F) ?
-			FW_PORT_CMD_MDIOADDR_G(v) : -1;
-	pi->port_type = FW_PORT_CMD_PTYPE_G(v);
-	pi->mod_type = FW_PORT_MOD_TYPE_NA;
+	/* Extract the various fields from the Port Information message. */
+	if (fw_caps == FW_CAPS16) {
+		u32 lstatus = be32_to_cpu(port_rpl.u.info.lstatus_to_modtype);
 
-	init_link_config(&pi->link_cfg, be16_to_cpu(port_rpl.u.info.pcap));
+		port_type = FW_PORT_CMD_PTYPE_G(lstatus);
+		mdio_addr = ((lstatus & FW_PORT_CMD_MDIOCAP_F)
+			     ? FW_PORT_CMD_MDIOADDR_G(lstatus)
+			     : -1);
+		pcaps = fwcaps16_to_caps32(be16_to_cpu(port_rpl.u.info.pcap));
+		acaps = fwcaps16_to_caps32(be16_to_cpu(port_rpl.u.info.acap));
+	} else {
+		u32 lstatus32 =
+			   be32_to_cpu(port_rpl.u.info32.lstatus32_to_cbllen32);
+
+		port_type = FW_PORT_CMD_PORTTYPE32_G(lstatus32);
+		mdio_addr = ((lstatus32 & FW_PORT_CMD_MDIOCAP32_F)
+			     ? FW_PORT_CMD_MDIOADDR32_G(lstatus32)
+			     : -1);
+		pcaps = be32_to_cpu(port_rpl.u.info32.pcaps32);
+		acaps = be32_to_cpu(port_rpl.u.info32.acaps32);
+	}
 
+	pi->port_type = port_type;
+	pi->mdio_addr = mdio_addr;
+	pi->mod_type = FW_PORT_MOD_TYPE_NA;
+
+	init_link_config(&pi->link_cfg, pcaps, acaps);
 	return 0;
 }
 
@@ -1667,6 +1807,202 @@ int t4vf_eth_eq_free(struct adapter *adapter, unsigned int eqid)
 }
 
 /**
+ *	t4vf_link_down_rc_str - return a string for a Link Down Reason Code
+ *	@link_down_rc: Link Down Reason Code
+ *
+ *	Returns a string representation of the Link Down Reason Code.
+ */
+const char *t4vf_link_down_rc_str(unsigned char link_down_rc)
+{
+	static const char * const reason[] = {
+		"Link Down",
+		"Remote Fault",
+		"Auto-negotiation Failure",
+		"Reserved",
+		"Insufficient Airflow",
+		"Unable To Determine Reason",
+		"No RX Signal Detected",
+		"Reserved",
+	};
+
+	if (link_down_rc >= ARRAY_SIZE(reason))
+		return "Bad Reason Code";
+
+	return reason[link_down_rc];
+}
+
+/**
+ *	t4vf_handle_get_port_info - process a FW reply message
+ *	@pi: the port info
+ *	@rpl: start of the FW message
+ *
+ *	Processes a GET_PORT_INFO FW reply message.
+ */
+void t4vf_handle_get_port_info(struct port_info *pi,
+			       const struct fw_port_cmd *cmd)
+{
+	int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
+	struct adapter *adapter = pi->adapter;
+	struct link_config *lc = &pi->link_cfg;
+	int link_ok, linkdnrc;
+	enum fw_port_type port_type;
+	enum fw_port_module_type mod_type;
+	unsigned int speed, fc, fec;
+	fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
+
+	/* Extract the various fields from the Port Information message. */
+	switch (action) {
+	case FW_PORT_ACTION_GET_PORT_INFO: {
+		u32 lstatus = be32_to_cpu(cmd->u.info.lstatus_to_modtype);
+
+		link_ok = (lstatus & FW_PORT_CMD_LSTATUS_F) != 0;
+		linkdnrc = FW_PORT_CMD_LINKDNRC_G(lstatus);
+		port_type = FW_PORT_CMD_PTYPE_G(lstatus);
+		mod_type = FW_PORT_CMD_MODTYPE_G(lstatus);
+		pcaps = fwcaps16_to_caps32(be16_to_cpu(cmd->u.info.pcap));
+		acaps = fwcaps16_to_caps32(be16_to_cpu(cmd->u.info.acap));
+		lpacaps = fwcaps16_to_caps32(be16_to_cpu(cmd->u.info.lpacap));
+
+		/* Unfortunately the format of the Link Status in the old
+		 * 16-bit Port Information message isn't the same as the
+		 * 16-bit Port Capabilities bitfield used everywhere else ...
+		 */
+		linkattr = 0;
+		if (lstatus & FW_PORT_CMD_RXPAUSE_F)
+			linkattr |= FW_PORT_CAP32_FC_RX;
+		if (lstatus & FW_PORT_CMD_TXPAUSE_F)
+			linkattr |= FW_PORT_CAP32_FC_TX;
+		if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100M))
+			linkattr |= FW_PORT_CAP32_SPEED_100M;
+		if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_1G))
+			linkattr |= FW_PORT_CAP32_SPEED_1G;
+		if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
+			linkattr |= FW_PORT_CAP32_SPEED_10G;
+		if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
+			linkattr |= FW_PORT_CAP32_SPEED_25G;
+		if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
+			linkattr |= FW_PORT_CAP32_SPEED_40G;
+		if (lstatus & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
+			linkattr |= FW_PORT_CAP32_SPEED_100G;
+
+		break;
+	}
+
+	case FW_PORT_ACTION_GET_PORT_INFO32: {
+		u32 lstatus32;
+
+		lstatus32 = be32_to_cpu(cmd->u.info32.lstatus32_to_cbllen32);
+		link_ok = (lstatus32 & FW_PORT_CMD_LSTATUS32_F) != 0;
+		linkdnrc = FW_PORT_CMD_LINKDNRC32_G(lstatus32);
+		port_type = FW_PORT_CMD_PORTTYPE32_G(lstatus32);
+		mod_type = FW_PORT_CMD_MODTYPE32_G(lstatus32);
+		pcaps = be32_to_cpu(cmd->u.info32.pcaps32);
+		acaps = be32_to_cpu(cmd->u.info32.acaps32);
+		lpacaps = be32_to_cpu(cmd->u.info32.lpacaps32);
+		linkattr = be32_to_cpu(cmd->u.info32.linkattr32);
+		break;
+	}
+
+	default:
+		dev_err(adapter->pdev_dev, "Handle Port Information: Bad Command/Action %#x\n",
+			be32_to_cpu(cmd->action_to_len16));
+		return;
+	}
+
+	fec = fwcap_to_cc_fec(acaps);
+	fc = fwcap_to_cc_pause(linkattr);
+	speed = fwcap_to_speed(linkattr);
+
+	if (mod_type != pi->mod_type) {
+		/* When a new Transceiver Module is inserted, the Firmware
+		 * will examine any Forward Error Correction parameters
+		 * present in the Transceiver Module i2c EPROM and determine
+		 * the supported and recommended FEC settings from those
+		 * based on IEEE 802.3 standards.  We always record the
+		 * IEEE 802.3 recommended "automatic" settings.
+		 */
+		lc->auto_fec = fec;
+
+		/* Some versions of the early T6 Firmware "cheated" when
+		 * handling different Transceiver Modules by changing the
+		 * underlaying Port Type reported to the Host Drivers.  As
+		 * such we need to capture whatever Port Type the Firmware
+		 * sends us and record it in case it's different from what we
+		 * were told earlier.  Unfortunately, since Firmware is
+		 * forever, we'll need to keep this code here forever, but in
+		 * later T6 Firmware it should just be an assignment of the
+		 * same value already recorded.
+		 */
+		pi->port_type = port_type;
+
+		pi->mod_type = mod_type;
+		t4vf_os_portmod_changed(adapter, pi->pidx);
+	}
+
+	if (link_ok != lc->link_ok || speed != lc->speed ||
+	    fc != lc->fc || fec != lc->fec) {	/* something changed */
+		if (!link_ok && lc->link_ok) {
+			lc->link_down_rc = linkdnrc;
+			dev_warn(adapter->pdev_dev, "Port %d link down, reason: %s\n",
+				 pi->port_id, t4vf_link_down_rc_str(linkdnrc));
+		}
+		lc->link_ok = link_ok;
+		lc->speed = speed;
+		lc->fc = fc;
+		lc->fec = fec;
+
+		lc->pcaps = pcaps;
+		lc->lpacaps = lpacaps;
+		lc->acaps = acaps & ADVERT_MASK;
+
+		if (lc->acaps & FW_PORT_CAP32_ANEG) {
+			lc->autoneg = AUTONEG_ENABLE;
+		} else {
+			/* When Autoneg is disabled, user needs to set
+			 * single speed.
+			 * Similar to cxgb4_ethtool.c: set_link_ksettings
+			 */
+			lc->acaps = 0;
+			lc->speed_caps = fwcap_to_speed(acaps);
+			lc->autoneg = AUTONEG_DISABLE;
+		}
+
+		t4vf_os_link_changed(adapter, pi->pidx, link_ok);
+	}
+}
+
+/**
+ *	t4vf_update_port_info - retrieve and update port information if changed
+ *	@pi: the port_info
+ *
+ *	We issue a Get Port Information Command to the Firmware and, if
+ *	successful, we check to see if anything is different from what we
+ *	last recorded and update things accordingly.
+ */
+int t4vf_update_port_info(struct port_info *pi)
+{
+	unsigned int fw_caps = pi->adapter->params.fw_caps_support;
+	struct fw_port_cmd port_cmd;
+	int ret;
+
+	memset(&port_cmd, 0, sizeof(port_cmd));
+	port_cmd.op_to_portid = cpu_to_be32(FW_CMD_OP_V(FW_PORT_CMD) |
+					    FW_CMD_REQUEST_F | FW_CMD_READ_F |
+					    FW_PORT_CMD_PORTID_V(pi->port_id));
+	port_cmd.action_to_len16 = cpu_to_be32(
+		FW_PORT_CMD_ACTION_V(fw_caps == FW_CAPS16
+				     ? FW_PORT_ACTION_GET_PORT_INFO
+				     : FW_PORT_ACTION_GET_PORT_INFO32) |
+		FW_LEN16(port_cmd));
+	ret = t4vf_wr_mbox(pi->adapter, &port_cmd, sizeof(port_cmd),
+			   &port_cmd);
+	if (ret)
+		return ret;
+	t4vf_handle_get_port_info(pi, &port_cmd);
+	return 0;
+}
+
+/**
  *	t4vf_handle_fw_rpl - process a firmware reply message
  *	@adapter: the adapter
  *	@rpl: start of the firmware message
@@ -1685,15 +2021,12 @@ int t4vf_handle_fw_rpl(struct adapter *adapter, const __be64 *rpl)
 		 */
 		const struct fw_port_cmd *port_cmd =
 			(const struct fw_port_cmd *)rpl;
-		u32 stat, mod;
-		int action, port_id, link_ok, speed, fc, pidx;
-
-		/*
-		 * Extract various fields from port status change message.
-		 */
-		action = FW_PORT_CMD_ACTION_G(
+		int action = FW_PORT_CMD_ACTION_G(
 			be32_to_cpu(port_cmd->action_to_len16));
-		if (action != FW_PORT_ACTION_GET_PORT_INFO) {
+		int port_id, pidx;
+
+		if (action != FW_PORT_ACTION_GET_PORT_INFO &&
+		    action != FW_PORT_ACTION_GET_PORT_INFO32) {
 			dev_err(adapter->pdev_dev,
 				"Unknown firmware PORT reply action %x\n",
 				action);
@@ -1702,61 +2035,12 @@ int t4vf_handle_fw_rpl(struct adapter *adapter, const __be64 *rpl)
 
 		port_id = FW_PORT_CMD_PORTID_G(
 			be32_to_cpu(port_cmd->op_to_portid));
-
-		stat = be32_to_cpu(port_cmd->u.info.lstatus_to_modtype);
-		link_ok = (stat & FW_PORT_CMD_LSTATUS_F) != 0;
-		speed = 0;
-		fc = 0;
-		if (stat & FW_PORT_CMD_RXPAUSE_F)
-			fc |= PAUSE_RX;
-		if (stat & FW_PORT_CMD_TXPAUSE_F)
-			fc |= PAUSE_TX;
-		if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100M))
-			speed = 100;
-		else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_1G))
-			speed = 1000;
-		else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
-			speed = 10000;
-		else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
-			speed = 25000;
-		else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
-			speed = 40000;
-		else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
-			speed = 100000;
-
-		/*
-		 * Scan all of our "ports" (Virtual Interfaces) looking for
-		 * those bound to the physical port which has changed.  If
-		 * our recorded state doesn't match the current state,
-		 * signal that change to the OS code.
-		 */
 		for_each_port(adapter, pidx) {
 			struct port_info *pi = adap2pinfo(adapter, pidx);
-			struct link_config *lc;
 
 			if (pi->port_id != port_id)
 				continue;
-
-			lc = &pi->link_cfg;
-
-			mod = FW_PORT_CMD_MODTYPE_G(stat);
-			if (mod != pi->mod_type) {
-				pi->mod_type = mod;
-				t4vf_os_portmod_changed(adapter, pidx);
-			}
-
-			if (link_ok != lc->link_ok || speed != lc->speed ||
-			    fc != lc->fc) {
-				/* something changed */
-				lc->link_ok = link_ok;
-				lc->speed = speed;
-				lc->fc = fc;
-				lc->supported =
-					be16_to_cpu(port_cmd->u.info.pcap);
-				lc->lp_advertising =
-					be16_to_cpu(port_cmd->u.info.lpacap);
-				t4vf_os_link_changed(adapter, pidx, link_ok);
-			}
+			t4vf_handle_get_port_info(pi, port_cmd);
 		}
 		break;
 	}
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index 47be501..0affee9 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -2094,7 +2094,7 @@ static int de4x5_eisa_remove(struct device *device)
 	return 0;
 }
 
-static struct eisa_device_id de4x5_eisa_ids[] = {
+static const struct eisa_device_id de4x5_eisa_ids[] = {
         { "DEC4250", 0 },	/* 0 is the board name index... */
         { "" }
 };
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index 17e566a8..84394b4 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -1303,7 +1303,6 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		0x00, 'L', 'i', 'n', 'u', 'x'
 	};
 	static int last_irq;
-	static int multiport_cnt;	/* For four-port boards w/one EEPROM */
 	int i, irq;
 	unsigned short sum;
 	unsigned char *ee_data;
@@ -1557,7 +1556,6 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		} else if (ee_data[0] == 0xff  &&  ee_data[1] == 0xff &&
 				   ee_data[2] == 0) {
 			sa_offset = 2;		/* Grrr, damn Matrox boards. */
-			multiport_cnt = 4;
 		}
 #ifdef CONFIG_MIPS_COBALT
                if ((pdev->bus->number == 0) &&
diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c
index 4ee042c..1b79a6d 100644
--- a/drivers/net/ethernet/ec_bhf.c
+++ b/drivers/net/ethernet/ec_bhf.c
@@ -73,7 +73,7 @@
 
 #define ETHERCAT_MASTER_ID	0x14
 
-static struct pci_device_id ids[] = {
+static const struct pci_device_id ids[] = {
 	{ PCI_DEVICE(0x15ec, 0x5000), },
 	{ 0, }
 };
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c
index 2b62841..05989aa 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.c
+++ b/drivers/net/ethernet/emulex/benet/be_roce.c
@@ -139,10 +139,7 @@ int be_roce_register_driver(struct ocrdma_driver *drv)
 	}
 	ocrdma_drv = drv;
 	list_for_each_entry(dev, &be_adapter_list, entry) {
-		struct net_device *netdev;
-
 		_be_roce_dev_add(dev);
-		netdev = dev->netdev;
 	}
 	mutex_unlock(&be_adapter_list_lock);
 	return 0;
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 59da7ac..9ed8e4b 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1623,6 +1623,8 @@ static const struct net_device_ops ftgmac100_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= ftgmac100_poll_controller,
 #endif
+	.ndo_vlan_rx_add_vid	= ncsi_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= ncsi_vlan_rx_kill_vid,
 };
 
 static int ftgmac100_setup_mdio(struct net_device *netdev)
@@ -1837,6 +1839,9 @@ static int ftgmac100_probe(struct platform_device *pdev)
 		NETIF_F_GRO | NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_RX |
 		NETIF_F_HW_VLAN_CTAG_TX;
 
+	if (priv->use_ncsi)
+		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
 	/* AST2400  doesn't have working HW checksum generation */
 	if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac")))
 		netdev->hw_features &= ~NETIF_F_HW_CSUM;
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 757b873..4225806 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -158,7 +158,7 @@ MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms");
 #define DPAA_RX_PRIV_DATA_SIZE	(u16)(DPAA_TX_PRIV_DATA_SIZE + \
 					dpaa_rx_extra_headroom)
 
-#define DPAA_ETH_RX_QUEUES	128
+#define DPAA_ETH_PCD_RXQ_NUM	128
 
 #define DPAA_ENQUEUE_RETRIES	100000
 
@@ -169,6 +169,7 @@ struct fm_port_fqs {
 	struct dpaa_fq *tx_errq;
 	struct dpaa_fq *rx_defq;
 	struct dpaa_fq *rx_errq;
+	struct dpaa_fq *rx_pcdq;
 };
 
 /* All the dpa bps in use at any moment */
@@ -235,7 +236,7 @@ static int dpaa_netdev_init(struct net_device *net_dev,
 	net_dev->max_mtu = dpaa_get_max_mtu();
 
 	net_dev->hw_features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-				 NETIF_F_LLTX);
+				 NETIF_F_LLTX | NETIF_F_RXHASH);
 
 	net_dev->hw_features |= NETIF_F_SG | NETIF_F_HIGHDMA;
 	/* The kernels enables GSO automatically, if we declare NETIF_F_SG.
@@ -342,18 +343,19 @@ static void dpaa_get_stats64(struct net_device *net_dev,
 	}
 }
 
-static int dpaa_setup_tc(struct net_device *net_dev, u32 handle,
-			 u32 chain_index, __be16 proto, struct tc_to_netdev *tc)
+static int dpaa_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
+			 void *type_data)
 {
 	struct dpaa_priv *priv = netdev_priv(net_dev);
+	struct tc_mqprio_qopt *mqprio = type_data;
 	u8 num_tc;
 	int i;
 
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
-	num_tc = tc->mqprio->num_tc;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	num_tc = mqprio->num_tc;
 
 	if (num_tc == priv->num_tc)
 		return 0;
@@ -398,8 +400,8 @@ static struct mac_device *dpaa_mac_dev_get(struct platform_device *pdev)
 
 	of_dev = of_find_device_by_node(mac_node);
 	if (!of_dev) {
-		dev_err(dpaa_dev, "of_find_device_by_node(%s) failed\n",
-			mac_node->full_name);
+		dev_err(dpaa_dev, "of_find_device_by_node(%pOF) failed\n",
+			mac_node);
 		of_node_put(mac_node);
 		return ERR_PTR(-EINVAL);
 	}
@@ -627,6 +629,7 @@ static inline void dpaa_assign_wq(struct dpaa_fq *fq, int idx)
 		fq->wq = 5;
 		break;
 	case FQ_TYPE_RX_DEFAULT:
+	case FQ_TYPE_RX_PCD:
 		fq->wq = 6;
 		break;
 	case FQ_TYPE_TX:
@@ -687,6 +690,7 @@ static int dpaa_alloc_all_fqs(struct device *dev, struct list_head *list,
 			      struct fm_port_fqs *port_fqs)
 {
 	struct dpaa_fq *dpaa_fq;
+	u32 fq_base, fq_base_aligned, i;
 
 	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_RX_ERROR);
 	if (!dpaa_fq)
@@ -700,6 +704,26 @@ static int dpaa_alloc_all_fqs(struct device *dev, struct list_head *list,
 
 	port_fqs->rx_defq = &dpaa_fq[0];
 
+	/* the PCD FQIDs range needs to be aligned for correct operation */
+	if (qman_alloc_fqid_range(&fq_base, 2 * DPAA_ETH_PCD_RXQ_NUM))
+		goto fq_alloc_failed;
+
+	fq_base_aligned = ALIGN(fq_base, DPAA_ETH_PCD_RXQ_NUM);
+
+	for (i = fq_base; i < fq_base_aligned; i++)
+		qman_release_fqid(i);
+
+	for (i = fq_base_aligned + DPAA_ETH_PCD_RXQ_NUM;
+	     i < (fq_base + 2 * DPAA_ETH_PCD_RXQ_NUM); i++)
+		qman_release_fqid(i);
+
+	dpaa_fq = dpaa_fq_alloc(dev, fq_base_aligned, DPAA_ETH_PCD_RXQ_NUM,
+				list, FQ_TYPE_RX_PCD);
+	if (!dpaa_fq)
+		goto fq_alloc_failed;
+
+	port_fqs->rx_pcdq = &dpaa_fq[0];
+
 	if (!dpaa_fq_alloc(dev, 0, DPAA_ETH_TXQ_NUM, list, FQ_TYPE_TX_CONF_MQ))
 		goto fq_alloc_failed;
 
@@ -869,13 +893,14 @@ static void dpaa_fq_setup(struct dpaa_priv *priv,
 			  const struct dpaa_fq_cbs *fq_cbs,
 			  struct fman_port *tx_port)
 {
-	int egress_cnt = 0, conf_cnt = 0, num_portals = 0, cpu;
+	int egress_cnt = 0, conf_cnt = 0, num_portals = 0, portal_cnt = 0, cpu;
 	const cpumask_t *affine_cpus = qman_affine_cpus();
-	u16 portals[NR_CPUS];
+	u16 channels[NR_CPUS];
 	struct dpaa_fq *fq;
 
 	for_each_cpu(cpu, affine_cpus)
-		portals[num_portals++] = qman_affine_channel(cpu);
+		channels[num_portals++] = qman_affine_channel(cpu);
+
 	if (num_portals == 0)
 		dev_err(priv->net_dev->dev.parent,
 			"No Qman software (affine) channels found");
@@ -889,6 +914,12 @@ static void dpaa_fq_setup(struct dpaa_priv *priv,
 		case FQ_TYPE_RX_ERROR:
 			dpaa_setup_ingress(priv, fq, &fq_cbs->rx_errq);
 			break;
+		case FQ_TYPE_RX_PCD:
+			if (!num_portals)
+				continue;
+			dpaa_setup_ingress(priv, fq, &fq_cbs->rx_defq);
+			fq->channel = channels[portal_cnt++ % num_portals];
+			break;
 		case FQ_TYPE_TX:
 			dpaa_setup_egress(priv, fq, tx_port,
 					  &fq_cbs->egress_ern);
@@ -1038,7 +1069,8 @@ static int dpaa_fq_init(struct dpaa_fq *dpaa_fq, bool td_enable)
 		/* Put all the ingress queues in our "ingress CGR". */
 		if (priv->use_ingress_cgr &&
 		    (dpaa_fq->fq_type == FQ_TYPE_RX_DEFAULT ||
-		     dpaa_fq->fq_type == FQ_TYPE_RX_ERROR)) {
+		     dpaa_fq->fq_type == FQ_TYPE_RX_ERROR ||
+		     dpaa_fq->fq_type == FQ_TYPE_RX_PCD)) {
 			initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_CGID);
 			initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_CGE);
 			initfq.fqd.cgid = (u8)priv->ingress_cgr.cgrid;
@@ -1169,7 +1201,7 @@ static int dpaa_eth_init_tx_port(struct fman_port *port, struct dpaa_fq *errq,
 
 static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp **bps,
 				 size_t count, struct dpaa_fq *errq,
-				 struct dpaa_fq *defq,
+				 struct dpaa_fq *defq, struct dpaa_fq *pcdq,
 				 struct dpaa_buffer_layout *buf_layout)
 {
 	struct fman_buffer_prefix_content buf_prefix_content;
@@ -1189,6 +1221,10 @@ static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp **bps,
 	rx_p = &params.specific_params.rx_params;
 	rx_p->err_fqid = errq->fqid;
 	rx_p->dflt_fqid = defq->fqid;
+	if (pcdq) {
+		rx_p->pcd_base_fqid = pcdq->fqid;
+		rx_p->pcd_fqs_count = DPAA_ETH_PCD_RXQ_NUM;
+	}
 
 	count = min(ARRAY_SIZE(rx_p->ext_buf_pools.ext_buf_pool), count);
 	rx_p->ext_buf_pools.num_of_pools_used = (u8)count;
@@ -1233,7 +1269,8 @@ static int dpaa_eth_init_ports(struct mac_device *mac_dev,
 		return err;
 
 	err = dpaa_eth_init_rx_port(rxport, bps, count, port_fqs->rx_errq,
-				    port_fqs->rx_defq, &buf_layout[RX]);
+				    port_fqs->rx_defq, port_fqs->rx_pcdq,
+				    &buf_layout[RX]);
 
 	return err;
 }
@@ -2200,12 +2237,13 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
 	dma_addr_t addr = qm_fd_addr(fd);
 	enum qm_fd_format fd_format;
 	struct net_device *net_dev;
-	u32 fd_status;
+	u32 fd_status, hash_offset;
 	struct dpaa_bp *dpaa_bp;
 	struct dpaa_priv *priv;
 	unsigned int skb_len;
 	struct sk_buff *skb;
 	int *count_ptr;
+	void *vaddr;
 
 	fd_status = be32_to_cpu(fd->status);
 	fd_format = qm_fd_get_format(fd);
@@ -2251,7 +2289,8 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
 	dma_unmap_single(dpaa_bp->dev, addr, dpaa_bp->size, DMA_FROM_DEVICE);
 
 	/* prefetch the first 64 bytes of the frame or the SGT start */
-	prefetch(phys_to_virt(addr) + qm_fd_get_offset(fd));
+	vaddr = phys_to_virt(addr);
+	prefetch(vaddr + qm_fd_get_offset(fd));
 
 	fd_format = qm_fd_get_format(fd);
 	/* The only FD types that we may receive are contig and S/G */
@@ -2272,6 +2311,18 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
 
 	skb->protocol = eth_type_trans(skb, net_dev);
 
+	if (net_dev->features & NETIF_F_RXHASH && priv->keygen_in_use &&
+	    !fman_port_get_hash_result_offset(priv->mac_dev->port[RX],
+					      &hash_offset)) {
+		enum pkt_hash_types type;
+
+		/* if L4 exists, it was used in the hash generation */
+		type = be32_to_cpu(fd->status) & FM_FD_STAT_L4CV ?
+			PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3;
+		skb_set_hash(skb, be32_to_cpu(*(u32 *)(vaddr + hash_offset)),
+			     type);
+	}
+
 	skb_len = skb->len;
 
 	if (unlikely(netif_receive_skb(skb) == NET_RX_DROP))
@@ -2510,6 +2561,9 @@ static struct dpaa_bp *dpaa_bp_alloc(struct device *dev)
 
 	dpaa_bp->bpid = FSL_DPAA_BPID_INV;
 	dpaa_bp->percpu_count = devm_alloc_percpu(dev, *dpaa_bp->percpu_count);
+	if (!dpaa_bp->percpu_count)
+		return ERR_PTR(-ENOMEM);
+
 	dpaa_bp->config_count = FSL_DPAA_ETH_MAX_BUF_COUNT;
 
 	dpaa_bp->seed_cb = dpaa_bp_seed;
@@ -2737,6 +2791,9 @@ static int dpaa_eth_probe(struct platform_device *pdev)
 	if (err)
 		goto init_ports_failed;
 
+	/* Rx traffic distribution based on keygen hashing defaults to on */
+	priv->keygen_in_use = true;
+
 	priv->percpu_priv = devm_alloc_percpu(dev, *priv->percpu_priv);
 	if (!priv->percpu_priv) {
 		dev_err(dev, "devm_alloc_percpu() failed\n");
@@ -2829,7 +2886,7 @@ static int dpaa_remove(struct platform_device *pdev)
 	return err;
 }
 
-static struct platform_device_id dpaa_devtype[] = {
+static const struct platform_device_id dpaa_devtype[] = {
 	{
 		.name = "dpaa-ethernet",
 		.driver_data = 0,
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
index 9941a78..bd94220 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
@@ -52,6 +52,7 @@
 enum dpaa_fq_type {
 	FQ_TYPE_RX_DEFAULT = 1, /* Rx Default FQs */
 	FQ_TYPE_RX_ERROR,	/* Rx Error FQs */
+	FQ_TYPE_RX_PCD,		/* Rx Parse Classify Distribute FQs */
 	FQ_TYPE_TX,		/* "Real" Tx FQs */
 	FQ_TYPE_TX_CONFIRM,	/* Tx default Conf FQ (actually an Rx FQ) */
 	FQ_TYPE_TX_CONF_MQ,	/* Tx conf FQs (one for each Tx FQ) */
@@ -158,6 +159,7 @@ struct dpaa_priv {
 	struct list_head dpaa_fq_list;
 
 	u8 num_tc;
+	bool keygen_in_use;
 	u32 msg_enable;	/* net_device message level */
 
 	struct {
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c
index ec75d1c..0d9b185 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c
@@ -71,6 +71,9 @@ static ssize_t dpaa_eth_show_fqids(struct device *dev,
 		case FQ_TYPE_RX_ERROR:
 			str = "Rx error";
 			break;
+		case FQ_TYPE_RX_PCD:
+			str = "Rx PCD";
+			break;
 		case FQ_TYPE_TX_CONFIRM:
 			str = "Tx default confirmation";
 			break;
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
index aad825088..faea674 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -399,6 +399,122 @@ static void dpaa_get_strings(struct net_device *net_dev, u32 stringset,
 	memcpy(strings, dpaa_stats_global, size);
 }
 
+static int dpaa_get_hash_opts(struct net_device *dev,
+			      struct ethtool_rxnfc *cmd)
+{
+	struct dpaa_priv *priv = netdev_priv(dev);
+
+	cmd->data = 0;
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+	case UDP_V4_FLOW:
+	case UDP_V6_FLOW:
+		if (priv->keygen_in_use)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* Fall through */
+	case IPV4_FLOW:
+	case IPV6_FLOW:
+	case SCTP_V4_FLOW:
+	case SCTP_V6_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V4_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V4_FLOW:
+	case ESP_V6_FLOW:
+		if (priv->keygen_in_use)
+			cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	default:
+		cmd->data = 0;
+		break;
+	}
+
+	return 0;
+}
+
+static int dpaa_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+			  u32 *unused)
+{
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXFH:
+		ret = dpaa_get_hash_opts(dev, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static void dpaa_set_hash(struct net_device *net_dev, bool enable)
+{
+	struct mac_device *mac_dev;
+	struct fman_port *rxport;
+	struct dpaa_priv *priv;
+
+	priv = netdev_priv(net_dev);
+	mac_dev = priv->mac_dev;
+	rxport = mac_dev->port[0];
+
+	fman_port_use_kg_hash(rxport, enable);
+	priv->keygen_in_use = enable;
+}
+
+static int dpaa_set_hash_opts(struct net_device *dev,
+			      struct ethtool_rxnfc *nfc)
+{
+	int ret = -EINVAL;
+
+	/* we support hashing on IPv4/v6 src/dest IP and L4 src/dest port */
+	if (nfc->data &
+	    ~(RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+	case UDP_V4_FLOW:
+	case UDP_V6_FLOW:
+	case IPV4_FLOW:
+	case IPV6_FLOW:
+	case SCTP_V4_FLOW:
+	case SCTP_V6_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V4_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V4_FLOW:
+	case ESP_V6_FLOW:
+		dpaa_set_hash(dev, !!nfc->data);
+		ret = 0;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static int dpaa_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		ret = dpaa_set_hash_opts(dev, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
 const struct ethtool_ops dpaa_ethtool_ops = {
 	.get_drvinfo = dpaa_get_drvinfo,
 	.get_msglevel = dpaa_get_msglevel,
@@ -412,4 +528,6 @@ const struct ethtool_ops dpaa_ethtool_ops = {
 	.get_strings = dpaa_get_strings,
 	.get_link_ksettings = dpaa_get_link_ksettings,
 	.set_link_ksettings = dpaa_set_link_ksettings,
+	.get_rxnfc = dpaa_get_rxnfc,
+	.set_rxnfc = dpaa_set_rxnfc,
 };
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index a6e323f..56f56d6 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -173,10 +173,12 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
 #endif /* CONFIG_M5272 */
 
 /* The FEC stores dest/src/type/vlan, data, and checksum for receive packets.
+ *
+ * 2048 byte skbufs are allocated. However, alignment requirements
+ * varies between FEC variants. Worst case is 64, so round down by 64.
  */
-#define PKT_MAXBUF_SIZE		1522
+#define PKT_MAXBUF_SIZE		(round_down(2048 - 64, 64))
 #define PKT_MINBUF_SIZE		64
-#define PKT_MAXBLR_SIZE		1536
 
 /* FEC receive acceleration */
 #define FEC_RACC_IPDIS		(1 << 1)
@@ -224,7 +226,6 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
 
 #define COPYBREAK_DEFAULT	256
 
-#define TSO_HEADER_SIZE		128
 /* Max number of allowed TCP segments for software TSO */
 #define FEC_MAX_TSO_SEGS	100
 #define FEC_MAX_SKB_DESCS	(FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
@@ -851,7 +852,7 @@ static void fec_enet_enable_ring(struct net_device *ndev)
 	for (i = 0; i < fep->num_rx_queues; i++) {
 		rxq = fep->rx_queue[i];
 		writel(rxq->bd.dma, fep->hwp + FEC_R_DES_START(i));
-		writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i));
+		writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i));
 
 		/* enable DMA1/2 */
 		if (i)
@@ -1904,8 +1905,10 @@ static int fec_enet_mii_probe(struct net_device *ndev)
 		phy_dev = of_phy_connect(ndev, fep->phy_node,
 					 &fec_enet_adjust_link, 0,
 					 fep->phy_interface);
-		if (!phy_dev)
+		if (!phy_dev) {
+			netdev_err(ndev, "Unable to connect to phy\n");
 			return -ENODEV;
+		}
 	} else {
 		/* check for attached phy */
 		for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) {
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index aa8cf5d2..6d7269d 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -960,8 +960,8 @@ static int mpc52xx_fec_probe(struct platform_device *op)
 
 	/* We're done ! */
 	platform_set_drvdata(op, ndev);
-	netdev_info(ndev, "%s MAC %pM\n",
-		    op->dev.of_node->full_name, ndev->dev_addr);
+	netdev_info(ndev, "%pOF MAC %pM\n",
+		    op->dev.of_node, ndev->dev_addr);
 
 	return 0;
 
diff --git a/drivers/net/ethernet/freescale/fman/Makefile b/drivers/net/ethernet/freescale/fman/Makefile
index 6049177..2c38119 100644
--- a/drivers/net/ethernet/freescale/fman/Makefile
+++ b/drivers/net/ethernet/freescale/fman/Makefile
@@ -4,6 +4,6 @@ obj-$(CONFIG_FSL_FMAN) += fsl_fman.o
 obj-$(CONFIG_FSL_FMAN) += fsl_fman_port.o
 obj-$(CONFIG_FSL_FMAN) += fsl_mac.o
 
-fsl_fman-objs	:= fman_muram.o fman.o fman_sp.o
+fsl_fman-objs	:= fman_muram.o fman.o fman_sp.o fman_keygen.o
 fsl_fman_port-objs := fman_port.o
 fsl_mac-objs:= mac.o fman_dtsec.o fman_memac.o fman_tgec.o
diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index 4aefe24..9530405 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c
@@ -32,9 +32,6 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include "fman.h"
-#include "fman_muram.h"
-
 #include <linux/fsl/guts.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
@@ -46,6 +43,10 @@
 #include <linux/interrupt.h>
 #include <linux/libfdt_env.h>
 
+#include "fman.h"
+#include "fman_muram.h"
+#include "fman_keygen.h"
+
 /* General defines */
 #define FMAN_LIODN_TBL			64	/* size of LIODN table */
 #define MAX_NUM_OF_MACS			10
@@ -56,6 +57,7 @@
 /* Modules registers offsets */
 #define BMI_OFFSET		0x00080000
 #define QMI_OFFSET		0x00080400
+#define KG_OFFSET		0x000C1000
 #define DMA_OFFSET		0x000C2000
 #define FPM_OFFSET		0x000C3000
 #define IMEM_OFFSET		0x000C4000
@@ -564,80 +566,6 @@ struct fman_cfg {
 	u32 qmi_def_tnums_thresh;
 };
 
-/* Structure that holds information received from device tree */
-struct fman_dts_params {
-	void __iomem *base_addr;		/* FMan virtual address */
-	struct resource *res;			/* FMan memory resource */
-	u8 id;					/* FMan ID */
-
-	int err_irq;				/* FMan Error IRQ */
-
-	u16 clk_freq;				/* FMan clock freq (In Mhz) */
-
-	u32 qman_channel_base;			/* QMan channels base */
-	u32 num_of_qman_channels;		/* Number of QMan channels */
-
-	struct resource muram_res;		/* MURAM resource */
-};
-
-/** fman_exceptions_cb
- * fman		- Pointer to FMan
- * exception	- The exception.
- *
- * Exceptions user callback routine, will be called upon an exception
- * passing the exception identification.
- *
- * Return: irq status
- */
-typedef irqreturn_t (fman_exceptions_cb)(struct fman *fman,
-					 enum fman_exceptions exception);
-
-/** fman_bus_error_cb
- * fman		- Pointer to FMan
- * port_id	- Port id
- * addr		- Address that caused the error
- * tnum		- Owner of error
- * liodn	- Logical IO device number
- *
- * Bus error user callback routine, will be called upon bus error,
- * passing parameters describing the errors and the owner.
- *
- * Return: IRQ status
- */
-typedef irqreturn_t (fman_bus_error_cb)(struct fman *fman, u8 port_id,
-					u64 addr, u8 tnum, u16 liodn);
-
-struct fman {
-	struct device *dev;
-	void __iomem *base_addr;
-	struct fman_intr_src intr_mng[FMAN_EV_CNT];
-
-	struct fman_fpm_regs __iomem *fpm_regs;
-	struct fman_bmi_regs __iomem *bmi_regs;
-	struct fman_qmi_regs __iomem *qmi_regs;
-	struct fman_dma_regs __iomem *dma_regs;
-	struct fman_hwp_regs __iomem *hwp_regs;
-	fman_exceptions_cb *exception_cb;
-	fman_bus_error_cb *bus_error_cb;
-	/* Spinlock for FMan use */
-	spinlock_t spinlock;
-	struct fman_state_struct *state;
-
-	struct fman_cfg *cfg;
-	struct muram_info *muram;
-	/* cam section in muram */
-	unsigned long cam_offset;
-	size_t cam_size;
-	/* Fifo in MURAM */
-	unsigned long fifo_offset;
-	size_t fifo_size;
-
-	u32 liodn_base[64];
-	u32 liodn_offset[64];
-
-	struct fman_dts_params dts_params;
-};
-
 static irqreturn_t fman_exceptions(struct fman *fman,
 				   enum fman_exceptions exception)
 {
@@ -1811,6 +1739,7 @@ static int fman_config(struct fman *fman)
 	fman->qmi_regs = base_addr + QMI_OFFSET;
 	fman->dma_regs = base_addr + DMA_OFFSET;
 	fman->hwp_regs = base_addr + HWP_OFFSET;
+	fman->kg_regs = base_addr + KG_OFFSET;
 	fman->base_addr = base_addr;
 
 	spin_lock_init(&fman->spinlock);
@@ -1925,8 +1854,8 @@ static int fman_reset(struct fman *fman)
 
 		guts_regs = of_iomap(guts_node, 0);
 		if (!guts_regs) {
-			dev_err(fman->dev, "%s: Couldn't map %s regs\n",
-				__func__, guts_node->full_name);
+			dev_err(fman->dev, "%s: Couldn't map %pOF regs\n",
+				__func__, guts_node);
 			goto guts_regs;
 		}
 #define FMAN1_ALL_MACS_MASK	0xFCC00000
@@ -2083,6 +2012,11 @@ static int fman_init(struct fman *fman)
 	/* Init HW Parser */
 	hwp_init(fman->hwp_regs);
 
+	/* Init KeyGen */
+	fman->keygen = keygen_init(fman->kg_regs);
+	if (!fman->keygen)
+		return -EINVAL;
+
 	err = enable(fman, cfg);
 	if (err != 0)
 		return err;
@@ -2434,15 +2368,21 @@ u32 fman_get_qman_channel_id(struct fman *fman, u32 port_id)
 	int i;
 
 	if (fman->state->rev_info.major >= 6) {
-		u32 port_ids[] = {0x30, 0x31, 0x28, 0x29, 0x2a, 0x2b,
-				  0x2c, 0x2d, 0x2, 0x3, 0x4, 0x5, 0x7, 0x7};
+		static const u32 port_ids[] = {
+			0x30, 0x31, 0x28, 0x29, 0x2a, 0x2b,
+			0x2c, 0x2d, 0x2, 0x3, 0x4, 0x5, 0x7, 0x7
+		};
+
 		for (i = 0; i < fman->state->num_of_qman_channels; i++) {
 			if (port_ids[i] == port_id)
 				break;
 		}
 	} else {
-		u32 port_ids[] = {0x30, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x1,
-				  0x2, 0x3, 0x4, 0x5, 0x7, 0x7};
+		static const u32 port_ids[] = {
+			0x30, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x1,
+			0x2, 0x3, 0x4, 0x5, 0x7, 0x7
+		};
+
 		for (i = 0; i < fman->state->num_of_qman_channels; i++) {
 			if (port_ids[i] == port_id)
 				break;
@@ -2780,8 +2720,8 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
 
 	err = of_property_read_u32(fm_node, "cell-index", &val);
 	if (err) {
-		dev_err(&of_dev->dev, "%s: failed to read cell-index for %s\n",
-			__func__, fm_node->full_name);
+		dev_err(&of_dev->dev, "%s: failed to read cell-index for %pOF\n",
+			__func__, fm_node);
 		goto fman_node_put;
 	}
 	fman->dts_params.id = (u8)val;
@@ -2834,8 +2774,8 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
 	err = of_property_read_u32_array(fm_node, "fsl,qman-channel-range",
 					 &range[0], 2);
 	if (err) {
-		dev_err(&of_dev->dev, "%s: failed to read fsl,qman-channel-range for %s\n",
-			__func__, fm_node->full_name);
+		dev_err(&of_dev->dev, "%s: failed to read fsl,qman-channel-range for %pOF\n",
+			__func__, fm_node);
 		goto fman_node_put;
 	}
 	fman->dts_params.qman_channel_base = range[0];
diff --git a/drivers/net/ethernet/freescale/fman/fman.h b/drivers/net/ethernet/freescale/fman/fman.h
index f53e147..bfa02e0 100644
--- a/drivers/net/ethernet/freescale/fman/fman.h
+++ b/drivers/net/ethernet/freescale/fman/fman.h
@@ -34,6 +34,8 @@
 #define __FM_H
 
 #include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
 
 /* FM Frame descriptor macros  */
 /* Frame queue Context Override */
@@ -274,6 +276,81 @@ struct fman_intr_src {
 	void *src_handle;
 };
 
+/** fman_exceptions_cb
+ * fman         - Pointer to FMan
+ * exception    - The exception.
+ *
+ * Exceptions user callback routine, will be called upon an exception
+ * passing the exception identification.
+ *
+ * Return: irq status
+ */
+typedef irqreturn_t (fman_exceptions_cb)(struct fman *fman,
+					 enum fman_exceptions exception);
+/** fman_bus_error_cb
+ * fman         - Pointer to FMan
+ * port_id      - Port id
+ * addr         - Address that caused the error
+ * tnum         - Owner of error
+ * liodn        - Logical IO device number
+ *
+ * Bus error user callback routine, will be called upon bus error,
+ * passing parameters describing the errors and the owner.
+ *
+ * Return: IRQ status
+ */
+typedef irqreturn_t (fman_bus_error_cb)(struct fman *fman, u8 port_id,
+					u64 addr, u8 tnum, u16 liodn);
+
+/* Structure that holds information received from device tree */
+struct fman_dts_params {
+	void __iomem *base_addr;                /* FMan virtual address */
+	struct resource *res;                   /* FMan memory resource */
+	u8 id;                                  /* FMan ID */
+
+	int err_irq;                            /* FMan Error IRQ */
+
+	u16 clk_freq;                           /* FMan clock freq (In Mhz) */
+
+	u32 qman_channel_base;                  /* QMan channels base */
+	u32 num_of_qman_channels;               /* Number of QMan channels */
+
+	struct resource muram_res;              /* MURAM resource */
+};
+
+struct fman {
+	struct device *dev;
+	void __iomem *base_addr;
+	struct fman_intr_src intr_mng[FMAN_EV_CNT];
+
+	struct fman_fpm_regs __iomem *fpm_regs;
+	struct fman_bmi_regs __iomem *bmi_regs;
+	struct fman_qmi_regs __iomem *qmi_regs;
+	struct fman_dma_regs __iomem *dma_regs;
+	struct fman_hwp_regs __iomem *hwp_regs;
+	struct fman_kg_regs __iomem *kg_regs;
+	fman_exceptions_cb *exception_cb;
+	fman_bus_error_cb *bus_error_cb;
+	/* Spinlock for FMan use */
+	spinlock_t spinlock;
+	struct fman_state_struct *state;
+
+	struct fman_cfg *cfg;
+	struct muram_info *muram;
+	struct fman_keygen *keygen;
+	/* cam section in muram */
+	unsigned long cam_offset;
+	size_t cam_size;
+	/* Fifo in MURAM */
+	unsigned long fifo_offset;
+	size_t fifo_size;
+
+	u32 liodn_base[64];
+	u32 liodn_offset[64];
+
+	struct fman_dts_params dts_params;
+};
+
 /* Structure for port-FM communication during fman_port_init. */
 struct fman_port_init_params {
 	u8 port_id;			/* port Id */
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
index 98bba10..ea43b49 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
@@ -123,7 +123,7 @@
 #define DTSEC_ECNTRL_R100M		0x00000008
 #define DTSEC_ECNTRL_QSGMIIM		0x00000001
 
-#define DTSEC_TCTRL_GTS			0x00000020
+#define TCTRL_GTS			0x00000020
 
 #define RCTRL_PAL_MASK			0x001f0000
 #define RCTRL_PAL_SHIFT			16
@@ -863,6 +863,52 @@ int dtsec_cfg_pad_and_crc(struct fman_mac *dtsec, bool new_val)
 	return 0;
 }
 
+static void graceful_start(struct fman_mac *dtsec, enum comm_mode mode)
+{
+	struct dtsec_regs __iomem *regs = dtsec->regs;
+
+	if (mode & COMM_MODE_TX)
+		iowrite32be(ioread32be(&regs->tctrl) &
+				~TCTRL_GTS, &regs->tctrl);
+	if (mode & COMM_MODE_RX)
+		iowrite32be(ioread32be(&regs->rctrl) &
+				~RCTRL_GRS, &regs->rctrl);
+}
+
+static void graceful_stop(struct fman_mac *dtsec, enum comm_mode mode)
+{
+	struct dtsec_regs __iomem *regs = dtsec->regs;
+	u32 tmp;
+
+	/* Graceful stop - Assert the graceful Rx stop bit */
+	if (mode & COMM_MODE_RX) {
+		tmp = ioread32be(&regs->rctrl) | RCTRL_GRS;
+		iowrite32be(tmp, &regs->rctrl);
+
+		if (dtsec->fm_rev_info.major == 2) {
+			/* Workaround for dTSEC Errata A002 */
+			usleep_range(100, 200);
+		} else {
+			/* Workaround for dTSEC Errata A004839 */
+			usleep_range(10, 50);
+		}
+	}
+
+	/* Graceful stop - Assert the graceful Tx stop bit */
+	if (mode & COMM_MODE_TX) {
+		if (dtsec->fm_rev_info.major == 2) {
+			/* dTSEC Errata A004: Do not use TCTRL[GTS]=1 */
+			pr_debug("GTS not supported due to DTSEC_A004 Errata.\n");
+		} else {
+			tmp = ioread32be(&regs->tctrl) | TCTRL_GTS;
+			iowrite32be(tmp, &regs->tctrl);
+
+			/* Workaround for dTSEC Errata A0012, A0014 */
+			usleep_range(10, 50);
+		}
+	}
+}
+
 int dtsec_enable(struct fman_mac *dtsec, enum comm_mode mode)
 {
 	struct dtsec_regs __iomem *regs = dtsec->regs;
@@ -880,13 +926,8 @@ int dtsec_enable(struct fman_mac *dtsec, enum comm_mode mode)
 
 	iowrite32be(tmp, &regs->maccfg1);
 
-	/* Graceful start - clear the graceful receive stop bit */
-	if (mode & COMM_MODE_TX)
-		iowrite32be(ioread32be(&regs->tctrl) & ~DTSEC_TCTRL_GTS,
-			    &regs->tctrl);
-	if (mode & COMM_MODE_RX)
-		iowrite32be(ioread32be(&regs->rctrl) & ~RCTRL_GRS,
-			    &regs->rctrl);
+	/* Graceful start - clear the graceful Rx/Tx stop bit */
+	graceful_start(dtsec, mode);
 
 	return 0;
 }
@@ -899,23 +940,8 @@ int dtsec_disable(struct fman_mac *dtsec, enum comm_mode mode)
 	if (!is_init_done(dtsec->dtsec_drv_param))
 		return -EINVAL;
 
-	/* Gracefull stop - Assert the graceful transmit stop bit */
-	if (mode & COMM_MODE_RX) {
-		tmp = ioread32be(&regs->rctrl) | RCTRL_GRS;
-		iowrite32be(tmp, &regs->rctrl);
-
-		if (dtsec->fm_rev_info.major == 2)
-			usleep_range(100, 200);
-		else
-			udelay(10);
-	}
-
-	if (mode & COMM_MODE_TX) {
-		if (dtsec->fm_rev_info.major == 2)
-			pr_debug("GTS not supported due to DTSEC_A004 errata.\n");
-		else
-			pr_debug("GTS not supported due to DTSEC_A0014 errata.\n");
-	}
+	/* Graceful stop - Assert the graceful Rx/Tx stop bit */
+	graceful_stop(dtsec, mode);
 
 	tmp = ioread32be(&regs->maccfg1);
 	if (mode & COMM_MODE_RX)
@@ -933,11 +959,19 @@ int dtsec_set_tx_pause_frames(struct fman_mac *dtsec,
 			      u16 pause_time, u16 __maybe_unused thresh_time)
 {
 	struct dtsec_regs __iomem *regs = dtsec->regs;
+	enum comm_mode mode = COMM_MODE_NONE;
 	u32 ptv = 0;
 
 	if (!is_init_done(dtsec->dtsec_drv_param))
 		return -EINVAL;
 
+	if ((ioread32be(&regs->rctrl) & RCTRL_GRS) == 0)
+		mode |= COMM_MODE_RX;
+	if ((ioread32be(&regs->tctrl) & TCTRL_GTS) == 0)
+		mode |= COMM_MODE_TX;
+
+	graceful_stop(dtsec, mode);
+
 	if (pause_time) {
 		/* FM_BAD_TX_TS_IN_B_2_B_ERRATA_DTSEC_A003 Errata workaround */
 		if (dtsec->fm_rev_info.major == 2 && pause_time <= 320) {
@@ -958,17 +992,27 @@ int dtsec_set_tx_pause_frames(struct fman_mac *dtsec,
 		iowrite32be(ioread32be(&regs->maccfg1) & ~MACCFG1_TX_FLOW,
 			    &regs->maccfg1);
 
+	graceful_start(dtsec, mode);
+
 	return 0;
 }
 
 int dtsec_accept_rx_pause_frames(struct fman_mac *dtsec, bool en)
 {
 	struct dtsec_regs __iomem *regs = dtsec->regs;
+	enum comm_mode mode = COMM_MODE_NONE;
 	u32 tmp;
 
 	if (!is_init_done(dtsec->dtsec_drv_param))
 		return -EINVAL;
 
+	if ((ioread32be(&regs->rctrl) & RCTRL_GRS) == 0)
+		mode |= COMM_MODE_RX;
+	if ((ioread32be(&regs->tctrl) & TCTRL_GTS) == 0)
+		mode |= COMM_MODE_TX;
+
+	graceful_stop(dtsec, mode);
+
 	tmp = ioread32be(&regs->maccfg1);
 	if (en)
 		tmp |= MACCFG1_RX_FLOW;
@@ -976,20 +1020,34 @@ int dtsec_accept_rx_pause_frames(struct fman_mac *dtsec, bool en)
 		tmp &= ~MACCFG1_RX_FLOW;
 	iowrite32be(tmp, &regs->maccfg1);
 
+	graceful_start(dtsec, mode);
+
 	return 0;
 }
 
 int dtsec_modify_mac_address(struct fman_mac *dtsec, enet_addr_t *enet_addr)
 {
+	struct dtsec_regs __iomem *regs = dtsec->regs;
+	enum comm_mode mode = COMM_MODE_NONE;
+
 	if (!is_init_done(dtsec->dtsec_drv_param))
 		return -EINVAL;
 
+	if ((ioread32be(&regs->rctrl) & RCTRL_GRS) == 0)
+		mode |= COMM_MODE_RX;
+	if ((ioread32be(&regs->tctrl) & TCTRL_GTS) == 0)
+		mode |= COMM_MODE_TX;
+
+	graceful_stop(dtsec, mode);
+
 	/* Initialize MAC Station Address registers (1 & 2)
 	 * Station address have to be swapped (big endian to little endian
 	 */
 	dtsec->addr = ENET_ADDR_TO_UINT64(*enet_addr);
 	set_mac_address(dtsec->regs, (u8 *)(*enet_addr));
 
+	graceful_start(dtsec, mode);
+
 	return 0;
 }
 
@@ -1162,11 +1220,19 @@ int dtsec_set_promiscuous(struct fman_mac *dtsec, bool new_val)
 int dtsec_adjust_link(struct fman_mac *dtsec, u16 speed)
 {
 	struct dtsec_regs __iomem *regs = dtsec->regs;
+	enum comm_mode mode = COMM_MODE_NONE;
 	u32 tmp;
 
 	if (!is_init_done(dtsec->dtsec_drv_param))
 		return -EINVAL;
 
+	if ((ioread32be(&regs->rctrl) & RCTRL_GRS) == 0)
+		mode |= COMM_MODE_RX;
+	if ((ioread32be(&regs->tctrl) & TCTRL_GTS) == 0)
+		mode |= COMM_MODE_TX;
+
+	graceful_stop(dtsec, mode);
+
 	tmp = ioread32be(&regs->maccfg2);
 
 	/* Full Duplex */
@@ -1186,6 +1252,8 @@ int dtsec_adjust_link(struct fman_mac *dtsec, u16 speed)
 		tmp &= ~DTSEC_ECNTRL_R100M;
 	iowrite32be(tmp, &regs->ecntrl);
 
+	graceful_start(dtsec, mode);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/freescale/fman/fman_keygen.c b/drivers/net/ethernet/freescale/fman/fman_keygen.c
new file mode 100644
index 0000000..f54da3c
--- /dev/null
+++ b/drivers/net/ethernet/freescale/fman/fman_keygen.c
@@ -0,0 +1,783 @@
+/*
+ * Copyright 2017 NXP
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of NXP nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NXP ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NXP BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+
+#include "fman_keygen.h"
+
+/* Maximum number of HW Ports */
+#define FMAN_MAX_NUM_OF_HW_PORTS		64
+
+/* Maximum number of KeyGen Schemes */
+#define FM_KG_MAX_NUM_OF_SCHEMES		32
+
+/* Number of generic KeyGen Generic Extract Command Registers */
+#define FM_KG_NUM_OF_GENERIC_REGS		8
+
+/* Dummy port ID */
+#define DUMMY_PORT_ID				0
+
+/* Select Scheme Value Register */
+#define KG_SCH_DEF_USE_KGSE_DV_0		2
+#define KG_SCH_DEF_USE_KGSE_DV_1		3
+
+/* Registers Shifting values */
+#define FM_KG_KGAR_NUM_SHIFT			16
+#define KG_SCH_DEF_L4_PORT_SHIFT		8
+#define KG_SCH_DEF_IP_ADDR_SHIFT		18
+#define KG_SCH_HASH_CONFIG_SHIFT_SHIFT		24
+
+/* KeyGen Registers bit field masks: */
+
+/* Enable bit field mask for KeyGen General Configuration Register */
+#define FM_KG_KGGCR_EN				0x80000000
+
+/* KeyGen Global Registers bit field masks */
+#define FM_KG_KGAR_GO				0x80000000
+#define FM_KG_KGAR_READ				0x40000000
+#define FM_KG_KGAR_WRITE			0x00000000
+#define FM_KG_KGAR_SEL_SCHEME_ENTRY		0x00000000
+#define FM_KG_KGAR_SCM_WSEL_UPDATE_CNT		0x00008000
+
+#define FM_KG_KGAR_ERR				0x20000000
+#define FM_KG_KGAR_SEL_CLS_PLAN_ENTRY		0x01000000
+#define FM_KG_KGAR_SEL_PORT_ENTRY		0x02000000
+#define FM_KG_KGAR_SEL_PORT_WSEL_SP		0x00008000
+#define FM_KG_KGAR_SEL_PORT_WSEL_CPP		0x00004000
+
+/* Error events exceptions */
+#define FM_EX_KG_DOUBLE_ECC			0x80000000
+#define FM_EX_KG_KEYSIZE_OVERFLOW		0x40000000
+
+/* Scheme Registers bit field masks */
+#define KG_SCH_MODE_EN				0x80000000
+#define KG_SCH_VSP_NO_KSP_EN			0x80000000
+#define KG_SCH_HASH_CONFIG_SYM			0x40000000
+
+/* Known Protocol field codes */
+#define KG_SCH_KN_PORT_ID		0x80000000
+#define KG_SCH_KN_MACDST		0x40000000
+#define KG_SCH_KN_MACSRC		0x20000000
+#define KG_SCH_KN_TCI1			0x10000000
+#define KG_SCH_KN_TCI2			0x08000000
+#define KG_SCH_KN_ETYPE			0x04000000
+#define KG_SCH_KN_PPPSID		0x02000000
+#define KG_SCH_KN_PPPID			0x01000000
+#define KG_SCH_KN_MPLS1			0x00800000
+#define KG_SCH_KN_MPLS2			0x00400000
+#define KG_SCH_KN_MPLS_LAST		0x00200000
+#define KG_SCH_KN_IPSRC1		0x00100000
+#define KG_SCH_KN_IPDST1		0x00080000
+#define KG_SCH_KN_PTYPE1		0x00040000
+#define KG_SCH_KN_IPTOS_TC1		0x00020000
+#define KG_SCH_KN_IPV6FL1		0x00010000
+#define KG_SCH_KN_IPSRC2		0x00008000
+#define KG_SCH_KN_IPDST2		0x00004000
+#define KG_SCH_KN_PTYPE2		0x00002000
+#define KG_SCH_KN_IPTOS_TC2		0x00001000
+#define KG_SCH_KN_IPV6FL2		0x00000800
+#define KG_SCH_KN_GREPTYPE		0x00000400
+#define KG_SCH_KN_IPSEC_SPI		0x00000200
+#define KG_SCH_KN_IPSEC_NH		0x00000100
+#define KG_SCH_KN_IPPID			0x00000080
+#define KG_SCH_KN_L4PSRC		0x00000004
+#define KG_SCH_KN_L4PDST		0x00000002
+#define KG_SCH_KN_TFLG			0x00000001
+
+/* NIA values */
+#define NIA_ENG_BMI			0x00500000
+#define NIA_BMI_AC_ENQ_FRAME		0x00000002
+#define ENQUEUE_KG_DFLT_NIA		(NIA_ENG_BMI | NIA_BMI_AC_ENQ_FRAME)
+
+/* Hard-coded configuration:
+ * These values are used as hard-coded values for KeyGen configuration
+ * and they replace user selections for this hard-coded version
+ */
+
+/* Hash distribution shift */
+#define DEFAULT_HASH_DIST_FQID_SHIFT		0
+
+/* Hash shift */
+#define DEFAULT_HASH_SHIFT			0
+
+/* Symmetric hash usage:
+ * Warning:
+ * - the value for symmetric hash usage must be in accordance with hash
+ *	key defined below
+ * - according to tests performed, spreading is not working if symmetric
+ *	hash is set on true
+ * So ultimately symmetric hash functionality should be always disabled:
+ */
+#define DEFAULT_SYMMETRIC_HASH			false
+
+/* Hash Key extraction fields: */
+#define DEFAULT_HASH_KEY_EXTRACT_FIELDS		\
+	(KG_SCH_KN_IPSRC1 | KG_SCH_KN_IPDST1 | \
+	    KG_SCH_KN_L4PSRC | KG_SCH_KN_L4PDST)
+
+/* Default values to be used as hash key in case IPv4 or L4 (TCP, UDP)
+ * don't exist in the frame
+ */
+/* Default IPv4 address */
+#define DEFAULT_HASH_KEY_IPv4_ADDR		0x0A0A0A0A
+/* Default L4 port */
+#define DEFAULT_HASH_KEY_L4_PORT		0x0B0B0B0B
+
+/* KeyGen Memory Mapped Registers: */
+
+/* Scheme Configuration RAM Registers */
+struct fman_kg_scheme_regs {
+	u32 kgse_mode;		/* 0x100: MODE */
+	u32 kgse_ekfc;		/* 0x104: Extract Known Fields Command */
+	u32 kgse_ekdv;		/* 0x108: Extract Known Default Value */
+	u32 kgse_bmch;		/* 0x10C: Bit Mask Command High */
+	u32 kgse_bmcl;		/* 0x110: Bit Mask Command Low */
+	u32 kgse_fqb;		/* 0x114: Frame Queue Base */
+	u32 kgse_hc;		/* 0x118: Hash Command */
+	u32 kgse_ppc;		/* 0x11C: Policer Profile Command */
+	u32 kgse_gec[FM_KG_NUM_OF_GENERIC_REGS];
+			/* 0x120: Generic Extract Command */
+	u32 kgse_spc;
+		/* 0x140: KeyGen Scheme Entry Statistic Packet Counter */
+	u32 kgse_dv0;	/* 0x144: KeyGen Scheme Entry Default Value 0 */
+	u32 kgse_dv1;	/* 0x148: KeyGen Scheme Entry Default Value 1 */
+	u32 kgse_ccbs;
+		/* 0x14C: KeyGen Scheme Entry Coarse Classification Bit*/
+	u32 kgse_mv;	/* 0x150: KeyGen Scheme Entry Match vector */
+	u32 kgse_om;	/* 0x154: KeyGen Scheme Entry Operation Mode bits */
+	u32 kgse_vsp;
+		/* 0x158: KeyGen Scheme Entry Virtual Storage Profile */
+};
+
+/* Port Partition Configuration Registers */
+struct fman_kg_pe_regs {
+	u32 fmkg_pe_sp;		/* 0x100: KeyGen Port entry Scheme Partition */
+	u32 fmkg_pe_cpp;
+		/* 0x104: KeyGen Port Entry Classification Plan Partition */
+};
+
+/* General Configuration and Status Registers
+ * Global Statistic Counters
+ * KeyGen Global Registers
+ */
+struct fman_kg_regs {
+	u32 fmkg_gcr;	/* 0x000: KeyGen General Configuration Register */
+	u32 res004;	/* 0x004: Reserved */
+	u32 res008;	/* 0x008: Reserved */
+	u32 fmkg_eer;	/* 0x00C: KeyGen Error Event Register */
+	u32 fmkg_eeer;	/* 0x010: KeyGen Error Event Enable Register */
+	u32 res014;	/* 0x014: Reserved */
+	u32 res018;	/* 0x018: Reserved */
+	u32 fmkg_seer;	/* 0x01C: KeyGen Scheme Error Event Register */
+	u32 fmkg_seeer;	/* 0x020: KeyGen Scheme Error Event Enable Register */
+	u32 fmkg_gsr;	/* 0x024: KeyGen Global Status Register */
+	u32 fmkg_tpc;	/* 0x028: Total Packet Counter Register */
+	u32 fmkg_serc;	/* 0x02C: Soft Error Capture Register */
+	u32 res030[4];	/* 0x030: Reserved */
+	u32 fmkg_fdor;	/* 0x034: Frame Data Offset Register */
+	u32 fmkg_gdv0r;	/* 0x038: Global Default Value Register 0 */
+	u32 fmkg_gdv1r;	/* 0x03C: Global Default Value Register 1 */
+	u32 res04c[6];	/* 0x040: Reserved */
+	u32 fmkg_feer;	/* 0x044: Force Error Event Register */
+	u32 res068[38];	/* 0x048: Reserved */
+	union {
+		u32 fmkg_indirect[63];	/* 0x100: Indirect Access Registers */
+		struct fman_kg_scheme_regs fmkg_sch; /* Scheme Registers */
+		struct fman_kg_pe_regs fmkg_pe; /* Port Partition Registers */
+	};
+	u32 fmkg_ar;	/* 0x1FC: KeyGen Action Register */
+};
+
+/* KeyGen Scheme data */
+struct keygen_scheme {
+	bool used;	/* Specifies if this scheme is used */
+	u8 hw_port_id;
+		/* Hardware port ID
+		 * schemes sharing between multiple ports is not
+		 * currently supported
+		 * so we have only one port id bound to a scheme
+		 */
+	u32 base_fqid;
+		/* Base FQID:
+		 * Must be between 1 and 2^24-1
+		 * If hash is used and an even distribution is
+		 * expected according to hash_fqid_count,
+		 * base_fqid must be aligned to hash_fqid_count
+		 */
+	u32 hash_fqid_count;
+		/* FQ range for hash distribution:
+		 * Must be a power of 2
+		 * Represents the range of queues for spreading
+		 */
+	bool use_hashing;	/* Usage of Hashing and spreading over FQ */
+	bool symmetric_hash;	/* Symmetric Hash option usage */
+	u8 hashShift;
+		/* Hash result right shift.
+		 * Select the 24 bits out of the 64 hash result.
+		 * 0 means using the 24 LSB's, otherwise
+		 * use the 24 LSB's after shifting right
+		 */
+	u32 match_vector;	/* Match Vector */
+};
+
+/* KeyGen driver data */
+struct fman_keygen {
+	struct keygen_scheme schemes[FM_KG_MAX_NUM_OF_SCHEMES];
+				/* Array of schemes */
+	struct fman_kg_regs __iomem *keygen_regs;	/* KeyGen registers */
+};
+
+/* keygen_write_ar_wait
+ *
+ * Write Action Register with specified value, wait for GO bit field to be
+ * idle and then read the error
+ *
+ * regs: KeyGen registers
+ * fmkg_ar: Action Register value
+ *
+ * Return: Zero for success or error code in case of failure
+ */
+static int keygen_write_ar_wait(struct fman_kg_regs __iomem *regs, u32 fmkg_ar)
+{
+	iowrite32be(fmkg_ar, &regs->fmkg_ar);
+
+	/* Wait for GO bit field to be idle */
+	while (fmkg_ar & FM_KG_KGAR_GO)
+		fmkg_ar = ioread32be(&regs->fmkg_ar);
+
+	if (fmkg_ar & FM_KG_KGAR_ERR)
+		return -EINVAL;
+
+	return 0;
+}
+
+/* build_ar_scheme
+ *
+ * Build Action Register value for scheme settings
+ *
+ * scheme_id: Scheme ID
+ * update_counter: update scheme counter
+ * write: true for action to write the scheme or false for read action
+ *
+ * Return: AR value
+ */
+static u32 build_ar_scheme(u8 scheme_id, bool update_counter, bool write)
+{
+	u32 rw = (u32)(write ? FM_KG_KGAR_WRITE : FM_KG_KGAR_READ);
+
+	return (u32)(FM_KG_KGAR_GO |
+			rw |
+			FM_KG_KGAR_SEL_SCHEME_ENTRY |
+			DUMMY_PORT_ID |
+			((u32)scheme_id << FM_KG_KGAR_NUM_SHIFT) |
+			(update_counter ? FM_KG_KGAR_SCM_WSEL_UPDATE_CNT : 0));
+}
+
+/* build_ar_bind_scheme
+ *
+ * Build Action Register value for port binding to schemes
+ *
+ * hwport_id: HW Port ID
+ * write: true for action to write the bind or false for read action
+ *
+ * Return: AR value
+ */
+static u32 build_ar_bind_scheme(u8 hwport_id, bool write)
+{
+	u32 rw = write ? (u32)FM_KG_KGAR_WRITE : (u32)FM_KG_KGAR_READ;
+
+	return (u32)(FM_KG_KGAR_GO |
+			rw |
+			FM_KG_KGAR_SEL_PORT_ENTRY |
+			hwport_id |
+			FM_KG_KGAR_SEL_PORT_WSEL_SP);
+}
+
+/* keygen_write_sp
+ *
+ * Write Scheme Partition Register with specified value
+ *
+ * regs: KeyGen Registers
+ * sp: Scheme Partition register value
+ * add: true to add a scheme partition or false to clear
+ *
+ * Return: none
+ */
+static void keygen_write_sp(struct fman_kg_regs __iomem *regs, u32 sp, bool add)
+{
+	u32 tmp;
+
+	tmp = ioread32be(&regs->fmkg_pe.fmkg_pe_sp);
+
+	if (add)
+		tmp |= sp;
+	else
+		tmp &= ~sp;
+
+	iowrite32be(tmp, &regs->fmkg_pe.fmkg_pe_sp);
+}
+
+/* build_ar_bind_cls_plan
+ *
+ * Build Action Register value for Classification Plan
+ *
+ * hwport_id: HW Port ID
+ * write: true for action to write the CP or false for read action
+ *
+ * Return: AR value
+ */
+static u32 build_ar_bind_cls_plan(u8 hwport_id, bool write)
+{
+	u32 rw = write ? (u32)FM_KG_KGAR_WRITE : (u32)FM_KG_KGAR_READ;
+
+	return (u32)(FM_KG_KGAR_GO |
+			rw |
+			FM_KG_KGAR_SEL_PORT_ENTRY |
+			hwport_id |
+			FM_KG_KGAR_SEL_PORT_WSEL_CPP);
+}
+
+/* keygen_write_cpp
+ *
+ * Write Classification Plan Partition Register with specified value
+ *
+ * regs: KeyGen Registers
+ * cpp: CPP register value
+ *
+ * Return: none
+ */
+static void keygen_write_cpp(struct fman_kg_regs __iomem *regs, u32 cpp)
+{
+	iowrite32be(cpp, &regs->fmkg_pe.fmkg_pe_cpp);
+}
+
+/* keygen_write_scheme
+ *
+ * Write all Schemes Registers with specified values
+ *
+ * regs: KeyGen Registers
+ * scheme_id: Scheme ID
+ * scheme_regs: Scheme registers values desired to be written
+ * update_counter: update scheme counter
+ *
+ * Return: Zero for success or error code in case of failure
+ */
+static int keygen_write_scheme(struct fman_kg_regs __iomem *regs, u8 scheme_id,
+			       struct fman_kg_scheme_regs *scheme_regs,
+				bool update_counter)
+{
+	u32 ar_reg;
+	int err, i;
+
+	/* Write indirect scheme registers */
+	iowrite32be(scheme_regs->kgse_mode, &regs->fmkg_sch.kgse_mode);
+	iowrite32be(scheme_regs->kgse_ekfc, &regs->fmkg_sch.kgse_ekfc);
+	iowrite32be(scheme_regs->kgse_ekdv, &regs->fmkg_sch.kgse_ekdv);
+	iowrite32be(scheme_regs->kgse_bmch, &regs->fmkg_sch.kgse_bmch);
+	iowrite32be(scheme_regs->kgse_bmcl, &regs->fmkg_sch.kgse_bmcl);
+	iowrite32be(scheme_regs->kgse_fqb, &regs->fmkg_sch.kgse_fqb);
+	iowrite32be(scheme_regs->kgse_hc, &regs->fmkg_sch.kgse_hc);
+	iowrite32be(scheme_regs->kgse_ppc, &regs->fmkg_sch.kgse_ppc);
+	iowrite32be(scheme_regs->kgse_spc, &regs->fmkg_sch.kgse_spc);
+	iowrite32be(scheme_regs->kgse_dv0, &regs->fmkg_sch.kgse_dv0);
+	iowrite32be(scheme_regs->kgse_dv1, &regs->fmkg_sch.kgse_dv1);
+	iowrite32be(scheme_regs->kgse_ccbs, &regs->fmkg_sch.kgse_ccbs);
+	iowrite32be(scheme_regs->kgse_mv, &regs->fmkg_sch.kgse_mv);
+	iowrite32be(scheme_regs->kgse_om, &regs->fmkg_sch.kgse_om);
+	iowrite32be(scheme_regs->kgse_vsp, &regs->fmkg_sch.kgse_vsp);
+
+	for (i = 0 ; i < FM_KG_NUM_OF_GENERIC_REGS ; i++)
+		iowrite32be(scheme_regs->kgse_gec[i],
+			    &regs->fmkg_sch.kgse_gec[i]);
+
+	/* Write AR (Action register) */
+	ar_reg = build_ar_scheme(scheme_id, update_counter, true);
+	err = keygen_write_ar_wait(regs, ar_reg);
+	if (err != 0) {
+		pr_err("Writing Action Register failed\n");
+		return err;
+	}
+
+	return err;
+}
+
+/* get_free_scheme_id
+ *
+ * Find the first free scheme available to be used
+ *
+ * keygen: KeyGen handle
+ * scheme_id: pointer to scheme id
+ *
+ * Return: 0 on success, -EINVAL when the are no available free schemes
+ */
+static int get_free_scheme_id(struct fman_keygen *keygen, u8 *scheme_id)
+{
+	u8 i;
+
+	for (i = 0; i < FM_KG_MAX_NUM_OF_SCHEMES; i++)
+		if (!keygen->schemes[i].used) {
+			*scheme_id = i;
+			return 0;
+		}
+
+	return -EINVAL;
+}
+
+/* get_scheme
+ *
+ * Provides the scheme for specified ID
+ *
+ * keygen: KeyGen handle
+ * scheme_id: Scheme ID
+ *
+ * Return: handle to required scheme
+ */
+static struct keygen_scheme *get_scheme(struct fman_keygen *keygen,
+					u8 scheme_id)
+{
+	if (scheme_id >= FM_KG_MAX_NUM_OF_SCHEMES)
+		return NULL;
+	return &keygen->schemes[scheme_id];
+}
+
+/* keygen_bind_port_to_schemes
+ *
+ * Bind the port to schemes
+ *
+ * keygen: KeyGen handle
+ * scheme_id: id of the scheme to bind to
+ * bind: true to bind the port or false to unbind it
+ *
+ * Return: Zero for success or error code in case of failure
+ */
+static int keygen_bind_port_to_schemes(struct fman_keygen *keygen,
+				       u8 scheme_id,
+					bool bind)
+{
+	struct fman_kg_regs __iomem *keygen_regs = keygen->keygen_regs;
+	struct keygen_scheme *scheme;
+	u32 ar_reg;
+	u32 schemes_vector = 0;
+	int err;
+
+	scheme = get_scheme(keygen, scheme_id);
+	if (!scheme) {
+		pr_err("Requested Scheme does not exist\n");
+		return -EINVAL;
+	}
+	if (!scheme->used) {
+		pr_err("Cannot bind port to an invalid scheme\n");
+		return -EINVAL;
+	}
+
+	schemes_vector |= 1 << (31 - scheme_id);
+
+	ar_reg = build_ar_bind_scheme(scheme->hw_port_id, false);
+	err = keygen_write_ar_wait(keygen_regs, ar_reg);
+	if (err != 0) {
+		pr_err("Reading Action Register failed\n");
+		return err;
+	}
+
+	keygen_write_sp(keygen_regs, schemes_vector, bind);
+
+	ar_reg = build_ar_bind_scheme(scheme->hw_port_id, true);
+	err = keygen_write_ar_wait(keygen_regs, ar_reg);
+	if (err != 0) {
+		pr_err("Writing Action Register failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+/* keygen_scheme_setup
+ *
+ * Setup the scheme according to required configuration
+ *
+ * keygen: KeyGen handle
+ * scheme_id: scheme ID
+ * enable: true to enable scheme or false to disable it
+ *
+ * Return: Zero for success or error code in case of failure
+ */
+static int keygen_scheme_setup(struct fman_keygen *keygen, u8 scheme_id,
+			       bool enable)
+{
+	struct fman_kg_regs __iomem *keygen_regs = keygen->keygen_regs;
+	struct fman_kg_scheme_regs scheme_regs;
+	struct keygen_scheme *scheme;
+	u32 tmp_reg;
+	int err;
+
+	scheme = get_scheme(keygen, scheme_id);
+	if (!scheme) {
+		pr_err("Requested Scheme does not exist\n");
+		return -EINVAL;
+	}
+	if (enable && scheme->used) {
+		pr_err("The requested Scheme is already used\n");
+		return -EINVAL;
+	}
+
+	/* Clear scheme registers */
+	memset(&scheme_regs, 0, sizeof(struct fman_kg_scheme_regs));
+
+	/* Setup all scheme registers: */
+	tmp_reg = 0;
+
+	if (enable) {
+		/* Enable Scheme */
+		tmp_reg |= KG_SCH_MODE_EN;
+		/* Enqueue frame NIA */
+		tmp_reg |= ENQUEUE_KG_DFLT_NIA;
+	}
+
+	scheme_regs.kgse_mode = tmp_reg;
+
+	scheme_regs.kgse_mv = scheme->match_vector;
+
+	/* Scheme don't override StorageProfile:
+	 * valid only for DPAA_VERSION >= 11
+	 */
+	scheme_regs.kgse_vsp = KG_SCH_VSP_NO_KSP_EN;
+
+	/* Configure Hard-Coded Rx Hashing: */
+
+	if (scheme->use_hashing) {
+		/* configure kgse_ekfc */
+		scheme_regs.kgse_ekfc = DEFAULT_HASH_KEY_EXTRACT_FIELDS;
+
+		/* configure kgse_ekdv */
+		tmp_reg = 0;
+		tmp_reg |= (KG_SCH_DEF_USE_KGSE_DV_0 <<
+				KG_SCH_DEF_IP_ADDR_SHIFT);
+		tmp_reg |= (KG_SCH_DEF_USE_KGSE_DV_1 <<
+				KG_SCH_DEF_L4_PORT_SHIFT);
+		scheme_regs.kgse_ekdv = tmp_reg;
+
+		/* configure kgse_dv0 */
+		scheme_regs.kgse_dv0 = DEFAULT_HASH_KEY_IPv4_ADDR;
+		/* configure kgse_dv1 */
+		scheme_regs.kgse_dv1 = DEFAULT_HASH_KEY_L4_PORT;
+
+		/* configure kgse_hc  */
+		tmp_reg = 0;
+		tmp_reg |= ((scheme->hash_fqid_count - 1) <<
+				DEFAULT_HASH_DIST_FQID_SHIFT);
+		tmp_reg |= scheme->hashShift << KG_SCH_HASH_CONFIG_SHIFT_SHIFT;
+
+		if (scheme->symmetric_hash) {
+			/* Normally extraction key should be verified if
+			 * complies with symmetric hash
+			 * But because extraction is hard-coded, we are sure
+			 * the key is symmetric
+			 */
+			tmp_reg |= KG_SCH_HASH_CONFIG_SYM;
+		}
+		scheme_regs.kgse_hc = tmp_reg;
+	} else {
+		scheme_regs.kgse_ekfc = 0;
+		scheme_regs.kgse_hc = 0;
+		scheme_regs.kgse_ekdv = 0;
+		scheme_regs.kgse_dv0 = 0;
+		scheme_regs.kgse_dv1 = 0;
+	}
+
+	/* configure kgse_fqb: Scheme FQID base */
+	tmp_reg = 0;
+	tmp_reg |= scheme->base_fqid;
+	scheme_regs.kgse_fqb = tmp_reg;
+
+	/* features not used by hard-coded configuration */
+	scheme_regs.kgse_bmch = 0;
+	scheme_regs.kgse_bmcl = 0;
+	scheme_regs.kgse_spc = 0;
+
+	/* Write scheme registers */
+	err = keygen_write_scheme(keygen_regs, scheme_id, &scheme_regs, true);
+	if (err != 0) {
+		pr_err("Writing scheme registers failed\n");
+		return err;
+	}
+
+	/* Update used field for Scheme */
+	scheme->used = enable;
+
+	return 0;
+}
+
+/* keygen_init
+ *
+ * KeyGen initialization:
+ * Initializes and enables KeyGen, allocate driver memory, setup registers,
+ * clear port bindings, invalidate all schemes
+ *
+ * keygen_regs: KeyGen registers base address
+ *
+ * Return: Handle to KeyGen driver
+ */
+struct fman_keygen *keygen_init(struct fman_kg_regs __iomem *keygen_regs)
+{
+	struct fman_keygen *keygen;
+	u32 ar;
+	int i;
+
+	/* Allocate memory for KeyGen driver */
+	keygen = kzalloc(sizeof(*keygen), GFP_KERNEL);
+	if (!keygen)
+		return NULL;
+
+	keygen->keygen_regs = keygen_regs;
+
+	/* KeyGen initialization (for Master partition):
+	 * Setup KeyGen registers
+	 */
+	iowrite32be(ENQUEUE_KG_DFLT_NIA, &keygen_regs->fmkg_gcr);
+
+	iowrite32be(FM_EX_KG_DOUBLE_ECC | FM_EX_KG_KEYSIZE_OVERFLOW,
+		    &keygen_regs->fmkg_eer);
+
+	iowrite32be(0, &keygen_regs->fmkg_fdor);
+	iowrite32be(0, &keygen_regs->fmkg_gdv0r);
+	iowrite32be(0, &keygen_regs->fmkg_gdv1r);
+
+	/* Clear binding between ports to schemes and classification plans
+	 * so that all ports are not bound to any scheme/classification plan
+	 */
+	for (i = 0; i < FMAN_MAX_NUM_OF_HW_PORTS; i++) {
+		/* Clear all pe sp schemes registers */
+		keygen_write_sp(keygen_regs, 0xffffffff, false);
+		ar = build_ar_bind_scheme(i, true);
+		keygen_write_ar_wait(keygen_regs, ar);
+
+		/* Clear all pe cpp classification plans registers */
+		keygen_write_cpp(keygen_regs, 0);
+		ar = build_ar_bind_cls_plan(i, true);
+		keygen_write_ar_wait(keygen_regs, ar);
+	}
+
+	/* Enable all scheme interrupts */
+	iowrite32be(0xFFFFFFFF, &keygen_regs->fmkg_seer);
+	iowrite32be(0xFFFFFFFF, &keygen_regs->fmkg_seeer);
+
+	/* Enable KyeGen */
+	iowrite32be(ioread32be(&keygen_regs->fmkg_gcr) | FM_KG_KGGCR_EN,
+		    &keygen_regs->fmkg_gcr);
+
+	return keygen;
+}
+EXPORT_SYMBOL(keygen_init);
+
+/* keygen_port_hashing_init
+ *
+ * Initializes a port for Rx Hashing with specified configuration parameters
+ *
+ * keygen: KeyGen handle
+ * hw_port_id: HW Port ID
+ * hash_base_fqid: Hashing Base FQID used for spreading
+ * hash_size: Hashing size
+ *
+ * Return: Zero for success or error code in case of failure
+ */
+int keygen_port_hashing_init(struct fman_keygen *keygen, u8 hw_port_id,
+			     u32 hash_base_fqid, u32 hash_size)
+{
+	struct keygen_scheme *scheme;
+	u8 scheme_id;
+	int err;
+
+	/* Validate Scheme configuration parameters */
+	if (hash_base_fqid == 0 || (hash_base_fqid & ~0x00FFFFFF)) {
+		pr_err("Base FQID must be between 1 and 2^24-1\n");
+		return -EINVAL;
+	}
+	if (hash_size == 0 || (hash_size & (hash_size - 1)) != 0) {
+		pr_err("Hash size must be power of two\n");
+		return -EINVAL;
+	}
+
+	/* Find a free scheme */
+	err = get_free_scheme_id(keygen, &scheme_id);
+	if (err) {
+		pr_err("The maximum number of available Schemes has been exceeded\n");
+		return -EINVAL;
+	}
+
+	/* Create and configure Hard-Coded Scheme: */
+
+	scheme = get_scheme(keygen, scheme_id);
+	if (!scheme) {
+		pr_err("Requested Scheme does not exist\n");
+		return -EINVAL;
+	}
+	if (scheme->used) {
+		pr_err("The requested Scheme is already used\n");
+		return -EINVAL;
+	}
+
+	/* Clear all scheme fields because the scheme may have been
+	 * previously used
+	 */
+	memset(scheme, 0, sizeof(struct keygen_scheme));
+
+	/* Setup scheme: */
+	scheme->hw_port_id = hw_port_id;
+	scheme->use_hashing = true;
+	scheme->base_fqid = hash_base_fqid;
+	scheme->hash_fqid_count = hash_size;
+	scheme->symmetric_hash = DEFAULT_SYMMETRIC_HASH;
+	scheme->hashShift = DEFAULT_HASH_SHIFT;
+
+	/* All Schemes in hard-coded configuration
+	 * are Indirect Schemes
+	 */
+	scheme->match_vector = 0;
+
+	err = keygen_scheme_setup(keygen, scheme_id, true);
+	if (err != 0) {
+		pr_err("Scheme setup failed\n");
+		return err;
+	}
+
+	/* Bind Rx port to Scheme */
+	err = keygen_bind_port_to_schemes(keygen, scheme_id, true);
+	if (err != 0) {
+		pr_err("Binding port to schemes failed\n");
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(keygen_port_hashing_init);
diff --git a/drivers/net/ethernet/freescale/fman/fman_keygen.h b/drivers/net/ethernet/freescale/fman/fman_keygen.h
new file mode 100644
index 0000000..c4640de
--- /dev/null
+++ b/drivers/net/ethernet/freescale/fman/fman_keygen.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2017 NXP
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of NXP nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NXP ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NXP BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __KEYGEN_H
+#define __KEYGEN_H
+
+#include <linux/io.h>
+
+struct fman_keygen;
+struct fman_kg_regs;
+
+struct fman_keygen *keygen_init(struct fman_kg_regs __iomem *keygen_regs);
+
+int keygen_port_hashing_init(struct fman_keygen *keygen, u8 hw_port_id,
+			     u32 hash_base_fqid, u32 hash_size);
+
+#endif /* __KEYGEN_H */
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c
index 57bf44f..1789b20 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.c
+++ b/drivers/net/ethernet/freescale/fman/fman_port.c
@@ -32,10 +32,6 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include "fman_port.h"
-#include "fman.h"
-#include "fman_sp.h"
-
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -45,6 +41,11 @@
 #include <linux/delay.h>
 #include <linux/libfdt_env.h>
 
+#include "fman.h"
+#include "fman_port.h"
+#include "fman_sp.h"
+#include "fman_keygen.h"
+
 /* Queue ID */
 #define DFLT_FQ_ID		0x00FFFFFF
 
@@ -184,6 +185,7 @@
 #define NIA_ENG_QMI_ENQ					0x00540000
 #define NIA_ENG_QMI_DEQ					0x00580000
 #define NIA_ENG_HWP					0x00440000
+#define NIA_ENG_HWK					0x00480000
 #define NIA_BMI_AC_ENQ_FRAME				0x00000002
 #define NIA_BMI_AC_TX_RELEASE				0x000002C0
 #define NIA_BMI_AC_RELEASE				0x000000C0
@@ -394,6 +396,8 @@ struct fman_port_bpools {
 struct fman_port_cfg {
 	u32 dflt_fqid;
 	u32 err_fqid;
+	u32 pcd_base_fqid;
+	u32 pcd_fqs_count;
 	u8 deq_sp;
 	bool deq_high_priority;
 	enum fman_port_deq_type deq_type;
@@ -1271,6 +1275,10 @@ static void set_rx_dflt_cfg(struct fman_port *port,
 		port_params->specific_params.rx_params.err_fqid;
 	port->cfg->dflt_fqid =
 		port_params->specific_params.rx_params.dflt_fqid;
+	port->cfg->pcd_base_fqid =
+		port_params->specific_params.rx_params.pcd_base_fqid;
+	port->cfg->pcd_fqs_count =
+		port_params->specific_params.rx_params.pcd_fqs_count;
 }
 
 static void set_tx_dflt_cfg(struct fman_port *port,
@@ -1398,6 +1406,24 @@ err_port_cfg:
 EXPORT_SYMBOL(fman_port_config);
 
 /**
+ * fman_port_use_kg_hash
+ * port:        A pointer to a FM Port module.
+ * Sets the HW KeyGen or the BMI as HW Parser next engine, enabling
+ * or bypassing the KeyGen hashing of Rx traffic
+ */
+void fman_port_use_kg_hash(struct fman_port *port, bool enable)
+{
+	if (enable)
+		/* After the Parser frames go to KeyGen */
+		iowrite32be(NIA_ENG_HWK, &port->bmi_regs->rx.fmbm_rfpne);
+	else
+		/* After the Parser frames go to BMI */
+		iowrite32be(NIA_ENG_BMI | NIA_BMI_AC_ENQ_FRAME,
+			    &port->bmi_regs->rx.fmbm_rfpne);
+}
+EXPORT_SYMBOL(fman_port_use_kg_hash);
+
+/**
  * fman_port_init
  * port:	A pointer to a FM Port module.
  * Initializes the FM PORT module by defining the software structure and
@@ -1407,9 +1433,10 @@ EXPORT_SYMBOL(fman_port_config);
  */
 int fman_port_init(struct fman_port *port)
 {
+	struct fman_port_init_params params;
+	struct fman_keygen *keygen;
 	struct fman_port_cfg *cfg;
 	int err;
-	struct fman_port_init_params params;
 
 	if (is_init_done(port->cfg))
 		return -EINVAL;
@@ -1472,6 +1499,17 @@ int fman_port_init(struct fman_port *port)
 	if (err)
 		return err;
 
+	if (port->cfg->pcd_fqs_count) {
+		keygen = port->dts_params.fman->keygen;
+		err = keygen_port_hashing_init(keygen, port->port_id,
+					       port->cfg->pcd_base_fqid,
+					       port->cfg->pcd_fqs_count);
+		if (err)
+			return err;
+
+		fman_port_use_kg_hash(port, true);
+	}
+
 	kfree(port->cfg);
 	port->cfg = NULL;
 
@@ -1682,6 +1720,17 @@ u32 fman_port_get_qman_channel_id(struct fman_port *port)
 }
 EXPORT_SYMBOL(fman_port_get_qman_channel_id);
 
+int fman_port_get_hash_result_offset(struct fman_port *port, u32 *offset)
+{
+	if (port->buffer_offsets.hash_result_offset == ILLEGAL_BASE)
+		return -EINVAL;
+
+	*offset = port->buffer_offsets.hash_result_offset;
+
+	return 0;
+}
+EXPORT_SYMBOL(fman_port_get_hash_result_offset);
+
 static int fman_port_probe(struct platform_device *of_dev)
 {
 	struct fman_port *port;
@@ -1720,8 +1769,8 @@ static int fman_port_probe(struct platform_device *of_dev)
 
 	err = of_property_read_u32(port_node, "cell-index", &val);
 	if (err) {
-		dev_err(port->dev, "%s: reading cell-index for %s failed\n",
-			__func__, port_node->full_name);
+		dev_err(port->dev, "%s: reading cell-index for %pOF failed\n",
+			__func__, port_node);
 		err = -EINVAL;
 		goto return_err;
 	}
diff --git a/drivers/net/ethernet/freescale/fman/fman_port.h b/drivers/net/ethernet/freescale/fman/fman_port.h
index 8ba9017..e86ca6a 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.h
+++ b/drivers/net/ethernet/freescale/fman/fman_port.h
@@ -100,6 +100,9 @@ struct fman_port;
 struct fman_port_rx_params {
 	u32 err_fqid;			/* Error Queue Id. */
 	u32 dflt_fqid;			/* Default Queue Id. */
+	u32 pcd_base_fqid;		/* PCD base Queue Id. */
+	u32 pcd_fqs_count;		/* Number of PCD FQs. */
+
 	/* Which external buffer pools are used
 	 * (up to FMAN_PORT_MAX_EXT_POOLS_NUM), and their sizes.
 	 */
@@ -134,6 +137,8 @@ struct fman_port_params {
 
 int fman_port_config(struct fman_port *port, struct fman_port_params *params);
 
+void fman_port_use_kg_hash(struct fman_port *port, bool enable);
+
 int fman_port_init(struct fman_port *port);
 
 int fman_port_cfg_buf_prefix_content(struct fman_port *port,
@@ -146,6 +151,8 @@ int fman_port_enable(struct fman_port *port);
 
 u32 fman_port_get_qman_channel_id(struct fman_port *port);
 
+int fman_port_get_hash_result_offset(struct fman_port *port, u32 *offset);
+
 struct fman_port *fman_port_bind(struct device *dev);
 
 #endif /* __FMAN_PORT_H */
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 1c7da16..387eb4a 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -700,8 +700,8 @@ static int mac_probe(struct platform_device *_of_dev)
 		priv->internal_phy_node = of_parse_phandle(mac_node,
 							  "pcsphy-handle", 0);
 	} else {
-		dev_err(dev, "MAC node (%s) contains unsupported MAC\n",
-			mac_node->full_name);
+		dev_err(dev, "MAC node (%pOF) contains unsupported MAC\n",
+			mac_node);
 		err = -EINVAL;
 		goto _return;
 	}
@@ -714,16 +714,15 @@ static int mac_probe(struct platform_device *_of_dev)
 	/* Get the FM node */
 	dev_node = of_get_parent(mac_node);
 	if (!dev_node) {
-		dev_err(dev, "of_get_parent(%s) failed\n",
-			mac_node->full_name);
+		dev_err(dev, "of_get_parent(%pOF) failed\n",
+			mac_node);
 		err = -EINVAL;
 		goto _return_dev_set_drvdata;
 	}
 
 	of_dev = of_find_device_by_node(dev_node);
 	if (!of_dev) {
-		dev_err(dev, "of_find_device_by_node(%s) failed\n",
-			dev_node->full_name);
+		dev_err(dev, "of_find_device_by_node(%pOF) failed\n", dev_node);
 		err = -EINVAL;
 		goto _return_of_node_put;
 	}
@@ -731,8 +730,7 @@ static int mac_probe(struct platform_device *_of_dev)
 	/* Get the FMan cell-index */
 	err = of_property_read_u32(dev_node, "cell-index", &val);
 	if (err) {
-		dev_err(dev, "failed to read cell-index for %s\n",
-			dev_node->full_name);
+		dev_err(dev, "failed to read cell-index for %pOF\n", dev_node);
 		err = -EINVAL;
 		goto _return_of_node_put;
 	}
@@ -741,7 +739,7 @@ static int mac_probe(struct platform_device *_of_dev)
 
 	priv->fman = fman_bind(&of_dev->dev);
 	if (!priv->fman) {
-		dev_err(dev, "fman_bind(%s) failed\n", dev_node->full_name);
+		dev_err(dev, "fman_bind(%pOF) failed\n", dev_node);
 		err = -ENODEV;
 		goto _return_of_node_put;
 	}
@@ -751,8 +749,8 @@ static int mac_probe(struct platform_device *_of_dev)
 	/* Get the address of the memory mapped registers */
 	err = of_address_to_resource(mac_node, 0, &res);
 	if (err < 0) {
-		dev_err(dev, "of_address_to_resource(%s) = %d\n",
-			mac_node->full_name, err);
+		dev_err(dev, "of_address_to_resource(%pOF) = %d\n",
+			mac_node, err);
 		goto _return_dev_set_drvdata;
 	}
 
@@ -786,8 +784,7 @@ static int mac_probe(struct platform_device *_of_dev)
 	/* Get the cell-index */
 	err = of_property_read_u32(mac_node, "cell-index", &val);
 	if (err) {
-		dev_err(dev, "failed to read cell-index for %s\n",
-			mac_node->full_name);
+		dev_err(dev, "failed to read cell-index for %pOF\n", mac_node);
 		err = -EINVAL;
 		goto _return_dev_set_drvdata;
 	}
@@ -796,8 +793,7 @@ static int mac_probe(struct platform_device *_of_dev)
 	/* Get the MAC address */
 	mac_addr = of_get_mac_address(mac_node);
 	if (!mac_addr) {
-		dev_err(dev, "of_get_mac_address(%s) failed\n",
-			mac_node->full_name);
+		dev_err(dev, "of_get_mac_address(%pOF) failed\n", mac_node);
 		err = -EINVAL;
 		goto _return_dev_set_drvdata;
 	}
@@ -806,15 +802,15 @@ static int mac_probe(struct platform_device *_of_dev)
 	/* Get the port handles */
 	nph = of_count_phandle_with_args(mac_node, "fsl,fman-ports", NULL);
 	if (unlikely(nph < 0)) {
-		dev_err(dev, "of_count_phandle_with_args(%s, fsl,fman-ports) failed\n",
-			mac_node->full_name);
+		dev_err(dev, "of_count_phandle_with_args(%pOF, fsl,fman-ports) failed\n",
+			mac_node);
 		err = nph;
 		goto _return_dev_set_drvdata;
 	}
 
 	if (nph != ARRAY_SIZE(mac_dev->port)) {
-		dev_err(dev, "Not supported number of fman-ports handles of mac node %s from device tree\n",
-			mac_node->full_name);
+		dev_err(dev, "Not supported number of fman-ports handles of mac node %pOF from device tree\n",
+			mac_node);
 		err = -EINVAL;
 		goto _return_dev_set_drvdata;
 	}
@@ -823,24 +819,24 @@ static int mac_probe(struct platform_device *_of_dev)
 		/* Find the port node */
 		dev_node = of_parse_phandle(mac_node, "fsl,fman-ports", i);
 		if (!dev_node) {
-			dev_err(dev, "of_parse_phandle(%s, fsl,fman-ports) failed\n",
-				mac_node->full_name);
+			dev_err(dev, "of_parse_phandle(%pOF, fsl,fman-ports) failed\n",
+				mac_node);
 			err = -EINVAL;
 			goto _return_of_node_put;
 		}
 
 		of_dev = of_find_device_by_node(dev_node);
 		if (!of_dev) {
-			dev_err(dev, "of_find_device_by_node(%s) failed\n",
-				dev_node->full_name);
+			dev_err(dev, "of_find_device_by_node(%pOF) failed\n",
+				dev_node);
 			err = -EINVAL;
 			goto _return_of_node_put;
 		}
 
 		mac_dev->port[i] = fman_port_bind(&of_dev->dev);
 		if (!mac_dev->port[i]) {
-			dev_err(dev, "dev_get_drvdata(%s) failed\n",
-				dev_node->full_name);
+			dev_err(dev, "dev_get_drvdata(%pOF) failed\n",
+				dev_node);
 			err = -EINVAL;
 			goto _return_of_node_put;
 		}
@@ -851,8 +847,8 @@ static int mac_probe(struct platform_device *_of_dev)
 	phy_if = of_get_phy_mode(mac_node);
 	if (phy_if < 0) {
 		dev_warn(dev,
-			 "of_get_phy_mode() for %s failed. Defaulting to SGMII\n",
-			 mac_node->full_name);
+			 "of_get_phy_mode() for %pOF failed. Defaulting to SGMII\n",
+			 mac_node);
 		phy_if = PHY_INTERFACE_MODE_SGMII;
 	}
 	priv->phy_if = phy_if;
diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
index 1f015ed..c8e5d88 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c
@@ -100,7 +100,7 @@ static inline void mdc(struct mdiobb_ctrl *ctrl, int what)
 	in_be32(bitbang->dat);
 }
 
-static struct mdiobb_ops bb_ops = {
+static const struct mdiobb_ops bb_ops = {
 	.owner = THIS_MODULE,
 	.set_mdc = mdc,
 	.set_mdio_dir = mdio_dir,
diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
index a10de1e..80ad16a 100644
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
+++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
@@ -267,8 +267,8 @@ static void ucc_configure(phys_addr_t start, phys_addr_t end)
 
 		ret = of_address_to_resource(np, 0, &res);
 		if (ret < 0) {
-			pr_debug("fsl-pq-mdio: no address range in node %s\n",
-				 np->full_name);
+			pr_debug("fsl-pq-mdio: no address range in node %pOF\n",
+				 np);
 			continue;
 		}
 
@@ -280,8 +280,8 @@ static void ucc_configure(phys_addr_t start, phys_addr_t end)
 		if (!iprop) {
 			iprop = of_get_property(np, "device-id", NULL);
 			if (!iprop) {
-				pr_debug("fsl-pq-mdio: no UCC ID in node %s\n",
-					 np->full_name);
+				pr_debug("fsl-pq-mdio: no UCC ID in node %pOF\n",
+					 np);
 				continue;
 			}
 		}
@@ -293,8 +293,8 @@ static void ucc_configure(phys_addr_t start, phys_addr_t end)
 		 * numbered from 1, not 0.
 		 */
 		if (ucc_set_qe_mux_mii_mng(id - 1) < 0) {
-			pr_debug("fsl-pq-mdio: invalid UCC ID in node %s\n",
-				 np->full_name);
+			pr_debug("fsl-pq-mdio: invalid UCC ID in node %pOF\n",
+				 np);
 			continue;
 		}
 
@@ -442,8 +442,8 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev)
 	if (data->get_tbipa) {
 		for_each_child_of_node(np, tbi) {
 			if (strcmp(tbi->type, "tbi-phy") == 0) {
-				dev_dbg(&pdev->dev, "found TBI PHY node %s\n",
-					strrchr(tbi->full_name, '/') + 1);
+				dev_dbg(&pdev->dev, "found TBI PHY node %pOFP\n",
+					tbi);
 				break;
 			}
 		}
@@ -454,8 +454,8 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev)
 
 			if (!prop) {
 				dev_err(&pdev->dev,
-					"missing 'reg' property in node %s\n",
-					tbi->full_name);
+					"missing 'reg' property in node %pOF\n",
+					tbi);
 				err = -EBUSY;
 				goto error;
 			}
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index c4b4b0a..5be52d8 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -3687,7 +3687,7 @@ static noinline void gfar_update_link_state(struct gfar_private *priv)
 		u32 tempval1 = gfar_read(&regs->maccfg1);
 		u32 tempval = gfar_read(&regs->maccfg2);
 		u32 ecntrl = gfar_read(&regs->ecntrl);
-		u32 tx_flow_oldval = (tempval & MACCFG1_TX_FLOW);
+		u32 tx_flow_oldval = (tempval1 & MACCFG1_TX_FLOW);
 
 		if (phydev->duplex != priv->oldduplex) {
 			if (!(phydev->duplex))
diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c
index 721be13..5441142 100644
--- a/drivers/net/ethernet/freescale/gianfar_ptp.c
+++ b/drivers/net/ethernet/freescale/gianfar_ptp.c
@@ -411,7 +411,7 @@ static int ptp_gianfar_enable(struct ptp_clock_info *ptp,
 	return -EOPNOTSUPP;
 }
 
-static struct ptp_clock_info ptp_gianfar_caps = {
+static const struct ptp_clock_info ptp_gianfar_caps = {
 	.owner		= THIS_MODULE,
 	.name		= "gianfar clock",
 	.max_adj	= 512000,
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index d11287e..91c7bdb 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -76,4 +76,31 @@ config HNS_ENET
 	  This selects the general ethernet driver for HNS.  This module make
 	  use of any HNS AE driver, such as HNS_DSAF
 
+config HNS3
+	tristate "Hisilicon Network Subsystem Support HNS3 (Framework)"
+    depends on PCI
+	---help---
+	  This selects the framework support for Hisilicon Network Subsystem 3.
+	  This layer facilitates clients like ENET, RoCE and user-space ethernet
+	  drivers(like ODP)to register with HNAE devices and their associated
+	  operations.
+
+config HNS3_HCLGE
+	tristate "Hisilicon HNS3 HCLGE Acceleration Engine & Compatibility Layer Support"
+    depends on PCI_MSI
+	depends on HNS3
+	---help---
+	  This selects the HNS3_HCLGE network acceleration engine & its hardware
+	  compatibility layer. The engine would be used in Hisilicon hip08 family of
+	  SoCs and further upcoming SoCs.
+
+config HNS3_ENET
+	tristate "Hisilicon HNS3 Ethernet Device Support"
+    depends on 64BIT && PCI
+	depends on HNS3 && HNS3_HCLGE
+	---help---
+	  This selects the Ethernet Driver for Hisilicon Network Subsystem 3 for hip08
+	  family of SoCs. This module depends upon HNAE3 driver to access the HNAE3
+	  devices and their associated operations.
+
 endif # NET_VENDOR_HISILICON
diff --git a/drivers/net/ethernet/hisilicon/Makefile b/drivers/net/ethernet/hisilicon/Makefile
index 8661695..3828c43 100644
--- a/drivers/net/ethernet/hisilicon/Makefile
+++ b/drivers/net/ethernet/hisilicon/Makefile
@@ -6,4 +6,5 @@ obj-$(CONFIG_HIX5HD2_GMAC) += hix5hd2_gmac.o
 obj-$(CONFIG_HIP04_ETH) += hip04_eth.o
 obj-$(CONFIG_HNS_MDIO) += hns_mdio.o
 obj-$(CONFIG_HNS) += hns/
+obj-$(CONFIG_HNS3) += hns3/
 obj-$(CONFIG_HISI_FEMAC) += hisi_femac.o
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c
index 9d9b6e6..a051e58 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.c
@@ -202,6 +202,7 @@ hnae_init_ring(struct hnae_queue *q, struct hnae_ring *ring, int flags)
 	ring->q = q;
 	ring->flags = flags;
 	spin_lock_init(&ring->lock);
+	ring->coal_param = q->handle->coal_param;
 	assert(!ring->desc && !ring->desc_cb && !ring->desc_dma_addr);
 
 	/* not matter for tx or rx ring, the ntc and ntc start from 0 */
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index 7ba653a..3e62692 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -89,6 +89,10 @@ do { \
 
 #define RCB_RING_NAME_LEN 16
 
+#define HNAE_LOWEST_LATENCY_COAL_PARAM	30
+#define HNAE_LOW_LATENCY_COAL_PARAM	80
+#define HNAE_BULK_LATENCY_COAL_PARAM	150
+
 enum hnae_led_state {
 	HNAE_LED_INACTIVE,
 	HNAE_LED_ACTIVE,
@@ -292,6 +296,12 @@ struct hnae_ring {
 
 	int flags;          /* ring attribute */
 	int irq_init_flag;
+
+	/* total rx bytes after last rx rate calucated */
+	u64 coal_last_rx_bytes;
+	unsigned long coal_last_jiffies;
+	u32 coal_param;
+	u32 coal_rx_rate;	/* rx rate in MB */
 };
 
 #define ring_ptr_move_fw(ring, p) \
@@ -548,8 +558,13 @@ struct hnae_handle {
 	u32 if_support;
 	int q_num;
 	int vf_id;
+	unsigned long coal_last_jiffies;
+	u32 coal_param;		/* self adapt coalesce param */
+	/* the ring index of last ring that set coal param */
+	u32 coal_ring_idx;
 	u32 eport_id;
 	u32 dport_id;	/* v2 tx bd should fill the dport_id */
+	bool coal_adapt_en;
 	enum hnae_port_type port_type;
 	enum hnae_media_type media_type;
 	struct list_head node;    /* list to hnae_ae_dev->handle_list */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index a37166e..bd68379 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -99,6 +99,7 @@ struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev,
 	ae_handle->owner_dev = dsaf_dev->dev;
 	ae_handle->dev = dev;
 	ae_handle->q_num = qnum_per_vf;
+	ae_handle->coal_param = HNAE_LOWEST_LATENCY_COAL_PARAM;
 
 	/* find ring pair, and set vf id*/
 	for (ae_handle->vf_id = 0;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 3987699..3652063 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -812,6 +812,113 @@ static int hns_desc_unused(struct hnae_ring *ring)
 	return ((ntc >= ntu) ? 0 : ring->desc_num) + ntc - ntu;
 }
 
+#define HNS_LOWEST_LATENCY_RATE		27	/* 27 MB/s */
+#define HNS_LOW_LATENCY_RATE			80	/* 80 MB/s */
+
+#define HNS_COAL_BDNUM			3
+
+static u32 hns_coal_rx_bdnum(struct hnae_ring *ring)
+{
+	bool coal_enable = ring->q->handle->coal_adapt_en;
+
+	if (coal_enable &&
+	    ring->coal_last_rx_bytes > HNS_LOWEST_LATENCY_RATE)
+		return HNS_COAL_BDNUM;
+	else
+		return 0;
+}
+
+static void hns_update_rx_rate(struct hnae_ring *ring)
+{
+	bool coal_enable = ring->q->handle->coal_adapt_en;
+	u32 time_passed_ms;
+	u64 total_bytes;
+
+	if (!coal_enable ||
+	    time_before(jiffies, ring->coal_last_jiffies + (HZ >> 4)))
+		return;
+
+	/* ring->stats.rx_bytes overflowed */
+	if (ring->coal_last_rx_bytes > ring->stats.rx_bytes) {
+		ring->coal_last_rx_bytes = ring->stats.rx_bytes;
+		ring->coal_last_jiffies = jiffies;
+		return;
+	}
+
+	total_bytes = ring->stats.rx_bytes - ring->coal_last_rx_bytes;
+	time_passed_ms = jiffies_to_msecs(jiffies - ring->coal_last_jiffies);
+	do_div(total_bytes, time_passed_ms);
+	ring->coal_rx_rate = total_bytes >> 10;
+
+	ring->coal_last_rx_bytes = ring->stats.rx_bytes;
+	ring->coal_last_jiffies = jiffies;
+}
+
+/**
+ * smooth_alg - smoothing algrithm for adjusting coalesce parameter
+ **/
+static u32 smooth_alg(u32 new_param, u32 old_param)
+{
+	u32 gap = (new_param > old_param) ? new_param - old_param
+					  : old_param - new_param;
+
+	if (gap > 8)
+		gap >>= 3;
+
+	if (new_param > old_param)
+		return old_param + gap;
+	else
+		return old_param - gap;
+}
+
+/**
+ * hns_nic_adp_coalesce - self adapte coalesce according to rx rate
+ * @ring_data: pointer to hns_nic_ring_data
+ **/
+static void hns_nic_adpt_coalesce(struct hns_nic_ring_data *ring_data)
+{
+	struct hnae_ring *ring = ring_data->ring;
+	struct hnae_handle *handle = ring->q->handle;
+	u32 new_coal_param, old_coal_param = ring->coal_param;
+
+	if (ring->coal_rx_rate < HNS_LOWEST_LATENCY_RATE)
+		new_coal_param = HNAE_LOWEST_LATENCY_COAL_PARAM;
+	else if (ring->coal_rx_rate < HNS_LOW_LATENCY_RATE)
+		new_coal_param = HNAE_LOW_LATENCY_COAL_PARAM;
+	else
+		new_coal_param = HNAE_BULK_LATENCY_COAL_PARAM;
+
+	if (new_coal_param == old_coal_param &&
+	    new_coal_param == handle->coal_param)
+		return;
+
+	new_coal_param = smooth_alg(new_coal_param, old_coal_param);
+	ring->coal_param = new_coal_param;
+
+	/**
+	 * Because all ring in one port has one coalesce param, when one ring
+	 * calculate its own coalesce param, it cannot write to hardware at
+	 * once. There are three conditions as follows:
+	 *       1. current ring's coalesce param is larger than the hardware.
+	 *       2. or ring which adapt last time can change again.
+	 *       3. timeout.
+	 */
+	if (new_coal_param == handle->coal_param) {
+		handle->coal_last_jiffies = jiffies;
+		handle->coal_ring_idx = ring_data->queue_index;
+	} else if (new_coal_param > handle->coal_param ||
+		   handle->coal_ring_idx == ring_data->queue_index ||
+		   time_after(jiffies, handle->coal_last_jiffies + (HZ >> 4))) {
+		handle->dev->ops->set_coalesce_usecs(handle,
+					new_coal_param);
+		handle->dev->ops->set_coalesce_frames(handle,
+					1, new_coal_param);
+		handle->coal_param = new_coal_param;
+		handle->coal_ring_idx = ring_data->queue_index;
+		handle->coal_last_jiffies = jiffies;
+	}
+}
+
 static int hns_nic_rx_poll_one(struct hns_nic_ring_data *ring_data,
 			       int budget, void *v)
 {
@@ -868,20 +975,27 @@ static bool hns_nic_rx_fini_pro(struct hns_nic_ring_data *ring_data)
 {
 	struct hnae_ring *ring = ring_data->ring;
 	int num = 0;
+	bool rx_stopped;
 
-	ring_data->ring->q->handle->dev->ops->toggle_ring_irq(ring, 0);
+	hns_update_rx_rate(ring);
 
 	/* for hardware bug fixed */
+	ring_data->ring->q->handle->dev->ops->toggle_ring_irq(ring, 0);
 	num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
 
-	if (num > 0) {
+	if (num <= hns_coal_rx_bdnum(ring)) {
+		if (ring->q->handle->coal_adapt_en)
+			hns_nic_adpt_coalesce(ring_data);
+
+		rx_stopped = true;
+	} else {
 		ring_data->ring->q->handle->dev->ops->toggle_ring_irq(
 			ring_data->ring, 1);
 
-		return false;
-	} else {
-		return true;
+		rx_stopped = false;
 	}
+
+	return rx_stopped;
 }
 
 static bool hns_nic_rx_fini_pro_v2(struct hns_nic_ring_data *ring_data)
@@ -889,12 +1003,17 @@ static bool hns_nic_rx_fini_pro_v2(struct hns_nic_ring_data *ring_data)
 	struct hnae_ring *ring = ring_data->ring;
 	int num;
 
+	hns_update_rx_rate(ring);
 	num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
 
-	if (!num)
+	if (num <= hns_coal_rx_bdnum(ring)) {
+		if (ring->q->handle->coal_adapt_en)
+			hns_nic_adpt_coalesce(ring_data);
+
 		return true;
-	else
-		return false;
+	}
+
+	return false;
 }
 
 static inline void hns_nic_reclaim_one_desc(struct hnae_ring *ring,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
index 9cb4c78..26e9afc 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
@@ -38,7 +38,7 @@ struct hns_nic_ring_data {
 	struct hnae_ring *ring;
 	struct napi_struct napi;
 	cpumask_t mask; /* affinity mask */
-	int queue_index;
+	u32 queue_index;
 	int (*poll_one)(struct hns_nic_ring_data *, int, void *);
 	void (*ex_process)(struct hns_nic_ring_data *, struct sk_buff *);
 	bool (*fini_process)(struct hns_nic_ring_data *);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index a8db27e..7ea7f8a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -595,7 +595,7 @@ static void hns_nic_self_test(struct net_device *ndev,
 		set_bit(NIC_STATE_TESTING, &priv->state);
 
 		if (if_running)
-			(void)dev_close(ndev);
+			dev_close(ndev);
 
 		for (i = 0; i < SELF_TEST_TPYE_NUM; i++) {
 			if (!st_param[i][1])
@@ -735,8 +735,8 @@ static int hns_get_coalesce(struct net_device *net_dev,
 
 	ops = priv->ae_handle->dev->ops;
 
-	ec->use_adaptive_rx_coalesce = 1;
-	ec->use_adaptive_tx_coalesce = 1;
+	ec->use_adaptive_rx_coalesce = priv->ae_handle->coal_adapt_en;
+	ec->use_adaptive_tx_coalesce = priv->ae_handle->coal_adapt_en;
 
 	if ((!ops->get_coalesce_usecs) ||
 	    (!ops->get_max_coalesced_frames))
@@ -787,6 +787,9 @@ static int hns_set_coalesce(struct net_device *net_dev,
 	    (!ops->set_coalesce_frames))
 		return -ESRCH;
 
+	if (ec->use_adaptive_rx_coalesce != priv->ae_handle->coal_adapt_en)
+		priv->ae_handle->coal_adapt_en = ec->use_adaptive_rx_coalesce;
+
 	rc1 = ops->set_coalesce_usecs(priv->ae_handle,
 				      ec->rx_coalesce_usecs);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/Makefile b/drivers/net/ethernet/hisilicon/hns3/Makefile
new file mode 100644
index 0000000..a9349e1
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the HISILICON network device drivers.
+#
+
+obj-$(CONFIG_HNS3) += hns3pf/
+
+obj-$(CONFIG_HNS3) += hnae3.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
new file mode 100644
index 0000000..59efbd6
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "hnae3.h"
+
+static LIST_HEAD(hnae3_ae_algo_list);
+static LIST_HEAD(hnae3_client_list);
+static LIST_HEAD(hnae3_ae_dev_list);
+
+/* we are keeping things simple and using single lock for all the
+ * list. This is a non-critical code so other updations, if happen
+ * in parallel, can wait.
+ */
+static DEFINE_MUTEX(hnae3_common_lock);
+
+static bool hnae3_client_match(enum hnae3_client_type client_type,
+			       enum hnae3_dev_type dev_type)
+{
+	if ((dev_type == HNAE3_DEV_KNIC) && (client_type == HNAE3_CLIENT_KNIC ||
+					     client_type == HNAE3_CLIENT_ROCE))
+		return true;
+
+	if (dev_type == HNAE3_DEV_UNIC && client_type == HNAE3_CLIENT_UNIC)
+		return true;
+
+	return false;
+}
+
+static int hnae3_match_n_instantiate(struct hnae3_client *client,
+				     struct hnae3_ae_dev *ae_dev,
+				     bool is_reg, bool *matched)
+{
+	int ret;
+
+	*matched = false;
+
+	/* check if this client matches the type of ae_dev */
+	if (!(hnae3_client_match(client->type, ae_dev->dev_type) &&
+	      hnae_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))) {
+		return 0;
+	}
+	/* there is a match of client and dev */
+	*matched = true;
+
+	/* now, (un-)instantiate client by calling lower layer */
+	if (is_reg) {
+		ret = ae_dev->ops->init_client_instance(client, ae_dev);
+		if (ret)
+			dev_err(&ae_dev->pdev->dev,
+				"fail to instantiate client\n");
+		return ret;
+	}
+
+	ae_dev->ops->uninit_client_instance(client, ae_dev);
+	return 0;
+}
+
+int hnae3_register_client(struct hnae3_client *client)
+{
+	struct hnae3_client *client_tmp;
+	struct hnae3_ae_dev *ae_dev;
+	bool matched;
+	int ret = 0;
+
+	mutex_lock(&hnae3_common_lock);
+	/* one system should only have one client for every type */
+	list_for_each_entry(client_tmp, &hnae3_client_list, node) {
+		if (client_tmp->type == client->type)
+			goto exit;
+	}
+
+	list_add_tail(&client->node, &hnae3_client_list);
+
+	/* initialize the client on every matched port */
+	list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
+		/* if the client could not be initialized on current port, for
+		 * any error reasons, move on to next available port
+		 */
+		ret = hnae3_match_n_instantiate(client, ae_dev, true, &matched);
+		if (ret)
+			dev_err(&ae_dev->pdev->dev,
+				"match and instantiation failed for port\n");
+	}
+
+exit:
+	mutex_unlock(&hnae3_common_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(hnae3_register_client);
+
+void hnae3_unregister_client(struct hnae3_client *client)
+{
+	struct hnae3_ae_dev *ae_dev;
+	bool matched;
+
+	mutex_lock(&hnae3_common_lock);
+	/* un-initialize the client on every matched port */
+	list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
+		hnae3_match_n_instantiate(client, ae_dev, false, &matched);
+	}
+
+	list_del(&client->node);
+	mutex_unlock(&hnae3_common_lock);
+}
+EXPORT_SYMBOL(hnae3_unregister_client);
+
+/* hnae3_register_ae_algo - register a AE algorithm to hnae3 framework
+ * @ae_algo: AE algorithm
+ * NOTE: the duplicated name will not be checked
+ */
+int hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo)
+{
+	const struct pci_device_id *id;
+	struct hnae3_ae_dev *ae_dev;
+	struct hnae3_client *client;
+	bool matched;
+	int ret = 0;
+
+	mutex_lock(&hnae3_common_lock);
+
+	list_add_tail(&ae_algo->node, &hnae3_ae_algo_list);
+
+	/* Check if this algo/ops matches the list of ae_devs */
+	list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
+		id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
+		if (!id)
+			continue;
+
+		/* ae_dev init should set flag */
+		ae_dev->ops = ae_algo->ops;
+		ret = ae_algo->ops->init_ae_dev(ae_dev);
+		if (ret) {
+			dev_err(&ae_dev->pdev->dev, "init ae_dev error.\n");
+			continue;
+		}
+
+		hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1);
+
+		/* check the client list for the match with this ae_dev type and
+		 * initialize the figure out client instance
+		 */
+		list_for_each_entry(client, &hnae3_client_list, node) {
+			ret = hnae3_match_n_instantiate(client, ae_dev, true,
+							&matched);
+			if (ret)
+				dev_err(&ae_dev->pdev->dev,
+					"match and instantiation failed\n");
+			if (matched)
+				break;
+		}
+	}
+
+	mutex_unlock(&hnae3_common_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(hnae3_register_ae_algo);
+
+/* hnae3_unregister_ae_algo - unregisters a AE algorithm
+ * @ae_algo: the AE algorithm to unregister
+ */
+void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo)
+{
+	const struct pci_device_id *id;
+	struct hnae3_ae_dev *ae_dev;
+	struct hnae3_client *client;
+	bool matched;
+
+	mutex_lock(&hnae3_common_lock);
+	/* Check if there are matched ae_dev */
+	list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) {
+		id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
+		if (!id)
+			continue;
+
+		/* check the client list for the match with this ae_dev type and
+		 * un-initialize the figure out client instance
+		 */
+		list_for_each_entry(client, &hnae3_client_list, node) {
+			hnae3_match_n_instantiate(client, ae_dev, false,
+						  &matched);
+			if (matched)
+				break;
+		}
+
+		ae_algo->ops->uninit_ae_dev(ae_dev);
+		hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
+	}
+
+	list_del(&ae_algo->node);
+	mutex_unlock(&hnae3_common_lock);
+}
+EXPORT_SYMBOL(hnae3_unregister_ae_algo);
+
+/* hnae3_register_ae_dev - registers a AE device to hnae3 framework
+ * @ae_dev: the AE device
+ * NOTE: the duplicated name will not be checked
+ */
+int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+	const struct pci_device_id *id;
+	struct hnae3_ae_algo *ae_algo;
+	struct hnae3_client *client;
+	bool matched;
+	int ret = 0;
+
+	mutex_lock(&hnae3_common_lock);
+	list_add_tail(&ae_dev->node, &hnae3_ae_dev_list);
+
+	/* Check if there are matched ae_algo */
+	list_for_each_entry(ae_algo, &hnae3_ae_algo_list, node) {
+		id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
+		if (!id)
+			continue;
+
+		ae_dev->ops = ae_algo->ops;
+
+		if (!ae_dev->ops) {
+			dev_err(&ae_dev->pdev->dev, "ae_dev ops are null\n");
+			goto out_err;
+		}
+
+		/* ae_dev init should set flag */
+		ret = ae_dev->ops->init_ae_dev(ae_dev);
+		if (ret) {
+			dev_err(&ae_dev->pdev->dev, "init ae_dev error\n");
+			goto out_err;
+		}
+
+		hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 1);
+		break;
+	}
+
+	/* check the client list for the match with this ae_dev type and
+	 * initialize the figure out client instance
+	 */
+	list_for_each_entry(client, &hnae3_client_list, node) {
+		ret = hnae3_match_n_instantiate(client, ae_dev, true,
+						&matched);
+		if (ret)
+			dev_err(&ae_dev->pdev->dev,
+				"match and instantiation failed\n");
+		if (matched)
+			break;
+	}
+
+out_err:
+	mutex_unlock(&hnae3_common_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(hnae3_register_ae_dev);
+
+/* hnae3_unregister_ae_dev - unregisters a AE device
+ * @ae_dev: the AE device to unregister
+ */
+void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+	const struct pci_device_id *id;
+	struct hnae3_ae_algo *ae_algo;
+	struct hnae3_client *client;
+	bool matched;
+
+	mutex_lock(&hnae3_common_lock);
+	/* Check if there are matched ae_algo */
+	list_for_each_entry(ae_algo, &hnae3_ae_algo_list, node) {
+		id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev);
+		if (!id)
+			continue;
+
+		list_for_each_entry(client, &hnae3_client_list, node) {
+			hnae3_match_n_instantiate(client, ae_dev, false,
+						  &matched);
+			if (matched)
+				break;
+		}
+
+		ae_algo->ops->uninit_ae_dev(ae_dev);
+		hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0);
+	}
+
+	list_del(&ae_dev->node);
+	mutex_unlock(&hnae3_common_lock);
+}
+EXPORT_SYMBOL(hnae3_unregister_ae_dev);
+
+MODULE_AUTHOR("Huawei Tech. Co., Ltd.");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("HNAE3(Hisilicon Network Acceleration Engine) Framework");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
new file mode 100644
index 0000000..b2f28ae
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -0,0 +1,444 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HNAE3_H
+#define __HNAE3_H
+
+/* Names used in this framework:
+ *      ae handle (handle):
+ *        a set of queues provided by AE
+ *      ring buffer queue (rbq):
+ *        the channel between upper layer and the AE, can do tx and rx
+ *      ring:
+ *        a tx or rx channel within a rbq
+ *      ring description (desc):
+ *        an element in the ring with packet information
+ *      buffer:
+ *        a memory region referred by desc with the full packet payload
+ *
+ * "num" means a static number set as a parameter, "count" mean a dynamic
+ *   number set while running
+ * "cb" means control block
+ */
+
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+
+/* Device IDs */
+#define HNAE3_DEV_ID_GE				0xA220
+#define HNAE3_DEV_ID_25GE			0xA221
+#define HNAE3_DEV_ID_25GE_RDMA			0xA222
+#define HNAE3_DEV_ID_25GE_RDMA_MACSEC		0xA223
+#define HNAE3_DEV_ID_50GE_RDMA			0xA224
+#define HNAE3_DEV_ID_50GE_RDMA_MACSEC		0xA225
+#define HNAE3_DEV_ID_100G_RDMA_MACSEC		0xA226
+#define HNAE3_DEV_ID_100G_VF			0xA22E
+#define HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF	0xA22F
+
+#define HNAE3_CLASS_NAME_SIZE 16
+
+#define HNAE3_DEV_INITED_B			0x0
+#define HNAE_DEV_SUPPORT_ROCE_B			0x1
+
+#define ring_ptr_move_fw(ring, p) \
+	((ring)->p = ((ring)->p + 1) % (ring)->desc_num)
+#define ring_ptr_move_bw(ring, p) \
+	((ring)->p = ((ring)->p - 1 + (ring)->desc_num) % (ring)->desc_num)
+
+enum hns_desc_type {
+	DESC_TYPE_SKB,
+	DESC_TYPE_PAGE,
+};
+
+struct hnae3_handle;
+
+struct hnae3_queue {
+	void __iomem *io_base;
+	struct hnae3_ae_algo *ae_algo;
+	struct hnae3_handle *handle;
+	int tqp_index;	/* index in a handle */
+	u32 buf_size;	/* size for hnae_desc->addr, preset by AE */
+	u16 desc_num;	/* total number of desc */
+};
+
+/*hnae3 loop mode*/
+enum hnae3_loop {
+	HNAE3_MAC_INTER_LOOP_MAC,
+	HNAE3_MAC_INTER_LOOP_SERDES,
+	HNAE3_MAC_INTER_LOOP_PHY,
+	HNAE3_MAC_LOOP_NONE,
+};
+
+enum hnae3_client_type {
+	HNAE3_CLIENT_KNIC,
+	HNAE3_CLIENT_UNIC,
+	HNAE3_CLIENT_ROCE,
+};
+
+enum hnae3_dev_type {
+	HNAE3_DEV_KNIC,
+	HNAE3_DEV_UNIC,
+};
+
+/* mac media type */
+enum hnae3_media_type {
+	HNAE3_MEDIA_TYPE_UNKNOWN,
+	HNAE3_MEDIA_TYPE_FIBER,
+	HNAE3_MEDIA_TYPE_COPPER,
+	HNAE3_MEDIA_TYPE_BACKPLANE,
+};
+
+struct hnae3_vector_info {
+	u8 __iomem *io_addr;
+	int vector;
+};
+
+#define HNAE3_RING_TYPE_B 0
+#define HNAE3_RING_TYPE_TX 0
+#define HNAE3_RING_TYPE_RX 1
+
+struct hnae3_ring_chain_node {
+	struct hnae3_ring_chain_node *next;
+	u32 tqp_index;
+	u32 flag;
+};
+
+#define HNAE3_IS_TX_RING(node) \
+	(((node)->flag & (1 << HNAE3_RING_TYPE_B)) == HNAE3_RING_TYPE_TX)
+
+struct hnae3_client_ops {
+	int (*init_instance)(struct hnae3_handle *handle);
+	void (*uninit_instance)(struct hnae3_handle *handle, bool reset);
+	void (*link_status_change)(struct hnae3_handle *handle, bool state);
+};
+
+#define HNAE3_CLIENT_NAME_LENGTH 16
+struct hnae3_client {
+	char name[HNAE3_CLIENT_NAME_LENGTH];
+	u16 version;
+	unsigned long state;
+	enum hnae3_client_type type;
+	const struct hnae3_client_ops *ops;
+	struct list_head node;
+};
+
+struct hnae3_ae_dev {
+	struct pci_dev *pdev;
+	const struct hnae3_ae_ops *ops;
+	struct list_head node;
+	u32 flag;
+	enum hnae3_dev_type dev_type;
+	void *priv;
+};
+
+/* This struct defines the operation on the handle.
+ *
+ * init_ae_dev(): (mandatory)
+ *   Get PF configure from pci_dev and initialize PF hardware
+ * uninit_ae_dev()
+ *   Disable PF device and release PF resource
+ * register_client
+ *   Register client to ae_dev
+ * unregister_client()
+ *   Unregister client from ae_dev
+ * start()
+ *   Enable the hardware
+ * stop()
+ *   Disable the hardware
+ * get_status()
+ *   Get the carrier state of the back channel of the handle, 1 for ok, 0 for
+ *   non-ok
+ * get_ksettings_an_result()
+ *   Get negotiation status,speed and duplex
+ * update_speed_duplex_h()
+ *   Update hardware speed and duplex
+ * get_media_type()
+ *   Get media type of MAC
+ * adjust_link()
+ *   Adjust link status
+ * set_loopback()
+ *   Set loopback
+ * set_promisc_mode
+ *   Set promisc mode
+ * set_mtu()
+ *   set mtu
+ * get_pauseparam()
+ *   get tx and rx of pause frame use
+ * set_pauseparam()
+ *   set tx and rx of pause frame use
+ * set_autoneg()
+ *   set auto autonegotiation of pause frame use
+ * get_autoneg()
+ *   get auto autonegotiation of pause frame use
+ * get_coalesce_usecs()
+ *   get usecs to delay a TX interrupt after a packet is sent
+ * get_rx_max_coalesced_frames()
+ *   get Maximum number of packets to be sent before a TX interrupt.
+ * set_coalesce_usecs()
+ *   set usecs to delay a TX interrupt after a packet is sent
+ * set_coalesce_frames()
+ *   set Maximum number of packets to be sent before a TX interrupt.
+ * get_mac_addr()
+ *   get mac address
+ * set_mac_addr()
+ *   set mac address
+ * add_uc_addr
+ *   Add unicast addr to mac table
+ * rm_uc_addr
+ *   Remove unicast addr from mac table
+ * set_mc_addr()
+ *   Set multicast address
+ * add_mc_addr
+ *   Add multicast address to mac table
+ * rm_mc_addr
+ *   Remove multicast address from mac table
+ * update_stats()
+ *   Update Old network device statistics
+ * get_ethtool_stats()
+ *   Get ethtool network device statistics
+ * get_strings()
+ *   Get a set of strings that describe the requested objects
+ * get_sset_count()
+ *   Get number of strings that @get_strings will write
+ * update_led_status()
+ *   Update the led status
+ * set_led_id()
+ *   Set led id
+ * get_regs()
+ *   Get regs dump
+ * get_regs_len()
+ *   Get the len of the regs dump
+ * get_rss_key_size()
+ *   Get rss key size
+ * get_rss_indir_size()
+ *   Get rss indirection table size
+ * get_rss()
+ *   Get rss table
+ * set_rss()
+ *   Set rss table
+ * get_tc_size()
+ *   Get tc size of handle
+ * get_vector()
+ *   Get vector number and vector information
+ * map_ring_to_vector()
+ *   Map rings to vector
+ * unmap_ring_from_vector()
+ *   Unmap rings from vector
+ * add_tunnel_udp()
+ *   Add tunnel information to hardware
+ * del_tunnel_udp()
+ *   Delete tunnel information from hardware
+ * reset_queue()
+ *   Reset queue
+ * get_fw_version()
+ *   Get firmware version
+ * get_mdix_mode()
+ *   Get media typr of phy
+ * set_vlan_filter()
+ *   Set vlan filter config of Ports
+ * set_vf_vlan_filter()
+ *   Set vlan filter config of vf
+ */
+struct hnae3_ae_ops {
+	int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev);
+	void (*uninit_ae_dev)(struct hnae3_ae_dev *ae_dev);
+
+	int (*init_client_instance)(struct hnae3_client *client,
+				    struct hnae3_ae_dev *ae_dev);
+	void (*uninit_client_instance)(struct hnae3_client *client,
+				       struct hnae3_ae_dev *ae_dev);
+	int (*start)(struct hnae3_handle *handle);
+	void (*stop)(struct hnae3_handle *handle);
+	int (*get_status)(struct hnae3_handle *handle);
+	void (*get_ksettings_an_result)(struct hnae3_handle *handle,
+					u8 *auto_neg, u32 *speed, u8 *duplex);
+
+	int (*update_speed_duplex_h)(struct hnae3_handle *handle);
+	int (*cfg_mac_speed_dup_h)(struct hnae3_handle *handle, int speed,
+				   u8 duplex);
+
+	void (*get_media_type)(struct hnae3_handle *handle, u8 *media_type);
+	void (*adjust_link)(struct hnae3_handle *handle, int speed, int duplex);
+	int (*set_loopback)(struct hnae3_handle *handle,
+			    enum hnae3_loop loop_mode, bool en);
+
+	void (*set_promisc_mode)(struct hnae3_handle *handle, u32 en);
+	int (*set_mtu)(struct hnae3_handle *handle, int new_mtu);
+
+	void (*get_pauseparam)(struct hnae3_handle *handle,
+			       u32 *auto_neg, u32 *rx_en, u32 *tx_en);
+	int (*set_pauseparam)(struct hnae3_handle *handle,
+			      u32 auto_neg, u32 rx_en, u32 tx_en);
+
+	int (*set_autoneg)(struct hnae3_handle *handle, bool enable);
+	int (*get_autoneg)(struct hnae3_handle *handle);
+
+	void (*get_coalesce_usecs)(struct hnae3_handle *handle,
+				   u32 *tx_usecs, u32 *rx_usecs);
+	void (*get_rx_max_coalesced_frames)(struct hnae3_handle *handle,
+					    u32 *tx_frames, u32 *rx_frames);
+	int (*set_coalesce_usecs)(struct hnae3_handle *handle, u32 timeout);
+	int (*set_coalesce_frames)(struct hnae3_handle *handle,
+				   u32 coalesce_frames);
+	void (*get_coalesce_range)(struct hnae3_handle *handle,
+				   u32 *tx_frames_low, u32 *rx_frames_low,
+				   u32 *tx_frames_high, u32 *rx_frames_high,
+				   u32 *tx_usecs_low, u32 *rx_usecs_low,
+				   u32 *tx_usecs_high, u32 *rx_usecs_high);
+
+	void (*get_mac_addr)(struct hnae3_handle *handle, u8 *p);
+	int (*set_mac_addr)(struct hnae3_handle *handle, void *p);
+	int (*add_uc_addr)(struct hnae3_handle *handle,
+			   const unsigned char *addr);
+	int (*rm_uc_addr)(struct hnae3_handle *handle,
+			  const unsigned char *addr);
+	int (*set_mc_addr)(struct hnae3_handle *handle, void *addr);
+	int (*add_mc_addr)(struct hnae3_handle *handle,
+			   const unsigned char *addr);
+	int (*rm_mc_addr)(struct hnae3_handle *handle,
+			  const unsigned char *addr);
+
+	void (*set_tso_stats)(struct hnae3_handle *handle, int enable);
+	void (*update_stats)(struct hnae3_handle *handle,
+			     struct net_device_stats *net_stats);
+	void (*get_stats)(struct hnae3_handle *handle, u64 *data);
+
+	void (*get_strings)(struct hnae3_handle *handle,
+			    u32 stringset, u8 *data);
+	int (*get_sset_count)(struct hnae3_handle *handle, int stringset);
+
+	void (*get_regs)(struct hnae3_handle *handle, void *data);
+	int (*get_regs_len)(struct hnae3_handle *handle);
+
+	u32 (*get_rss_key_size)(struct hnae3_handle *handle);
+	u32 (*get_rss_indir_size)(struct hnae3_handle *handle);
+	int (*get_rss)(struct hnae3_handle *handle, u32 *indir, u8 *key,
+		       u8 *hfunc);
+	int (*set_rss)(struct hnae3_handle *handle, const u32 *indir,
+		       const u8 *key, const u8 hfunc);
+
+	int (*get_tc_size)(struct hnae3_handle *handle);
+
+	int (*get_vector)(struct hnae3_handle *handle, u16 vector_num,
+			  struct hnae3_vector_info *vector_info);
+	int (*map_ring_to_vector)(struct hnae3_handle *handle,
+				  int vector_num,
+				  struct hnae3_ring_chain_node *vr_chain);
+	int (*unmap_ring_from_vector)(struct hnae3_handle *handle,
+				      int vector_num,
+				      struct hnae3_ring_chain_node *vr_chain);
+
+	int (*add_tunnel_udp)(struct hnae3_handle *handle, u16 port_num);
+	int (*del_tunnel_udp)(struct hnae3_handle *handle, u16 port_num);
+
+	void (*reset_queue)(struct hnae3_handle *handle, u16 queue_id);
+	u32 (*get_fw_version)(struct hnae3_handle *handle);
+	void (*get_mdix_mode)(struct hnae3_handle *handle,
+			      u8 *tp_mdix_ctrl, u8 *tp_mdix);
+
+	int (*set_vlan_filter)(struct hnae3_handle *handle, __be16 proto,
+			       u16 vlan_id, bool is_kill);
+	int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid,
+				  u16 vlan, u8 qos, __be16 proto);
+};
+
+struct hnae3_ae_algo {
+	const struct hnae3_ae_ops *ops;
+	struct list_head node;
+	char name[HNAE3_CLASS_NAME_SIZE];
+	const struct pci_device_id *pdev_id_table;
+};
+
+#define HNAE3_INT_NAME_LEN        (IFNAMSIZ + 16)
+#define HNAE3_ITR_COUNTDOWN_START 100
+
+struct hnae3_tc_info {
+	u16	tqp_offset;	/* TQP offset from base TQP */
+	u16	tqp_count;	/* Total TQPs */
+	u8	up;		/* user priority */
+	u8	tc;		/* TC index */
+	bool	enable;		/* If this TC is enable or not */
+};
+
+#define HNAE3_MAX_TC		8
+struct hnae3_knic_private_info {
+	struct net_device *netdev; /* Set by KNIC client when init instance */
+	u16 rss_size;		   /* Allocated RSS queues */
+	u16 rx_buf_len;
+	u16 num_desc;
+
+	u8 num_tc;		   /* Total number of enabled TCs */
+	struct hnae3_tc_info tc_info[HNAE3_MAX_TC]; /* Idx of array is HW TC */
+
+	u16 num_tqps;		  /* total number of TQPs in this handle */
+	struct hnae3_queue **tqp;  /* array base of all TQPs in this instance */
+};
+
+struct hnae3_roce_private_info {
+	struct net_device *netdev;
+	void __iomem *roce_io_base;
+	int base_vector;
+	int num_vectors;
+};
+
+struct hnae3_unic_private_info {
+	struct net_device *netdev;
+	u16 rx_buf_len;
+	u16 num_desc;
+	u16 num_tqps;	/* total number of tqps in this handle */
+	struct hnae3_queue **tqp;  /* array base of all TQPs of this instance */
+};
+
+#define HNAE3_SUPPORT_MAC_LOOPBACK    1
+#define HNAE3_SUPPORT_PHY_LOOPBACK    2
+#define HNAE3_SUPPORT_SERDES_LOOPBACK 4
+
+struct hnae3_handle {
+	struct hnae3_client *client;
+	struct pci_dev *pdev;
+	void *priv;
+	struct hnae3_ae_algo *ae_algo;  /* the class who provides this handle */
+	u64 flags; /* Indicate the capabilities for this handle*/
+
+	union {
+		struct net_device *netdev; /* first member */
+		struct hnae3_knic_private_info kinfo;
+		struct hnae3_unic_private_info uinfo;
+		struct hnae3_roce_private_info rinfo;
+	};
+
+	u32 numa_node_mask;	/* for multi-chip support */
+};
+
+#define hnae_set_field(origin, mask, shift, val) \
+	do { \
+		(origin) &= (~(mask)); \
+		(origin) |= ((val) << (shift)) & (mask); \
+	} while (0)
+#define hnae_get_field(origin, mask, shift) (((origin) & (mask)) >> (shift))
+
+#define hnae_set_bit(origin, shift, val) \
+	hnae_set_field((origin), (0x1 << (shift)), (shift), (val))
+#define hnae_get_bit(origin, shift) \
+	hnae_get_field((origin), (0x1 << (shift)), (shift))
+
+int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev);
+void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev);
+
+void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo);
+int hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo);
+
+void hnae3_unregister_client(struct hnae3_client *client);
+int hnae3_register_client(struct hnae3_client *client);
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
new file mode 100644
index 0000000..162e8a42
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for the HISILICON network device drivers.
+#
+
+ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
+
+obj-$(CONFIG_HNS3_HCLGE) += hclge.o
+hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o
+
+obj-$(CONFIG_HNS3_ENET) += hns3.o
+hns3-objs = hns3_enet.o hns3_ethtool.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
new file mode 100644
index 0000000..8b511e6
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/dma-direction.h>
+#include "hclge_cmd.h"
+#include "hnae3.h"
+#include "hclge_main.h"
+
+#define hclge_is_csq(ring) ((ring)->flag & HCLGE_TYPE_CSQ)
+#define hclge_ring_to_dma_dir(ring) (hclge_is_csq(ring) ? \
+	DMA_TO_DEVICE : DMA_FROM_DEVICE)
+#define cmq_ring_to_dev(ring)   (&(ring)->dev->pdev->dev)
+
+static int hclge_ring_space(struct hclge_cmq_ring *ring)
+{
+	int ntu = ring->next_to_use;
+	int ntc = ring->next_to_clean;
+	int used = (ntu - ntc + ring->desc_num) % ring->desc_num;
+
+	return ring->desc_num - used - 1;
+}
+
+static int hclge_alloc_cmd_desc(struct hclge_cmq_ring *ring)
+{
+	int size  = ring->desc_num * sizeof(struct hclge_desc);
+
+	ring->desc = kzalloc(size, GFP_KERNEL);
+	if (!ring->desc)
+		return -ENOMEM;
+
+	ring->desc_dma_addr = dma_map_single(cmq_ring_to_dev(ring), ring->desc,
+					     size, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(cmq_ring_to_dev(ring), ring->desc_dma_addr)) {
+		ring->desc_dma_addr = 0;
+		kfree(ring->desc);
+		ring->desc = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void hclge_free_cmd_desc(struct hclge_cmq_ring *ring)
+{
+	dma_unmap_single(cmq_ring_to_dev(ring), ring->desc_dma_addr,
+			 ring->desc_num * sizeof(ring->desc[0]),
+			 DMA_BIDIRECTIONAL);
+
+	ring->desc_dma_addr = 0;
+	kfree(ring->desc);
+	ring->desc = NULL;
+}
+
+static int hclge_init_cmd_queue(struct hclge_dev *hdev, int ring_type)
+{
+	struct hclge_hw *hw = &hdev->hw;
+	struct hclge_cmq_ring *ring =
+		(ring_type == HCLGE_TYPE_CSQ) ? &hw->cmq.csq : &hw->cmq.crq;
+	int ret;
+
+	ring->flag = ring_type;
+	ring->dev = hdev;
+
+	ret = hclge_alloc_cmd_desc(ring);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "descriptor %s alloc error %d\n",
+			(ring_type == HCLGE_TYPE_CSQ) ? "CSQ" : "CRQ", ret);
+		return ret;
+	}
+
+	ring->next_to_clean = 0;
+	ring->next_to_use = 0;
+
+	return 0;
+}
+
+void hclge_cmd_setup_basic_desc(struct hclge_desc *desc,
+				enum hclge_opcode_type opcode, bool is_read)
+{
+	memset((void *)desc, 0, sizeof(struct hclge_desc));
+	desc->opcode = cpu_to_le16(opcode);
+	desc->flag = cpu_to_le16(HCLGE_CMD_FLAG_NO_INTR | HCLGE_CMD_FLAG_IN);
+
+	if (is_read)
+		desc->flag |= cpu_to_le16(HCLGE_CMD_FLAG_WR);
+	else
+		desc->flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
+}
+
+static void hclge_cmd_config_regs(struct hclge_cmq_ring *ring)
+{
+	dma_addr_t dma = ring->desc_dma_addr;
+	struct hclge_dev *hdev = ring->dev;
+	struct hclge_hw *hw = &hdev->hw;
+
+	if (ring->flag == HCLGE_TYPE_CSQ) {
+		hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_L_REG,
+				(u32)dma);
+		hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_H_REG,
+				(u32)((dma >> 31) >> 1));
+		hclge_write_dev(hw, HCLGE_NIC_CSQ_DEPTH_REG,
+				(ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) |
+				HCLGE_NIC_CMQ_ENABLE);
+		hclge_write_dev(hw, HCLGE_NIC_CSQ_TAIL_REG, 0);
+		hclge_write_dev(hw, HCLGE_NIC_CSQ_HEAD_REG, 0);
+	} else {
+		hclge_write_dev(hw, HCLGE_NIC_CRQ_BASEADDR_L_REG,
+				(u32)dma);
+		hclge_write_dev(hw, HCLGE_NIC_CRQ_BASEADDR_H_REG,
+				(u32)((dma >> 31) >> 1));
+		hclge_write_dev(hw, HCLGE_NIC_CRQ_DEPTH_REG,
+				(ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) |
+				HCLGE_NIC_CMQ_ENABLE);
+		hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0);
+		hclge_write_dev(hw, HCLGE_NIC_CRQ_HEAD_REG, 0);
+	}
+}
+
+static void hclge_cmd_init_regs(struct hclge_hw *hw)
+{
+	hclge_cmd_config_regs(&hw->cmq.csq);
+	hclge_cmd_config_regs(&hw->cmq.crq);
+}
+
+static int hclge_cmd_csq_clean(struct hclge_hw *hw)
+{
+	struct hclge_cmq_ring *csq = &hw->cmq.csq;
+	u16 ntc = csq->next_to_clean;
+	struct hclge_desc *desc;
+	int clean = 0;
+	u32 head;
+
+	desc = &csq->desc[ntc];
+	head = hclge_read_dev(hw, HCLGE_NIC_CSQ_HEAD_REG);
+
+	while (head != ntc) {
+		memset(desc, 0, sizeof(*desc));
+		ntc++;
+		if (ntc == csq->desc_num)
+			ntc = 0;
+		desc = &csq->desc[ntc];
+		clean++;
+	}
+	csq->next_to_clean = ntc;
+
+	return clean;
+}
+
+static int hclge_cmd_csq_done(struct hclge_hw *hw)
+{
+	u32 head = hclge_read_dev(hw, HCLGE_NIC_CSQ_HEAD_REG);
+	return head == hw->cmq.csq.next_to_use;
+}
+
+static bool hclge_is_special_opcode(u16 opcode)
+{
+	u16 spec_opcode[3] = {0x0030, 0x0031, 0x0032};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) {
+		if (spec_opcode[i] == opcode)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * hclge_cmd_send - send command to command queue
+ * @hw: pointer to the hw struct
+ * @desc: prefilled descriptor for describing the command
+ * @num : the number of descriptors to be sent
+ *
+ * This is the main send command for command queue, it
+ * sends the queue, cleans the queue, etc
+ **/
+int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
+{
+	struct hclge_dev *hdev = (struct hclge_dev *)hw->back;
+	struct hclge_desc *desc_to_use;
+	bool complete = false;
+	u32 timeout = 0;
+	int handle = 0;
+	int retval = 0;
+	u16 opcode, desc_ret;
+	int ntc;
+
+	spin_lock_bh(&hw->cmq.csq.lock);
+
+	if (num > hclge_ring_space(&hw->cmq.csq)) {
+		spin_unlock_bh(&hw->cmq.csq.lock);
+		return -EBUSY;
+	}
+
+	/**
+	 * Record the location of desc in the ring for this time
+	 * which will be use for hardware to write back
+	 */
+	ntc = hw->cmq.csq.next_to_use;
+	opcode = desc[0].opcode;
+	while (handle < num) {
+		desc_to_use = &hw->cmq.csq.desc[hw->cmq.csq.next_to_use];
+		*desc_to_use = desc[handle];
+		(hw->cmq.csq.next_to_use)++;
+		if (hw->cmq.csq.next_to_use == hw->cmq.csq.desc_num)
+			hw->cmq.csq.next_to_use = 0;
+		handle++;
+	}
+
+	/* Write to hardware */
+	hclge_write_dev(hw, HCLGE_NIC_CSQ_TAIL_REG, hw->cmq.csq.next_to_use);
+
+	/**
+	 * If the command is sync, wait for the firmware to write back,
+	 * if multi descriptors to be sent, use the first one to check
+	 */
+	if (HCLGE_SEND_SYNC(desc->flag)) {
+		do {
+			if (hclge_cmd_csq_done(hw))
+				break;
+			udelay(1);
+			timeout++;
+		} while (timeout < hw->cmq.tx_timeout);
+	}
+
+	if (hclge_cmd_csq_done(hw)) {
+		complete = true;
+		handle = 0;
+		while (handle < num) {
+			/* Get the result of hardware write back */
+			desc_to_use = &hw->cmq.csq.desc[ntc];
+			desc[handle] = *desc_to_use;
+			pr_debug("Get cmd desc:\n");
+
+			if (likely(!hclge_is_special_opcode(opcode)))
+				desc_ret = desc[handle].retval;
+			else
+				desc_ret = desc[0].retval;
+
+			if ((enum hclge_cmd_return_status)desc_ret ==
+			    HCLGE_CMD_EXEC_SUCCESS)
+				retval = 0;
+			else
+				retval = -EIO;
+			hw->cmq.last_status = (enum hclge_cmd_status)desc_ret;
+			ntc++;
+			handle++;
+			if (ntc == hw->cmq.csq.desc_num)
+				ntc = 0;
+		}
+	}
+
+	if (!complete)
+		retval = -EAGAIN;
+
+	/* Clean the command send queue */
+	handle = hclge_cmd_csq_clean(hw);
+	if (handle != num) {
+		dev_warn(&hdev->pdev->dev,
+			 "cleaned %d, need to clean %d\n", handle, num);
+	}
+
+	spin_unlock_bh(&hw->cmq.csq.lock);
+
+	return retval;
+}
+
+enum hclge_cmd_status hclge_cmd_query_firmware_version(struct hclge_hw *hw,
+						       u32 *version)
+{
+	struct hclge_query_version *resp;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_FW_VER, 1);
+	resp = (struct hclge_query_version *)desc.data;
+
+	ret = hclge_cmd_send(hw, &desc, 1);
+	if (!ret)
+		*version = le32_to_cpu(resp->firmware);
+
+	return ret;
+}
+
+int hclge_cmd_init(struct hclge_dev *hdev)
+{
+	u32 version;
+	int ret;
+
+	/* Setup the queue entries for use cmd queue */
+	hdev->hw.cmq.csq.desc_num = HCLGE_NIC_CMQ_DESC_NUM;
+	hdev->hw.cmq.crq.desc_num = HCLGE_NIC_CMQ_DESC_NUM;
+
+	/* Setup the lock for command queue */
+	spin_lock_init(&hdev->hw.cmq.csq.lock);
+	spin_lock_init(&hdev->hw.cmq.crq.lock);
+
+	/* Setup Tx write back timeout */
+	hdev->hw.cmq.tx_timeout = HCLGE_CMDQ_TX_TIMEOUT;
+
+	/* Setup queue rings */
+	ret = hclge_init_cmd_queue(hdev, HCLGE_TYPE_CSQ);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"CSQ ring setup error %d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_init_cmd_queue(hdev, HCLGE_TYPE_CRQ);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"CRQ ring setup error %d\n", ret);
+		goto err_csq;
+	}
+
+	hclge_cmd_init_regs(&hdev->hw);
+
+	ret = hclge_cmd_query_firmware_version(&hdev->hw, &version);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"firmware version query failed %d\n", ret);
+		return ret;
+	}
+	hdev->fw_version = version;
+
+	dev_info(&hdev->pdev->dev, "The firmware version is %08x\n", version);
+
+	return 0;
+err_csq:
+	hclge_free_cmd_desc(&hdev->hw.cmq.csq);
+	return ret;
+}
+
+static void hclge_destroy_queue(struct hclge_cmq_ring *ring)
+{
+	spin_lock_bh(&ring->lock);
+	hclge_free_cmd_desc(ring);
+	spin_unlock_bh(&ring->lock);
+}
+
+void hclge_destroy_cmd_queue(struct hclge_hw *hw)
+{
+	hclge_destroy_queue(&hw->cmq.csq);
+	hclge_destroy_queue(&hw->cmq.crq);
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
new file mode 100644
index 0000000..91ae013
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -0,0 +1,740 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HCLGE_CMD_H
+#define __HCLGE_CMD_H
+#include <linux/types.h>
+#include <linux/io.h>
+
+#define HCLGE_CMDQ_TX_TIMEOUT		1000
+
+struct hclge_dev;
+struct hclge_desc {
+	__le16 opcode;
+
+#define HCLGE_CMDQ_RX_INVLD_B		0
+#define HCLGE_CMDQ_RX_OUTVLD_B		1
+
+	__le16 flag;
+	__le16 retval;
+	__le16 rsv;
+	__le32 data[6];
+};
+
+struct hclge_desc_cb {
+	dma_addr_t dma;
+	void *va;
+	u32 length;
+};
+
+struct hclge_cmq_ring {
+	dma_addr_t desc_dma_addr;
+	struct hclge_desc *desc;
+	struct hclge_desc_cb *desc_cb;
+	struct hclge_dev  *dev;
+	u32 head;
+	u32 tail;
+
+	u16 buf_size;
+	u16 desc_num;
+	int next_to_use;
+	int next_to_clean;
+	u8 flag;
+	spinlock_t lock; /* Command queue lock */
+};
+
+enum hclge_cmd_return_status {
+	HCLGE_CMD_EXEC_SUCCESS	= 0,
+	HCLGE_CMD_NO_AUTH	= 1,
+	HCLGE_CMD_NOT_EXEC	= 2,
+	HCLGE_CMD_QUEUE_FULL	= 3,
+};
+
+enum hclge_cmd_status {
+	HCLGE_STATUS_SUCCESS	= 0,
+	HCLGE_ERR_CSQ_FULL	= -1,
+	HCLGE_ERR_CSQ_TIMEOUT	= -2,
+	HCLGE_ERR_CSQ_ERROR	= -3,
+};
+
+struct hclge_cmq {
+	struct hclge_cmq_ring csq;
+	struct hclge_cmq_ring crq;
+	u16 tx_timeout; /* Tx timeout */
+	enum hclge_cmd_status last_status;
+};
+
+#define HCLGE_CMD_FLAG_IN_VALID_SHIFT	0
+#define HCLGE_CMD_FLAG_OUT_VALID_SHIFT	1
+#define HCLGE_CMD_FLAG_NEXT_SHIFT	2
+#define HCLGE_CMD_FLAG_WR_OR_RD_SHIFT	3
+#define HCLGE_CMD_FLAG_NO_INTR_SHIFT	4
+#define HCLGE_CMD_FLAG_ERR_INTR_SHIFT	5
+
+#define HCLGE_CMD_FLAG_IN	BIT(HCLGE_CMD_FLAG_IN_VALID_SHIFT)
+#define HCLGE_CMD_FLAG_OUT	BIT(HCLGE_CMD_FLAG_OUT_VALID_SHIFT)
+#define HCLGE_CMD_FLAG_NEXT	BIT(HCLGE_CMD_FLAG_NEXT_SHIFT)
+#define HCLGE_CMD_FLAG_WR	BIT(HCLGE_CMD_FLAG_WR_OR_RD_SHIFT)
+#define HCLGE_CMD_FLAG_NO_INTR	BIT(HCLGE_CMD_FLAG_NO_INTR_SHIFT)
+#define HCLGE_CMD_FLAG_ERR_INTR	BIT(HCLGE_CMD_FLAG_ERR_INTR_SHIFT)
+
+enum hclge_opcode_type {
+	/* Generic command */
+	HCLGE_OPC_QUERY_FW_VER		= 0x0001,
+	HCLGE_OPC_CFG_RST_TRIGGER	= 0x0020,
+	HCLGE_OPC_GBL_RST_STATUS	= 0x0021,
+	HCLGE_OPC_QUERY_FUNC_STATUS	= 0x0022,
+	HCLGE_OPC_QUERY_PF_RSRC		= 0x0023,
+	HCLGE_OPC_QUERY_VF_RSRC		= 0x0024,
+	HCLGE_OPC_GET_CFG_PARAM		= 0x0025,
+
+	HCLGE_OPC_STATS_64_BIT		= 0x0030,
+	HCLGE_OPC_STATS_32_BIT		= 0x0031,
+	HCLGE_OPC_STATS_MAC		= 0x0032,
+	/* Device management command */
+
+	/* MAC commond */
+	HCLGE_OPC_CONFIG_MAC_MODE	= 0x0301,
+	HCLGE_OPC_CONFIG_AN_MODE	= 0x0304,
+	HCLGE_OPC_QUERY_AN_RESULT	= 0x0306,
+	HCLGE_OPC_QUERY_LINK_STATUS	= 0x0307,
+	HCLGE_OPC_CONFIG_MAX_FRM_SIZE	= 0x0308,
+	HCLGE_OPC_CONFIG_SPEED_DUP	= 0x0309,
+	/* MACSEC command */
+
+	/* PFC/Pause CMD*/
+	HCLGE_OPC_CFG_MAC_PAUSE_EN      = 0x0701,
+	HCLGE_OPC_CFG_PFC_PAUSE_EN      = 0x0702,
+	HCLGE_OPC_CFG_MAC_PARA          = 0x0703,
+	HCLGE_OPC_CFG_PFC_PARA          = 0x0704,
+	HCLGE_OPC_QUERY_MAC_TX_PKT_CNT  = 0x0705,
+	HCLGE_OPC_QUERY_MAC_RX_PKT_CNT  = 0x0706,
+	HCLGE_OPC_QUERY_PFC_TX_PKT_CNT  = 0x0707,
+	HCLGE_OPC_QUERY_PFC_RX_PKT_CNT  = 0x0708,
+	HCLGE_OPC_PRI_TO_TC_MAPPING     = 0x0709,
+	HCLGE_OPC_QOS_MAP               = 0x070A,
+
+	/* ETS/scheduler commands */
+	HCLGE_OPC_TM_PG_TO_PRI_LINK	= 0x0804,
+	HCLGE_OPC_TM_QS_TO_PRI_LINK     = 0x0805,
+	HCLGE_OPC_TM_NQ_TO_QS_LINK      = 0x0806,
+	HCLGE_OPC_TM_RQ_TO_QS_LINK      = 0x0807,
+	HCLGE_OPC_TM_PORT_WEIGHT        = 0x0808,
+	HCLGE_OPC_TM_PG_WEIGHT          = 0x0809,
+	HCLGE_OPC_TM_QS_WEIGHT          = 0x080A,
+	HCLGE_OPC_TM_PRI_WEIGHT         = 0x080B,
+	HCLGE_OPC_TM_PRI_C_SHAPPING     = 0x080C,
+	HCLGE_OPC_TM_PRI_P_SHAPPING     = 0x080D,
+	HCLGE_OPC_TM_PG_C_SHAPPING      = 0x080E,
+	HCLGE_OPC_TM_PG_P_SHAPPING      = 0x080F,
+	HCLGE_OPC_TM_PORT_SHAPPING      = 0x0810,
+	HCLGE_OPC_TM_PG_SCH_MODE_CFG    = 0x0812,
+	HCLGE_OPC_TM_PRI_SCH_MODE_CFG   = 0x0813,
+	HCLGE_OPC_TM_QS_SCH_MODE_CFG    = 0x0814,
+	HCLGE_OPC_TM_BP_TO_QSET_MAPPING = 0x0815,
+
+	/* Packet buffer allocate command */
+	HCLGE_OPC_TX_BUFF_ALLOC		= 0x0901,
+	HCLGE_OPC_RX_PRIV_BUFF_ALLOC	= 0x0902,
+	HCLGE_OPC_RX_PRIV_WL_ALLOC	= 0x0903,
+	HCLGE_OPC_RX_COM_THRD_ALLOC	= 0x0904,
+	HCLGE_OPC_RX_COM_WL_ALLOC	= 0x0905,
+	HCLGE_OPC_RX_GBL_PKT_CNT	= 0x0906,
+
+	/* PTP command */
+	/* TQP management command */
+	HCLGE_OPC_SET_TQP_MAP		= 0x0A01,
+
+	/* TQP command */
+	HCLGE_OPC_CFG_TX_QUEUE		= 0x0B01,
+	HCLGE_OPC_QUERY_TX_POINTER	= 0x0B02,
+	HCLGE_OPC_QUERY_TX_STATUS	= 0x0B03,
+	HCLGE_OPC_CFG_RX_QUEUE		= 0x0B11,
+	HCLGE_OPC_QUERY_RX_POINTER	= 0x0B12,
+	HCLGE_OPC_QUERY_RX_STATUS	= 0x0B13,
+	HCLGE_OPC_STASH_RX_QUEUE_LRO	= 0x0B16,
+	HCLGE_OPC_CFG_RX_QUEUE_LRO	= 0x0B17,
+	HCLGE_OPC_CFG_COM_TQP_QUEUE	= 0x0B20,
+	HCLGE_OPC_RESET_TQP_QUEUE	= 0x0B22,
+
+	/* TSO cmd */
+	HCLGE_OPC_TSO_GENERIC_CONFIG	= 0x0C01,
+
+	/* RSS cmd */
+	HCLGE_OPC_RSS_GENERIC_CONFIG	= 0x0D01,
+	HCLGE_OPC_RSS_INDIR_TABLE	= 0x0D07,
+	HCLGE_OPC_RSS_TC_MODE		= 0x0D08,
+	HCLGE_OPC_RSS_INPUT_TUPLE	= 0x0D02,
+
+	/* Promisuous mode command */
+	HCLGE_OPC_CFG_PROMISC_MODE	= 0x0E01,
+
+	/* Interrupts cmd */
+	HCLGE_OPC_ADD_RING_TO_VECTOR	= 0x1503,
+	HCLGE_OPC_DEL_RING_TO_VECTOR	= 0x1504,
+
+	/* MAC command */
+	HCLGE_OPC_MAC_VLAN_ADD		    = 0x1000,
+	HCLGE_OPC_MAC_VLAN_REMOVE	    = 0x1001,
+	HCLGE_OPC_MAC_VLAN_TYPE_ID	    = 0x1002,
+	HCLGE_OPC_MAC_VLAN_INSERT	    = 0x1003,
+	HCLGE_OPC_MAC_ETHTYPE_ADD	    = 0x1010,
+	HCLGE_OPC_MAC_ETHTYPE_REMOVE	= 0x1011,
+
+	/* Multicast linear table cmd */
+	HCLGE_OPC_MTA_MAC_MODE_CFG	    = 0x1020,
+	HCLGE_OPC_MTA_MAC_FUNC_CFG	    = 0x1021,
+	HCLGE_OPC_MTA_TBL_ITEM_CFG	    = 0x1022,
+	HCLGE_OPC_MTA_TBL_ITEM_QUERY	= 0x1023,
+
+	/* VLAN command */
+	HCLGE_OPC_VLAN_FILTER_CTRL	    = 0x1100,
+	HCLGE_OPC_VLAN_FILTER_PF_CFG	= 0x1101,
+	HCLGE_OPC_VLAN_FILTER_VF_CFG	= 0x1102,
+
+	/* MDIO command */
+	HCLGE_OPC_MDIO_CONFIG		= 0x1900,
+
+	/* QCN command */
+	HCLGE_OPC_QCN_MOD_CFG		= 0x1A01,
+	HCLGE_OPC_QCN_GRP_TMPLT_CFG	= 0x1A02,
+	HCLGE_OPC_QCN_SHAPPING_IR_CFG	= 0x1A03,
+	HCLGE_OPC_QCN_SHAPPING_BS_CFG	= 0x1A04,
+	HCLGE_OPC_QCN_QSET_LINK_CFG	= 0x1A05,
+	HCLGE_OPC_QCN_RP_STATUS_GET	= 0x1A06,
+	HCLGE_OPC_QCN_AJUST_INIT	= 0x1A07,
+	HCLGE_OPC_QCN_DFX_CNT_STATUS    = 0x1A08,
+
+	/* Mailbox cmd */
+	HCLGEVF_OPC_MBX_PF_TO_VF	= 0x2000,
+};
+
+#define HCLGE_TQP_REG_OFFSET		0x80000
+#define HCLGE_TQP_REG_SIZE		0x200
+
+#define HCLGE_RCB_INIT_QUERY_TIMEOUT	10
+#define HCLGE_RCB_INIT_FLAG_EN_B	0
+#define HCLGE_RCB_INIT_FLAG_FINI_B	8
+struct hclge_config_rcb_init {
+	__le16 rcb_init_flag;
+	u8 rsv[22];
+};
+
+struct hclge_tqp_map {
+	__le16 tqp_id;	/* Absolute tqp id for in this pf */
+	u8 tqp_vf;	/* VF id */
+#define HCLGE_TQP_MAP_TYPE_PF		0
+#define HCLGE_TQP_MAP_TYPE_VF		1
+#define HCLGE_TQP_MAP_TYPE_B		0
+#define HCLGE_TQP_MAP_EN_B		1
+	u8 tqp_flag;	/* Indicate it's pf or vf tqp */
+	__le16 tqp_vid; /* Virtual id in this pf/vf */
+	u8 rsv[18];
+};
+
+#define HCLGE_VECTOR_ELEMENTS_PER_CMD	11
+
+enum hclge_int_type {
+	HCLGE_INT_TX,
+	HCLGE_INT_RX,
+	HCLGE_INT_EVENT,
+};
+
+struct hclge_ctrl_vector_chain {
+	u8 int_vector_id;
+	u8 int_cause_num;
+#define HCLGE_INT_TYPE_S	0
+#define HCLGE_INT_TYPE_M	0x3
+#define HCLGE_TQP_ID_S		2
+#define HCLGE_TQP_ID_M		(0x3fff << HCLGE_TQP_ID_S)
+	__le16 tqp_type_and_id[HCLGE_VECTOR_ELEMENTS_PER_CMD];
+};
+
+#define HCLGE_TC_NUM		8
+#define HCLGE_TC0_PRI_BUF_EN_B	15 /* Bit 15 indicate enable or not */
+#define HCLGE_BUF_UNIT_S	7  /* Buf size is united by 128 bytes */
+struct hclge_tx_buff_alloc {
+	__le16 tx_pkt_buff[HCLGE_TC_NUM];
+	u8 tx_buff_rsv[8];
+};
+
+struct hclge_rx_priv_buff {
+	__le16 buf_num[HCLGE_TC_NUM];
+	u8 rsv[8];
+};
+
+struct hclge_query_version {
+	__le32 firmware;
+	__le32 firmware_rsv[5];
+};
+
+#define HCLGE_RX_PRIV_EN_B	15
+#define HCLGE_TC_NUM_ONE_DESC	4
+struct hclge_priv_wl {
+	__le16 high;
+	__le16 low;
+};
+
+struct hclge_rx_priv_wl_buf {
+	struct hclge_priv_wl tc_wl[HCLGE_TC_NUM_ONE_DESC];
+};
+
+struct hclge_rx_com_thrd {
+	struct hclge_priv_wl com_thrd[HCLGE_TC_NUM_ONE_DESC];
+};
+
+struct hclge_rx_com_wl {
+	struct hclge_priv_wl com_wl;
+};
+
+struct hclge_waterline {
+	u32 low;
+	u32 high;
+};
+
+struct hclge_tc_thrd {
+	u32 low;
+	u32 high;
+};
+
+struct hclge_priv_buf {
+	struct hclge_waterline wl;	/* Waterline for low and high*/
+	u32 buf_size;	/* TC private buffer size */
+	u32 enable;	/* Enable TC private buffer or not */
+};
+
+#define HCLGE_MAX_TC_NUM	8
+struct hclge_shared_buf {
+	struct hclge_waterline self;
+	struct hclge_tc_thrd tc_thrd[HCLGE_MAX_TC_NUM];
+	u32 buf_size;
+};
+
+#define HCLGE_RX_COM_WL_EN_B	15
+struct hclge_rx_com_wl_buf {
+	__le16 high_wl;
+	__le16 low_wl;
+	u8 rsv[20];
+};
+
+#define HCLGE_RX_PKT_EN_B	15
+struct hclge_rx_pkt_buf {
+	__le16 high_pkt;
+	__le16 low_pkt;
+	u8 rsv[20];
+};
+
+#define HCLGE_PF_STATE_DONE_B	0
+#define HCLGE_PF_STATE_MAIN_B	1
+#define HCLGE_PF_STATE_BOND_B	2
+#define HCLGE_PF_STATE_MAC_N_B	6
+#define HCLGE_PF_MAC_NUM_MASK	0x3
+#define HCLGE_PF_STATE_MAIN	BIT(HCLGE_PF_STATE_MAIN_B)
+#define HCLGE_PF_STATE_DONE	BIT(HCLGE_PF_STATE_DONE_B)
+struct hclge_func_status {
+	__le32  vf_rst_state[4];
+	u8 pf_state;
+	u8 mac_id;
+	u8 rsv1;
+	u8 pf_cnt_in_mac;
+	u8 pf_num;
+	u8 vf_num;
+	u8 rsv[2];
+};
+
+struct hclge_pf_res {
+	__le16 tqp_num;
+	__le16 buf_size;
+	__le16 msixcap_localid_ba_nic;
+	__le16 msixcap_localid_ba_rocee;
+#define HCLGE_PF_VEC_NUM_S		0
+#define HCLGE_PF_VEC_NUM_M		(0xff << HCLGE_PF_VEC_NUM_S)
+	__le16 pf_intr_vector_number;
+	__le16 pf_own_fun_number;
+	__le32 rsv[3];
+};
+
+#define HCLGE_CFG_OFFSET_S	0
+#define HCLGE_CFG_OFFSET_M	0xfffff /* Byte (8-10.3) */
+#define HCLGE_CFG_RD_LEN_S	24
+#define HCLGE_CFG_RD_LEN_M	(0xf << HCLGE_CFG_RD_LEN_S)
+#define HCLGE_CFG_RD_LEN_BYTES	16
+#define HCLGE_CFG_RD_LEN_UNIT	4
+
+#define HCLGE_CFG_VMDQ_S	0
+#define HCLGE_CFG_VMDQ_M	(0xff << HCLGE_CFG_VMDQ_S)
+#define HCLGE_CFG_TC_NUM_S	8
+#define HCLGE_CFG_TC_NUM_M	(0xff << HCLGE_CFG_TC_NUM_S)
+#define HCLGE_CFG_TQP_DESC_N_S	16
+#define HCLGE_CFG_TQP_DESC_N_M	(0xffff << HCLGE_CFG_TQP_DESC_N_S)
+#define HCLGE_CFG_PHY_ADDR_S	0
+#define HCLGE_CFG_PHY_ADDR_M	(0x1f << HCLGE_CFG_PHY_ADDR_S)
+#define HCLGE_CFG_MEDIA_TP_S	8
+#define HCLGE_CFG_MEDIA_TP_M	(0xff << HCLGE_CFG_MEDIA_TP_S)
+#define HCLGE_CFG_RX_BUF_LEN_S	16
+#define HCLGE_CFG_RX_BUF_LEN_M	(0xffff << HCLGE_CFG_RX_BUF_LEN_S)
+#define HCLGE_CFG_MAC_ADDR_H_S	0
+#define HCLGE_CFG_MAC_ADDR_H_M	(0xffff << HCLGE_CFG_MAC_ADDR_H_S)
+#define HCLGE_CFG_DEFAULT_SPEED_S	16
+#define HCLGE_CFG_DEFAULT_SPEED_M	(0xff << HCLGE_CFG_DEFAULT_SPEED_S)
+
+struct hclge_cfg_param {
+	__le32 offset;
+	__le32 rsv;
+	__le32 param[4];
+};
+
+#define HCLGE_MAC_MODE		0x0
+#define HCLGE_DESC_NUM		0x40
+
+#define HCLGE_ALLOC_VALID_B	0
+struct hclge_vf_num {
+	u8 alloc_valid;
+	u8 rsv[23];
+};
+
+#define HCLGE_RSS_DEFAULT_OUTPORT_B	4
+#define HCLGE_RSS_HASH_KEY_OFFSET_B	4
+#define HCLGE_RSS_HASH_KEY_NUM		16
+struct hclge_rss_config {
+	u8 hash_config;
+	u8 rsv[7];
+	u8 hash_key[HCLGE_RSS_HASH_KEY_NUM];
+};
+
+struct hclge_rss_input_tuple {
+	u8 ipv4_tcp_en;
+	u8 ipv4_udp_en;
+	u8 ipv4_sctp_en;
+	u8 ipv4_fragment_en;
+	u8 ipv6_tcp_en;
+	u8 ipv6_udp_en;
+	u8 ipv6_sctp_en;
+	u8 ipv6_fragment_en;
+	u8 rsv[16];
+};
+
+#define HCLGE_RSS_CFG_TBL_SIZE	16
+
+struct hclge_rss_indirection_table {
+	u16 start_table_index;
+	u16 rss_set_bitmap;
+	u8 rsv[4];
+	u8 rss_result[HCLGE_RSS_CFG_TBL_SIZE];
+};
+
+#define HCLGE_RSS_TC_OFFSET_S		0
+#define HCLGE_RSS_TC_OFFSET_M		(0x3ff << HCLGE_RSS_TC_OFFSET_S)
+#define HCLGE_RSS_TC_SIZE_S		12
+#define HCLGE_RSS_TC_SIZE_M		(0x7 << HCLGE_RSS_TC_SIZE_S)
+#define HCLGE_RSS_TC_VALID_B		15
+struct hclge_rss_tc_mode {
+	u16 rss_tc_mode[HCLGE_MAX_TC_NUM];
+	u8 rsv[8];
+};
+
+#define HCLGE_LINK_STS_B	0
+#define HCLGE_LINK_STATUS	BIT(HCLGE_LINK_STS_B)
+struct hclge_link_status {
+	u8 status;
+	u8 rsv[23];
+};
+
+struct hclge_promisc_param {
+	u8 vf_id;
+	u8 enable;
+};
+
+#define HCLGE_PROMISC_EN_B	1
+#define HCLGE_PROMISC_EN_ALL	0x7
+#define HCLGE_PROMISC_EN_UC	0x1
+#define HCLGE_PROMISC_EN_MC	0x2
+#define HCLGE_PROMISC_EN_BC	0x4
+struct hclge_promisc_cfg {
+	u8 flag;
+	u8 vf_id;
+	__le16 rsv0;
+	u8 rsv1[20];
+};
+
+enum hclge_promisc_type {
+	HCLGE_UNICAST	= 1,
+	HCLGE_MULTICAST	= 2,
+	HCLGE_BROADCAST	= 3,
+};
+
+#define HCLGE_MAC_TX_EN_B	6
+#define HCLGE_MAC_RX_EN_B	7
+#define HCLGE_MAC_PAD_TX_B	11
+#define HCLGE_MAC_PAD_RX_B	12
+#define HCLGE_MAC_1588_TX_B	13
+#define HCLGE_MAC_1588_RX_B	14
+#define HCLGE_MAC_APP_LP_B	15
+#define HCLGE_MAC_LINE_LP_B	16
+#define HCLGE_MAC_FCS_TX_B	17
+#define HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B	18
+#define HCLGE_MAC_RX_FCS_STRIP_B	19
+#define HCLGE_MAC_RX_FCS_B	20
+#define HCLGE_MAC_TX_UNDER_MIN_ERR_B		21
+#define HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B	22
+
+struct hclge_config_mac_mode {
+	__le32 txrx_pad_fcs_loop_en;
+	u8 rsv[20];
+};
+
+#define HCLGE_CFG_SPEED_S		0
+#define HCLGE_CFG_SPEED_M		(0x3f << HCLGE_CFG_SPEED_S)
+
+#define HCLGE_CFG_DUPLEX_B		7
+#define HCLGE_CFG_DUPLEX_M		BIT(HCLGE_CFG_DUPLEX_B)
+
+struct hclge_config_mac_speed_dup {
+	u8 speed_dup;
+
+#define HCLGE_CFG_MAC_SPEED_CHANGE_EN_B	0
+	u8 mac_change_fec_en;
+	u8 rsv[22];
+};
+
+#define HCLGE_QUERY_SPEED_S		3
+#define HCLGE_QUERY_AN_B		0
+#define HCLGE_QUERY_DUPLEX_B		2
+
+#define HCLGE_QUERY_SPEED_M		(0x1f << HCLGE_QUERY_SPEED_S)
+#define HCLGE_QUERY_AN_M		BIT(HCLGE_QUERY_AN_B)
+#define HCLGE_QUERY_DUPLEX_M		BIT(HCLGE_QUERY_DUPLEX_B)
+
+struct hclge_query_an_speed_dup {
+	u8 an_syn_dup_speed;
+	u8 pause;
+	u8 rsv[23];
+};
+
+#define HCLGE_RING_ID_MASK		0x3ff
+#define HCLGE_TQP_ENABLE_B		0
+
+#define HCLGE_MAC_CFG_AN_EN_B		0
+#define HCLGE_MAC_CFG_AN_INT_EN_B	1
+#define HCLGE_MAC_CFG_AN_INT_MSK_B	2
+#define HCLGE_MAC_CFG_AN_INT_CLR_B	3
+#define HCLGE_MAC_CFG_AN_RST_B		4
+
+#define HCLGE_MAC_CFG_AN_EN	BIT(HCLGE_MAC_CFG_AN_EN_B)
+
+struct hclge_config_auto_neg {
+	__le32  cfg_an_cmd_flag;
+	u8      rsv[20];
+};
+
+#define HCLGE_MAC_MIN_MTU		64
+#define HCLGE_MAC_MAX_MTU		9728
+#define HCLGE_MAC_UPLINK_PORT		0x100
+
+struct hclge_config_max_frm_size {
+	__le16  max_frm_size;
+	u8      rsv[22];
+};
+
+enum hclge_mac_vlan_tbl_opcode {
+	HCLGE_MAC_VLAN_ADD,	/* Add new or modify mac_vlan */
+	HCLGE_MAC_VLAN_UPDATE,  /* Modify other fields of this table */
+	HCLGE_MAC_VLAN_REMOVE,  /* Remove a entry through mac_vlan key */
+	HCLGE_MAC_VLAN_LKUP,    /* Lookup a entry through mac_vlan key */
+};
+
+#define HCLGE_MAC_VLAN_BIT0_EN_B	0x0
+#define HCLGE_MAC_VLAN_BIT1_EN_B	0x1
+#define HCLGE_MAC_EPORT_SW_EN_B		0xc
+#define HCLGE_MAC_EPORT_TYPE_B		0xb
+#define HCLGE_MAC_EPORT_VFID_S		0x3
+#define HCLGE_MAC_EPORT_VFID_M		(0xff << HCLGE_MAC_EPORT_VFID_S)
+#define HCLGE_MAC_EPORT_PFID_S		0x0
+#define HCLGE_MAC_EPORT_PFID_M		(0x7 << HCLGE_MAC_EPORT_PFID_S)
+struct hclge_mac_vlan_tbl_entry {
+	u8	flags;
+	u8      resp_code;
+	__le16  vlan_tag;
+	__le32  mac_addr_hi32;
+	__le16  mac_addr_lo16;
+	__le16  rsv1;
+	u8      entry_type;
+	u8      mc_mac_en;
+	__le16  egress_port;
+	__le16  egress_queue;
+	u8      rsv2[6];
+};
+
+#define HCLGE_CFG_MTA_MAC_SEL_S		0x0
+#define HCLGE_CFG_MTA_MAC_SEL_M		(0x3 << HCLGE_CFG_MTA_MAC_SEL_S)
+#define HCLGE_CFG_MTA_MAC_EN_B		0x7
+struct hclge_mta_filter_mode {
+	u8	dmac_sel_en; /* Use lowest 2 bit as sel_mode, bit 7 as enable */
+	u8      rsv[23];
+};
+
+#define HCLGE_CFG_FUNC_MTA_ACCEPT_B	0x0
+struct hclge_cfg_func_mta_filter {
+	u8	accept; /* Only used lowest 1 bit */
+	u8      function_id;
+	u8      rsv[22];
+};
+
+#define HCLGE_CFG_MTA_ITEM_ACCEPT_B	0x0
+#define HCLGE_CFG_MTA_ITEM_IDX_S	0x0
+#define HCLGE_CFG_MTA_ITEM_IDX_M	(0xfff << HCLGE_CFG_MTA_ITEM_IDX_S)
+struct hclge_cfg_func_mta_item {
+	u16	item_idx; /* Only used lowest 12 bit */
+	u8      accept;   /* Only used lowest 1 bit */
+	u8      rsv[21];
+};
+
+struct hclge_mac_vlan_add {
+	__le16  flags;
+	__le16  mac_addr_hi16;
+	__le32  mac_addr_lo32;
+	__le32  mac_addr_msk_hi32;
+	__le16  mac_addr_msk_lo16;
+	__le16  vlan_tag;
+	__le16  ingress_port;
+	__le16  egress_port;
+	u8      rsv[4];
+};
+
+#define HNS3_MAC_VLAN_CFG_FLAG_BIT 0
+struct hclge_mac_vlan_remove {
+	__le16  flags;
+	__le16  mac_addr_hi16;
+	__le32  mac_addr_lo32;
+	__le32  mac_addr_msk_hi32;
+	__le16  mac_addr_msk_lo16;
+	__le16  vlan_tag;
+	__le16  ingress_port;
+	__le16  egress_port;
+	u8      rsv[4];
+};
+
+struct hclge_vlan_filter_ctrl {
+	u8 vlan_type;
+	u8 vlan_fe;
+	u8 rsv[22];
+};
+
+struct hclge_vlan_filter_pf_cfg {
+	u8 vlan_offset;
+	u8 vlan_cfg;
+	u8 rsv[2];
+	u8 vlan_offset_bitmap[20];
+};
+
+struct hclge_vlan_filter_vf_cfg {
+	u16 vlan_id;
+	u8  resp_code;
+	u8  rsv;
+	u8  vlan_cfg;
+	u8  rsv1[3];
+	u8  vf_bitmap[16];
+};
+
+struct hclge_cfg_com_tqp_queue {
+	__le16 tqp_id;
+	__le16 stream_id;
+	u8 enable;
+	u8 rsv[19];
+};
+
+struct hclge_cfg_tx_queue_pointer {
+	__le16 tqp_id;
+	__le16 tx_tail;
+	__le16 tx_head;
+	__le16 fbd_num;
+	__le16 ring_offset;
+	u8 rsv[14];
+};
+
+#define HCLGE_TSO_MSS_MIN_S	0
+#define HCLGE_TSO_MSS_MIN_M	(0x3FFF << HCLGE_TSO_MSS_MIN_S)
+
+#define HCLGE_TSO_MSS_MAX_S	16
+#define HCLGE_TSO_MSS_MAX_M	(0x3FFF << HCLGE_TSO_MSS_MAX_S)
+
+struct hclge_cfg_tso_status {
+	__le16 tso_mss_min;
+	__le16 tso_mss_max;
+	u8 rsv[20];
+};
+
+#define HCLGE_TSO_MSS_MIN	256
+#define HCLGE_TSO_MSS_MAX	9668
+
+#define HCLGE_TQP_RESET_B	0
+struct hclge_reset_tqp_queue {
+	__le16 tqp_id;
+	u8 reset_req;
+	u8 ready_to_reset;
+	u8 rsv[20];
+};
+
+#define HCLGE_DEFAULT_TX_BUF		0x4000	 /* 16k  bytes */
+#define HCLGE_TOTAL_PKT_BUF		0x108000 /* 1.03125M bytes */
+#define HCLGE_DEFAULT_DV		0xA000	 /* 40k byte */
+
+#define HCLGE_TYPE_CRQ			0
+#define HCLGE_TYPE_CSQ			1
+#define HCLGE_NIC_CSQ_BASEADDR_L_REG	0x27000
+#define HCLGE_NIC_CSQ_BASEADDR_H_REG	0x27004
+#define HCLGE_NIC_CSQ_DEPTH_REG		0x27008
+#define HCLGE_NIC_CSQ_TAIL_REG		0x27010
+#define HCLGE_NIC_CSQ_HEAD_REG		0x27014
+#define HCLGE_NIC_CRQ_BASEADDR_L_REG	0x27018
+#define HCLGE_NIC_CRQ_BASEADDR_H_REG	0x2701c
+#define HCLGE_NIC_CRQ_DEPTH_REG		0x27020
+#define HCLGE_NIC_CRQ_TAIL_REG		0x27024
+#define HCLGE_NIC_CRQ_HEAD_REG		0x27028
+#define HCLGE_NIC_CMQ_EN_B		16
+#define HCLGE_NIC_CMQ_ENABLE		BIT(HCLGE_NIC_CMQ_EN_B)
+#define HCLGE_NIC_CMQ_DESC_NUM		1024
+#define HCLGE_NIC_CMQ_DESC_NUM_S	3
+
+int hclge_cmd_init(struct hclge_dev *hdev);
+static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
+{
+	writel(value, base + reg);
+}
+
+#define hclge_write_dev(a, reg, value) \
+	hclge_write_reg((a)->io_base, (reg), (value))
+#define hclge_read_dev(a, reg) \
+	hclge_read_reg((a)->io_base, (reg))
+
+static inline u32 hclge_read_reg(u8 __iomem *base, u32 reg)
+{
+	u8 __iomem *reg_addr = READ_ONCE(base);
+
+	return readl(reg_addr + reg);
+}
+
+#define HCLGE_SEND_SYNC(flag) \
+	((flag) & HCLGE_CMD_FLAG_NO_INTR)
+
+struct hclge_hw;
+int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num);
+void hclge_cmd_setup_basic_desc(struct hclge_desc *desc,
+				enum hclge_opcode_type opcode, bool is_read);
+
+int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
+			       struct hclge_promisc_param *param);
+
+enum hclge_cmd_status hclge_cmd_mdio_write(struct hclge_hw *hw,
+					   struct hclge_desc *desc);
+enum hclge_cmd_status hclge_cmd_mdio_read(struct hclge_hw *hw,
+					  struct hclge_desc *desc);
+
+void hclge_destroy_cmd_queue(struct hclge_hw *hw);
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
new file mode 100644
index 0000000..bb45365
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -0,0 +1,4265 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+
+#include "hclge_cmd.h"
+#include "hclge_main.h"
+#include "hclge_mdio.h"
+#include "hclge_tm.h"
+#include "hnae3.h"
+
+#define HCLGE_NAME			"hclge"
+#define HCLGE_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset))))
+#define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f))
+#define HCLGE_64BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_64_bit_stats, f))
+#define HCLGE_32BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_32_bit_stats, f))
+
+static int hclge_rss_init_hw(struct hclge_dev *hdev);
+static int hclge_set_mta_filter_mode(struct hclge_dev *hdev,
+				     enum hclge_mta_dmac_sel_type mta_mac_sel,
+				     bool enable);
+static int hclge_init_vlan_config(struct hclge_dev *hdev);
+
+static struct hnae3_ae_algo ae_algo;
+
+static const struct pci_device_id ae_algo_pci_tbl[] = {
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
+	/* Required last entry */
+	{0, }
+};
+
+static const struct pci_device_id roce_pci_tbl[] = {
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
+	/* Required last entry */
+	{0, }
+};
+
+static const char hns3_nic_test_strs[][ETH_GSTRING_LEN] = {
+	"Mac    Loopback test",
+	"Serdes Loopback test",
+	"Phy    Loopback test"
+};
+
+static const struct hclge_comm_stats_str g_all_64bit_stats_string[] = {
+	{"igu_rx_oversize_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_oversize_pkt)},
+	{"igu_rx_undersize_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_undersize_pkt)},
+	{"igu_rx_out_all_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_out_all_pkt)},
+	{"igu_rx_uni_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_uni_pkt)},
+	{"igu_rx_multi_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_multi_pkt)},
+	{"igu_rx_broad_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(igu_rx_broad_pkt)},
+	{"egu_tx_out_all_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(egu_tx_out_all_pkt)},
+	{"egu_tx_uni_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(egu_tx_uni_pkt)},
+	{"egu_tx_multi_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(egu_tx_multi_pkt)},
+	{"egu_tx_broad_pkt",
+		HCLGE_64BIT_STATS_FIELD_OFF(egu_tx_broad_pkt)},
+	{"ssu_ppp_mac_key_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ssu_ppp_mac_key_num)},
+	{"ssu_ppp_host_key_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ssu_ppp_host_key_num)},
+	{"ppp_ssu_mac_rlt_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ppp_ssu_mac_rlt_num)},
+	{"ppp_ssu_host_rlt_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ppp_ssu_host_rlt_num)},
+	{"ssu_tx_in_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ssu_tx_in_num)},
+	{"ssu_tx_out_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ssu_tx_out_num)},
+	{"ssu_rx_in_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ssu_rx_in_num)},
+	{"ssu_rx_out_num",
+		HCLGE_64BIT_STATS_FIELD_OFF(ssu_rx_out_num)}
+};
+
+static const struct hclge_comm_stats_str g_all_32bit_stats_string[] = {
+	{"igu_rx_err_pkt",
+		HCLGE_32BIT_STATS_FIELD_OFF(igu_rx_err_pkt)},
+	{"igu_rx_no_eof_pkt",
+		HCLGE_32BIT_STATS_FIELD_OFF(igu_rx_no_eof_pkt)},
+	{"igu_rx_no_sof_pkt",
+		HCLGE_32BIT_STATS_FIELD_OFF(igu_rx_no_sof_pkt)},
+	{"egu_tx_1588_pkt",
+		HCLGE_32BIT_STATS_FIELD_OFF(egu_tx_1588_pkt)},
+	{"ssu_full_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(ssu_full_drop_num)},
+	{"ssu_part_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(ssu_part_drop_num)},
+	{"ppp_key_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(ppp_key_drop_num)},
+	{"ppp_rlt_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(ppp_rlt_drop_num)},
+	{"ssu_key_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(ssu_key_drop_num)},
+	{"pkt_curr_buf_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_cnt)},
+	{"qcn_fb_rcv_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(qcn_fb_rcv_cnt)},
+	{"qcn_fb_drop_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(qcn_fb_drop_cnt)},
+	{"qcn_fb_invaild_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(qcn_fb_invaild_cnt)},
+	{"rx_packet_tc0_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc0_in_cnt)},
+	{"rx_packet_tc1_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc1_in_cnt)},
+	{"rx_packet_tc2_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc2_in_cnt)},
+	{"rx_packet_tc3_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc3_in_cnt)},
+	{"rx_packet_tc4_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc4_in_cnt)},
+	{"rx_packet_tc5_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc5_in_cnt)},
+	{"rx_packet_tc6_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc6_in_cnt)},
+	{"rx_packet_tc7_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc7_in_cnt)},
+	{"rx_packet_tc0_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc0_out_cnt)},
+	{"rx_packet_tc1_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc1_out_cnt)},
+	{"rx_packet_tc2_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc2_out_cnt)},
+	{"rx_packet_tc3_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc3_out_cnt)},
+	{"rx_packet_tc4_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc4_out_cnt)},
+	{"rx_packet_tc5_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc5_out_cnt)},
+	{"rx_packet_tc6_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc6_out_cnt)},
+	{"rx_packet_tc7_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_packet_tc7_out_cnt)},
+	{"tx_packet_tc0_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc0_in_cnt)},
+	{"tx_packet_tc1_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc1_in_cnt)},
+	{"tx_packet_tc2_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc2_in_cnt)},
+	{"tx_packet_tc3_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc3_in_cnt)},
+	{"tx_packet_tc4_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc4_in_cnt)},
+	{"tx_packet_tc5_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc5_in_cnt)},
+	{"tx_packet_tc6_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc6_in_cnt)},
+	{"tx_packet_tc7_in_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc7_in_cnt)},
+	{"tx_packet_tc0_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc0_out_cnt)},
+	{"tx_packet_tc1_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc1_out_cnt)},
+	{"tx_packet_tc2_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc2_out_cnt)},
+	{"tx_packet_tc3_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc3_out_cnt)},
+	{"tx_packet_tc4_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc4_out_cnt)},
+	{"tx_packet_tc5_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc5_out_cnt)},
+	{"tx_packet_tc6_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc6_out_cnt)},
+	{"tx_packet_tc7_out_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_packet_tc7_out_cnt)},
+	{"pkt_curr_buf_tc0_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc0_cnt)},
+	{"pkt_curr_buf_tc1_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc1_cnt)},
+	{"pkt_curr_buf_tc2_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc2_cnt)},
+	{"pkt_curr_buf_tc3_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc3_cnt)},
+	{"pkt_curr_buf_tc4_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc4_cnt)},
+	{"pkt_curr_buf_tc5_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc5_cnt)},
+	{"pkt_curr_buf_tc6_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc6_cnt)},
+	{"pkt_curr_buf_tc7_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(pkt_curr_buf_tc7_cnt)},
+	{"mb_uncopy_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(mb_uncopy_num)},
+	{"lo_pri_unicast_rlt_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(lo_pri_unicast_rlt_drop_num)},
+	{"hi_pri_multicast_rlt_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(hi_pri_multicast_rlt_drop_num)},
+	{"lo_pri_multicast_rlt_drop_num",
+		HCLGE_32BIT_STATS_FIELD_OFF(lo_pri_multicast_rlt_drop_num)},
+	{"rx_oq_drop_pkt_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(rx_oq_drop_pkt_cnt)},
+	{"tx_oq_drop_pkt_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(tx_oq_drop_pkt_cnt)},
+	{"nic_l2_err_drop_pkt_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(nic_l2_err_drop_pkt_cnt)},
+	{"roc_l2_err_drop_pkt_cnt",
+		HCLGE_32BIT_STATS_FIELD_OFF(roc_l2_err_drop_pkt_cnt)}
+};
+
+static const struct hclge_comm_stats_str g_mac_stats_string[] = {
+	{"mac_tx_mac_pause_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_mac_pause_num)},
+	{"mac_rx_mac_pause_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_mac_pause_num)},
+	{"mac_tx_pfc_pri0_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri0_pkt_num)},
+	{"mac_tx_pfc_pri1_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri1_pkt_num)},
+	{"mac_tx_pfc_pri2_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri2_pkt_num)},
+	{"mac_tx_pfc_pri3_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri3_pkt_num)},
+	{"mac_tx_pfc_pri4_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri4_pkt_num)},
+	{"mac_tx_pfc_pri5_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri5_pkt_num)},
+	{"mac_tx_pfc_pri6_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri6_pkt_num)},
+	{"mac_tx_pfc_pri7_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri7_pkt_num)},
+	{"mac_rx_pfc_pri0_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri0_pkt_num)},
+	{"mac_rx_pfc_pri1_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri1_pkt_num)},
+	{"mac_rx_pfc_pri2_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri2_pkt_num)},
+	{"mac_rx_pfc_pri3_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri3_pkt_num)},
+	{"mac_rx_pfc_pri4_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri4_pkt_num)},
+	{"mac_rx_pfc_pri5_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri5_pkt_num)},
+	{"mac_rx_pfc_pri6_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri6_pkt_num)},
+	{"mac_rx_pfc_pri7_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri7_pkt_num)},
+	{"mac_tx_total_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_total_pkt_num)},
+	{"mac_tx_total_oct_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_total_oct_num)},
+	{"mac_tx_good_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_good_pkt_num)},
+	{"mac_tx_bad_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_bad_pkt_num)},
+	{"mac_tx_good_oct_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_good_oct_num)},
+	{"mac_tx_bad_oct_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_bad_oct_num)},
+	{"mac_tx_uni_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_uni_pkt_num)},
+	{"mac_tx_multi_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_multi_pkt_num)},
+	{"mac_tx_broad_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_broad_pkt_num)},
+	{"mac_tx_undersize_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_undersize_pkt_num)},
+	{"mac_tx_overrsize_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_overrsize_pkt_num)},
+	{"mac_tx_64_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_64_oct_pkt_num)},
+	{"mac_tx_65_127_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_65_127_oct_pkt_num)},
+	{"mac_tx_128_255_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_128_255_oct_pkt_num)},
+	{"mac_tx_256_511_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_256_511_oct_pkt_num)},
+	{"mac_tx_512_1023_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_512_1023_oct_pkt_num)},
+	{"mac_tx_1024_1518_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_1024_1518_oct_pkt_num)},
+	{"mac_tx_1519_max_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_tx_1519_max_oct_pkt_num)},
+	{"mac_rx_total_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_total_pkt_num)},
+	{"mac_rx_total_oct_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_total_oct_num)},
+	{"mac_rx_good_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_good_pkt_num)},
+	{"mac_rx_bad_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_bad_pkt_num)},
+	{"mac_rx_good_oct_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_good_oct_num)},
+	{"mac_rx_bad_oct_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_bad_oct_num)},
+	{"mac_rx_uni_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_uni_pkt_num)},
+	{"mac_rx_multi_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_multi_pkt_num)},
+	{"mac_rx_broad_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_broad_pkt_num)},
+	{"mac_rx_undersize_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_undersize_pkt_num)},
+	{"mac_rx_overrsize_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_overrsize_pkt_num)},
+	{"mac_rx_64_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_64_oct_pkt_num)},
+	{"mac_rx_65_127_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_65_127_oct_pkt_num)},
+	{"mac_rx_128_255_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_128_255_oct_pkt_num)},
+	{"mac_rx_256_511_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_256_511_oct_pkt_num)},
+	{"mac_rx_512_1023_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_512_1023_oct_pkt_num)},
+	{"mac_rx_1024_1518_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_1024_1518_oct_pkt_num)},
+	{"mac_rx_1519_max_oct_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rx_1519_max_oct_pkt_num)},
+
+	{"mac_trans_fragment_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_trans_fragment_pkt_num)},
+	{"mac_trans_undermin_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_trans_undermin_pkt_num)},
+	{"mac_trans_jabber_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_trans_jabber_pkt_num)},
+	{"mac_trans_err_all_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_trans_err_all_pkt_num)},
+	{"mac_trans_from_app_good_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_trans_from_app_good_pkt_num)},
+	{"mac_trans_from_app_bad_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_trans_from_app_bad_pkt_num)},
+	{"mac_rcv_fragment_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rcv_fragment_pkt_num)},
+	{"mac_rcv_undermin_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rcv_undermin_pkt_num)},
+	{"mac_rcv_jabber_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rcv_jabber_pkt_num)},
+	{"mac_rcv_fcs_err_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rcv_fcs_err_pkt_num)},
+	{"mac_rcv_send_app_good_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rcv_send_app_good_pkt_num)},
+	{"mac_rcv_send_app_bad_pkt_num",
+		HCLGE_MAC_STATS_FIELD_OFF(mac_rcv_send_app_bad_pkt_num)}
+};
+
+static int hclge_64_bit_update_stats(struct hclge_dev *hdev)
+{
+#define HCLGE_64_BIT_CMD_NUM 5
+#define HCLGE_64_BIT_RTN_DATANUM 4
+	u64 *data = (u64 *)(&hdev->hw_stats.all_64_bit_stats);
+	struct hclge_desc desc[HCLGE_64_BIT_CMD_NUM];
+	u64 *desc_data;
+	int i, k, n;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_STATS_64_BIT, true);
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_64_BIT_CMD_NUM);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get 64 bit pkt stats fail, status = %d.\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < HCLGE_64_BIT_CMD_NUM; i++) {
+		if (unlikely(i == 0)) {
+			desc_data = (u64 *)(&desc[i].data[0]);
+			n = HCLGE_64_BIT_RTN_DATANUM - 1;
+		} else {
+			desc_data = (u64 *)(&desc[i]);
+			n = HCLGE_64_BIT_RTN_DATANUM;
+		}
+		for (k = 0; k < n; k++) {
+			*data++ += cpu_to_le64(*desc_data);
+			desc_data++;
+		}
+	}
+
+	return 0;
+}
+
+static void hclge_reset_partial_32bit_counter(struct hclge_32_bit_stats *stats)
+{
+	stats->pkt_curr_buf_cnt     = 0;
+	stats->pkt_curr_buf_tc0_cnt = 0;
+	stats->pkt_curr_buf_tc1_cnt = 0;
+	stats->pkt_curr_buf_tc2_cnt = 0;
+	stats->pkt_curr_buf_tc3_cnt = 0;
+	stats->pkt_curr_buf_tc4_cnt = 0;
+	stats->pkt_curr_buf_tc5_cnt = 0;
+	stats->pkt_curr_buf_tc6_cnt = 0;
+	stats->pkt_curr_buf_tc7_cnt = 0;
+}
+
+static int hclge_32_bit_update_stats(struct hclge_dev *hdev)
+{
+#define HCLGE_32_BIT_CMD_NUM 8
+#define HCLGE_32_BIT_RTN_DATANUM 8
+
+	struct hclge_desc desc[HCLGE_32_BIT_CMD_NUM];
+	struct hclge_32_bit_stats *all_32_bit_stats;
+	u32 *desc_data;
+	int i, k, n;
+	u64 *data;
+	int ret;
+
+	all_32_bit_stats = &hdev->hw_stats.all_32_bit_stats;
+	data = (u64 *)(&all_32_bit_stats->egu_tx_1588_pkt);
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_STATS_32_BIT, true);
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_32_BIT_CMD_NUM);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get 32 bit pkt stats fail, status = %d.\n", ret);
+
+		return ret;
+	}
+
+	hclge_reset_partial_32bit_counter(all_32_bit_stats);
+	for (i = 0; i < HCLGE_32_BIT_CMD_NUM; i++) {
+		if (unlikely(i == 0)) {
+			all_32_bit_stats->igu_rx_err_pkt +=
+				cpu_to_le32(desc[i].data[0]);
+			all_32_bit_stats->igu_rx_no_eof_pkt +=
+				cpu_to_le32(desc[i].data[1] & 0xffff);
+			all_32_bit_stats->igu_rx_no_sof_pkt +=
+				cpu_to_le32((desc[i].data[1] >> 16) & 0xffff);
+
+			desc_data = (u32 *)(&desc[i].data[2]);
+			n = HCLGE_32_BIT_RTN_DATANUM - 4;
+		} else {
+			desc_data = (u32 *)(&desc[i]);
+			n = HCLGE_32_BIT_RTN_DATANUM;
+		}
+		for (k = 0; k < n; k++) {
+			*data++ += cpu_to_le32(*desc_data);
+			desc_data++;
+		}
+	}
+
+	return 0;
+}
+
+static int hclge_mac_update_stats(struct hclge_dev *hdev)
+{
+#define HCLGE_MAC_CMD_NUM 17
+#define HCLGE_RTN_DATA_NUM 4
+
+	u64 *data = (u64 *)(&hdev->hw_stats.mac_stats);
+	struct hclge_desc desc[HCLGE_MAC_CMD_NUM];
+	u64 *desc_data;
+	int i, k, n;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_STATS_MAC, true);
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_MAC_CMD_NUM);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get MAC pkt stats fail, status = %d.\n", ret);
+
+		return ret;
+	}
+
+	for (i = 0; i < HCLGE_MAC_CMD_NUM; i++) {
+		if (unlikely(i == 0)) {
+			desc_data = (u64 *)(&desc[i].data[0]);
+			n = HCLGE_RTN_DATA_NUM - 2;
+		} else {
+			desc_data = (u64 *)(&desc[i]);
+			n = HCLGE_RTN_DATA_NUM;
+		}
+		for (k = 0; k < n; k++) {
+			*data++ += cpu_to_le64(*desc_data);
+			desc_data++;
+		}
+	}
+
+	return 0;
+}
+
+static int hclge_tqps_update_stats(struct hnae3_handle *handle)
+{
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hnae3_queue *queue;
+	struct hclge_desc desc[1];
+	struct hclge_tqp *tqp;
+	int ret, i;
+
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		queue = handle->kinfo.tqp[i];
+		tqp = container_of(queue, struct hclge_tqp, q);
+		/* command : HCLGE_OPC_QUERY_IGU_STAT */
+		hclge_cmd_setup_basic_desc(&desc[0],
+					   HCLGE_OPC_QUERY_RX_STATUS,
+					   true);
+
+		desc[0].data[0] = (tqp->index & 0x1ff);
+		ret = hclge_cmd_send(&hdev->hw, desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Query tqp stat fail, status = %d,queue = %d\n",
+				ret,	i);
+			return ret;
+		}
+		tqp->tqp_stats.rcb_rx_ring_pktnum_rcd +=
+			cpu_to_le32(desc[0].data[4]);
+	}
+
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		queue = handle->kinfo.tqp[i];
+		tqp = container_of(queue, struct hclge_tqp, q);
+		/* command : HCLGE_OPC_QUERY_IGU_STAT */
+		hclge_cmd_setup_basic_desc(&desc[0],
+					   HCLGE_OPC_QUERY_TX_STATUS,
+					   true);
+
+		desc[0].data[0] = (tqp->index & 0x1ff);
+		ret = hclge_cmd_send(&hdev->hw, desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Query tqp stat fail, status = %d,queue = %d\n",
+				ret, i);
+			return ret;
+		}
+		tqp->tqp_stats.rcb_tx_ring_pktnum_rcd +=
+			cpu_to_le32(desc[0].data[4]);
+	}
+
+	return 0;
+}
+
+static u64 *hclge_tqps_get_stats(struct hnae3_handle *handle, u64 *data)
+{
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+	struct hclge_tqp *tqp;
+	u64 *buff = data;
+	int i;
+
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		tqp = container_of(kinfo->tqp[i], struct hclge_tqp, q);
+		*buff++ = cpu_to_le64(tqp->tqp_stats.rcb_tx_ring_pktnum_rcd);
+	}
+
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		tqp = container_of(kinfo->tqp[i], struct hclge_tqp, q);
+		*buff++ = cpu_to_le64(tqp->tqp_stats.rcb_rx_ring_pktnum_rcd);
+	}
+
+	return buff;
+}
+
+static int hclge_tqps_get_sset_count(struct hnae3_handle *handle, int stringset)
+{
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+
+	return kinfo->num_tqps * (2);
+}
+
+static u8 *hclge_tqps_get_strings(struct hnae3_handle *handle, u8 *data)
+{
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+	u8 *buff = data;
+	int i = 0;
+
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		struct hclge_tqp *tqp = container_of(handle->kinfo.tqp[i],
+			struct hclge_tqp, q);
+		snprintf(buff, ETH_GSTRING_LEN, "rcb_q%d_tx_pktnum_rcd",
+			 tqp->index);
+		buff = buff + ETH_GSTRING_LEN;
+	}
+
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		struct hclge_tqp *tqp = container_of(kinfo->tqp[i],
+			struct hclge_tqp, q);
+		snprintf(buff, ETH_GSTRING_LEN, "rcb_q%d_rx_pktnum_rcd",
+			 tqp->index);
+		buff = buff + ETH_GSTRING_LEN;
+	}
+
+	return buff;
+}
+
+static u64 *hclge_comm_get_stats(void *comm_stats,
+				 const struct hclge_comm_stats_str strs[],
+				 int size, u64 *data)
+{
+	u64 *buf = data;
+	u32 i;
+
+	for (i = 0; i < size; i++)
+		buf[i] = HCLGE_STATS_READ(comm_stats, strs[i].offset);
+
+	return buf + size;
+}
+
+static u8 *hclge_comm_get_strings(u32 stringset,
+				  const struct hclge_comm_stats_str strs[],
+				  int size, u8 *data)
+{
+	char *buff = (char *)data;
+	u32 i;
+
+	if (stringset != ETH_SS_STATS)
+		return buff;
+
+	for (i = 0; i < size; i++) {
+		snprintf(buff, ETH_GSTRING_LEN,
+			 strs[i].desc);
+		buff = buff + ETH_GSTRING_LEN;
+	}
+
+	return (u8 *)buff;
+}
+
+static void hclge_update_netstat(struct hclge_hw_stats *hw_stats,
+				 struct net_device_stats *net_stats)
+{
+	net_stats->tx_dropped = 0;
+	net_stats->rx_dropped = hw_stats->all_32_bit_stats.ssu_full_drop_num;
+	net_stats->rx_dropped += hw_stats->all_32_bit_stats.ppp_key_drop_num;
+	net_stats->rx_dropped += hw_stats->all_32_bit_stats.ssu_key_drop_num;
+
+	net_stats->rx_errors = hw_stats->mac_stats.mac_rx_overrsize_pkt_num;
+	net_stats->rx_errors += hw_stats->mac_stats.mac_rx_undersize_pkt_num;
+	net_stats->rx_errors += hw_stats->all_32_bit_stats.igu_rx_err_pkt;
+	net_stats->rx_errors += hw_stats->all_32_bit_stats.igu_rx_no_eof_pkt;
+	net_stats->rx_errors += hw_stats->all_32_bit_stats.igu_rx_no_sof_pkt;
+	net_stats->rx_errors += hw_stats->mac_stats.mac_rcv_fcs_err_pkt_num;
+
+	net_stats->multicast = hw_stats->mac_stats.mac_tx_multi_pkt_num;
+	net_stats->multicast += hw_stats->mac_stats.mac_rx_multi_pkt_num;
+
+	net_stats->rx_crc_errors = hw_stats->mac_stats.mac_rcv_fcs_err_pkt_num;
+	net_stats->rx_length_errors =
+		hw_stats->mac_stats.mac_rx_undersize_pkt_num;
+	net_stats->rx_length_errors +=
+		hw_stats->mac_stats.mac_rx_overrsize_pkt_num;
+	net_stats->rx_over_errors =
+		hw_stats->mac_stats.mac_rx_overrsize_pkt_num;
+}
+
+static void hclge_update_stats_for_all(struct hclge_dev *hdev)
+{
+	struct hnae3_handle *handle;
+	int status;
+
+	handle = &hdev->vport[0].nic;
+	if (handle->client) {
+		status = hclge_tqps_update_stats(handle);
+		if (status) {
+			dev_err(&hdev->pdev->dev,
+				"Update TQPS stats fail, status = %d.\n",
+				status);
+		}
+	}
+
+	status = hclge_mac_update_stats(hdev);
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"Update MAC stats fail, status = %d.\n", status);
+
+	status = hclge_32_bit_update_stats(hdev);
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"Update 32 bit stats fail, status = %d.\n",
+			status);
+
+	hclge_update_netstat(&hdev->hw_stats, &handle->kinfo.netdev->stats);
+}
+
+static void hclge_update_stats(struct hnae3_handle *handle,
+			       struct net_device_stats *net_stats)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_hw_stats *hw_stats = &hdev->hw_stats;
+	int status;
+
+	status = hclge_mac_update_stats(hdev);
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"Update MAC stats fail, status = %d.\n",
+			status);
+
+	status = hclge_32_bit_update_stats(hdev);
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"Update 32 bit stats fail, status = %d.\n",
+			status);
+
+	status = hclge_64_bit_update_stats(hdev);
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"Update 64 bit stats fail, status = %d.\n",
+			status);
+
+	status = hclge_tqps_update_stats(handle);
+	if (status)
+		dev_err(&hdev->pdev->dev,
+			"Update TQPS stats fail, status = %d.\n",
+			status);
+
+	hclge_update_netstat(hw_stats, net_stats);
+}
+
+static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset)
+{
+#define HCLGE_LOOPBACK_TEST_FLAGS 0x7
+
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int count = 0;
+
+	/* Loopback test support rules:
+	 * mac: only GE mode support
+	 * serdes: all mac mode will support include GE/XGE/LGE/CGE
+	 * phy: only support when phy device exist on board
+	 */
+	if (stringset == ETH_SS_TEST) {
+		/* clear loopback bit flags at first */
+		handle->flags = (handle->flags & (~HCLGE_LOOPBACK_TEST_FLAGS));
+		if (hdev->hw.mac.speed == HCLGE_MAC_SPEED_10M ||
+		    hdev->hw.mac.speed == HCLGE_MAC_SPEED_100M ||
+		    hdev->hw.mac.speed == HCLGE_MAC_SPEED_1G) {
+			count += 1;
+			handle->flags |= HNAE3_SUPPORT_MAC_LOOPBACK;
+		} else {
+			count = -EOPNOTSUPP;
+		}
+	} else if (stringset == ETH_SS_STATS) {
+		count = ARRAY_SIZE(g_mac_stats_string) +
+			ARRAY_SIZE(g_all_32bit_stats_string) +
+			ARRAY_SIZE(g_all_64bit_stats_string) +
+			hclge_tqps_get_sset_count(handle, stringset);
+	}
+
+	return count;
+}
+
+static void hclge_get_strings(struct hnae3_handle *handle,
+			      u32 stringset,
+			      u8 *data)
+{
+	u8 *p = (char *)data;
+	int size;
+
+	if (stringset == ETH_SS_STATS) {
+		size = ARRAY_SIZE(g_mac_stats_string);
+		p = hclge_comm_get_strings(stringset,
+					   g_mac_stats_string,
+					   size,
+					   p);
+		size = ARRAY_SIZE(g_all_32bit_stats_string);
+		p = hclge_comm_get_strings(stringset,
+					   g_all_32bit_stats_string,
+					   size,
+					   p);
+		size = ARRAY_SIZE(g_all_64bit_stats_string);
+		p = hclge_comm_get_strings(stringset,
+					   g_all_64bit_stats_string,
+					   size,
+					   p);
+		p = hclge_tqps_get_strings(handle, p);
+	} else if (stringset == ETH_SS_TEST) {
+		if (handle->flags & HNAE3_SUPPORT_MAC_LOOPBACK) {
+			memcpy(p,
+			       hns3_nic_test_strs[HNAE3_MAC_INTER_LOOP_MAC],
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		if (handle->flags & HNAE3_SUPPORT_SERDES_LOOPBACK) {
+			memcpy(p,
+			       hns3_nic_test_strs[HNAE3_MAC_INTER_LOOP_SERDES],
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		if (handle->flags & HNAE3_SUPPORT_PHY_LOOPBACK) {
+			memcpy(p,
+			       hns3_nic_test_strs[HNAE3_MAC_INTER_LOOP_PHY],
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+	}
+}
+
+static void hclge_get_stats(struct hnae3_handle *handle, u64 *data)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	u64 *p;
+
+	p = hclge_comm_get_stats(&hdev->hw_stats.mac_stats,
+				 g_mac_stats_string,
+				 ARRAY_SIZE(g_mac_stats_string),
+				 data);
+	p = hclge_comm_get_stats(&hdev->hw_stats.all_32_bit_stats,
+				 g_all_32bit_stats_string,
+				 ARRAY_SIZE(g_all_32bit_stats_string),
+				 p);
+	p = hclge_comm_get_stats(&hdev->hw_stats.all_64_bit_stats,
+				 g_all_64bit_stats_string,
+				 ARRAY_SIZE(g_all_64bit_stats_string),
+				 p);
+	p = hclge_tqps_get_stats(handle, p);
+}
+
+static int hclge_parse_func_status(struct hclge_dev *hdev,
+				   struct hclge_func_status *status)
+{
+	if (!(status->pf_state & HCLGE_PF_STATE_DONE))
+		return -EINVAL;
+
+	/* Set the pf to main pf */
+	if (status->pf_state & HCLGE_PF_STATE_MAIN)
+		hdev->flag |= HCLGE_FLAG_MAIN;
+	else
+		hdev->flag &= ~HCLGE_FLAG_MAIN;
+
+	hdev->num_req_vfs = status->vf_num / status->pf_num;
+	return 0;
+}
+
+static int hclge_query_function_status(struct hclge_dev *hdev)
+{
+	struct hclge_func_status *req;
+	struct hclge_desc desc;
+	int timeout = 0;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_FUNC_STATUS, true);
+	req = (struct hclge_func_status *)desc.data;
+
+	do {
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"query function status failed %d.\n",
+				ret);
+
+			return ret;
+		}
+
+		/* Check pf reset is done */
+		if (req->pf_state)
+			break;
+		usleep_range(1000, 2000);
+	} while (timeout++ < 5);
+
+	ret = hclge_parse_func_status(hdev, req);
+
+	return ret;
+}
+
+static int hclge_query_pf_resource(struct hclge_dev *hdev)
+{
+	struct hclge_pf_res *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_PF_RSRC, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"query pf resource failed %d.\n", ret);
+		return ret;
+	}
+
+	req = (struct hclge_pf_res *)desc.data;
+	hdev->num_tqps = __le16_to_cpu(req->tqp_num);
+	hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S;
+
+	if (hnae_get_bit(hdev->ae_dev->flag, HNAE_DEV_SUPPORT_ROCE_B)) {
+		hdev->num_roce_msix =
+		hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number),
+			       HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+
+		/* PF should have NIC vectors and Roce vectors,
+		 * NIC vectors are queued before Roce vectors.
+		 */
+		hdev->num_msi = hdev->num_roce_msix  + HCLGE_ROCE_VECTOR_OFFSET;
+	} else {
+		hdev->num_msi =
+		hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number),
+			       HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+	}
+
+	return 0;
+}
+
+static int hclge_parse_speed(int speed_cmd, int *speed)
+{
+	switch (speed_cmd) {
+	case 6:
+		*speed = HCLGE_MAC_SPEED_10M;
+		break;
+	case 7:
+		*speed = HCLGE_MAC_SPEED_100M;
+		break;
+	case 0:
+		*speed = HCLGE_MAC_SPEED_1G;
+		break;
+	case 1:
+		*speed = HCLGE_MAC_SPEED_10G;
+		break;
+	case 2:
+		*speed = HCLGE_MAC_SPEED_25G;
+		break;
+	case 3:
+		*speed = HCLGE_MAC_SPEED_40G;
+		break;
+	case 4:
+		*speed = HCLGE_MAC_SPEED_50G;
+		break;
+	case 5:
+		*speed = HCLGE_MAC_SPEED_100G;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
+{
+	struct hclge_cfg_param *req;
+	u64 mac_addr_tmp_high;
+	u64 mac_addr_tmp;
+	int i;
+
+	req = (struct hclge_cfg_param *)desc[0].data;
+
+	/* get the configuration */
+	cfg->vmdq_vport_num = hnae_get_field(__le32_to_cpu(req->param[0]),
+					     HCLGE_CFG_VMDQ_M,
+					     HCLGE_CFG_VMDQ_S);
+	cfg->tc_num = hnae_get_field(__le32_to_cpu(req->param[0]),
+				     HCLGE_CFG_TC_NUM_M, HCLGE_CFG_TC_NUM_S);
+	cfg->tqp_desc_num = hnae_get_field(__le32_to_cpu(req->param[0]),
+					   HCLGE_CFG_TQP_DESC_N_M,
+					   HCLGE_CFG_TQP_DESC_N_S);
+
+	cfg->phy_addr = hnae_get_field(__le32_to_cpu(req->param[1]),
+				       HCLGE_CFG_PHY_ADDR_M,
+				       HCLGE_CFG_PHY_ADDR_S);
+	cfg->media_type = hnae_get_field(__le32_to_cpu(req->param[1]),
+					 HCLGE_CFG_MEDIA_TP_M,
+					 HCLGE_CFG_MEDIA_TP_S);
+	cfg->rx_buf_len = hnae_get_field(__le32_to_cpu(req->param[1]),
+					 HCLGE_CFG_RX_BUF_LEN_M,
+					 HCLGE_CFG_RX_BUF_LEN_S);
+	/* get mac_address */
+	mac_addr_tmp = __le32_to_cpu(req->param[2]);
+	mac_addr_tmp_high = hnae_get_field(__le32_to_cpu(req->param[3]),
+					   HCLGE_CFG_MAC_ADDR_H_M,
+					   HCLGE_CFG_MAC_ADDR_H_S);
+
+	mac_addr_tmp |= (mac_addr_tmp_high << 31) << 1;
+
+	cfg->default_speed = hnae_get_field(__le32_to_cpu(req->param[3]),
+					    HCLGE_CFG_DEFAULT_SPEED_M,
+					    HCLGE_CFG_DEFAULT_SPEED_S);
+	for (i = 0; i < ETH_ALEN; i++)
+		cfg->mac_addr[i] = (mac_addr_tmp >> (8 * i)) & 0xff;
+
+	req = (struct hclge_cfg_param *)desc[1].data;
+	cfg->numa_node_map = __le32_to_cpu(req->param[0]);
+}
+
+/* hclge_get_cfg: query the static parameter from flash
+ * @hdev: pointer to struct hclge_dev
+ * @hcfg: the config structure to be getted
+ */
+static int hclge_get_cfg(struct hclge_dev *hdev, struct hclge_cfg *hcfg)
+{
+	struct hclge_desc desc[HCLGE_PF_CFG_DESC_NUM];
+	struct hclge_cfg_param *req;
+	int i, ret;
+
+	for (i = 0; i < HCLGE_PF_CFG_DESC_NUM; i++) {
+		req = (struct hclge_cfg_param *)desc[i].data;
+		hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_GET_CFG_PARAM,
+					   true);
+		hnae_set_field(req->offset, HCLGE_CFG_OFFSET_M,
+			       HCLGE_CFG_OFFSET_S, i * HCLGE_CFG_RD_LEN_BYTES);
+		/* Len should be united by 4 bytes when send to hardware */
+		hnae_set_field(req->offset, HCLGE_CFG_RD_LEN_M,
+			       HCLGE_CFG_RD_LEN_S,
+			       HCLGE_CFG_RD_LEN_BYTES / HCLGE_CFG_RD_LEN_UNIT);
+		req->offset = cpu_to_le32(req->offset);
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PF_CFG_DESC_NUM);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"get config failed %d.\n", ret);
+		return ret;
+	}
+
+	hclge_parse_cfg(hcfg, desc);
+	return 0;
+}
+
+static int hclge_get_cap(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_query_function_status(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"query function status error %d.\n", ret);
+		return ret;
+	}
+
+	/* get pf resource */
+	ret = hclge_query_pf_resource(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"query pf resource error %d.\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_configure(struct hclge_dev *hdev)
+{
+	struct hclge_cfg cfg;
+	int ret, i;
+
+	ret = hclge_get_cfg(hdev, &cfg);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "get mac mode error %d.\n", ret);
+		return ret;
+	}
+
+	hdev->num_vmdq_vport = cfg.vmdq_vport_num;
+	hdev->base_tqp_pid = 0;
+	hdev->rss_size_max = 1;
+	hdev->rx_buf_len = cfg.rx_buf_len;
+	for (i = 0; i < ETH_ALEN; i++)
+		hdev->hw.mac.mac_addr[i] = cfg.mac_addr[i];
+	hdev->hw.mac.media_type = cfg.media_type;
+	hdev->num_desc = cfg.tqp_desc_num;
+	hdev->tm_info.num_pg = 1;
+	hdev->tm_info.num_tc = cfg.tc_num;
+	hdev->tm_info.hw_pfc_map = 0;
+
+	ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "Get wrong speed ret=%d.\n", ret);
+		return ret;
+	}
+
+	if ((hdev->tm_info.num_tc > HNAE3_MAX_TC) ||
+	    (hdev->tm_info.num_tc < 1)) {
+		dev_warn(&hdev->pdev->dev, "TC num = %d.\n",
+			 hdev->tm_info.num_tc);
+		hdev->tm_info.num_tc = 1;
+	}
+
+	/* Currently not support uncontiuous tc */
+	for (i = 0; i < cfg.tc_num; i++)
+		hnae_set_bit(hdev->hw_tc_map, i, 1);
+
+	if (!hdev->num_vmdq_vport && !hdev->num_req_vfs)
+		hdev->tx_sch_mode = HCLGE_FLAG_TC_BASE_SCH_MODE;
+	else
+		hdev->tx_sch_mode = HCLGE_FLAG_VNET_BASE_SCH_MODE;
+
+	return ret;
+}
+
+static int hclge_config_tso(struct hclge_dev *hdev, int tso_mss_min,
+			    int tso_mss_max)
+{
+	struct hclge_cfg_tso_status *req;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TSO_GENERIC_CONFIG, false);
+
+	req = (struct hclge_cfg_tso_status *)desc.data;
+	hnae_set_field(req->tso_mss_min, HCLGE_TSO_MSS_MIN_M,
+		       HCLGE_TSO_MSS_MIN_S, tso_mss_min);
+	hnae_set_field(req->tso_mss_max, HCLGE_TSO_MSS_MIN_M,
+		       HCLGE_TSO_MSS_MIN_S, tso_mss_max);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_alloc_tqps(struct hclge_dev *hdev)
+{
+	struct hclge_tqp *tqp;
+	int i;
+
+	hdev->htqp = devm_kcalloc(&hdev->pdev->dev, hdev->num_tqps,
+				  sizeof(struct hclge_tqp), GFP_KERNEL);
+	if (!hdev->htqp)
+		return -ENOMEM;
+
+	tqp = hdev->htqp;
+
+	for (i = 0; i < hdev->num_tqps; i++) {
+		tqp->dev = &hdev->pdev->dev;
+		tqp->index = i;
+
+		tqp->q.ae_algo = &ae_algo;
+		tqp->q.buf_size = hdev->rx_buf_len;
+		tqp->q.desc_num = hdev->num_desc;
+		tqp->q.io_base = hdev->hw.io_base + HCLGE_TQP_REG_OFFSET +
+			i * HCLGE_TQP_REG_SIZE;
+
+		tqp++;
+	}
+
+	return 0;
+}
+
+static int hclge_map_tqps_to_func(struct hclge_dev *hdev, u16 func_id,
+				  u16 tqp_pid, u16 tqp_vid, bool is_pf)
+{
+	struct hclge_tqp_map *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SET_TQP_MAP, false);
+
+	req = (struct hclge_tqp_map *)desc.data;
+	req->tqp_id = cpu_to_le16(tqp_pid);
+	req->tqp_vf = cpu_to_le16(func_id);
+	req->tqp_flag = !is_pf << HCLGE_TQP_MAP_TYPE_B |
+			1 << HCLGE_TQP_MAP_EN_B;
+	req->tqp_vid = cpu_to_le16(tqp_vid);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "TQP map failed %d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int  hclge_assign_tqp(struct hclge_vport *vport,
+			     struct hnae3_queue **tqp, u16 num_tqps)
+{
+	struct hclge_dev *hdev = vport->back;
+	int i, alloced, func_id, ret;
+	bool is_pf;
+
+	func_id = vport->vport_id;
+	is_pf = (vport->vport_id == 0) ? true : false;
+
+	for (i = 0, alloced = 0; i < hdev->num_tqps &&
+	     alloced < num_tqps; i++) {
+		if (!hdev->htqp[i].alloced) {
+			hdev->htqp[i].q.handle = &vport->nic;
+			hdev->htqp[i].q.tqp_index = alloced;
+			tqp[alloced] = &hdev->htqp[i].q;
+			hdev->htqp[i].alloced = true;
+			ret = hclge_map_tqps_to_func(hdev, func_id,
+						     hdev->htqp[i].index,
+						     alloced, is_pf);
+			if (ret)
+				return ret;
+
+			alloced++;
+		}
+	}
+	vport->alloc_tqps = num_tqps;
+
+	return 0;
+}
+
+static int hclge_knic_setup(struct hclge_vport *vport, u16 num_tqps)
+{
+	struct hnae3_handle *nic = &vport->nic;
+	struct hnae3_knic_private_info *kinfo = &nic->kinfo;
+	struct hclge_dev *hdev = vport->back;
+	int i, ret;
+
+	kinfo->num_desc = hdev->num_desc;
+	kinfo->rx_buf_len = hdev->rx_buf_len;
+	kinfo->num_tc = min_t(u16, num_tqps, hdev->tm_info.num_tc);
+	kinfo->rss_size
+		= min_t(u16, hdev->rss_size_max, num_tqps / kinfo->num_tc);
+	kinfo->num_tqps = kinfo->rss_size * kinfo->num_tc;
+
+	for (i = 0; i < HNAE3_MAX_TC; i++) {
+		if (hdev->hw_tc_map & BIT(i)) {
+			kinfo->tc_info[i].enable = true;
+			kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size;
+			kinfo->tc_info[i].tqp_count = kinfo->rss_size;
+			kinfo->tc_info[i].tc = i;
+		} else {
+			/* Set to default queue if TC is disable */
+			kinfo->tc_info[i].enable = false;
+			kinfo->tc_info[i].tqp_offset = 0;
+			kinfo->tc_info[i].tqp_count = 1;
+			kinfo->tc_info[i].tc = 0;
+		}
+	}
+
+	kinfo->tqp = devm_kcalloc(&hdev->pdev->dev, kinfo->num_tqps,
+				  sizeof(struct hnae3_queue *), GFP_KERNEL);
+	if (!kinfo->tqp)
+		return -ENOMEM;
+
+	ret = hclge_assign_tqp(vport, kinfo->tqp, kinfo->num_tqps);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "fail to assign TQPs %d.\n", ret);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void hclge_unic_setup(struct hclge_vport *vport, u16 num_tqps)
+{
+	/* this would be initialized later */
+}
+
+static int hclge_vport_setup(struct hclge_vport *vport, u16 num_tqps)
+{
+	struct hnae3_handle *nic = &vport->nic;
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	nic->pdev = hdev->pdev;
+	nic->ae_algo = &ae_algo;
+	nic->numa_node_mask = hdev->numa_node_mask;
+
+	if (hdev->ae_dev->dev_type == HNAE3_DEV_KNIC) {
+		ret = hclge_knic_setup(vport, num_tqps);
+		if (ret) {
+			dev_err(&hdev->pdev->dev, "knic setup failed %d\n",
+				ret);
+			return ret;
+		}
+	} else {
+		hclge_unic_setup(vport, num_tqps);
+	}
+
+	return 0;
+}
+
+static int hclge_alloc_vport(struct hclge_dev *hdev)
+{
+	struct pci_dev *pdev = hdev->pdev;
+	struct hclge_vport *vport;
+	u32 tqp_main_vport;
+	u32 tqp_per_vport;
+	int num_vport, i;
+	int ret;
+
+	/* We need to alloc a vport for main NIC of PF */
+	num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
+
+	if (hdev->num_tqps < num_vport)
+		num_vport = hdev->num_tqps;
+
+	/* Alloc the same number of TQPs for every vport */
+	tqp_per_vport = hdev->num_tqps / num_vport;
+	tqp_main_vport = tqp_per_vport + hdev->num_tqps % num_vport;
+
+	vport = devm_kcalloc(&pdev->dev, num_vport, sizeof(struct hclge_vport),
+			     GFP_KERNEL);
+	if (!vport)
+		return -ENOMEM;
+
+	hdev->vport = vport;
+	hdev->num_alloc_vport = num_vport;
+
+#ifdef CONFIG_PCI_IOV
+	/* Enable SRIOV */
+	if (hdev->num_req_vfs) {
+		dev_info(&pdev->dev, "active VFs(%d) found, enabling SRIOV\n",
+			 hdev->num_req_vfs);
+		ret = pci_enable_sriov(hdev->pdev, hdev->num_req_vfs);
+		if (ret) {
+			hdev->num_alloc_vfs = 0;
+			dev_err(&pdev->dev, "SRIOV enable failed %d\n",
+				ret);
+			return ret;
+		}
+	}
+	hdev->num_alloc_vfs = hdev->num_req_vfs;
+#endif
+
+	for (i = 0; i < num_vport; i++) {
+		vport->back = hdev;
+		vport->vport_id = i;
+
+		if (i == 0)
+			ret = hclge_vport_setup(vport, tqp_main_vport);
+		else
+			ret = hclge_vport_setup(vport, tqp_per_vport);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"vport setup failed for vport %d, %d\n",
+				i, ret);
+			return ret;
+		}
+
+		vport++;
+	}
+
+	return 0;
+}
+
+static int  hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size)
+{
+/* TX buffer size is unit by 128 byte */
+#define HCLGE_BUF_SIZE_UNIT_SHIFT	7
+#define HCLGE_BUF_SIZE_UPDATE_EN_MSK	BIT(15)
+	struct hclge_tx_buff_alloc *req;
+	struct hclge_desc desc;
+	int ret;
+	u8 i;
+
+	req = (struct hclge_tx_buff_alloc *)desc.data;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, 0);
+	for (i = 0; i < HCLGE_TC_NUM; i++)
+		req->tx_pkt_buff[i] =
+			cpu_to_le16((buf_size >> HCLGE_BUF_SIZE_UNIT_SHIFT) |
+				     HCLGE_BUF_SIZE_UPDATE_EN_MSK);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "tx buffer alloc cmd failed %d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tx_buffer_alloc(struct hclge_dev *hdev, u32 buf_size)
+{
+	int ret = hclge_cmd_alloc_tx_buff(hdev, buf_size);
+
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"tx buffer alloc failed %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_get_tc_num(struct hclge_dev *hdev)
+{
+	int i, cnt = 0;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
+		if (hdev->hw_tc_map & BIT(i))
+			cnt++;
+	return cnt;
+}
+
+static int hclge_get_pfc_enalbe_num(struct hclge_dev *hdev)
+{
+	int i, cnt = 0;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
+		if (hdev->hw_tc_map & BIT(i) &&
+		    hdev->tm_info.hw_pfc_map & BIT(i))
+			cnt++;
+	return cnt;
+}
+
+/* Get the number of pfc enabled TCs, which have private buffer */
+static int hclge_get_pfc_priv_num(struct hclge_dev *hdev)
+{
+	struct hclge_priv_buf *priv;
+	int i, cnt = 0;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		priv = &hdev->priv_buf[i];
+		if ((hdev->tm_info.hw_pfc_map & BIT(i)) &&
+		    priv->enable)
+			cnt++;
+	}
+
+	return cnt;
+}
+
+/* Get the number of pfc disabled TCs, which have private buffer */
+static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev)
+{
+	struct hclge_priv_buf *priv;
+	int i, cnt = 0;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		priv = &hdev->priv_buf[i];
+		if (hdev->hw_tc_map & BIT(i) &&
+		    !(hdev->tm_info.hw_pfc_map & BIT(i)) &&
+		    priv->enable)
+			cnt++;
+	}
+
+	return cnt;
+}
+
+static u32 hclge_get_rx_priv_buff_alloced(struct hclge_dev *hdev)
+{
+	struct hclge_priv_buf *priv;
+	u32 rx_priv = 0;
+	int i;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		priv = &hdev->priv_buf[i];
+		if (priv->enable)
+			rx_priv += priv->buf_size;
+	}
+	return rx_priv;
+}
+
+static bool  hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
+{
+	u32 shared_buf_min, shared_buf_tc, shared_std;
+	int tc_num, pfc_enable_num;
+	u32 shared_buf;
+	u32 rx_priv;
+	int i;
+
+	tc_num = hclge_get_tc_num(hdev);
+	pfc_enable_num = hclge_get_pfc_enalbe_num(hdev);
+
+	shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_DV;
+	shared_buf_tc = pfc_enable_num * hdev->mps +
+			(tc_num - pfc_enable_num) * hdev->mps / 2 +
+			hdev->mps;
+	shared_std = max_t(u32, shared_buf_min, shared_buf_tc);
+
+	rx_priv = hclge_get_rx_priv_buff_alloced(hdev);
+	if (rx_all <= rx_priv + shared_std)
+		return false;
+
+	shared_buf = rx_all - rx_priv;
+	hdev->s_buf.buf_size = shared_buf;
+	hdev->s_buf.self.high = shared_buf;
+	hdev->s_buf.self.low =  2 * hdev->mps;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		if ((hdev->hw_tc_map & BIT(i)) &&
+		    (hdev->tm_info.hw_pfc_map & BIT(i))) {
+			hdev->s_buf.tc_thrd[i].low = hdev->mps;
+			hdev->s_buf.tc_thrd[i].high = 2 * hdev->mps;
+		} else {
+			hdev->s_buf.tc_thrd[i].low = 0;
+			hdev->s_buf.tc_thrd[i].high = hdev->mps;
+		}
+	}
+
+	return true;
+}
+
+/* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs
+ * @hdev: pointer to struct hclge_dev
+ * @tx_size: the allocated tx buffer for all TCs
+ * @return: 0: calculate sucessful, negative: fail
+ */
+int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
+{
+	u32 rx_all = hdev->pkt_buf_size - tx_size;
+	int no_pfc_priv_num, pfc_priv_num;
+	struct hclge_priv_buf *priv;
+	int i;
+
+	/* step 1, try to alloc private buffer for all enabled tc */
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		priv = &hdev->priv_buf[i];
+		if (hdev->hw_tc_map & BIT(i)) {
+			priv->enable = 1;
+			if (hdev->tm_info.hw_pfc_map & BIT(i)) {
+				priv->wl.low = hdev->mps;
+				priv->wl.high = priv->wl.low + hdev->mps;
+				priv->buf_size = priv->wl.high +
+						HCLGE_DEFAULT_DV;
+			} else {
+				priv->wl.low = 0;
+				priv->wl.high = 2 * hdev->mps;
+				priv->buf_size = priv->wl.high;
+			}
+		}
+	}
+
+	if (hclge_is_rx_buf_ok(hdev, rx_all))
+		return 0;
+
+	/* step 2, try to decrease the buffer size of
+	 * no pfc TC's private buffer
+	 */
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		priv = &hdev->priv_buf[i];
+
+		if (hdev->hw_tc_map & BIT(i))
+			priv->enable = 1;
+
+		if (hdev->tm_info.hw_pfc_map & BIT(i)) {
+			priv->wl.low = 128;
+			priv->wl.high = priv->wl.low + hdev->mps;
+			priv->buf_size = priv->wl.high + HCLGE_DEFAULT_DV;
+		} else {
+			priv->wl.low = 0;
+			priv->wl.high = hdev->mps;
+			priv->buf_size = priv->wl.high;
+		}
+	}
+
+	if (hclge_is_rx_buf_ok(hdev, rx_all))
+		return 0;
+
+	/* step 3, try to reduce the number of pfc disabled TCs,
+	 * which have private buffer
+	 */
+	/* get the total no pfc enable TC number, which have private buffer */
+	no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev);
+
+	/* let the last to be cleared first */
+	for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
+		priv = &hdev->priv_buf[i];
+
+		if (hdev->hw_tc_map & BIT(i) &&
+		    !(hdev->tm_info.hw_pfc_map & BIT(i))) {
+			/* Clear the no pfc TC private buffer */
+			priv->wl.low = 0;
+			priv->wl.high = 0;
+			priv->buf_size = 0;
+			priv->enable = 0;
+			no_pfc_priv_num--;
+		}
+
+		if (hclge_is_rx_buf_ok(hdev, rx_all) ||
+		    no_pfc_priv_num == 0)
+			break;
+	}
+
+	if (hclge_is_rx_buf_ok(hdev, rx_all))
+		return 0;
+
+	/* step 4, try to reduce the number of pfc enabled TCs
+	 * which have private buffer.
+	 */
+	pfc_priv_num = hclge_get_pfc_priv_num(hdev);
+
+	/* let the last to be cleared first */
+	for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
+		priv = &hdev->priv_buf[i];
+
+		if (hdev->hw_tc_map & BIT(i) &&
+		    hdev->tm_info.hw_pfc_map & BIT(i)) {
+			/* Reduce the number of pfc TC with private buffer */
+			priv->wl.low = 0;
+			priv->enable = 0;
+			priv->wl.high = 0;
+			priv->buf_size = 0;
+			pfc_priv_num--;
+		}
+
+		if (hclge_is_rx_buf_ok(hdev, rx_all) ||
+		    pfc_priv_num == 0)
+			break;
+	}
+	if (hclge_is_rx_buf_ok(hdev, rx_all))
+		return 0;
+
+	return -ENOMEM;
+}
+
+static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
+{
+	struct hclge_rx_priv_buff *req;
+	struct hclge_desc desc;
+	int ret;
+	int i;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RX_PRIV_BUFF_ALLOC, false);
+	req = (struct hclge_rx_priv_buff *)desc.data;
+
+	/* Alloc private buffer TCs */
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		struct hclge_priv_buf *priv = &hdev->priv_buf[i];
+
+		req->buf_num[i] =
+			cpu_to_le16(priv->buf_size >> HCLGE_BUF_UNIT_S);
+		req->buf_num[i] |=
+			cpu_to_le16(true << HCLGE_TC0_PRI_BUF_EN_B);
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"rx private buffer alloc cmd failed %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+#define HCLGE_PRIV_ENABLE(a) ((a) > 0 ? 1 : 0)
+
+static int hclge_rx_priv_wl_config(struct hclge_dev *hdev)
+{
+	struct hclge_rx_priv_wl_buf *req;
+	struct hclge_priv_buf *priv;
+	struct hclge_desc desc[2];
+	int i, j;
+	int ret;
+
+	for (i = 0; i < 2; i++) {
+		hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_RX_PRIV_WL_ALLOC,
+					   false);
+		req = (struct hclge_rx_priv_wl_buf *)desc[i].data;
+
+		/* The first descriptor set the NEXT bit to 1 */
+		if (i == 0)
+			desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		else
+			desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+		for (j = 0; j < HCLGE_TC_NUM_ONE_DESC; j++) {
+			priv = &hdev->priv_buf[i * HCLGE_TC_NUM_ONE_DESC + j];
+			req->tc_wl[j].high =
+				cpu_to_le16(priv->wl.high >> HCLGE_BUF_UNIT_S);
+			req->tc_wl[j].high |=
+				cpu_to_le16(HCLGE_PRIV_ENABLE(priv->wl.high) <<
+					    HCLGE_RX_PRIV_EN_B);
+			req->tc_wl[j].low =
+				cpu_to_le16(priv->wl.low >> HCLGE_BUF_UNIT_S);
+			req->tc_wl[j].low |=
+				cpu_to_le16(HCLGE_PRIV_ENABLE(priv->wl.low) <<
+					    HCLGE_RX_PRIV_EN_B);
+		}
+	}
+
+	/* Send 2 descriptor at one time */
+	ret = hclge_cmd_send(&hdev->hw, desc, 2);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"rx private waterline config cmd failed %d\n",
+			ret);
+		return ret;
+	}
+	return 0;
+}
+
+static int hclge_common_thrd_config(struct hclge_dev *hdev)
+{
+	struct hclge_shared_buf *s_buf = &hdev->s_buf;
+	struct hclge_rx_com_thrd *req;
+	struct hclge_desc desc[2];
+	struct hclge_tc_thrd *tc;
+	int i, j;
+	int ret;
+
+	for (i = 0; i < 2; i++) {
+		hclge_cmd_setup_basic_desc(&desc[i],
+					   HCLGE_OPC_RX_COM_THRD_ALLOC, false);
+		req = (struct hclge_rx_com_thrd *)&desc[i].data;
+
+		/* The first descriptor set the NEXT bit to 1 */
+		if (i == 0)
+			desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		else
+			desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+		for (j = 0; j < HCLGE_TC_NUM_ONE_DESC; j++) {
+			tc = &s_buf->tc_thrd[i * HCLGE_TC_NUM_ONE_DESC + j];
+
+			req->com_thrd[j].high =
+				cpu_to_le16(tc->high >> HCLGE_BUF_UNIT_S);
+			req->com_thrd[j].high |=
+				cpu_to_le16(HCLGE_PRIV_ENABLE(tc->high) <<
+					    HCLGE_RX_PRIV_EN_B);
+			req->com_thrd[j].low =
+				cpu_to_le16(tc->low >> HCLGE_BUF_UNIT_S);
+			req->com_thrd[j].low |=
+				cpu_to_le16(HCLGE_PRIV_ENABLE(tc->low) <<
+					    HCLGE_RX_PRIV_EN_B);
+		}
+	}
+
+	/* Send 2 descriptors at one time */
+	ret = hclge_cmd_send(&hdev->hw, desc, 2);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"common threshold config cmd failed %d\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+static int hclge_common_wl_config(struct hclge_dev *hdev)
+{
+	struct hclge_shared_buf *buf = &hdev->s_buf;
+	struct hclge_rx_com_wl *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RX_COM_WL_ALLOC, false);
+
+	req = (struct hclge_rx_com_wl *)desc.data;
+	req->com_wl.high = cpu_to_le16(buf->self.high >> HCLGE_BUF_UNIT_S);
+	req->com_wl.high |=
+		cpu_to_le16(HCLGE_PRIV_ENABLE(buf->self.high) <<
+			    HCLGE_RX_PRIV_EN_B);
+
+	req->com_wl.low = cpu_to_le16(buf->self.low >> HCLGE_BUF_UNIT_S);
+	req->com_wl.low |=
+		cpu_to_le16(HCLGE_PRIV_ENABLE(buf->self.low) <<
+			    HCLGE_RX_PRIV_EN_B);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"common waterline config cmd failed %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int hclge_buffer_alloc(struct hclge_dev *hdev)
+{
+	u32 tx_buf_size = HCLGE_DEFAULT_TX_BUF;
+	int ret;
+
+	hdev->priv_buf = devm_kmalloc_array(&hdev->pdev->dev, HCLGE_MAX_TC_NUM,
+					    sizeof(struct hclge_priv_buf),
+					    GFP_KERNEL | __GFP_ZERO);
+	if (!hdev->priv_buf)
+		return -ENOMEM;
+
+	ret = hclge_tx_buffer_alloc(hdev, tx_buf_size);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"could not alloc tx buffers %d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_rx_buffer_calc(hdev, tx_buf_size);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"could not calc rx priv buffer size for all TCs %d\n",
+			ret);
+		return ret;
+	}
+
+	ret = hclge_rx_priv_buf_alloc(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "could not alloc rx priv buffer %d\n",
+			ret);
+		return ret;
+	}
+
+	ret = hclge_rx_priv_wl_config(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"could not configure rx private waterline %d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_common_thrd_config(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"could not configure common threshold %d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_common_wl_config(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"could not configure common waterline %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_init_roce_base_info(struct hclge_vport *vport)
+{
+	struct hnae3_handle *roce = &vport->roce;
+	struct hnae3_handle *nic = &vport->nic;
+
+	roce->rinfo.num_vectors = vport->back->num_roce_msix;
+
+	if (vport->back->num_msi_left < vport->roce.rinfo.num_vectors ||
+	    vport->back->num_msi_left == 0)
+		return -EINVAL;
+
+	roce->rinfo.base_vector = vport->back->roce_base_vector;
+
+	roce->rinfo.netdev = nic->kinfo.netdev;
+	roce->rinfo.roce_io_base = vport->back->hw.io_base;
+
+	roce->pdev = nic->pdev;
+	roce->ae_algo = nic->ae_algo;
+	roce->numa_node_mask = nic->numa_node_mask;
+
+	return 0;
+}
+
+static int hclge_init_msix(struct hclge_dev *hdev)
+{
+	struct pci_dev *pdev = hdev->pdev;
+	int ret, i;
+
+	hdev->msix_entries = devm_kcalloc(&pdev->dev, hdev->num_msi,
+					  sizeof(struct msix_entry),
+					  GFP_KERNEL);
+	if (!hdev->msix_entries)
+		return -ENOMEM;
+
+	hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi,
+					   sizeof(u16), GFP_KERNEL);
+	if (!hdev->vector_status)
+		return -ENOMEM;
+
+	for (i = 0; i < hdev->num_msi; i++) {
+		hdev->msix_entries[i].entry = i;
+		hdev->vector_status[i] = HCLGE_INVALID_VPORT;
+	}
+
+	hdev->num_msi_left = hdev->num_msi;
+	hdev->base_msi_vector = hdev->pdev->irq;
+	hdev->roce_base_vector = hdev->base_msi_vector +
+				HCLGE_ROCE_VECTOR_OFFSET;
+
+	ret = pci_enable_msix_range(hdev->pdev, hdev->msix_entries,
+				    hdev->num_msi, hdev->num_msi);
+	if (ret < 0) {
+		dev_info(&hdev->pdev->dev,
+			 "MSI-X vector alloc failed: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_init_msi(struct hclge_dev *hdev)
+{
+	struct pci_dev *pdev = hdev->pdev;
+	int vectors;
+	int i;
+
+	hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi,
+					   sizeof(u16), GFP_KERNEL);
+	if (!hdev->vector_status)
+		return -ENOMEM;
+
+	for (i = 0; i < hdev->num_msi; i++)
+		hdev->vector_status[i] = HCLGE_INVALID_VPORT;
+
+	vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, PCI_IRQ_MSI);
+	if (vectors < 0) {
+		dev_err(&pdev->dev, "MSI vectors enable failed %d\n", vectors);
+		return -EINVAL;
+	}
+	hdev->num_msi = vectors;
+	hdev->num_msi_left = vectors;
+	hdev->base_msi_vector = pdev->irq;
+	hdev->roce_base_vector = hdev->base_msi_vector +
+				HCLGE_ROCE_VECTOR_OFFSET;
+
+	return 0;
+}
+
+static void hclge_check_speed_dup(struct hclge_dev *hdev, int duplex, int speed)
+{
+	struct hclge_mac *mac = &hdev->hw.mac;
+
+	if ((speed == HCLGE_MAC_SPEED_10M) || (speed == HCLGE_MAC_SPEED_100M))
+		mac->duplex = (u8)duplex;
+	else
+		mac->duplex = HCLGE_MAC_FULL;
+
+	mac->speed = speed;
+}
+
+int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex)
+{
+	struct hclge_config_mac_speed_dup *req;
+	struct hclge_desc desc;
+	int ret;
+
+	req = (struct hclge_config_mac_speed_dup *)desc.data;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_SPEED_DUP, false);
+
+	hnae_set_bit(req->speed_dup, HCLGE_CFG_DUPLEX_B, !!duplex);
+
+	switch (speed) {
+	case HCLGE_MAC_SPEED_10M:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 6);
+		break;
+	case HCLGE_MAC_SPEED_100M:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 7);
+		break;
+	case HCLGE_MAC_SPEED_1G:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 0);
+		break;
+	case HCLGE_MAC_SPEED_10G:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 1);
+		break;
+	case HCLGE_MAC_SPEED_25G:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 2);
+		break;
+	case HCLGE_MAC_SPEED_40G:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 3);
+		break;
+	case HCLGE_MAC_SPEED_50G:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 4);
+		break;
+	case HCLGE_MAC_SPEED_100G:
+		hnae_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
+			       HCLGE_CFG_SPEED_S, 5);
+		break;
+	default:
+		dev_err(&hdev->pdev->dev, "invalid speed (%d)\n", speed);
+		return -EINVAL;
+	}
+
+	hnae_set_bit(req->mac_change_fec_en, HCLGE_CFG_MAC_SPEED_CHANGE_EN_B,
+		     1);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"mac speed/duplex config cmd failed %d.\n", ret);
+		return ret;
+	}
+
+	hclge_check_speed_dup(hdev, duplex, speed);
+
+	return 0;
+}
+
+static int hclge_cfg_mac_speed_dup_h(struct hnae3_handle *handle, int speed,
+				     u8 duplex)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	return hclge_cfg_mac_speed_dup(hdev, speed, duplex);
+}
+
+static int hclge_query_mac_an_speed_dup(struct hclge_dev *hdev, int *speed,
+					u8 *duplex)
+{
+	struct hclge_query_an_speed_dup *req;
+	struct hclge_desc desc;
+	int speed_tmp;
+	int ret;
+
+	req = (struct hclge_query_an_speed_dup *)desc.data;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_AN_RESULT, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"mac speed/autoneg/duplex query cmd failed %d\n",
+			ret);
+		return ret;
+	}
+
+	*duplex = hnae_get_bit(req->an_syn_dup_speed, HCLGE_QUERY_DUPLEX_B);
+	speed_tmp = hnae_get_field(req->an_syn_dup_speed, HCLGE_QUERY_SPEED_M,
+				   HCLGE_QUERY_SPEED_S);
+
+	ret = hclge_parse_speed(speed_tmp, speed);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"could not parse speed(=%d), %d\n", speed_tmp, ret);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int hclge_query_autoneg_result(struct hclge_dev *hdev)
+{
+	struct hclge_mac *mac = &hdev->hw.mac;
+	struct hclge_query_an_speed_dup *req;
+	struct hclge_desc desc;
+	int ret;
+
+	req = (struct hclge_query_an_speed_dup *)desc.data;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_AN_RESULT, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"autoneg result query cmd failed %d.\n", ret);
+		return ret;
+	}
+
+	mac->autoneg = hnae_get_bit(req->an_syn_dup_speed, HCLGE_QUERY_AN_B);
+
+	return 0;
+}
+
+static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable)
+{
+	struct hclge_config_auto_neg *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_AN_MODE, false);
+
+	req = (struct hclge_config_auto_neg *)desc.data;
+	hnae_set_bit(req->cfg_an_cmd_flag, HCLGE_MAC_CFG_AN_EN_B, !!enable);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "auto neg set cmd failed %d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_set_autoneg(struct hnae3_handle *handle, bool enable)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	return hclge_set_autoneg_en(hdev, enable);
+}
+
+static int hclge_get_autoneg(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	hclge_query_autoneg_result(hdev);
+
+	return hdev->hw.mac.autoneg;
+}
+
+static int hclge_mac_init(struct hclge_dev *hdev)
+{
+	struct hclge_mac *mac = &hdev->hw.mac;
+	int ret;
+
+	ret = hclge_cfg_mac_speed_dup(hdev, hdev->hw.mac.speed, HCLGE_MAC_FULL);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Config mac speed dup fail ret=%d\n", ret);
+		return ret;
+	}
+
+	mac->link = 0;
+
+	ret = hclge_mac_mdio_config(hdev);
+	if (ret) {
+		dev_warn(&hdev->pdev->dev,
+			 "mdio config fail ret=%d\n", ret);
+		return ret;
+	}
+
+	/* Initialize the MTA table work mode */
+	hdev->accept_mta_mc	= true;
+	hdev->enable_mta	= true;
+	hdev->mta_mac_sel_type	= HCLGE_MAC_ADDR_47_36;
+
+	ret = hclge_set_mta_filter_mode(hdev,
+					hdev->mta_mac_sel_type,
+					hdev->enable_mta);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "set mta filter mode failed %d\n",
+			ret);
+		return ret;
+	}
+
+	return hclge_cfg_func_mta_filter(hdev, 0, hdev->accept_mta_mc);
+}
+
+static void hclge_task_schedule(struct hclge_dev *hdev)
+{
+	if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) &&
+	    !test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
+	    !test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state))
+		(void)schedule_work(&hdev->service_task);
+}
+
+static int hclge_get_mac_link_status(struct hclge_dev *hdev)
+{
+	struct hclge_link_status *req;
+	struct hclge_desc desc;
+	int link_status;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_LINK_STATUS, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "get link status cmd failed %d\n",
+			ret);
+		return ret;
+	}
+
+	req = (struct hclge_link_status *)desc.data;
+	link_status = req->status & HCLGE_LINK_STATUS;
+
+	return !!link_status;
+}
+
+static int hclge_get_mac_phy_link(struct hclge_dev *hdev)
+{
+	int mac_state;
+	int link_stat;
+
+	mac_state = hclge_get_mac_link_status(hdev);
+
+	if (hdev->hw.mac.phydev) {
+		if (!genphy_read_status(hdev->hw.mac.phydev))
+			link_stat = mac_state &
+				hdev->hw.mac.phydev->link;
+		else
+			link_stat = 0;
+
+	} else {
+		link_stat = mac_state;
+	}
+
+	return !!link_stat;
+}
+
+static void hclge_update_link_status(struct hclge_dev *hdev)
+{
+	struct hnae3_client *client = hdev->nic_client;
+	struct hnae3_handle *handle;
+	int state;
+	int i;
+
+	if (!client)
+		return;
+	state = hclge_get_mac_phy_link(hdev);
+	if (state != hdev->hw.mac.link) {
+		for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+			handle = &hdev->vport[i].nic;
+			client->ops->link_status_change(handle, state);
+		}
+		hdev->hw.mac.link = state;
+	}
+}
+
+static int hclge_update_speed_duplex(struct hclge_dev *hdev)
+{
+	struct hclge_mac mac = hdev->hw.mac;
+	u8 duplex;
+	int speed;
+	int ret;
+
+	/* get the speed and duplex as autoneg'result from mac cmd when phy
+	 * doesn't exit.
+	 */
+	if (mac.phydev)
+		return 0;
+
+	/* update mac->antoneg. */
+	ret = hclge_query_autoneg_result(hdev);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"autoneg result query failed %d\n", ret);
+		return ret;
+	}
+
+	if (!mac.autoneg)
+		return 0;
+
+	ret = hclge_query_mac_an_speed_dup(hdev, &speed, &duplex);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"mac autoneg/speed/duplex query failed %d\n", ret);
+		return ret;
+	}
+
+	if ((mac.speed != speed) || (mac.duplex != duplex)) {
+		ret = hclge_cfg_mac_speed_dup(hdev, speed, duplex);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"mac speed/duplex config failed %d\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int hclge_update_speed_duplex_h(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	return hclge_update_speed_duplex(hdev);
+}
+
+static int hclge_get_status(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	hclge_update_link_status(hdev);
+
+	return hdev->hw.mac.link;
+}
+
+static void hclge_service_timer(unsigned long data)
+{
+	struct hclge_dev *hdev = (struct hclge_dev *)data;
+	(void)mod_timer(&hdev->service_timer, jiffies + HZ);
+
+	hclge_task_schedule(hdev);
+}
+
+static void hclge_service_complete(struct hclge_dev *hdev)
+{
+	WARN_ON(!test_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state));
+
+	/* Flush memory before next watchdog */
+	smp_mb__before_atomic();
+	clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
+}
+
+static void hclge_service_task(struct work_struct *work)
+{
+	struct hclge_dev *hdev =
+		container_of(work, struct hclge_dev, service_task);
+
+	hclge_update_speed_duplex(hdev);
+	hclge_update_link_status(hdev);
+	hclge_update_stats_for_all(hdev);
+	hclge_service_complete(hdev);
+}
+
+static void hclge_disable_sriov(struct hclge_dev *hdev)
+{
+	/* If our VFs are assigned we cannot shut down SR-IOV
+	 * without causing issues, so just leave the hardware
+	 * available but disabled
+	 */
+	if (pci_vfs_assigned(hdev->pdev)) {
+		dev_warn(&hdev->pdev->dev,
+			 "disabling driver while VFs are assigned\n");
+		return;
+	}
+
+	pci_disable_sriov(hdev->pdev);
+}
+
+struct hclge_vport *hclge_get_vport(struct hnae3_handle *handle)
+{
+	/* VF handle has no client */
+	if (!handle->client)
+		return container_of(handle, struct hclge_vport, nic);
+	else if (handle->client->type == HNAE3_CLIENT_ROCE)
+		return container_of(handle, struct hclge_vport, roce);
+	else
+		return container_of(handle, struct hclge_vport, nic);
+}
+
+static int hclge_get_vector(struct hnae3_handle *handle, u16 vector_num,
+			    struct hnae3_vector_info *vector_info)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hnae3_vector_info *vector = vector_info;
+	struct hclge_dev *hdev = vport->back;
+	int alloc = 0;
+	int i, j;
+
+	vector_num = min(hdev->num_msi_left, vector_num);
+
+	for (j = 0; j < vector_num; j++) {
+		for (i = 1; i < hdev->num_msi; i++) {
+			if (hdev->vector_status[i] == HCLGE_INVALID_VPORT) {
+				vector->vector = pci_irq_vector(hdev->pdev, i);
+				vector->io_addr = hdev->hw.io_base +
+					HCLGE_VECTOR_REG_BASE +
+					(i - 1) * HCLGE_VECTOR_REG_OFFSET +
+					vport->vport_id *
+					HCLGE_VECTOR_VF_OFFSET;
+				hdev->vector_status[i] = vport->vport_id;
+
+				vector++;
+				alloc++;
+
+				break;
+			}
+		}
+	}
+	hdev->num_msi_left -= alloc;
+	hdev->num_msi_used += alloc;
+
+	return alloc;
+}
+
+static int hclge_get_vector_index(struct hclge_dev *hdev, int vector)
+{
+	int i;
+
+	for (i = 0; i < hdev->num_msi; i++) {
+		if (hdev->msix_entries) {
+			if (vector == hdev->msix_entries[i].vector)
+				return i;
+		} else {
+			if (vector == (hdev->base_msi_vector + i))
+				return i;
+		}
+	}
+	return -EINVAL;
+}
+
+static u32 hclge_get_rss_key_size(struct hnae3_handle *handle)
+{
+	return HCLGE_RSS_KEY_SIZE;
+}
+
+static u32 hclge_get_rss_indir_size(struct hnae3_handle *handle)
+{
+	return HCLGE_RSS_IND_TBL_SIZE;
+}
+
+static int hclge_get_rss_algo(struct hclge_dev *hdev)
+{
+	struct hclge_rss_config *req;
+	struct hclge_desc desc;
+	int rss_hash_algo;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_GENERIC_CONFIG, true);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get link status error, status =%d\n", ret);
+		return ret;
+	}
+
+	req = (struct hclge_rss_config *)desc.data;
+	rss_hash_algo = (req->hash_config & HCLGE_RSS_HASH_ALGO_MASK);
+
+	if (rss_hash_algo == HCLGE_RSS_HASH_ALGO_TOEPLITZ)
+		return ETH_RSS_HASH_TOP;
+
+	return -EINVAL;
+}
+
+static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
+				  const u8 hfunc, const u8 *key)
+{
+	struct hclge_rss_config *req;
+	struct hclge_desc desc;
+	int key_offset;
+	int key_size;
+	int ret;
+
+	req = (struct hclge_rss_config *)desc.data;
+
+	for (key_offset = 0; key_offset < 3; key_offset++) {
+		hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_GENERIC_CONFIG,
+					   false);
+
+		req->hash_config |= (hfunc & HCLGE_RSS_HASH_ALGO_MASK);
+		req->hash_config |= (key_offset << HCLGE_RSS_HASH_KEY_OFFSET_B);
+
+		if (key_offset == 2)
+			key_size =
+			HCLGE_RSS_KEY_SIZE - HCLGE_RSS_HASH_KEY_NUM * 2;
+		else
+			key_size = HCLGE_RSS_HASH_KEY_NUM;
+
+		memcpy(req->hash_key,
+		       key + key_offset * HCLGE_RSS_HASH_KEY_NUM, key_size);
+
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Configure RSS config fail, status = %d\n",
+				ret);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u32 *indir)
+{
+	struct hclge_rss_indirection_table *req;
+	struct hclge_desc desc;
+	int i, j;
+	int ret;
+
+	req = (struct hclge_rss_indirection_table *)desc.data;
+
+	for (i = 0; i < HCLGE_RSS_CFG_TBL_NUM; i++) {
+		hclge_cmd_setup_basic_desc
+			(&desc, HCLGE_OPC_RSS_INDIR_TABLE, false);
+
+		req->start_table_index = i * HCLGE_RSS_CFG_TBL_SIZE;
+		req->rss_set_bitmap = HCLGE_RSS_SET_BITMAP_MSK;
+
+		for (j = 0; j < HCLGE_RSS_CFG_TBL_SIZE; j++)
+			req->rss_result[j] =
+				indir[i * HCLGE_RSS_CFG_TBL_SIZE + j];
+
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Configure rss indir table fail,status = %d\n",
+				ret);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+static int hclge_set_rss_tc_mode(struct hclge_dev *hdev, u16 *tc_valid,
+				 u16 *tc_size, u16 *tc_offset)
+{
+	struct hclge_rss_tc_mode *req;
+	struct hclge_desc desc;
+	int ret;
+	int i;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_TC_MODE, false);
+	req = (struct hclge_rss_tc_mode *)desc.data;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		hnae_set_bit(req->rss_tc_mode[i], HCLGE_RSS_TC_VALID_B,
+			     (tc_valid[i] & 0x1));
+		hnae_set_field(req->rss_tc_mode[i], HCLGE_RSS_TC_SIZE_M,
+			       HCLGE_RSS_TC_SIZE_S, tc_size[i]);
+		hnae_set_field(req->rss_tc_mode[i], HCLGE_RSS_TC_OFFSET_M,
+			       HCLGE_RSS_TC_OFFSET_S, tc_offset[i]);
+	}
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Configure rss tc mode fail, status = %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_set_rss_input_tuple(struct hclge_dev *hdev)
+{
+#define HCLGE_RSS_INPUT_TUPLE_OTHER		0xf
+#define HCLGE_RSS_INPUT_TUPLE_SCTP		0x1f
+	struct hclge_rss_input_tuple *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false);
+
+	req = (struct hclge_rss_input_tuple *)desc.data;
+	req->ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	req->ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	req->ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
+	req->ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	req->ipv6_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	req->ipv6_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	req->ipv6_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
+	req->ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Configure rss input fail, status = %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_get_rss(struct hnae3_handle *handle, u32 *indir,
+			 u8 *key, u8 *hfunc)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int i;
+
+	/* Get hash algorithm */
+	if (hfunc)
+		*hfunc = hclge_get_rss_algo(hdev);
+
+	/* Get the RSS Key required by the user */
+	if (key)
+		memcpy(key, vport->rss_hash_key, HCLGE_RSS_KEY_SIZE);
+
+	/* Get indirect table */
+	if (indir)
+		for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+			indir[i] =  vport->rss_indirection_tbl[i];
+
+	return 0;
+}
+
+static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
+			 const  u8 *key, const  u8 hfunc)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	u8 hash_algo;
+	int ret, i;
+
+	/* Set the RSS Hash Key if specififed by the user */
+	if (key) {
+		/* Update the shadow RSS key with user specified qids */
+		memcpy(vport->rss_hash_key, key, HCLGE_RSS_KEY_SIZE);
+
+		if (hfunc == ETH_RSS_HASH_TOP ||
+		    hfunc == ETH_RSS_HASH_NO_CHANGE)
+			hash_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
+		else
+			return -EINVAL;
+		ret = hclge_set_rss_algo_key(hdev, hash_algo, key);
+		if (ret)
+			return ret;
+	}
+
+	/* Update the shadow RSS table with user specified qids */
+	for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++)
+		vport->rss_indirection_tbl[i] = indir[i];
+
+	/* Update the hardware */
+	ret = hclge_set_rss_indir_table(hdev, indir);
+	return ret;
+}
+
+static int hclge_get_tc_size(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	return hdev->rss_size_max;
+}
+
+static int hclge_rss_init_hw(struct hclge_dev *hdev)
+{
+	const  u8 hfunc = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
+	struct hclge_vport *vport = hdev->vport;
+	u16 tc_offset[HCLGE_MAX_TC_NUM];
+	u8 rss_key[HCLGE_RSS_KEY_SIZE];
+	u16 tc_valid[HCLGE_MAX_TC_NUM];
+	u16 tc_size[HCLGE_MAX_TC_NUM];
+	u32 *rss_indir = NULL;
+	const u8 *key;
+	int i, ret, j;
+
+	rss_indir = kcalloc(HCLGE_RSS_IND_TBL_SIZE, sizeof(u32), GFP_KERNEL);
+	if (!rss_indir)
+		return -ENOMEM;
+
+	/* Get default RSS key */
+	netdev_rss_key_fill(rss_key, HCLGE_RSS_KEY_SIZE);
+
+	/* Initialize RSS indirect table for each vport */
+	for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
+		for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++) {
+			vport[j].rss_indirection_tbl[i] =
+				i % hdev->rss_size_max;
+			rss_indir[i] = vport[j].rss_indirection_tbl[i];
+		}
+	}
+	ret = hclge_set_rss_indir_table(hdev, rss_indir);
+	if (ret)
+		goto err;
+
+	key = rss_key;
+	ret = hclge_set_rss_algo_key(hdev, hfunc, key);
+	if (ret)
+		goto err;
+
+	ret = hclge_set_rss_input_tuple(hdev);
+	if (ret)
+		goto err;
+
+	for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+		if (hdev->hw_tc_map & BIT(i))
+			tc_valid[i] = 1;
+		else
+			tc_valid[i] = 0;
+
+		switch (hdev->rss_size_max) {
+		case HCLGE_RSS_TC_SIZE_0:
+			tc_size[i] = 0;
+			break;
+		case HCLGE_RSS_TC_SIZE_1:
+			tc_size[i] = 1;
+			break;
+		case HCLGE_RSS_TC_SIZE_2:
+			tc_size[i] = 2;
+			break;
+		case HCLGE_RSS_TC_SIZE_3:
+			tc_size[i] = 3;
+			break;
+		case HCLGE_RSS_TC_SIZE_4:
+			tc_size[i] = 4;
+			break;
+		case HCLGE_RSS_TC_SIZE_5:
+			tc_size[i] = 5;
+			break;
+		case HCLGE_RSS_TC_SIZE_6:
+			tc_size[i] = 6;
+			break;
+		case HCLGE_RSS_TC_SIZE_7:
+			tc_size[i] = 7;
+			break;
+		default:
+			break;
+		}
+		tc_offset[i] = hdev->rss_size_max * i;
+	}
+	ret = hclge_set_rss_tc_mode(hdev, tc_valid, tc_size, tc_offset);
+
+err:
+	kfree(rss_indir);
+
+	return ret;
+}
+
+int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id,
+				   struct hnae3_ring_chain_node *ring_chain)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_ctrl_vector_chain *req;
+	struct hnae3_ring_chain_node *node;
+	struct hclge_desc desc;
+	int ret;
+	int i;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_ADD_RING_TO_VECTOR, false);
+
+	req = (struct hclge_ctrl_vector_chain *)desc.data;
+	req->int_vector_id = vector_id;
+
+	i = 0;
+	for (node = ring_chain; node; node = node->next) {
+		hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_TYPE_M,
+			       HCLGE_INT_TYPE_S,
+			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
+		hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M,
+			       HCLGE_TQP_ID_S,	node->tqp_index);
+		req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]);
+
+		if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
+			req->int_cause_num = HCLGE_VECTOR_ELEMENTS_PER_CMD;
+
+			ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+			if (ret) {
+				dev_err(&hdev->pdev->dev,
+					"Map TQP fail, status is %d.\n",
+					ret);
+				return ret;
+			}
+			i = 0;
+
+			hclge_cmd_setup_basic_desc(&desc,
+						   HCLGE_OPC_ADD_RING_TO_VECTOR,
+						   false);
+			req->int_vector_id = vector_id;
+		}
+	}
+
+	if (i > 0) {
+		req->int_cause_num = i;
+
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Map TQP fail, status is %d.\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+int hclge_map_handle_ring_to_vector(struct hnae3_handle *handle,
+				    int vector,
+				    struct hnae3_ring_chain_node *ring_chain)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int vector_id;
+
+	vector_id = hclge_get_vector_index(hdev, vector);
+	if (vector_id < 0) {
+		dev_err(&hdev->pdev->dev,
+			"Get vector index fail. ret =%d\n", vector_id);
+		return vector_id;
+	}
+
+	return hclge_map_vport_ring_to_vector(vport, vector_id, ring_chain);
+}
+
+static int hclge_unmap_ring_from_vector(
+	struct hnae3_handle *handle, int vector,
+	struct hnae3_ring_chain_node *ring_chain)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_ctrl_vector_chain *req;
+	struct hnae3_ring_chain_node *node;
+	struct hclge_desc desc;
+	int i, vector_id;
+	int ret;
+
+	vector_id = hclge_get_vector_index(hdev, vector);
+	if (vector_id < 0) {
+		dev_err(&handle->pdev->dev,
+			"Get vector index fail. ret =%d\n", vector_id);
+		return vector_id;
+	}
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_DEL_RING_TO_VECTOR, false);
+
+	req = (struct hclge_ctrl_vector_chain *)desc.data;
+	req->int_vector_id = vector_id;
+
+	i = 0;
+	for (node = ring_chain; node; node = node->next) {
+		hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_TYPE_M,
+			       HCLGE_INT_TYPE_S,
+			       hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
+		hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M,
+			       HCLGE_TQP_ID_S,	node->tqp_index);
+
+		req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]);
+
+		if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
+			req->int_cause_num = HCLGE_VECTOR_ELEMENTS_PER_CMD;
+
+			ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+			if (ret) {
+				dev_err(&hdev->pdev->dev,
+					"Unmap TQP fail, status is %d.\n",
+					ret);
+				return ret;
+			}
+			i = 0;
+			hclge_cmd_setup_basic_desc(&desc,
+						   HCLGE_OPC_ADD_RING_TO_VECTOR,
+						   false);
+			req->int_vector_id = vector_id;
+		}
+	}
+
+	if (i > 0) {
+		req->int_cause_num = i;
+
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"Unmap TQP fail, status is %d.\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
+			       struct hclge_promisc_param *param)
+{
+	struct hclge_promisc_cfg *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_PROMISC_MODE, false);
+
+	req = (struct hclge_promisc_cfg *)desc.data;
+	req->vf_id = param->vf_id;
+	req->flag = (param->enable << HCLGE_PROMISC_EN_B);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Set promisc mode fail, status is %d.\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
+			      bool en_mc, bool en_bc, int vport_id)
+{
+	if (!param)
+		return;
+
+	memset(param, 0, sizeof(struct hclge_promisc_param));
+	if (en_uc)
+		param->enable = HCLGE_PROMISC_EN_UC;
+	if (en_mc)
+		param->enable |= HCLGE_PROMISC_EN_MC;
+	if (en_bc)
+		param->enable |= HCLGE_PROMISC_EN_BC;
+	param->vf_id = vport_id;
+}
+
+static void hclge_set_promisc_mode(struct hnae3_handle *handle, u32 en)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_promisc_param param;
+
+	hclge_promisc_param_init(&param, en, en, true, vport->vport_id);
+	hclge_cmd_set_promisc_mode(hdev, &param);
+}
+
+static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
+{
+	struct hclge_desc desc;
+	struct hclge_config_mac_mode *req =
+		(struct hclge_config_mac_mode *)desc.data;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAC_MODE, false);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_TX_EN_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_RX_EN_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_PAD_TX_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_PAD_RX_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_1588_TX_B, 0);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_1588_RX_B, 0);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_APP_LP_B, 0);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_LINE_LP_B, 0);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_FCS_TX_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_RX_FCS_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en,
+		     HCLGE_MAC_RX_FCS_STRIP_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en,
+		     HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en,
+		     HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable);
+	hnae_set_bit(req->txrx_pad_fcs_loop_en,
+		     HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"mac enable fail, ret =%d.\n", ret);
+}
+
+static int hclge_tqp_enable(struct hclge_dev *hdev, int tqp_id,
+			    int stream_id, bool enable)
+{
+	struct hclge_desc desc;
+	struct hclge_cfg_com_tqp_queue *req =
+		(struct hclge_cfg_com_tqp_queue *)desc.data;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false);
+	req->tqp_id = cpu_to_le16(tqp_id & HCLGE_RING_ID_MASK);
+	req->stream_id = cpu_to_le16(stream_id);
+	req->enable |= enable << HCLGE_TQP_ENABLE_B;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"Tqp enable fail, status =%d.\n", ret);
+	return ret;
+}
+
+static void hclge_reset_tqp_stats(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hnae3_queue *queue;
+	struct hclge_tqp *tqp;
+	int i;
+
+	for (i = 0; i < vport->alloc_tqps; i++) {
+		queue = handle->kinfo.tqp[i];
+		tqp = container_of(queue, struct hclge_tqp, q);
+		memset(&tqp->tqp_stats, 0, sizeof(tqp->tqp_stats));
+	}
+}
+
+static int hclge_ae_start(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int i, queue_id, ret;
+
+	for (i = 0; i < vport->alloc_tqps; i++) {
+		/* todo clear interrupt */
+		/* ring enable */
+		queue_id = hclge_get_queue_id(handle->kinfo.tqp[i]);
+		if (queue_id < 0) {
+			dev_warn(&hdev->pdev->dev,
+				 "Get invalid queue id, ignore it\n");
+			continue;
+		}
+
+		hclge_tqp_enable(hdev, queue_id, 0, true);
+	}
+	/* mac enable */
+	hclge_cfg_mac_mode(hdev, true);
+	clear_bit(HCLGE_STATE_DOWN, &hdev->state);
+	(void)mod_timer(&hdev->service_timer, jiffies + HZ);
+
+	ret = hclge_mac_start_phy(hdev);
+	if (ret)
+		return ret;
+
+	/* reset tqp stats */
+	hclge_reset_tqp_stats(handle);
+
+	return 0;
+}
+
+static void hclge_ae_stop(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int i, queue_id;
+
+	for (i = 0; i < vport->alloc_tqps; i++) {
+		/* Ring disable */
+		queue_id = hclge_get_queue_id(handle->kinfo.tqp[i]);
+		if (queue_id < 0) {
+			dev_warn(&hdev->pdev->dev,
+				 "Get invalid queue id, ignore it\n");
+			continue;
+		}
+
+		hclge_tqp_enable(hdev, queue_id, 0, false);
+	}
+	/* Mac disable */
+	hclge_cfg_mac_mode(hdev, false);
+
+	hclge_mac_stop_phy(hdev);
+
+	/* reset tqp stats */
+	hclge_reset_tqp_stats(handle);
+}
+
+static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
+					 u16 cmdq_resp, u8  resp_code,
+					 enum hclge_mac_vlan_tbl_opcode op)
+{
+	struct hclge_dev *hdev = vport->back;
+	int return_status = -EIO;
+
+	if (cmdq_resp) {
+		dev_err(&hdev->pdev->dev,
+			"cmdq execute failed for get_mac_vlan_cmd_status,status=%d.\n",
+			cmdq_resp);
+		return -EIO;
+	}
+
+	if (op == HCLGE_MAC_VLAN_ADD) {
+		if ((!resp_code) || (resp_code == 1)) {
+			return_status = 0;
+		} else if (resp_code == 2) {
+			return_status = -EIO;
+			dev_err(&hdev->pdev->dev,
+				"add mac addr failed for uc_overflow.\n");
+		} else if (resp_code == 3) {
+			return_status = -EIO;
+			dev_err(&hdev->pdev->dev,
+				"add mac addr failed for mc_overflow.\n");
+		} else {
+			dev_err(&hdev->pdev->dev,
+				"add mac addr failed for undefined, code=%d.\n",
+				resp_code);
+		}
+	} else if (op == HCLGE_MAC_VLAN_REMOVE) {
+		if (!resp_code) {
+			return_status = 0;
+		} else if (resp_code == 1) {
+			return_status = -EIO;
+			dev_dbg(&hdev->pdev->dev,
+				"remove mac addr failed for miss.\n");
+		} else {
+			dev_err(&hdev->pdev->dev,
+				"remove mac addr failed for undefined, code=%d.\n",
+				resp_code);
+		}
+	} else if (op == HCLGE_MAC_VLAN_LKUP) {
+		if (!resp_code) {
+			return_status = 0;
+		} else if (resp_code == 1) {
+			return_status = -EIO;
+			dev_dbg(&hdev->pdev->dev,
+				"lookup mac addr failed for miss.\n");
+		} else {
+			dev_err(&hdev->pdev->dev,
+				"lookup mac addr failed for undefined, code=%d.\n",
+				resp_code);
+		}
+	} else {
+		return_status = -EIO;
+		dev_err(&hdev->pdev->dev,
+			"unknown opcode for get_mac_vlan_cmd_status,opcode=%d.\n",
+			op);
+	}
+
+	return return_status;
+}
+
+static int hclge_update_desc_vfid(struct hclge_desc *desc, int vfid, bool clr)
+{
+	int word_num;
+	int bit_num;
+
+	if (vfid > 255 || vfid < 0)
+		return -EIO;
+
+	if (vfid >= 0 && vfid <= 191) {
+		word_num = vfid / 32;
+		bit_num  = vfid % 32;
+		if (clr)
+			desc[1].data[word_num] &= ~(1 << bit_num);
+		else
+			desc[1].data[word_num] |= (1 << bit_num);
+	} else {
+		word_num = (vfid - 192) / 32;
+		bit_num  = vfid % 32;
+		if (clr)
+			desc[2].data[word_num] &= ~(1 << bit_num);
+		else
+			desc[2].data[word_num] |= (1 << bit_num);
+	}
+
+	return 0;
+}
+
+static bool hclge_is_all_function_id_zero(struct hclge_desc *desc)
+{
+#define HCLGE_DESC_NUMBER 3
+#define HCLGE_FUNC_NUMBER_PER_DESC 6
+	int i, j;
+
+	for (i = 0; i < HCLGE_DESC_NUMBER; i++)
+		for (j = 0; j < HCLGE_FUNC_NUMBER_PER_DESC; j++)
+			if (desc[i].data[j])
+				return false;
+
+	return true;
+}
+
+static void hclge_prepare_mac_addr(struct hclge_mac_vlan_tbl_entry *new_req,
+				   const u8 *addr)
+{
+	const unsigned char *mac_addr = addr;
+	u32 high_val = mac_addr[2] << 16 | (mac_addr[3] << 24) |
+		       (mac_addr[0]) | (mac_addr[1] << 8);
+	u32 low_val  = mac_addr[4] | (mac_addr[5] << 8);
+
+	new_req->mac_addr_hi32 = cpu_to_le32(high_val);
+	new_req->mac_addr_lo16 = cpu_to_le16(low_val & 0xffff);
+}
+
+u16 hclge_get_mac_addr_to_mta_index(struct hclge_vport *vport,
+				    const u8 *addr)
+{
+	u16 high_val = addr[1] | (addr[0] << 8);
+	struct hclge_dev *hdev = vport->back;
+	u32 rsh = 4 - hdev->mta_mac_sel_type;
+	u16 ret_val = (high_val >> rsh) & 0xfff;
+
+	return ret_val;
+}
+
+static int hclge_set_mta_filter_mode(struct hclge_dev *hdev,
+				     enum hclge_mta_dmac_sel_type mta_mac_sel,
+				     bool enable)
+{
+	struct hclge_mta_filter_mode *req;
+	struct hclge_desc desc;
+	int ret;
+
+	req = (struct hclge_mta_filter_mode *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_MODE_CFG, false);
+
+	hnae_set_bit(req->dmac_sel_en, HCLGE_CFG_MTA_MAC_EN_B,
+		     enable);
+	hnae_set_field(req->dmac_sel_en, HCLGE_CFG_MTA_MAC_SEL_M,
+		       HCLGE_CFG_MTA_MAC_SEL_S, mta_mac_sel);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Config mat filter mode failed for cmd_send, ret =%d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int hclge_cfg_func_mta_filter(struct hclge_dev *hdev,
+			      u8 func_id,
+			      bool enable)
+{
+	struct hclge_cfg_func_mta_filter *req;
+	struct hclge_desc desc;
+	int ret;
+
+	req = (struct hclge_cfg_func_mta_filter *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_FUNC_CFG, false);
+
+	hnae_set_bit(req->accept, HCLGE_CFG_FUNC_MTA_ACCEPT_B,
+		     enable);
+	req->function_id = func_id;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Config func_id enable failed for cmd_send, ret =%d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_set_mta_table_item(struct hclge_vport *vport,
+				    u16 idx,
+				    bool enable)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_cfg_func_mta_item *req;
+	struct hclge_desc desc;
+	int ret;
+
+	req = (struct hclge_cfg_func_mta_item *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_TBL_ITEM_CFG, false);
+	hnae_set_bit(req->accept, HCLGE_CFG_MTA_ITEM_ACCEPT_B, enable);
+
+	hnae_set_field(req->item_idx, HCLGE_CFG_MTA_ITEM_IDX_M,
+		       HCLGE_CFG_MTA_ITEM_IDX_S, idx);
+	req->item_idx = cpu_to_le16(req->item_idx);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Config mta table item failed for cmd_send, ret =%d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_remove_mac_vlan_tbl(struct hclge_vport *vport,
+				     struct hclge_mac_vlan_tbl_entry *req)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_desc desc;
+	u8 resp_code;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_REMOVE, false);
+
+	memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry));
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"del mac addr failed for cmd_send, ret =%d.\n",
+			ret);
+		return ret;
+	}
+	resp_code = (desc.data[0] >> 8) & 0xff;
+
+	return hclge_get_mac_vlan_cmd_status(vport, desc.retval, resp_code,
+					     HCLGE_MAC_VLAN_REMOVE);
+}
+
+static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport,
+				     struct hclge_mac_vlan_tbl_entry *req,
+				     struct hclge_desc *desc,
+				     bool is_mc)
+{
+	struct hclge_dev *hdev = vport->back;
+	u8 resp_code;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_MAC_VLAN_ADD, true);
+	if (is_mc) {
+		desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		memcpy(desc[0].data,
+		       req,
+		       sizeof(struct hclge_mac_vlan_tbl_entry));
+		hclge_cmd_setup_basic_desc(&desc[1],
+					   HCLGE_OPC_MAC_VLAN_ADD,
+					   true);
+		desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		hclge_cmd_setup_basic_desc(&desc[2],
+					   HCLGE_OPC_MAC_VLAN_ADD,
+					   true);
+		ret = hclge_cmd_send(&hdev->hw, desc, 3);
+	} else {
+		memcpy(desc[0].data,
+		       req,
+		       sizeof(struct hclge_mac_vlan_tbl_entry));
+		ret = hclge_cmd_send(&hdev->hw, desc, 1);
+	}
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"lookup mac addr failed for cmd_send, ret =%d.\n",
+			ret);
+		return ret;
+	}
+	resp_code = (desc[0].data[0] >> 8) & 0xff;
+
+	return hclge_get_mac_vlan_cmd_status(vport, desc[0].retval, resp_code,
+					     HCLGE_MAC_VLAN_LKUP);
+}
+
+static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport,
+				  struct hclge_mac_vlan_tbl_entry *req,
+				  struct hclge_desc *mc_desc)
+{
+	struct hclge_dev *hdev = vport->back;
+	int cfg_status;
+	u8 resp_code;
+	int ret;
+
+	if (!mc_desc) {
+		struct hclge_desc desc;
+
+		hclge_cmd_setup_basic_desc(&desc,
+					   HCLGE_OPC_MAC_VLAN_ADD,
+					   false);
+		memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry));
+		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+		resp_code = (desc.data[0] >> 8) & 0xff;
+		cfg_status = hclge_get_mac_vlan_cmd_status(vport, desc.retval,
+							   resp_code,
+							   HCLGE_MAC_VLAN_ADD);
+	} else {
+		mc_desc[0].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
+		mc_desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		mc_desc[1].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
+		mc_desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+		mc_desc[2].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
+		mc_desc[2].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_NEXT);
+		memcpy(mc_desc[0].data, req,
+		       sizeof(struct hclge_mac_vlan_tbl_entry));
+		ret = hclge_cmd_send(&hdev->hw, mc_desc, 3);
+		resp_code = (mc_desc[0].data[0] >> 8) & 0xff;
+		cfg_status = hclge_get_mac_vlan_cmd_status(vport,
+							   mc_desc[0].retval,
+							   resp_code,
+							   HCLGE_MAC_VLAN_ADD);
+	}
+
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"add mac addr failed for cmd_send, ret =%d.\n",
+			ret);
+		return ret;
+	}
+
+	return cfg_status;
+}
+
+static int hclge_add_uc_addr(struct hnae3_handle *handle,
+			     const unsigned char *addr)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+
+	return hclge_add_uc_addr_common(vport, addr);
+}
+
+int hclge_add_uc_addr_common(struct hclge_vport *vport,
+			     const unsigned char *addr)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_mac_vlan_tbl_entry req;
+	enum hclge_cmd_status status;
+
+	/* mac addr check */
+	if (is_zero_ether_addr(addr) ||
+	    is_broadcast_ether_addr(addr) ||
+	    is_multicast_ether_addr(addr)) {
+		dev_err(&hdev->pdev->dev,
+			"Set_uc mac err! invalid mac:%pM. is_zero:%d,is_br=%d,is_mul=%d\n",
+			 addr,
+			 is_zero_ether_addr(addr),
+			 is_broadcast_ether_addr(addr),
+			 is_multicast_ether_addr(addr));
+		return -EINVAL;
+	}
+
+	memset(&req, 0, sizeof(req));
+	hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
+	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 0);
+	hnae_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+	hnae_set_bit(req.egress_port,
+		     HCLGE_MAC_EPORT_SW_EN_B, 0);
+	hnae_set_bit(req.egress_port,
+		     HCLGE_MAC_EPORT_TYPE_B, 0);
+	hnae_set_field(req.egress_port, HCLGE_MAC_EPORT_VFID_M,
+		       HCLGE_MAC_EPORT_VFID_S, vport->vport_id);
+	hnae_set_field(req.egress_port, HCLGE_MAC_EPORT_PFID_M,
+		       HCLGE_MAC_EPORT_PFID_S, 0);
+	req.egress_port = cpu_to_le16(req.egress_port);
+
+	hclge_prepare_mac_addr(&req, addr);
+
+	status = hclge_add_mac_vlan_tbl(vport, &req, NULL);
+
+	return status;
+}
+
+static int hclge_rm_uc_addr(struct hnae3_handle *handle,
+			    const unsigned char *addr)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+
+	return hclge_rm_uc_addr_common(vport, addr);
+}
+
+int hclge_rm_uc_addr_common(struct hclge_vport *vport,
+			    const unsigned char *addr)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_mac_vlan_tbl_entry req;
+	enum hclge_cmd_status status;
+
+	/* mac addr check */
+	if (is_zero_ether_addr(addr) ||
+	    is_broadcast_ether_addr(addr) ||
+	    is_multicast_ether_addr(addr)) {
+		dev_dbg(&hdev->pdev->dev,
+			"Remove mac err! invalid mac:%pM.\n",
+			 addr);
+		return -EINVAL;
+	}
+
+	memset(&req, 0, sizeof(req));
+	hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
+	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+	hclge_prepare_mac_addr(&req, addr);
+	status = hclge_remove_mac_vlan_tbl(vport, &req);
+
+	return status;
+}
+
+static int hclge_add_mc_addr(struct hnae3_handle *handle,
+			     const unsigned char *addr)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+
+	return	hclge_add_mc_addr_common(vport, addr);
+}
+
+int hclge_add_mc_addr_common(struct hclge_vport *vport,
+			     const unsigned char *addr)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_mac_vlan_tbl_entry req;
+	struct hclge_desc desc[3];
+	u16 tbl_idx;
+	int status;
+
+	/* mac addr check */
+	if (!is_multicast_ether_addr(addr)) {
+		dev_err(&hdev->pdev->dev,
+			"Add mc mac err! invalid mac:%pM.\n",
+			 addr);
+		return -EINVAL;
+	}
+	memset(&req, 0, sizeof(req));
+	hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
+	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 1);
+	hnae_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+	hclge_prepare_mac_addr(&req, addr);
+	status = hclge_lookup_mac_vlan_tbl(vport, &req, desc, true);
+	if (!status) {
+		/* This mac addr exist, update VFID for it */
+		hclge_update_desc_vfid(desc, vport->vport_id, false);
+		status = hclge_add_mac_vlan_tbl(vport, &req, desc);
+	} else {
+		/* This mac addr do not exist, add new entry for it */
+		memset(desc[0].data, 0, sizeof(desc[0].data));
+		memset(desc[1].data, 0, sizeof(desc[0].data));
+		memset(desc[2].data, 0, sizeof(desc[0].data));
+		hclge_update_desc_vfid(desc, vport->vport_id, false);
+		status = hclge_add_mac_vlan_tbl(vport, &req, desc);
+	}
+
+	/* Set MTA table for this MAC address */
+	tbl_idx = hclge_get_mac_addr_to_mta_index(vport, addr);
+	status = hclge_set_mta_table_item(vport, tbl_idx, true);
+
+	return status;
+}
+
+static int hclge_rm_mc_addr(struct hnae3_handle *handle,
+			    const unsigned char *addr)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+
+	return hclge_rm_mc_addr_common(vport, addr);
+}
+
+int hclge_rm_mc_addr_common(struct hclge_vport *vport,
+			    const unsigned char *addr)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_mac_vlan_tbl_entry req;
+	enum hclge_cmd_status status;
+	struct hclge_desc desc[3];
+	u16 tbl_idx;
+
+	/* mac addr check */
+	if (!is_multicast_ether_addr(addr)) {
+		dev_dbg(&hdev->pdev->dev,
+			"Remove mc mac err! invalid mac:%pM.\n",
+			 addr);
+		return -EINVAL;
+	}
+
+	memset(&req, 0, sizeof(req));
+	hnae_set_bit(req.flags, HCLGE_MAC_VLAN_BIT0_EN_B, 1);
+	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+	hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 1);
+	hnae_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
+	hclge_prepare_mac_addr(&req, addr);
+	status = hclge_lookup_mac_vlan_tbl(vport, &req, desc, true);
+	if (!status) {
+		/* This mac addr exist, remove this handle's VFID for it */
+		hclge_update_desc_vfid(desc, vport->vport_id, true);
+
+		if (hclge_is_all_function_id_zero(desc))
+			/* All the vfid is zero, so need to delete this entry */
+			status = hclge_remove_mac_vlan_tbl(vport, &req);
+		else
+			/* Not all the vfid is zero, update the vfid */
+			status = hclge_add_mac_vlan_tbl(vport, &req, desc);
+
+	} else {
+		/* This mac addr do not exist, can't delete it */
+		dev_err(&hdev->pdev->dev,
+			"Rm multicast mac addr failed, ret = %d.\n",
+			status);
+		return -EIO;
+	}
+
+	/* Set MTB table for this MAC address */
+	tbl_idx = hclge_get_mac_addr_to_mta_index(vport, addr);
+	status = hclge_set_mta_table_item(vport, tbl_idx, false);
+
+	return status;
+}
+
+static void hclge_get_mac_addr(struct hnae3_handle *handle, u8 *p)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	ether_addr_copy(p, hdev->hw.mac.mac_addr);
+}
+
+static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
+{
+	const unsigned char *new_addr = (const unsigned char *)p;
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	/* mac addr check */
+	if (is_zero_ether_addr(new_addr) ||
+	    is_broadcast_ether_addr(new_addr) ||
+	    is_multicast_ether_addr(new_addr)) {
+		dev_err(&hdev->pdev->dev,
+			"Change uc mac err! invalid mac:%p.\n",
+			 new_addr);
+		return -EINVAL;
+	}
+
+	hclge_rm_uc_addr(handle, hdev->hw.mac.mac_addr);
+
+	if (!hclge_add_uc_addr(handle, new_addr)) {
+		ether_addr_copy(hdev->hw.mac.mac_addr, new_addr);
+		return 0;
+	}
+
+	return -EIO;
+}
+
+static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type,
+				      bool filter_en)
+{
+	struct hclge_vlan_filter_ctrl *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, false);
+
+	req = (struct hclge_vlan_filter_ctrl *)desc.data;
+	req->vlan_type = vlan_type;
+	req->vlan_fe = filter_en;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "set vlan filter fail, ret =%d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
+			     bool is_kill, u16 vlan, u8 qos, __be16 proto)
+{
+#define HCLGE_MAX_VF_BYTES  16
+	struct hclge_vlan_filter_vf_cfg *req0;
+	struct hclge_vlan_filter_vf_cfg *req1;
+	struct hclge_desc desc[2];
+	u8 vf_byte_val;
+	u8 vf_byte_off;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc[0],
+				   HCLGE_OPC_VLAN_FILTER_VF_CFG, false);
+	hclge_cmd_setup_basic_desc(&desc[1],
+				   HCLGE_OPC_VLAN_FILTER_VF_CFG, false);
+
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	vf_byte_off = vfid / 8;
+	vf_byte_val = 1 << (vfid % 8);
+
+	req0 = (struct hclge_vlan_filter_vf_cfg *)desc[0].data;
+	req1 = (struct hclge_vlan_filter_vf_cfg *)desc[1].data;
+
+	req0->vlan_id  = vlan;
+	req0->vlan_cfg = is_kill;
+
+	if (vf_byte_off < HCLGE_MAX_VF_BYTES)
+		req0->vf_bitmap[vf_byte_off] = vf_byte_val;
+	else
+		req1->vf_bitmap[vf_byte_off - HCLGE_MAX_VF_BYTES] = vf_byte_val;
+
+	ret = hclge_cmd_send(&hdev->hw, desc, 2);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Send vf vlan command fail, ret =%d.\n",
+			ret);
+		return ret;
+	}
+
+	if (!is_kill) {
+		if (!req0->resp_code || req0->resp_code == 1)
+			return 0;
+
+		dev_err(&hdev->pdev->dev,
+			"Add vf vlan filter fail, ret =%d.\n",
+			req0->resp_code);
+	} else {
+		if (!req0->resp_code)
+			return 0;
+
+		dev_err(&hdev->pdev->dev,
+			"Kill vf vlan filter fail, ret =%d.\n",
+			req0->resp_code);
+	}
+
+	return -EIO;
+}
+
+static int hclge_set_port_vlan_filter(struct hnae3_handle *handle,
+				      __be16 proto, u16 vlan_id,
+				      bool is_kill)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_vlan_filter_pf_cfg *req;
+	struct hclge_desc desc;
+	u8 vlan_offset_byte_val;
+	u8 vlan_offset_byte;
+	u8 vlan_offset_160;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_PF_CFG, false);
+
+	vlan_offset_160 = vlan_id / 160;
+	vlan_offset_byte = (vlan_id % 160) / 8;
+	vlan_offset_byte_val = 1 << (vlan_id % 8);
+
+	req = (struct hclge_vlan_filter_pf_cfg *)desc.data;
+	req->vlan_offset = vlan_offset_160;
+	req->vlan_cfg = is_kill;
+	req->vlan_offset_bitmap[vlan_offset_byte] = vlan_offset_byte_val;
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"port vlan command, send fail, ret =%d.\n",
+			ret);
+		return ret;
+	}
+
+	ret = hclge_set_vf_vlan_common(hdev, 0, is_kill, vlan_id, 0, proto);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Set pf vlan filter config fail, ret =%d.\n",
+			ret);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid,
+				    u16 vlan, u8 qos, __be16 proto)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if ((vfid >= hdev->num_alloc_vfs) || (vlan > 4095) || (qos > 7))
+		return -EINVAL;
+	if (proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
+	return hclge_set_vf_vlan_common(hdev, vfid, false, vlan, qos, proto);
+}
+
+static int hclge_init_vlan_config(struct hclge_dev *hdev)
+{
+#define HCLGE_VLAN_TYPE_VF_TABLE   0
+#define HCLGE_VLAN_TYPE_PORT_TABLE 1
+	int ret;
+
+	ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_VLAN_TYPE_VF_TABLE,
+					 true);
+	if (ret)
+		return ret;
+
+	ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_VLAN_TYPE_PORT_TABLE,
+					 true);
+
+	return ret;
+}
+
+static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_config_max_frm_size *req;
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_desc desc;
+	int ret;
+
+	if ((new_mtu < HCLGE_MAC_MIN_MTU) || (new_mtu > HCLGE_MAC_MAX_MTU))
+		return -EINVAL;
+
+	hdev->mps = new_mtu;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAX_FRM_SIZE, false);
+
+	req = (struct hclge_config_max_frm_size *)desc.data;
+	req->max_frm_size = cpu_to_le16(new_mtu);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "set mtu fail, ret =%d.\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id,
+				    bool enable)
+{
+	struct hclge_reset_tqp_queue *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RESET_TQP_QUEUE, false);
+
+	req = (struct hclge_reset_tqp_queue *)desc.data;
+	req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK);
+	hnae_set_bit(req->reset_req, HCLGE_TQP_RESET_B, enable);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Send tqp reset cmd error, status =%d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id)
+{
+	struct hclge_reset_tqp_queue *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RESET_TQP_QUEUE, true);
+
+	req = (struct hclge_reset_tqp_queue *)desc.data;
+	req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"Get reset status error, status =%d\n", ret);
+		return ret;
+	}
+
+	return hnae_get_bit(req->ready_to_reset, HCLGE_TQP_RESET_B);
+}
+
+static void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int reset_try_times = 0;
+	int reset_status;
+	int ret;
+
+	ret = hclge_tqp_enable(hdev, queue_id, 0, false);
+	if (ret) {
+		dev_warn(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
+		return;
+	}
+
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_id, true);
+	if (ret) {
+		dev_warn(&hdev->pdev->dev,
+			 "Send reset tqp cmd fail, ret = %d\n", ret);
+		return;
+	}
+
+	reset_try_times = 0;
+	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
+		/* Wait for tqp hw reset */
+		msleep(20);
+		reset_status = hclge_get_reset_status(hdev, queue_id);
+		if (reset_status)
+			break;
+	}
+
+	if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
+		dev_warn(&hdev->pdev->dev, "Reset TQP fail\n");
+		return;
+	}
+
+	ret = hclge_send_reset_tqp_cmd(hdev, queue_id, false);
+	if (ret) {
+		dev_warn(&hdev->pdev->dev,
+			 "Deassert the soft reset fail, ret = %d\n", ret);
+		return;
+	}
+}
+
+static u32 hclge_get_fw_version(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	return hdev->fw_version;
+}
+
+static void hclge_get_pauseparam(struct hnae3_handle *handle, u32 *auto_neg,
+				 u32 *rx_en, u32 *tx_en)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	*auto_neg = hclge_get_autoneg(handle);
+
+	if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) {
+		*rx_en = 0;
+		*tx_en = 0;
+		return;
+	}
+
+	if (hdev->tm_info.fc_mode == HCLGE_FC_RX_PAUSE) {
+		*rx_en = 1;
+		*tx_en = 0;
+	} else if (hdev->tm_info.fc_mode == HCLGE_FC_TX_PAUSE) {
+		*tx_en = 1;
+		*rx_en = 0;
+	} else if (hdev->tm_info.fc_mode == HCLGE_FC_FULL) {
+		*rx_en = 1;
+		*tx_en = 1;
+	} else {
+		*rx_en = 0;
+		*tx_en = 0;
+	}
+}
+
+static void hclge_get_ksettings_an_result(struct hnae3_handle *handle,
+					  u8 *auto_neg, u32 *speed, u8 *duplex)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if (speed)
+		*speed = hdev->hw.mac.speed;
+	if (duplex)
+		*duplex = hdev->hw.mac.duplex;
+	if (auto_neg)
+		*auto_neg = hdev->hw.mac.autoneg;
+}
+
+static void hclge_get_media_type(struct hnae3_handle *handle, u8 *media_type)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	if (media_type)
+		*media_type = hdev->hw.mac.media_type;
+}
+
+static void hclge_get_mdix_mode(struct hnae3_handle *handle,
+				u8 *tp_mdix_ctrl, u8 *tp_mdix)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	struct phy_device *phydev = hdev->hw.mac.phydev;
+	int mdix_ctrl, mdix, retval, is_resolved;
+
+	if (!phydev) {
+		*tp_mdix_ctrl = ETH_TP_MDI_INVALID;
+		*tp_mdix = ETH_TP_MDI_INVALID;
+		return;
+	}
+
+	phy_write(phydev, HCLGE_PHY_PAGE_REG, HCLGE_PHY_PAGE_MDIX);
+
+	retval = phy_read(phydev, HCLGE_PHY_CSC_REG);
+	mdix_ctrl = hnae_get_field(retval, HCLGE_PHY_MDIX_CTRL_M,
+				   HCLGE_PHY_MDIX_CTRL_S);
+
+	retval = phy_read(phydev, HCLGE_PHY_CSS_REG);
+	mdix = hnae_get_bit(retval, HCLGE_PHY_MDIX_STATUS_B);
+	is_resolved = hnae_get_bit(retval, HCLGE_PHY_SPEED_DUP_RESOLVE_B);
+
+	phy_write(phydev, HCLGE_PHY_PAGE_REG, HCLGE_PHY_PAGE_COPPER);
+
+	switch (mdix_ctrl) {
+	case 0x0:
+		*tp_mdix_ctrl = ETH_TP_MDI;
+		break;
+	case 0x1:
+		*tp_mdix_ctrl = ETH_TP_MDI_X;
+		break;
+	case 0x3:
+		*tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+		break;
+	default:
+		*tp_mdix_ctrl = ETH_TP_MDI_INVALID;
+		break;
+	}
+
+	if (!is_resolved)
+		*tp_mdix = ETH_TP_MDI_INVALID;
+	else if (mdix)
+		*tp_mdix = ETH_TP_MDI_X;
+	else
+		*tp_mdix = ETH_TP_MDI;
+}
+
+static int hclge_init_client_instance(struct hnae3_client *client,
+				      struct hnae3_ae_dev *ae_dev)
+{
+	struct hclge_dev *hdev = ae_dev->priv;
+	struct hclge_vport *vport;
+	int i, ret;
+
+	for (i = 0; i <  hdev->num_vmdq_vport + 1; i++) {
+		vport = &hdev->vport[i];
+
+		switch (client->type) {
+		case HNAE3_CLIENT_KNIC:
+
+			hdev->nic_client = client;
+			vport->nic.client = client;
+			ret = client->ops->init_instance(&vport->nic);
+			if (ret)
+				goto err;
+
+			if (hdev->roce_client &&
+			    hnae_get_bit(hdev->ae_dev->flag,
+					 HNAE_DEV_SUPPORT_ROCE_B)) {
+				struct hnae3_client *rc = hdev->roce_client;
+
+				ret = hclge_init_roce_base_info(vport);
+				if (ret)
+					goto err;
+
+				ret = rc->ops->init_instance(&vport->roce);
+				if (ret)
+					goto err;
+			}
+
+			break;
+		case HNAE3_CLIENT_UNIC:
+			hdev->nic_client = client;
+			vport->nic.client = client;
+
+			ret = client->ops->init_instance(&vport->nic);
+			if (ret)
+				goto err;
+
+			break;
+		case HNAE3_CLIENT_ROCE:
+			if (hnae_get_bit(hdev->ae_dev->flag,
+					 HNAE_DEV_SUPPORT_ROCE_B)) {
+				hdev->roce_client = client;
+				vport->roce.client = client;
+			}
+
+			if (hdev->roce_client) {
+				ret = hclge_init_roce_base_info(vport);
+				if (ret)
+					goto err;
+
+				ret = client->ops->init_instance(&vport->roce);
+				if (ret)
+					goto err;
+			}
+		}
+	}
+
+	return 0;
+err:
+	return ret;
+}
+
+static void hclge_uninit_client_instance(struct hnae3_client *client,
+					 struct hnae3_ae_dev *ae_dev)
+{
+	struct hclge_dev *hdev = ae_dev->priv;
+	struct hclge_vport *vport;
+	int i;
+
+	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+		vport = &hdev->vport[i];
+		if (hdev->roce_client)
+			hdev->roce_client->ops->uninit_instance(&vport->roce,
+								0);
+		if (client->type == HNAE3_CLIENT_ROCE)
+			return;
+		if (client->ops->uninit_instance)
+			client->ops->uninit_instance(&vport->nic, 0);
+	}
+}
+
+static int hclge_pci_init(struct hclge_dev *hdev)
+{
+	struct pci_dev *pdev = hdev->pdev;
+	struct hclge_hw *hw;
+	int ret;
+
+	ret = pci_enable_device(pdev);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to enable PCI device\n");
+		goto err_no_drvdata;
+	}
+
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (ret) {
+		ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		if (ret) {
+			dev_err(&pdev->dev,
+				"can't set consistent PCI DMA");
+			goto err_disable_device;
+		}
+		dev_warn(&pdev->dev, "set DMA mask to 32 bits\n");
+	}
+
+	ret = pci_request_regions(pdev, HCLGE_DRIVER_NAME);
+	if (ret) {
+		dev_err(&pdev->dev, "PCI request regions failed %d\n", ret);
+		goto err_disable_device;
+	}
+
+	pci_set_master(pdev);
+	hw = &hdev->hw;
+	hw->back = hdev;
+	hw->io_base = pcim_iomap(pdev, 2, 0);
+	if (!hw->io_base) {
+		dev_err(&pdev->dev, "Can't map configuration register space\n");
+		ret = -ENOMEM;
+		goto err_clr_master;
+	}
+
+	return 0;
+err_clr_master:
+	pci_clear_master(pdev);
+	pci_release_regions(pdev);
+err_disable_device:
+	pci_disable_device(pdev);
+err_no_drvdata:
+	pci_set_drvdata(pdev, NULL);
+
+	return ret;
+}
+
+static void hclge_pci_uninit(struct hclge_dev *hdev)
+{
+	struct pci_dev *pdev = hdev->pdev;
+
+	if (hdev->flag & HCLGE_FLAG_USE_MSIX) {
+		pci_disable_msix(pdev);
+		devm_kfree(&pdev->dev, hdev->msix_entries);
+		hdev->msix_entries = NULL;
+	} else {
+		pci_disable_msi(pdev);
+	}
+
+	pci_clear_master(pdev);
+	pci_release_mem_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+	struct pci_dev *pdev = ae_dev->pdev;
+	const struct pci_device_id *id;
+	struct hclge_dev *hdev;
+	int ret;
+
+	hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
+	if (!hdev) {
+		ret = -ENOMEM;
+		goto err_hclge_dev;
+	}
+
+	hdev->flag |= HCLGE_FLAG_USE_MSIX;
+	hdev->pdev = pdev;
+	hdev->ae_dev = ae_dev;
+	ae_dev->priv = hdev;
+
+	id = pci_match_id(roce_pci_tbl, ae_dev->pdev);
+	if (id)
+		hnae_set_bit(ae_dev->flag, HNAE_DEV_SUPPORT_ROCE_B, 1);
+
+	ret = hclge_pci_init(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "PCI init failed\n");
+		goto err_pci_init;
+	}
+
+	/* Command queue initialize */
+	ret = hclge_cmd_init(hdev);
+	if (ret)
+		goto err_cmd_init;
+
+	ret = hclge_get_cap(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "get hw capability error, ret = %d.\n",
+			ret);
+		return ret;
+	}
+
+	ret = hclge_configure(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Configure dev error, ret = %d.\n", ret);
+		return ret;
+	}
+
+	if (hdev->flag & HCLGE_FLAG_USE_MSIX)
+		ret = hclge_init_msix(hdev);
+	else
+		ret = hclge_init_msi(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Init msix/msi error, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = hclge_alloc_tqps(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Allocate TQPs error, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = hclge_alloc_vport(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Allocate vport error, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = hclge_mac_init(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret);
+		return ret;
+	}
+	ret = hclge_buffer_alloc(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Buffer allocate fail, ret =%d\n", ret);
+		return  ret;
+	}
+
+	ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
+	if (ret) {
+		dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret);
+		return ret;
+	}
+
+	ret = hclge_rss_init_hw(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret);
+		return  ret;
+	}
+
+	ret = hclge_init_vlan_config(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "VLAN init fail, ret =%d\n", ret);
+		return  ret;
+	}
+
+	ret = hclge_tm_schd_init(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "tm schd init fail, ret =%d\n", ret);
+		return ret;
+	}
+
+	setup_timer(&hdev->service_timer, hclge_service_timer,
+		    (unsigned long)hdev);
+	INIT_WORK(&hdev->service_task, hclge_service_task);
+
+	set_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state);
+	set_bit(HCLGE_STATE_DOWN, &hdev->state);
+
+	pr_info("%s driver initialization finished.\n", HCLGE_DRIVER_NAME);
+	return 0;
+
+err_cmd_init:
+	pci_release_regions(pdev);
+err_pci_init:
+	pci_set_drvdata(pdev, NULL);
+err_hclge_dev:
+	return ret;
+}
+
+static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
+{
+	struct hclge_dev *hdev = ae_dev->priv;
+	struct hclge_mac *mac = &hdev->hw.mac;
+
+	set_bit(HCLGE_STATE_DOWN, &hdev->state);
+
+	if (IS_ENABLED(CONFIG_PCI_IOV))
+		hclge_disable_sriov(hdev);
+
+	if (hdev->service_timer.data)
+		del_timer_sync(&hdev->service_timer);
+	if (hdev->service_task.func)
+		cancel_work_sync(&hdev->service_task);
+
+	if (mac->phydev)
+		mdiobus_unregister(mac->mdio_bus);
+
+	hclge_destroy_cmd_queue(&hdev->hw);
+	hclge_pci_uninit(hdev);
+	ae_dev->priv = NULL;
+}
+
+static const struct hnae3_ae_ops hclge_ops = {
+	.init_ae_dev = hclge_init_ae_dev,
+	.uninit_ae_dev = hclge_uninit_ae_dev,
+	.init_client_instance = hclge_init_client_instance,
+	.uninit_client_instance = hclge_uninit_client_instance,
+	.map_ring_to_vector = hclge_map_handle_ring_to_vector,
+	.unmap_ring_from_vector = hclge_unmap_ring_from_vector,
+	.get_vector = hclge_get_vector,
+	.set_promisc_mode = hclge_set_promisc_mode,
+	.start = hclge_ae_start,
+	.stop = hclge_ae_stop,
+	.get_status = hclge_get_status,
+	.get_ksettings_an_result = hclge_get_ksettings_an_result,
+	.update_speed_duplex_h = hclge_update_speed_duplex_h,
+	.cfg_mac_speed_dup_h = hclge_cfg_mac_speed_dup_h,
+	.get_media_type = hclge_get_media_type,
+	.get_rss_key_size = hclge_get_rss_key_size,
+	.get_rss_indir_size = hclge_get_rss_indir_size,
+	.get_rss = hclge_get_rss,
+	.set_rss = hclge_set_rss,
+	.get_tc_size = hclge_get_tc_size,
+	.get_mac_addr = hclge_get_mac_addr,
+	.set_mac_addr = hclge_set_mac_addr,
+	.add_uc_addr = hclge_add_uc_addr,
+	.rm_uc_addr = hclge_rm_uc_addr,
+	.add_mc_addr = hclge_add_mc_addr,
+	.rm_mc_addr = hclge_rm_mc_addr,
+	.set_autoneg = hclge_set_autoneg,
+	.get_autoneg = hclge_get_autoneg,
+	.get_pauseparam = hclge_get_pauseparam,
+	.set_mtu = hclge_set_mtu,
+	.reset_queue = hclge_reset_tqp,
+	.get_stats = hclge_get_stats,
+	.update_stats = hclge_update_stats,
+	.get_strings = hclge_get_strings,
+	.get_sset_count = hclge_get_sset_count,
+	.get_fw_version = hclge_get_fw_version,
+	.get_mdix_mode = hclge_get_mdix_mode,
+	.set_vlan_filter = hclge_set_port_vlan_filter,
+	.set_vf_vlan_filter = hclge_set_vf_vlan_filter,
+};
+
+static struct hnae3_ae_algo ae_algo = {
+	.ops = &hclge_ops,
+	.name = HCLGE_NAME,
+	.pdev_id_table = ae_algo_pci_tbl,
+};
+
+static int hclge_init(void)
+{
+	pr_info("%s is initializing\n", HCLGE_NAME);
+
+	return hnae3_register_ae_algo(&ae_algo);
+}
+
+static void hclge_exit(void)
+{
+	hnae3_unregister_ae_algo(&ae_algo);
+}
+module_init(hclge_init);
+module_exit(hclge_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Huawei Tech. Co., Ltd.");
+MODULE_DESCRIPTION("HCLGE Driver");
+MODULE_VERSION(HCLGE_MOD_VERSION);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
new file mode 100644
index 0000000..edb10ad
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -0,0 +1,519 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HCLGE_MAIN_H
+#define __HCLGE_MAIN_H
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/phy.h>
+#include "hclge_cmd.h"
+#include "hnae3.h"
+
+#define HCLGE_MOD_VERSION "v1.0"
+#define HCLGE_DRIVER_NAME "hclge"
+
+#define HCLGE_INVALID_VPORT 0xffff
+
+#define HCLGE_ROCE_VECTOR_OFFSET	96
+
+#define HCLGE_PF_CFG_BLOCK_SIZE		32
+#define HCLGE_PF_CFG_DESC_NUM \
+	(HCLGE_PF_CFG_BLOCK_SIZE / HCLGE_CFG_RD_LEN_BYTES)
+
+#define HCLGE_VECTOR_REG_BASE		0x20000
+
+#define HCLGE_VECTOR_REG_OFFSET		0x4
+#define HCLGE_VECTOR_VF_OFFSET		0x100000
+
+#define HCLGE_RSS_IND_TBL_SIZE		512
+#define HCLGE_RSS_SET_BITMAP_MSK	0xffff
+#define HCLGE_RSS_KEY_SIZE		40
+#define HCLGE_RSS_HASH_ALGO_TOEPLITZ	0
+#define HCLGE_RSS_HASH_ALGO_SIMPLE	1
+#define HCLGE_RSS_HASH_ALGO_SYMMETRIC	2
+#define HCLGE_RSS_HASH_ALGO_MASK	0xf
+#define HCLGE_RSS_CFG_TBL_NUM \
+	(HCLGE_RSS_IND_TBL_SIZE / HCLGE_RSS_CFG_TBL_SIZE)
+
+#define HCLGE_RSS_TC_SIZE_0		1
+#define HCLGE_RSS_TC_SIZE_1		2
+#define HCLGE_RSS_TC_SIZE_2		4
+#define HCLGE_RSS_TC_SIZE_3		8
+#define HCLGE_RSS_TC_SIZE_4		16
+#define HCLGE_RSS_TC_SIZE_5		32
+#define HCLGE_RSS_TC_SIZE_6		64
+#define HCLGE_RSS_TC_SIZE_7		128
+
+#define HCLGE_TQP_RESET_TRY_TIMES	10
+
+#define HCLGE_PHY_PAGE_MDIX		0
+#define HCLGE_PHY_PAGE_COPPER		0
+
+/* Page Selection Reg. */
+#define HCLGE_PHY_PAGE_REG		22
+
+/* Copper Specific Control Register */
+#define HCLGE_PHY_CSC_REG		16
+
+/* Copper Specific Status Register */
+#define HCLGE_PHY_CSS_REG		17
+
+#define HCLGE_PHY_MDIX_CTRL_S		(5)
+#define HCLGE_PHY_MDIX_CTRL_M		(3 << HCLGE_PHY_MDIX_CTRL_S)
+
+#define HCLGE_PHY_MDIX_STATUS_B	(6)
+#define HCLGE_PHY_SPEED_DUP_RESOLVE_B	(11)
+
+enum HCLGE_DEV_STATE {
+	HCLGE_STATE_REINITING,
+	HCLGE_STATE_DOWN,
+	HCLGE_STATE_DISABLED,
+	HCLGE_STATE_REMOVING,
+	HCLGE_STATE_SERVICE_INITED,
+	HCLGE_STATE_SERVICE_SCHED,
+	HCLGE_STATE_MBX_HANDLING,
+	HCLGE_STATE_MBX_IRQ,
+	HCLGE_STATE_MAX
+};
+
+#define HCLGE_MPF_ENBALE 1
+struct hclge_caps {
+	u16 num_tqp;
+	u16 num_buffer_cell;
+	u32 flag;
+	u16 vmdq;
+};
+
+enum HCLGE_MAC_SPEED {
+	HCLGE_MAC_SPEED_10M	= 10,		/* 10 Mbps */
+	HCLGE_MAC_SPEED_100M	= 100,		/* 100 Mbps */
+	HCLGE_MAC_SPEED_1G	= 1000,		/* 1000 Mbps   = 1 Gbps */
+	HCLGE_MAC_SPEED_10G	= 10000,	/* 10000 Mbps  = 10 Gbps */
+	HCLGE_MAC_SPEED_25G	= 25000,	/* 25000 Mbps  = 25 Gbps */
+	HCLGE_MAC_SPEED_40G	= 40000,	/* 40000 Mbps  = 40 Gbps */
+	HCLGE_MAC_SPEED_50G	= 50000,	/* 50000 Mbps  = 50 Gbps */
+	HCLGE_MAC_SPEED_100G	= 100000	/* 100000 Mbps = 100 Gbps */
+};
+
+enum HCLGE_MAC_DUPLEX {
+	HCLGE_MAC_HALF,
+	HCLGE_MAC_FULL
+};
+
+enum hclge_mta_dmac_sel_type {
+	HCLGE_MAC_ADDR_47_36,
+	HCLGE_MAC_ADDR_46_35,
+	HCLGE_MAC_ADDR_45_34,
+	HCLGE_MAC_ADDR_44_33,
+};
+
+struct hclge_mac {
+	u8 phy_addr;
+	u8 flag;
+	u8 media_type;
+	u8 mac_addr[ETH_ALEN];
+	u8 autoneg;
+	u8 duplex;
+	u32 speed;
+	int link;	/* store the link status of mac & phy (if phy exit)*/
+	struct phy_device *phydev;
+	struct mii_bus *mdio_bus;
+	phy_interface_t phy_if;
+};
+
+struct hclge_hw {
+	void __iomem *io_base;
+	struct hclge_mac mac;
+	int num_vec;
+	struct hclge_cmq cmq;
+	struct hclge_caps caps;
+	void *back;
+};
+
+/* TQP stats */
+struct hlcge_tqp_stats {
+	/* query_tqp_tx_queue_statistics ,opcode id:  0x0B03 */
+	u64 rcb_tx_ring_pktnum_rcd; /* 32bit */
+	/* query_tqp_rx_queue_statistics ,opcode id:  0x0B13 */
+	u64 rcb_rx_ring_pktnum_rcd; /* 32bit */
+};
+
+struct hclge_tqp {
+	struct device *dev;	/* Device for DMA mapping */
+	struct hnae3_queue q;
+	struct hlcge_tqp_stats tqp_stats;
+	u16 index;	/* Global index in a NIC controller */
+
+	bool alloced;
+};
+
+enum hclge_fc_mode {
+	HCLGE_FC_NONE,
+	HCLGE_FC_RX_PAUSE,
+	HCLGE_FC_TX_PAUSE,
+	HCLGE_FC_FULL,
+	HCLGE_FC_PFC,
+	HCLGE_FC_DEFAULT
+};
+
+#define HCLGE_PG_NUM		4
+#define HCLGE_SCH_MODE_SP	0
+#define HCLGE_SCH_MODE_DWRR	1
+struct hclge_pg_info {
+	u8 pg_id;
+	u8 pg_sch_mode;		/* 0: sp; 1: dwrr */
+	u8 tc_bit_map;
+	u32 bw_limit;
+	u8 tc_dwrr[HNAE3_MAX_TC];
+};
+
+struct hclge_tc_info {
+	u8 tc_id;
+	u8 tc_sch_mode;		/* 0: sp; 1: dwrr */
+	u8 up;
+	u8 pgid;
+	u32 bw_limit;
+};
+
+struct hclge_cfg {
+	u8 vmdq_vport_num;
+	u8 tc_num;
+	u16 tqp_desc_num;
+	u16 rx_buf_len;
+	u8 phy_addr;
+	u8 media_type;
+	u8 mac_addr[ETH_ALEN];
+	u8 default_speed;
+	u32 numa_node_map;
+};
+
+struct hclge_tm_info {
+	u8 num_tc;
+	u8 num_pg;      /* It must be 1 if vNET-Base schd */
+	u8 pg_dwrr[HCLGE_PG_NUM];
+	struct hclge_pg_info pg_info[HCLGE_PG_NUM];
+	struct hclge_tc_info tc_info[HNAE3_MAX_TC];
+	enum hclge_fc_mode fc_mode;
+	u8 hw_pfc_map; /* Allow for packet drop or not on this TC */
+};
+
+struct hclge_comm_stats_str {
+	char desc[ETH_GSTRING_LEN];
+	unsigned long offset;
+};
+
+/* all 64bit stats, opcode id: 0x0030 */
+struct hclge_64_bit_stats {
+	/* query_igu_stat */
+	u64 igu_rx_oversize_pkt;
+	u64 igu_rx_undersize_pkt;
+	u64 igu_rx_out_all_pkt;
+	u64 igu_rx_uni_pkt;
+	u64 igu_rx_multi_pkt;
+	u64 igu_rx_broad_pkt;
+	u64 rsv0;
+
+	/* query_egu_stat */
+	u64 egu_tx_out_all_pkt;
+	u64 egu_tx_uni_pkt;
+	u64 egu_tx_multi_pkt;
+	u64 egu_tx_broad_pkt;
+
+	/* ssu_ppp packet stats */
+	u64 ssu_ppp_mac_key_num;
+	u64 ssu_ppp_host_key_num;
+	u64 ppp_ssu_mac_rlt_num;
+	u64 ppp_ssu_host_rlt_num;
+
+	/* ssu_tx_in_out_dfx_stats */
+	u64 ssu_tx_in_num;
+	u64 ssu_tx_out_num;
+	/* ssu_rx_in_out_dfx_stats */
+	u64 ssu_rx_in_num;
+	u64 ssu_rx_out_num;
+};
+
+/* all 32bit stats, opcode id: 0x0031 */
+struct hclge_32_bit_stats {
+	u64 igu_rx_err_pkt;
+	u64 igu_rx_no_eof_pkt;
+	u64 igu_rx_no_sof_pkt;
+	u64 egu_tx_1588_pkt;
+	u64 egu_tx_err_pkt;
+	u64 ssu_full_drop_num;
+	u64 ssu_part_drop_num;
+	u64 ppp_key_drop_num;
+	u64 ppp_rlt_drop_num;
+	u64 ssu_key_drop_num;
+	u64 pkt_curr_buf_cnt;
+	u64 qcn_fb_rcv_cnt;
+	u64 qcn_fb_drop_cnt;
+	u64 qcn_fb_invaild_cnt;
+	u64 rsv0;
+	u64 rx_packet_tc0_in_cnt;
+	u64 rx_packet_tc1_in_cnt;
+	u64 rx_packet_tc2_in_cnt;
+	u64 rx_packet_tc3_in_cnt;
+	u64 rx_packet_tc4_in_cnt;
+	u64 rx_packet_tc5_in_cnt;
+	u64 rx_packet_tc6_in_cnt;
+	u64 rx_packet_tc7_in_cnt;
+	u64 rx_packet_tc0_out_cnt;
+	u64 rx_packet_tc1_out_cnt;
+	u64 rx_packet_tc2_out_cnt;
+	u64 rx_packet_tc3_out_cnt;
+	u64 rx_packet_tc4_out_cnt;
+	u64 rx_packet_tc5_out_cnt;
+	u64 rx_packet_tc6_out_cnt;
+	u64 rx_packet_tc7_out_cnt;
+
+	/* Tx packet level statistics */
+	u64 tx_packet_tc0_in_cnt;
+	u64 tx_packet_tc1_in_cnt;
+	u64 tx_packet_tc2_in_cnt;
+	u64 tx_packet_tc3_in_cnt;
+	u64 tx_packet_tc4_in_cnt;
+	u64 tx_packet_tc5_in_cnt;
+	u64 tx_packet_tc6_in_cnt;
+	u64 tx_packet_tc7_in_cnt;
+	u64 tx_packet_tc0_out_cnt;
+	u64 tx_packet_tc1_out_cnt;
+	u64 tx_packet_tc2_out_cnt;
+	u64 tx_packet_tc3_out_cnt;
+	u64 tx_packet_tc4_out_cnt;
+	u64 tx_packet_tc5_out_cnt;
+	u64 tx_packet_tc6_out_cnt;
+	u64 tx_packet_tc7_out_cnt;
+
+	/* packet buffer statistics */
+	u64 pkt_curr_buf_tc0_cnt;
+	u64 pkt_curr_buf_tc1_cnt;
+	u64 pkt_curr_buf_tc2_cnt;
+	u64 pkt_curr_buf_tc3_cnt;
+	u64 pkt_curr_buf_tc4_cnt;
+	u64 pkt_curr_buf_tc5_cnt;
+	u64 pkt_curr_buf_tc6_cnt;
+	u64 pkt_curr_buf_tc7_cnt;
+
+	u64 mb_uncopy_num;
+	u64 lo_pri_unicast_rlt_drop_num;
+	u64 hi_pri_multicast_rlt_drop_num;
+	u64 lo_pri_multicast_rlt_drop_num;
+	u64 rx_oq_drop_pkt_cnt;
+	u64 tx_oq_drop_pkt_cnt;
+	u64 nic_l2_err_drop_pkt_cnt;
+	u64 roc_l2_err_drop_pkt_cnt;
+};
+
+/* mac stats ,opcode id: 0x0032 */
+struct hclge_mac_stats {
+	u64 mac_tx_mac_pause_num;
+	u64 mac_rx_mac_pause_num;
+	u64 mac_tx_pfc_pri0_pkt_num;
+	u64 mac_tx_pfc_pri1_pkt_num;
+	u64 mac_tx_pfc_pri2_pkt_num;
+	u64 mac_tx_pfc_pri3_pkt_num;
+	u64 mac_tx_pfc_pri4_pkt_num;
+	u64 mac_tx_pfc_pri5_pkt_num;
+	u64 mac_tx_pfc_pri6_pkt_num;
+	u64 mac_tx_pfc_pri7_pkt_num;
+	u64 mac_rx_pfc_pri0_pkt_num;
+	u64 mac_rx_pfc_pri1_pkt_num;
+	u64 mac_rx_pfc_pri2_pkt_num;
+	u64 mac_rx_pfc_pri3_pkt_num;
+	u64 mac_rx_pfc_pri4_pkt_num;
+	u64 mac_rx_pfc_pri5_pkt_num;
+	u64 mac_rx_pfc_pri6_pkt_num;
+	u64 mac_rx_pfc_pri7_pkt_num;
+	u64 mac_tx_total_pkt_num;
+	u64 mac_tx_total_oct_num;
+	u64 mac_tx_good_pkt_num;
+	u64 mac_tx_bad_pkt_num;
+	u64 mac_tx_good_oct_num;
+	u64 mac_tx_bad_oct_num;
+	u64 mac_tx_uni_pkt_num;
+	u64 mac_tx_multi_pkt_num;
+	u64 mac_tx_broad_pkt_num;
+	u64 mac_tx_undersize_pkt_num;
+	u64 mac_tx_overrsize_pkt_num;
+	u64 mac_tx_64_oct_pkt_num;
+	u64 mac_tx_65_127_oct_pkt_num;
+	u64 mac_tx_128_255_oct_pkt_num;
+	u64 mac_tx_256_511_oct_pkt_num;
+	u64 mac_tx_512_1023_oct_pkt_num;
+	u64 mac_tx_1024_1518_oct_pkt_num;
+	u64 mac_tx_1519_max_oct_pkt_num;
+	u64 mac_rx_total_pkt_num;
+	u64 mac_rx_total_oct_num;
+	u64 mac_rx_good_pkt_num;
+	u64 mac_rx_bad_pkt_num;
+	u64 mac_rx_good_oct_num;
+	u64 mac_rx_bad_oct_num;
+	u64 mac_rx_uni_pkt_num;
+	u64 mac_rx_multi_pkt_num;
+	u64 mac_rx_broad_pkt_num;
+	u64 mac_rx_undersize_pkt_num;
+	u64 mac_rx_overrsize_pkt_num;
+	u64 mac_rx_64_oct_pkt_num;
+	u64 mac_rx_65_127_oct_pkt_num;
+	u64 mac_rx_128_255_oct_pkt_num;
+	u64 mac_rx_256_511_oct_pkt_num;
+	u64 mac_rx_512_1023_oct_pkt_num;
+	u64 mac_rx_1024_1518_oct_pkt_num;
+	u64 mac_rx_1519_max_oct_pkt_num;
+
+	u64 mac_trans_fragment_pkt_num;
+	u64 mac_trans_undermin_pkt_num;
+	u64 mac_trans_jabber_pkt_num;
+	u64 mac_trans_err_all_pkt_num;
+	u64 mac_trans_from_app_good_pkt_num;
+	u64 mac_trans_from_app_bad_pkt_num;
+	u64 mac_rcv_fragment_pkt_num;
+	u64 mac_rcv_undermin_pkt_num;
+	u64 mac_rcv_jabber_pkt_num;
+	u64 mac_rcv_fcs_err_pkt_num;
+	u64 mac_rcv_send_app_good_pkt_num;
+	u64 mac_rcv_send_app_bad_pkt_num;
+};
+
+struct hclge_hw_stats {
+	struct hclge_mac_stats      mac_stats;
+	struct hclge_64_bit_stats   all_64_bit_stats;
+	struct hclge_32_bit_stats   all_32_bit_stats;
+};
+
+struct hclge_dev {
+	struct pci_dev *pdev;
+	struct hnae3_ae_dev *ae_dev;
+	struct hclge_hw hw;
+	struct hclge_hw_stats hw_stats;
+	unsigned long state;
+
+	u32 fw_version;
+	u16 num_vmdq_vport;		/* Num vmdq vport this PF has set up */
+	u16 num_tqps;			/* Num task queue pairs of this PF */
+	u16 num_req_vfs;		/* Num VFs requested for this PF */
+
+	u16 num_roce_msix;		/* Num of roce vectors for this PF */
+	int roce_base_vector;
+
+	/* Base task tqp physical id of this PF */
+	u16 base_tqp_pid;
+	u16 alloc_rss_size;		/* Allocated RSS task queue */
+	u16 rss_size_max;		/* HW defined max RSS task queue */
+
+	/* Num of guaranteed filters for this PF */
+	u16 fdir_pf_filter_count;
+	u16 num_alloc_vport;		/* Num vports this driver supports */
+	u32 numa_node_mask;
+	u16 rx_buf_len;
+	u16 num_desc;
+	u8 hw_tc_map;
+	u8 tc_num_last_time;
+	enum hclge_fc_mode fc_mode_last_time;
+
+#define HCLGE_FLAG_TC_BASE_SCH_MODE		1
+#define HCLGE_FLAG_VNET_BASE_SCH_MODE		2
+	u8 tx_sch_mode;
+
+	u8 default_up;
+	struct hclge_tm_info tm_info;
+
+	u16 num_msi;
+	u16 num_msi_left;
+	u16 num_msi_used;
+	u32 base_msi_vector;
+	struct msix_entry *msix_entries;
+	u16 *vector_status;
+
+	u16 pending_udp_bitmap;
+
+	u16 rx_itr_default;
+	u16 tx_itr_default;
+
+	u16 adminq_work_limit; /* Num of admin receive queue desc to process */
+	unsigned long service_timer_period;
+	unsigned long service_timer_previous;
+	struct timer_list service_timer;
+	struct work_struct service_task;
+
+	bool cur_promisc;
+	int num_alloc_vfs;	/* Actual number of VFs allocated */
+
+	struct hclge_tqp *htqp;
+	struct hclge_vport *vport;
+
+	struct dentry *hclge_dbgfs;
+
+	struct hnae3_client *nic_client;
+	struct hnae3_client *roce_client;
+
+#define HCLGE_FLAG_USE_MSI	0x00000001
+#define HCLGE_FLAG_USE_MSIX	0x00000002
+#define HCLGE_FLAG_MAIN		0x00000004
+#define HCLGE_FLAG_DCB_CAPABLE	0x00000008
+#define HCLGE_FLAG_DCB_ENABLE	0x00000010
+	u32 flag;
+
+	u32 pkt_buf_size; /* Total pf buf size for tx/rx */
+	u32 mps; /* Max packet size */
+	struct hclge_priv_buf *priv_buf;
+	struct hclge_shared_buf s_buf;
+
+	enum hclge_mta_dmac_sel_type mta_mac_sel_type;
+	bool enable_mta; /* Mutilcast filter enable */
+	bool accept_mta_mc; /* Whether accept mta filter multicast */
+};
+
+struct hclge_vport {
+	u16 alloc_tqps;	/* Allocated Tx/Rx queues */
+
+	u8  rss_hash_key[HCLGE_RSS_KEY_SIZE]; /* User configured hash keys */
+	/* User configured lookup table entries */
+	u8  rss_indirection_tbl[HCLGE_RSS_IND_TBL_SIZE];
+
+	u16 qs_offset;
+	u16 bw_limit;		/* VSI BW Limit (0 = disabled) */
+	u8  dwrr;
+
+	int vport_id;
+	struct hclge_dev *back;  /* Back reference to associated dev */
+	struct hnae3_handle nic;
+	struct hnae3_handle roce;
+};
+
+void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
+			      bool en_mc, bool en_bc, int vport_id);
+
+int hclge_add_uc_addr_common(struct hclge_vport *vport,
+			     const unsigned char *addr);
+int hclge_rm_uc_addr_common(struct hclge_vport *vport,
+			    const unsigned char *addr);
+int hclge_add_mc_addr_common(struct hclge_vport *vport,
+			     const unsigned char *addr);
+int hclge_rm_mc_addr_common(struct hclge_vport *vport,
+			    const unsigned char *addr);
+
+int hclge_cfg_func_mta_filter(struct hclge_dev *hdev,
+			      u8 func_id,
+			      bool enable);
+struct hclge_vport *hclge_get_vport(struct hnae3_handle *handle);
+int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector,
+				   struct hnae3_ring_chain_node *ring_chain);
+static inline int hclge_get_queue_id(struct hnae3_queue *queue)
+{
+	struct hclge_tqp *tqp = container_of(queue, struct hclge_tqp, q);
+
+	return tqp->index;
+}
+
+int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex);
+int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid,
+			     bool is_kill, u16 vlan, u8 qos, __be16 proto);
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
new file mode 100644
index 0000000..f32d719
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/kernel.h>
+
+#include "hclge_cmd.h"
+#include "hclge_main.h"
+#include "hclge_mdio.h"
+
+enum hclge_mdio_c22_op_seq {
+	HCLGE_MDIO_C22_WRITE = 1,
+	HCLGE_MDIO_C22_READ = 2
+};
+
+#define HCLGE_MDIO_CTRL_START_B		0
+#define HCLGE_MDIO_CTRL_ST_S		1
+#define HCLGE_MDIO_CTRL_ST_M		(0x3 << HCLGE_MDIO_CTRL_ST_S)
+#define HCLGE_MDIO_CTRL_OP_S		3
+#define HCLGE_MDIO_CTRL_OP_M		(0x3 << HCLGE_MDIO_CTRL_OP_S)
+
+#define HCLGE_MDIO_PHYID_S		0
+#define HCLGE_MDIO_PHYID_M		(0x1f << HCLGE_MDIO_PHYID_S)
+
+#define HCLGE_MDIO_PHYREG_S		0
+#define HCLGE_MDIO_PHYREG_M		(0x1f << HCLGE_MDIO_PHYREG_S)
+
+#define HCLGE_MDIO_STA_B		0
+
+struct hclge_mdio_cfg_cmd {
+	u8 ctrl_bit;
+	u8 phyid;
+	u8 phyad;
+	u8 rsvd;
+	__le16 reserve;
+	__le16 data_wr;
+	__le16 data_rd;
+	__le16 sta;
+};
+
+static int hclge_mdio_write(struct mii_bus *bus, int phyid, int regnum,
+			    u16 data)
+{
+	struct hclge_mdio_cfg_cmd *mdio_cmd;
+	struct hclge_dev *hdev = bus->priv;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, false);
+
+	mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data;
+
+	hnae_set_field(mdio_cmd->phyid, HCLGE_MDIO_PHYID_M,
+		       HCLGE_MDIO_PHYID_S, phyid);
+	hnae_set_field(mdio_cmd->phyad, HCLGE_MDIO_PHYREG_M,
+		       HCLGE_MDIO_PHYREG_S, regnum);
+
+	hnae_set_bit(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_START_B, 1);
+	hnae_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_ST_M,
+		       HCLGE_MDIO_CTRL_ST_S, 1);
+	hnae_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_OP_M,
+		       HCLGE_MDIO_CTRL_OP_S, HCLGE_MDIO_C22_WRITE);
+
+	mdio_cmd->data_wr = cpu_to_le16(data);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"mdio write fail when sending cmd, status is %d.\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum)
+{
+	struct hclge_mdio_cfg_cmd *mdio_cmd;
+	struct hclge_dev *hdev = bus->priv;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, true);
+
+	mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data;
+
+	hnae_set_field(mdio_cmd->phyid, HCLGE_MDIO_PHYID_M,
+		       HCLGE_MDIO_PHYID_S, phyid);
+	hnae_set_field(mdio_cmd->phyad, HCLGE_MDIO_PHYREG_M,
+		       HCLGE_MDIO_PHYREG_S, regnum);
+
+	hnae_set_bit(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_START_B, 1);
+	hnae_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_ST_M,
+		       HCLGE_MDIO_CTRL_ST_S, 1);
+	hnae_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_OP_M,
+		       HCLGE_MDIO_CTRL_OP_S, HCLGE_MDIO_C22_READ);
+
+	/* Read out phy data */
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"mdio read fail when get data, status is %d.\n",
+			ret);
+		return ret;
+	}
+
+	if (hnae_get_bit(le16_to_cpu(mdio_cmd->sta), HCLGE_MDIO_STA_B)) {
+		dev_err(&hdev->pdev->dev, "mdio read data error\n");
+		return -EIO;
+	}
+
+	return le16_to_cpu(mdio_cmd->data_rd);
+}
+
+int hclge_mac_mdio_config(struct hclge_dev *hdev)
+{
+	struct hclge_mac *mac = &hdev->hw.mac;
+	struct phy_device *phydev;
+	struct mii_bus *mdio_bus;
+	int ret;
+
+	if (hdev->hw.mac.phy_addr >= PHY_MAX_ADDR)
+		return 0;
+
+	mdio_bus = devm_mdiobus_alloc(&hdev->pdev->dev);
+	if (!mdio_bus)
+		return -ENOMEM;
+
+	mdio_bus->name = "hisilicon MII bus";
+	mdio_bus->read = hclge_mdio_read;
+	mdio_bus->write = hclge_mdio_write;
+	snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "%s-%s", "mii",
+		 dev_name(&hdev->pdev->dev));
+
+	mdio_bus->parent = &hdev->pdev->dev;
+	mdio_bus->priv = hdev;
+	mdio_bus->phy_mask = ~(1 << mac->phy_addr);
+	ret = mdiobus_register(mdio_bus);
+	if (ret) {
+		dev_err(mdio_bus->parent,
+			"Failed to register MDIO bus ret = %#x\n", ret);
+		return ret;
+	}
+
+	phydev = mdiobus_get_phy(mdio_bus, mac->phy_addr);
+	if (!phydev) {
+		dev_err(mdio_bus->parent, "Failed to get phy device\n");
+		mdiobus_unregister(mdio_bus);
+		return -EIO;
+	}
+
+	mac->phydev = phydev;
+	mac->mdio_bus = mdio_bus;
+
+	return 0;
+}
+
+static void hclge_mac_adjust_link(struct net_device *netdev)
+{
+	struct hnae3_handle *h = *((void **)netdev_priv(netdev));
+	struct hclge_vport *vport = hclge_get_vport(h);
+	struct hclge_dev *hdev = vport->back;
+	int duplex, speed;
+	int ret;
+
+	speed = netdev->phydev->speed;
+	duplex = netdev->phydev->duplex;
+
+	ret = hclge_cfg_mac_speed_dup(hdev, speed, duplex);
+	if (ret)
+		netdev_err(netdev, "failed to adjust link.\n");
+}
+
+int hclge_mac_start_phy(struct hclge_dev *hdev)
+{
+	struct net_device *netdev = hdev->vport[0].nic.netdev;
+	struct phy_device *phydev = hdev->hw.mac.phydev;
+	int ret;
+
+	if (!phydev)
+		return 0;
+
+	ret = phy_connect_direct(netdev, phydev,
+				 hclge_mac_adjust_link,
+				 PHY_INTERFACE_MODE_SGMII);
+	if (ret) {
+		netdev_err(netdev, "phy_connect_direct err.\n");
+		return ret;
+	}
+
+	phy_start(phydev);
+
+	return 0;
+}
+
+void hclge_mac_stop_phy(struct hclge_dev *hdev)
+{
+	struct net_device *netdev = hdev->vport[0].nic.netdev;
+	struct phy_device *phydev = netdev->phydev;
+
+	if (!phydev)
+		return;
+
+	phy_stop(phydev);
+	phy_disconnect(phydev);
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
new file mode 100644
index 0000000..c5e91cf
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HCLGE_MDIO_H
+#define __HCLGE_MDIO_H
+
+int hclge_mac_mdio_config(struct hclge_dev *hdev);
+int hclge_mac_start_phy(struct hclge_dev *hdev);
+void hclge_mac_stop_phy(struct hclge_dev *hdev);
+
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
new file mode 100644
index 0000000..1c577d2
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -0,0 +1,1015 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/etherdevice.h>
+
+#include "hclge_cmd.h"
+#include "hclge_main.h"
+#include "hclge_tm.h"
+
+enum hclge_shaper_level {
+	HCLGE_SHAPER_LVL_PRI	= 0,
+	HCLGE_SHAPER_LVL_PG	= 1,
+	HCLGE_SHAPER_LVL_PORT	= 2,
+	HCLGE_SHAPER_LVL_QSET	= 3,
+	HCLGE_SHAPER_LVL_CNT	= 4,
+	HCLGE_SHAPER_LVL_VF	= 0,
+	HCLGE_SHAPER_LVL_PF	= 1,
+};
+
+#define HCLGE_SHAPER_BS_U_DEF	1
+#define HCLGE_SHAPER_BS_S_DEF	4
+
+#define HCLGE_ETHER_MAX_RATE	100000
+
+/* hclge_shaper_para_calc: calculate ir parameter for the shaper
+ * @ir: Rate to be config, its unit is Mbps
+ * @shaper_level: the shaper level. eg: port, pg, priority, queueset
+ * @ir_b: IR_B parameter of IR shaper
+ * @ir_u: IR_U parameter of IR shaper
+ * @ir_s: IR_S parameter of IR shaper
+ *
+ * the formula:
+ *
+ *		IR_b * (2 ^ IR_u) * 8
+ * IR(Mbps) = -------------------------  *  CLOCK(1000Mbps)
+ *		Tick * (2 ^ IR_s)
+ *
+ * @return: 0: calculate sucessful, negative: fail
+ */
+static int hclge_shaper_para_calc(u32 ir, u8 shaper_level,
+				  u8 *ir_b, u8 *ir_u, u8 *ir_s)
+{
+	const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = {
+		6 * 256,        /* Prioriy level */
+		6 * 32,         /* Prioriy group level */
+		6 * 8,          /* Port level */
+		6 * 256         /* Qset level */
+	};
+	u8 ir_u_calc = 0, ir_s_calc = 0;
+	u32 ir_calc;
+	u32 tick;
+
+	/* Calc tick */
+	if (shaper_level >= HCLGE_SHAPER_LVL_CNT)
+		return -EINVAL;
+
+	tick = tick_array[shaper_level];
+
+	/**
+	 * Calc the speed if ir_b = 126, ir_u = 0 and ir_s = 0
+	 * the formula is changed to:
+	 *		126 * 1 * 8
+	 * ir_calc = ---------------- * 1000
+	 *		tick * 1
+	 */
+	ir_calc = (1008000 + (tick >> 1) - 1) / tick;
+
+	if (ir_calc == ir) {
+		*ir_b = 126;
+		*ir_u = 0;
+		*ir_s = 0;
+
+		return 0;
+	} else if (ir_calc > ir) {
+		/* Increasing the denominator to select ir_s value */
+		while (ir_calc > ir) {
+			ir_s_calc++;
+			ir_calc = 1008000 / (tick * (1 << ir_s_calc));
+		}
+
+		if (ir_calc == ir)
+			*ir_b = 126;
+		else
+			*ir_b = (ir * tick * (1 << ir_s_calc) + 4000) / 8000;
+	} else {
+		/* Increasing the numerator to select ir_u value */
+		u32 numerator;
+
+		while (ir_calc < ir) {
+			ir_u_calc++;
+			numerator = 1008000 * (1 << ir_u_calc);
+			ir_calc = (numerator + (tick >> 1)) / tick;
+		}
+
+		if (ir_calc == ir) {
+			*ir_b = 126;
+		} else {
+			u32 denominator = (8000 * (1 << --ir_u_calc));
+			*ir_b = (ir * tick + (denominator >> 1)) / denominator;
+		}
+	}
+
+	*ir_u = ir_u_calc;
+	*ir_s = ir_s_calc;
+
+	return 0;
+}
+
+static int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx)
+{
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_MAC_PAUSE_EN, false);
+
+	desc.data[0] = cpu_to_le32((tx ? HCLGE_TX_MAC_PAUSE_EN_MSK : 0) |
+		(rx ? HCLGE_RX_MAC_PAUSE_EN_MSK : 0));
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id)
+{
+	u8 tc;
+
+	for (tc = 0; tc < hdev->tm_info.num_tc; tc++)
+		if (hdev->tm_info.tc_info[tc].up == pri_id)
+			break;
+
+	if (tc >= hdev->tm_info.num_tc)
+		return -EINVAL;
+
+	/**
+	 * the register for priority has four bytes, the first bytes includes
+	 *  priority0 and priority1, the higher 4bit stands for priority1
+	 *  while the lower 4bit stands for priority0, as below:
+	 * first byte:	| pri_1 | pri_0 |
+	 * second byte:	| pri_3 | pri_2 |
+	 * third byte:	| pri_5 | pri_4 |
+	 * fourth byte:	| pri_7 | pri_6 |
+	 */
+	pri[pri_id >> 1] |= tc << ((pri_id & 1) * 4);
+
+	return 0;
+}
+
+static int hclge_up_to_tc_map(struct hclge_dev *hdev)
+{
+	struct hclge_desc desc;
+	u8 *pri = (u8 *)desc.data;
+	u8 pri_id;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PRI_TO_TC_MAPPING, false);
+
+	for (pri_id = 0; pri_id < hdev->tm_info.num_tc; pri_id++) {
+		ret = hclge_fill_pri_array(hdev, pri, pri_id);
+		if (ret)
+			return ret;
+	}
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_pg_to_pri_map_cfg(struct hclge_dev *hdev,
+				      u8 pg_id, u8 pri_bit_map)
+{
+	struct hclge_pg_to_pri_link_cmd *map;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PG_TO_PRI_LINK, false);
+
+	map = (struct hclge_pg_to_pri_link_cmd *)desc.data;
+
+	map->pg_id = pg_id;
+	map->pri_bit_map = pri_bit_map;
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_qs_to_pri_map_cfg(struct hclge_dev *hdev,
+				      u16 qs_id, u8 pri)
+{
+	struct hclge_qs_to_pri_link_cmd *map;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_TO_PRI_LINK, false);
+
+	map = (struct hclge_qs_to_pri_link_cmd *)desc.data;
+
+	map->qs_id = cpu_to_le16(qs_id);
+	map->priority = pri;
+	map->link_vld = HCLGE_TM_QS_PRI_LINK_VLD_MSK;
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_q_to_qs_map_cfg(struct hclge_dev *hdev,
+				    u8 q_id, u16 qs_id)
+{
+	struct hclge_nq_to_qs_link_cmd *map;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_NQ_TO_QS_LINK, false);
+
+	map = (struct hclge_nq_to_qs_link_cmd *)desc.data;
+
+	map->nq_id = cpu_to_le16(q_id);
+	map->qset_id = cpu_to_le16(qs_id | HCLGE_TM_Q_QS_LINK_VLD_MSK);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_pg_weight_cfg(struct hclge_dev *hdev, u8 pg_id,
+				  u8 dwrr)
+{
+	struct hclge_pg_weight_cmd *weight;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PG_WEIGHT, false);
+
+	weight = (struct hclge_pg_weight_cmd *)desc.data;
+
+	weight->pg_id = pg_id;
+	weight->dwrr = dwrr;
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_pri_weight_cfg(struct hclge_dev *hdev, u8 pri_id,
+				   u8 dwrr)
+{
+	struct hclge_priority_weight_cmd *weight;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PRI_WEIGHT, false);
+
+	weight = (struct hclge_priority_weight_cmd *)desc.data;
+
+	weight->pri_id = pri_id;
+	weight->dwrr = dwrr;
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_qs_weight_cfg(struct hclge_dev *hdev, u16 qs_id,
+				  u8 dwrr)
+{
+	struct hclge_qs_weight_cmd *weight;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_WEIGHT, false);
+
+	weight = (struct hclge_qs_weight_cmd *)desc.data;
+
+	weight->qs_id = cpu_to_le16(qs_id);
+	weight->dwrr = dwrr;
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
+				    enum hclge_shap_bucket bucket, u8 pg_id,
+				    u8 ir_b, u8 ir_u, u8 ir_s, u8 bs_b, u8 bs_s)
+{
+	struct hclge_pg_shapping_cmd *shap_cfg_cmd;
+	enum hclge_opcode_type opcode;
+	struct hclge_desc desc;
+
+	opcode = bucket ? HCLGE_OPC_TM_PG_P_SHAPPING :
+		HCLGE_OPC_TM_PG_C_SHAPPING;
+	hclge_cmd_setup_basic_desc(&desc, opcode, false);
+
+	shap_cfg_cmd = (struct hclge_pg_shapping_cmd *)desc.data;
+
+	shap_cfg_cmd->pg_id = pg_id;
+
+	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_B, ir_b);
+	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_U, ir_u);
+	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_S, ir_s);
+	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, BS_B, bs_b);
+	hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, BS_S, bs_s);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev,
+				     enum hclge_shap_bucket bucket, u8 pri_id,
+				     u8 ir_b, u8 ir_u, u8 ir_s,
+				     u8 bs_b, u8 bs_s)
+{
+	struct hclge_pri_shapping_cmd *shap_cfg_cmd;
+	enum hclge_opcode_type opcode;
+	struct hclge_desc desc;
+
+	opcode = bucket ? HCLGE_OPC_TM_PRI_P_SHAPPING :
+		HCLGE_OPC_TM_PRI_C_SHAPPING;
+
+	hclge_cmd_setup_basic_desc(&desc, opcode, false);
+
+	shap_cfg_cmd = (struct hclge_pri_shapping_cmd *)desc.data;
+
+	shap_cfg_cmd->pri_id = pri_id;
+
+	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_B, ir_b);
+	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_U, ir_u);
+	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_S, ir_s);
+	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, BS_B, bs_b);
+	hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, BS_S, bs_s);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_pg_schd_mode_cfg(struct hclge_dev *hdev, u8 pg_id)
+{
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PG_SCH_MODE_CFG, false);
+
+	if (hdev->tm_info.pg_info[pg_id].pg_sch_mode == HCLGE_SCH_MODE_DWRR)
+		desc.data[1] = cpu_to_le32(HCLGE_TM_TX_SCHD_DWRR_MSK);
+	else
+		desc.data[1] = 0;
+
+	desc.data[0] = cpu_to_le32(pg_id);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_pri_schd_mode_cfg(struct hclge_dev *hdev, u8 pri_id)
+{
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PRI_SCH_MODE_CFG, false);
+
+	if (hdev->tm_info.tc_info[pri_id].tc_sch_mode == HCLGE_SCH_MODE_DWRR)
+		desc.data[1] = cpu_to_le32(HCLGE_TM_TX_SCHD_DWRR_MSK);
+	else
+		desc.data[1] = 0;
+
+	desc.data[0] = cpu_to_le32(pri_id);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id)
+{
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_SCH_MODE_CFG, false);
+
+	if (hdev->tm_info.tc_info[qs_id].tc_sch_mode == HCLGE_SCH_MODE_DWRR)
+		desc.data[1] = cpu_to_le32(HCLGE_TM_TX_SCHD_DWRR_MSK);
+	else
+		desc.data[1] = 0;
+
+	desc.data[0] = cpu_to_le32(qs_id);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_qs_bp_cfg(struct hclge_dev *hdev, u8 tc)
+{
+	struct hclge_bp_to_qs_map_cmd *bp_to_qs_map_cmd;
+	struct hclge_desc desc;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_BP_TO_QSET_MAPPING,
+				   false);
+
+	bp_to_qs_map_cmd = (struct hclge_bp_to_qs_map_cmd *)desc.data;
+
+	bp_to_qs_map_cmd->tc_id = tc;
+
+	/* Qset and tc is one by one mapping */
+	bp_to_qs_map_cmd->qs_bit_map = cpu_to_le32(1 << tc);
+
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
+{
+	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	struct hclge_dev *hdev = vport->back;
+	u8 i;
+
+	kinfo = &vport->nic.kinfo;
+	vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit;
+	kinfo->num_tc =
+		min_t(u16, kinfo->num_tqps, hdev->tm_info.num_tc);
+	kinfo->rss_size
+		= min_t(u16, hdev->rss_size_max,
+			kinfo->num_tqps / kinfo->num_tc);
+	vport->qs_offset = hdev->tm_info.num_tc * vport->vport_id;
+	vport->dwrr = 100;  /* 100 percent as init */
+
+	for (i = 0; i < kinfo->num_tc; i++) {
+		if (hdev->hw_tc_map & BIT(i)) {
+			kinfo->tc_info[i].enable = true;
+			kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size;
+			kinfo->tc_info[i].tqp_count = kinfo->rss_size;
+			kinfo->tc_info[i].tc = i;
+			kinfo->tc_info[i].up = hdev->tm_info.tc_info[i].up;
+		} else {
+			/* Set to default queue if TC is disable */
+			kinfo->tc_info[i].enable = false;
+			kinfo->tc_info[i].tqp_offset = 0;
+			kinfo->tc_info[i].tqp_count = 1;
+			kinfo->tc_info[i].tc = 0;
+			kinfo->tc_info[i].up = 0;
+		}
+	}
+}
+
+static void hclge_tm_vport_info_update(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	u32 i;
+
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		hclge_tm_vport_tc_info_update(vport);
+
+		vport++;
+	}
+}
+
+static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
+{
+	u8 i;
+
+	for (i = 0; i < hdev->tm_info.num_tc; i++) {
+		hdev->tm_info.tc_info[i].tc_id = i;
+		hdev->tm_info.tc_info[i].tc_sch_mode = HCLGE_SCH_MODE_DWRR;
+		hdev->tm_info.tc_info[i].up = i;
+		hdev->tm_info.tc_info[i].pgid = 0;
+		hdev->tm_info.tc_info[i].bw_limit =
+			hdev->tm_info.pg_info[0].bw_limit;
+	}
+
+	hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+}
+
+static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
+{
+	u8 i;
+
+	for (i = 0; i < hdev->tm_info.num_pg; i++) {
+		int k;
+
+		hdev->tm_info.pg_dwrr[i] = i ? 0 : 100;
+
+		hdev->tm_info.pg_info[i].pg_id = i;
+		hdev->tm_info.pg_info[i].pg_sch_mode = HCLGE_SCH_MODE_DWRR;
+
+		hdev->tm_info.pg_info[i].bw_limit = HCLGE_ETHER_MAX_RATE;
+
+		if (i != 0)
+			continue;
+
+		hdev->tm_info.pg_info[i].tc_bit_map = hdev->hw_tc_map;
+		for (k = 0; k < hdev->tm_info.num_tc; k++)
+			hdev->tm_info.pg_info[i].tc_dwrr[k] = 100;
+	}
+}
+
+static int hclge_tm_schd_info_init(struct hclge_dev *hdev)
+{
+	if ((hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE) &&
+	    (hdev->tm_info.num_pg != 1))
+		return -EINVAL;
+
+	hclge_tm_pg_info_init(hdev);
+
+	hclge_tm_tc_info_init(hdev);
+
+	hclge_tm_vport_info_update(hdev);
+
+	hdev->tm_info.fc_mode = HCLGE_FC_NONE;
+	hdev->fc_mode_last_time = hdev->tm_info.fc_mode;
+
+	return 0;
+}
+
+static int hclge_tm_pg_to_pri_map(struct hclge_dev *hdev)
+{
+	int ret;
+	u32 i;
+
+	if (hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE)
+		return 0;
+
+	for (i = 0; i < hdev->tm_info.num_pg; i++) {
+		/* Cfg mapping */
+		ret = hclge_tm_pg_to_pri_map_cfg(
+			hdev, i, hdev->tm_info.pg_info[i].tc_bit_map);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pg_shaper_cfg(struct hclge_dev *hdev)
+{
+	u8 ir_u, ir_b, ir_s;
+	int ret;
+	u32 i;
+
+	/* Cfg pg schd */
+	if (hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE)
+		return 0;
+
+	/* Pg to pri */
+	for (i = 0; i < hdev->tm_info.num_pg; i++) {
+		/* Calc shaper para */
+		ret = hclge_shaper_para_calc(
+					hdev->tm_info.pg_info[i].bw_limit,
+					HCLGE_SHAPER_LVL_PG,
+					&ir_b, &ir_u, &ir_s);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_pg_shapping_cfg(hdev,
+					       HCLGE_TM_SHAP_C_BUCKET, i,
+					       0, 0, 0, HCLGE_SHAPER_BS_U_DEF,
+					       HCLGE_SHAPER_BS_S_DEF);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_pg_shapping_cfg(hdev,
+					       HCLGE_TM_SHAP_P_BUCKET, i,
+					       ir_b, ir_u, ir_s,
+					       HCLGE_SHAPER_BS_U_DEF,
+					       HCLGE_SHAPER_BS_S_DEF);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pg_dwrr_cfg(struct hclge_dev *hdev)
+{
+	int ret;
+	u32 i;
+
+	/* cfg pg schd */
+	if (hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE)
+		return 0;
+
+	/* pg to prio */
+	for (i = 0; i < hdev->tm_info.num_pg; i++) {
+		/* Cfg dwrr */
+		ret = hclge_tm_pg_weight_cfg(hdev, i,
+					     hdev->tm_info.pg_dwrr[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_vport_q_to_qs_map(struct hclge_dev *hdev,
+				   struct hclge_vport *vport)
+{
+	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	struct hnae3_queue **tqp = kinfo->tqp;
+	struct hnae3_tc_info *v_tc_info;
+	u32 i, j;
+	int ret;
+
+	for (i = 0; i < kinfo->num_tc; i++) {
+		v_tc_info = &kinfo->tc_info[i];
+		for (j = 0; j < v_tc_info->tqp_count; j++) {
+			struct hnae3_queue *q = tqp[v_tc_info->tqp_offset + j];
+
+			ret = hclge_tm_q_to_qs_map_cfg(hdev,
+						       hclge_get_queue_id(q),
+						       vport->qs_offset + i);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_q_qs_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int ret;
+	u32 i;
+
+	if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
+		/* Cfg qs -> pri mapping, one by one mapping */
+		for (i = 0; i < hdev->tm_info.num_tc; i++) {
+			ret = hclge_tm_qs_to_pri_map_cfg(hdev, i, i);
+			if (ret)
+				return ret;
+		}
+	} else if (hdev->tx_sch_mode == HCLGE_FLAG_VNET_BASE_SCH_MODE) {
+		int k;
+		/* Cfg qs -> pri mapping,  qs = tc, pri = vf, 8 qs -> 1 pri */
+		for (k = 0; k < hdev->num_alloc_vport; k++)
+			for (i = 0; i < HNAE3_MAX_TC; i++) {
+				ret = hclge_tm_qs_to_pri_map_cfg(
+					hdev, vport[k].qs_offset + i, k);
+				if (ret)
+					return ret;
+			}
+	} else {
+		return -EINVAL;
+	}
+
+	/* Cfg q -> qs mapping */
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		ret = hclge_vport_q_to_qs_map(hdev, vport);
+		if (ret)
+			return ret;
+
+		vport++;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_tc_base_shaper_cfg(struct hclge_dev *hdev)
+{
+	u8 ir_u, ir_b, ir_s;
+	int ret;
+	u32 i;
+
+	for (i = 0; i < hdev->tm_info.num_tc; i++) {
+		ret = hclge_shaper_para_calc(
+					hdev->tm_info.tc_info[i].bw_limit,
+					HCLGE_SHAPER_LVL_PRI,
+					&ir_b, &ir_u, &ir_s);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_pri_shapping_cfg(
+			hdev, HCLGE_TM_SHAP_C_BUCKET, i,
+			0, 0, 0, HCLGE_SHAPER_BS_U_DEF,
+			HCLGE_SHAPER_BS_S_DEF);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_pri_shapping_cfg(
+			hdev, HCLGE_TM_SHAP_P_BUCKET, i,
+			ir_b, ir_u, ir_s, HCLGE_SHAPER_BS_U_DEF,
+			HCLGE_SHAPER_BS_S_DEF);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_vnet_base_shaper_pri_cfg(struct hclge_vport *vport)
+{
+	struct hclge_dev *hdev = vport->back;
+	u8 ir_u, ir_b, ir_s;
+	int ret;
+
+	ret = hclge_shaper_para_calc(vport->bw_limit, HCLGE_SHAPER_LVL_VF,
+				     &ir_b, &ir_u, &ir_s);
+	if (ret)
+		return ret;
+
+	ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET,
+					vport->vport_id,
+					0, 0, 0, HCLGE_SHAPER_BS_U_DEF,
+					HCLGE_SHAPER_BS_S_DEF);
+	if (ret)
+		return ret;
+
+	ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET,
+					vport->vport_id,
+					ir_b, ir_u, ir_s,
+					HCLGE_SHAPER_BS_U_DEF,
+					HCLGE_SHAPER_BS_S_DEF);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int hclge_tm_pri_vnet_base_shaper_qs_cfg(struct hclge_vport *vport)
+{
+	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	struct hclge_dev *hdev = vport->back;
+	struct hnae3_tc_info *v_tc_info;
+	u8 ir_u, ir_b, ir_s;
+	u32 i;
+	int ret;
+
+	for (i = 0; i < kinfo->num_tc; i++) {
+		v_tc_info = &kinfo->tc_info[i];
+		ret = hclge_shaper_para_calc(
+					hdev->tm_info.tc_info[i].bw_limit,
+					HCLGE_SHAPER_LVL_QSET,
+					&ir_b, &ir_u, &ir_s);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_vnet_base_shaper_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int ret;
+	u32 i;
+
+	/* Need config vport shaper */
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		ret = hclge_tm_pri_vnet_base_shaper_pri_cfg(vport);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_pri_vnet_base_shaper_qs_cfg(vport);
+		if (ret)
+			return ret;
+
+		vport++;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_shaper_cfg(struct hclge_dev *hdev)
+{
+	int ret;
+
+	if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
+		ret = hclge_tm_pri_tc_base_shaper_cfg(hdev);
+		if (ret)
+			return ret;
+	} else {
+		ret = hclge_tm_pri_vnet_base_shaper_cfg(hdev);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_pg_info *pg_info;
+	u8 dwrr;
+	int ret;
+	u32 i;
+
+	for (i = 0; i < hdev->tm_info.num_tc; i++) {
+		pg_info =
+			&hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid];
+		dwrr = pg_info->tc_dwrr[i];
+
+		ret = hclge_tm_pri_weight_cfg(hdev, i, dwrr);
+		if (ret)
+			return ret;
+
+		ret = hclge_tm_qs_weight_cfg(hdev, i, dwrr);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_vnet_base_dwrr_pri_cfg(struct hclge_vport *vport)
+{
+	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+	u8 i;
+
+	/* Vf dwrr */
+	ret = hclge_tm_pri_weight_cfg(hdev, vport->vport_id, vport->dwrr);
+	if (ret)
+		return ret;
+
+	/* Qset dwrr */
+	for (i = 0; i < kinfo->num_tc; i++) {
+		ret = hclge_tm_qs_weight_cfg(
+			hdev, vport->qs_offset + i,
+			hdev->tm_info.pg_info[0].tc_dwrr[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_vnet_base_dwrr_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int ret;
+	u32 i;
+
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		ret = hclge_tm_pri_vnet_base_dwrr_pri_cfg(vport);
+		if (ret)
+			return ret;
+
+		vport++;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_pri_dwrr_cfg(struct hclge_dev *hdev)
+{
+	int ret;
+
+	if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
+		ret = hclge_tm_pri_tc_base_dwrr_cfg(hdev);
+		if (ret)
+			return ret;
+	} else {
+		ret = hclge_tm_pri_vnet_base_dwrr_cfg(hdev);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_map_cfg(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_tm_pg_to_pri_map(hdev);
+	if (ret)
+		return ret;
+
+	return hclge_tm_pri_q_qs_cfg(hdev);
+}
+
+static int hclge_tm_shaper_cfg(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_tm_pg_shaper_cfg(hdev);
+	if (ret)
+		return ret;
+
+	return hclge_tm_pri_shaper_cfg(hdev);
+}
+
+int hclge_tm_dwrr_cfg(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_tm_pg_dwrr_cfg(hdev);
+	if (ret)
+		return ret;
+
+	return hclge_tm_pri_dwrr_cfg(hdev);
+}
+
+static int hclge_tm_lvl2_schd_mode_cfg(struct hclge_dev *hdev)
+{
+	int ret;
+	u8 i;
+
+	/* Only being config on TC-Based scheduler mode */
+	if (hdev->tx_sch_mode == HCLGE_FLAG_VNET_BASE_SCH_MODE)
+		return 0;
+
+	for (i = 0; i < hdev->tm_info.num_pg; i++) {
+		ret = hclge_tm_pg_schd_mode_cfg(hdev, i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_schd_mode_vnet_base_cfg(struct hclge_vport *vport)
+{
+	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+	u8 i;
+
+	ret = hclge_tm_pri_schd_mode_cfg(hdev, vport->vport_id);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < kinfo->num_tc; i++) {
+		ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = hdev->vport;
+	int ret;
+	u8 i;
+
+	if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
+		for (i = 0; i < hdev->tm_info.num_tc; i++) {
+			ret = hclge_tm_pri_schd_mode_cfg(hdev, i);
+			if (ret)
+				return ret;
+
+			ret = hclge_tm_qs_schd_mode_cfg(hdev, i);
+			if (ret)
+				return ret;
+		}
+	} else {
+		for (i = 0; i < hdev->num_alloc_vport; i++) {
+			ret = hclge_tm_schd_mode_vnet_base_cfg(vport);
+			if (ret)
+				return ret;
+
+			vport++;
+		}
+	}
+
+	return 0;
+}
+
+static int hclge_tm_schd_mode_hw(struct hclge_dev *hdev)
+{
+	int ret;
+
+	ret = hclge_tm_lvl2_schd_mode_cfg(hdev);
+	if (ret)
+		return ret;
+
+	return hclge_tm_lvl34_schd_mode_cfg(hdev);
+}
+
+static int hclge_tm_schd_setup_hw(struct hclge_dev *hdev)
+{
+	int ret;
+
+	/* Cfg tm mapping  */
+	ret = hclge_tm_map_cfg(hdev);
+	if (ret)
+		return ret;
+
+	/* Cfg tm shaper */
+	ret = hclge_tm_shaper_cfg(hdev);
+	if (ret)
+		return ret;
+
+	/* Cfg dwrr */
+	ret = hclge_tm_dwrr_cfg(hdev);
+	if (ret)
+		return ret;
+
+	/* Cfg schd mode for each level schd */
+	return hclge_tm_schd_mode_hw(hdev);
+}
+
+int hclge_pause_setup_hw(struct hclge_dev *hdev)
+{
+	bool en = hdev->tm_info.fc_mode != HCLGE_FC_PFC;
+	int ret;
+	u8 i;
+
+	ret = hclge_mac_pause_en_cfg(hdev, en, en);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < hdev->tm_info.num_tc; i++) {
+		ret = hclge_tm_qs_bp_cfg(hdev, i);
+		if (ret)
+			return ret;
+	}
+
+	return hclge_up_to_tc_map(hdev);
+}
+
+int hclge_tm_init_hw(struct hclge_dev *hdev)
+{
+	int ret;
+
+	if ((hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE) &&
+	    (hdev->tx_sch_mode != HCLGE_FLAG_VNET_BASE_SCH_MODE))
+		return -ENOTSUPP;
+
+	ret = hclge_tm_schd_setup_hw(hdev);
+	if (ret)
+		return ret;
+
+	ret = hclge_pause_setup_hw(hdev);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int hclge_tm_schd_init(struct hclge_dev *hdev)
+{
+	int ret = hclge_tm_schd_info_init(hdev);
+
+	if (ret)
+		return ret;
+
+	return hclge_tm_init_hw(hdev);
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
new file mode 100644
index 0000000..7e67337
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HCLGE_TM_H
+#define __HCLGE_TM_H
+
+#include <linux/types.h>
+
+/* MAC Pause */
+#define HCLGE_TX_MAC_PAUSE_EN_MSK	BIT(0)
+#define HCLGE_RX_MAC_PAUSE_EN_MSK	BIT(1)
+
+#define HCLGE_TM_PORT_BASE_MODE_MSK	BIT(0)
+
+/* SP or DWRR */
+#define HCLGE_TM_TX_SCHD_DWRR_MSK	BIT(0)
+#define HCLGE_TM_TX_SCHD_SP_MSK		(0xFE)
+
+struct hclge_pg_to_pri_link_cmd {
+	u8 pg_id;
+	u8 rsvd1[3];
+	u8 pri_bit_map;
+};
+
+struct hclge_qs_to_pri_link_cmd {
+	__le16 qs_id;
+	__le16 rsvd;
+	u8 priority;
+#define HCLGE_TM_QS_PRI_LINK_VLD_MSK	BIT(0)
+	u8 link_vld;
+};
+
+struct hclge_nq_to_qs_link_cmd {
+	__le16 nq_id;
+	__le16 rsvd;
+#define HCLGE_TM_Q_QS_LINK_VLD_MSK	BIT(10)
+	__le16 qset_id;
+};
+
+struct hclge_pg_weight_cmd {
+	u8 pg_id;
+	u8 dwrr;
+};
+
+struct hclge_priority_weight_cmd {
+	u8 pri_id;
+	u8 dwrr;
+};
+
+struct hclge_qs_weight_cmd {
+	__le16 qs_id;
+	u8 dwrr;
+};
+
+#define HCLGE_TM_SHAP_IR_B_MSK  GENMASK(7, 0)
+#define HCLGE_TM_SHAP_IR_B_LSH	0
+#define HCLGE_TM_SHAP_IR_U_MSK  GENMASK(11, 8)
+#define HCLGE_TM_SHAP_IR_U_LSH	8
+#define HCLGE_TM_SHAP_IR_S_MSK  GENMASK(15, 12)
+#define HCLGE_TM_SHAP_IR_S_LSH	12
+#define HCLGE_TM_SHAP_BS_B_MSK  GENMASK(20, 16)
+#define HCLGE_TM_SHAP_BS_B_LSH	16
+#define HCLGE_TM_SHAP_BS_S_MSK  GENMASK(25, 21)
+#define HCLGE_TM_SHAP_BS_S_LSH	21
+
+enum hclge_shap_bucket {
+	HCLGE_TM_SHAP_C_BUCKET = 0,
+	HCLGE_TM_SHAP_P_BUCKET,
+};
+
+struct hclge_pri_shapping_cmd {
+	u8 pri_id;
+	u8 rsvd[3];
+	__le32 pri_shapping_para;
+};
+
+struct hclge_pg_shapping_cmd {
+	u8 pg_id;
+	u8 rsvd[3];
+	__le32 pg_shapping_para;
+};
+
+struct hclge_bp_to_qs_map_cmd {
+	u8 tc_id;
+	u8 rsvd[2];
+	u8 qs_group_id;
+	__le32 qs_bit_map;
+	u32 rsvd1;
+};
+
+#define hclge_tm_set_feild(dest, string, val) \
+			hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \
+				       (HCLGE_TM_SHAP_##string##_LSH), val)
+#define hclge_tm_get_feild(src, string) \
+			hnae_get_field((src), (HCLGE_TM_SHAP_##string##_MSK), \
+				       (HCLGE_TM_SHAP_##string##_LSH))
+
+int hclge_tm_schd_init(struct hclge_dev *hdev);
+int hclge_pause_setup_hw(struct hclge_dev *hdev);
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
new file mode 100644
index 0000000..1c3e294
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -0,0 +1,2891 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/skbuff.h>
+#include <linux/sctp.h>
+#include <linux/vermagic.h>
+#include <net/gre.h>
+#include <net/vxlan.h>
+
+#include "hnae3.h"
+#include "hns3_enet.h"
+
+const char hns3_driver_name[] = "hns3";
+const char hns3_driver_version[] = VERMAGIC_STRING;
+static const char hns3_driver_string[] =
+			"Hisilicon Ethernet Network Driver for Hip08 Family";
+static const char hns3_copyright[] = "Copyright (c) 2017 Huawei Corporation.";
+static struct hnae3_client client;
+
+/* hns3_pci_tbl - PCI Device ID Table
+ *
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, private data (not used) }
+ */
+static const struct pci_device_id hns3_pci_tbl[] = {
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
+	/* required last entry */
+	{0, }
+};
+MODULE_DEVICE_TABLE(pci, hns3_pci_tbl);
+
+static irqreturn_t hns3_irq_handle(int irq, void *dev)
+{
+	struct hns3_enet_tqp_vector *tqp_vector = dev;
+
+	napi_schedule(&tqp_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+static void hns3_nic_uninit_irq(struct hns3_nic_priv *priv)
+{
+	struct hns3_enet_tqp_vector *tqp_vectors;
+	unsigned int i;
+
+	for (i = 0; i < priv->vector_num; i++) {
+		tqp_vectors = &priv->tqp_vector[i];
+
+		if (tqp_vectors->irq_init_flag != HNS3_VECTOR_INITED)
+			continue;
+
+		/* release the irq resource */
+		free_irq(tqp_vectors->vector_irq, tqp_vectors);
+		tqp_vectors->irq_init_flag = HNS3_VECTOR_NOT_INITED;
+	}
+}
+
+static int hns3_nic_init_irq(struct hns3_nic_priv *priv)
+{
+	struct hns3_enet_tqp_vector *tqp_vectors;
+	int txrx_int_idx = 0;
+	int rx_int_idx = 0;
+	int tx_int_idx = 0;
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < priv->vector_num; i++) {
+		tqp_vectors = &priv->tqp_vector[i];
+
+		if (tqp_vectors->irq_init_flag == HNS3_VECTOR_INITED)
+			continue;
+
+		if (tqp_vectors->tx_group.ring && tqp_vectors->rx_group.ring) {
+			snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN - 1,
+				 "%s-%s-%d", priv->netdev->name, "TxRx",
+				 txrx_int_idx++);
+			txrx_int_idx++;
+		} else if (tqp_vectors->rx_group.ring) {
+			snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN - 1,
+				 "%s-%s-%d", priv->netdev->name, "Rx",
+				 rx_int_idx++);
+		} else if (tqp_vectors->tx_group.ring) {
+			snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN - 1,
+				 "%s-%s-%d", priv->netdev->name, "Tx",
+				 tx_int_idx++);
+		} else {
+			/* Skip this unused q_vector */
+			continue;
+		}
+
+		tqp_vectors->name[HNAE3_INT_NAME_LEN - 1] = '\0';
+
+		ret = request_irq(tqp_vectors->vector_irq, hns3_irq_handle, 0,
+				  tqp_vectors->name,
+				       tqp_vectors);
+		if (ret) {
+			netdev_err(priv->netdev, "request irq(%d) fail\n",
+				   tqp_vectors->vector_irq);
+			return ret;
+		}
+
+		tqp_vectors->irq_init_flag = HNS3_VECTOR_INITED;
+	}
+
+	return 0;
+}
+
+static void hns3_mask_vector_irq(struct hns3_enet_tqp_vector *tqp_vector,
+				 u32 mask_en)
+{
+	writel(mask_en, tqp_vector->mask_addr);
+}
+
+static void hns3_vector_enable(struct hns3_enet_tqp_vector *tqp_vector)
+{
+	napi_enable(&tqp_vector->napi);
+
+	/* enable vector */
+	hns3_mask_vector_irq(tqp_vector, 1);
+}
+
+static void hns3_vector_disable(struct hns3_enet_tqp_vector *tqp_vector)
+{
+	/* disable vector */
+	hns3_mask_vector_irq(tqp_vector, 0);
+
+	disable_irq(tqp_vector->vector_irq);
+	napi_disable(&tqp_vector->napi);
+}
+
+static void hns3_set_vector_coalesc_gl(struct hns3_enet_tqp_vector *tqp_vector,
+				       u32 gl_value)
+{
+	/* this defines the configuration for GL (Interrupt Gap Limiter)
+	 * GL defines inter interrupt gap.
+	 * GL and RL(Rate Limiter) are 2 ways to acheive interrupt coalescing
+	 */
+	writel(gl_value, tqp_vector->mask_addr + HNS3_VECTOR_GL0_OFFSET);
+	writel(gl_value, tqp_vector->mask_addr + HNS3_VECTOR_GL1_OFFSET);
+	writel(gl_value, tqp_vector->mask_addr + HNS3_VECTOR_GL2_OFFSET);
+}
+
+static void hns3_set_vector_coalesc_rl(struct hns3_enet_tqp_vector *tqp_vector,
+				       u32 rl_value)
+{
+	/* this defines the configuration for RL (Interrupt Rate Limiter).
+	 * Rl defines rate of interrupts i.e. number of interrupts-per-second
+	 * GL and RL(Rate Limiter) are 2 ways to acheive interrupt coalescing
+	 */
+	writel(rl_value, tqp_vector->mask_addr + HNS3_VECTOR_RL_OFFSET);
+}
+
+static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector)
+{
+	/* initialize the configuration for interrupt coalescing.
+	 * 1. GL (Interrupt Gap Limiter)
+	 * 2. RL (Interrupt Rate Limiter)
+	 */
+
+	/* Default :enable interrupt coalesce */
+	tqp_vector->rx_group.int_gl = HNS3_INT_GL_50K;
+	tqp_vector->tx_group.int_gl = HNS3_INT_GL_50K;
+	hns3_set_vector_coalesc_gl(tqp_vector, HNS3_INT_GL_50K);
+	/* for now we are disabling Interrupt RL - we
+	 * will re-enable later
+	 */
+	hns3_set_vector_coalesc_rl(tqp_vector, 0);
+	tqp_vector->rx_group.flow_level = HNS3_FLOW_LOW;
+	tqp_vector->tx_group.flow_level = HNS3_FLOW_LOW;
+}
+
+static int hns3_nic_net_up(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	int i, j;
+	int ret;
+
+	/* get irq resource for all vectors */
+	ret = hns3_nic_init_irq(priv);
+	if (ret) {
+		netdev_err(netdev, "hns init irq failed! ret=%d\n", ret);
+		return ret;
+	}
+
+	/* enable the vectors */
+	for (i = 0; i < priv->vector_num; i++)
+		hns3_vector_enable(&priv->tqp_vector[i]);
+
+	/* start the ae_dev */
+	ret = h->ae_algo->ops->start ? h->ae_algo->ops->start(h) : 0;
+	if (ret)
+		goto out_start_err;
+
+	return 0;
+
+out_start_err:
+	for (j = i - 1; j >= 0; j--)
+		hns3_vector_disable(&priv->tqp_vector[j]);
+
+	hns3_nic_uninit_irq(priv);
+
+	return ret;
+}
+
+static int hns3_nic_net_open(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	int ret;
+
+	netif_carrier_off(netdev);
+
+	ret = netif_set_real_num_tx_queues(netdev, h->kinfo.num_tqps);
+	if (ret) {
+		netdev_err(netdev,
+			   "netif_set_real_num_tx_queues fail, ret=%d!\n",
+			   ret);
+		return ret;
+	}
+
+	ret = netif_set_real_num_rx_queues(netdev, h->kinfo.num_tqps);
+	if (ret) {
+		netdev_err(netdev,
+			   "netif_set_real_num_rx_queues fail, ret=%d!\n", ret);
+		return ret;
+	}
+
+	ret = hns3_nic_net_up(netdev);
+	if (ret) {
+		netdev_err(netdev,
+			   "hns net up fail, ret=%d!\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void hns3_nic_net_down(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	const struct hnae3_ae_ops *ops;
+	int i;
+
+	/* stop ae_dev */
+	ops = priv->ae_handle->ae_algo->ops;
+	if (ops->stop)
+		ops->stop(priv->ae_handle);
+
+	/* disable vectors */
+	for (i = 0; i < priv->vector_num; i++)
+		hns3_vector_disable(&priv->tqp_vector[i]);
+
+	/* free irq resources */
+	hns3_nic_uninit_irq(priv);
+}
+
+static int hns3_nic_net_stop(struct net_device *netdev)
+{
+	netif_tx_stop_all_queues(netdev);
+	netif_carrier_off(netdev);
+
+	hns3_nic_net_down(netdev);
+
+	return 0;
+}
+
+void hns3_set_multicast_list(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	struct netdev_hw_addr *ha = NULL;
+
+	if (h->ae_algo->ops->set_mc_addr) {
+		netdev_for_each_mc_addr(ha, netdev)
+			if (h->ae_algo->ops->set_mc_addr(h, ha->addr))
+				netdev_err(netdev, "set multicast fail\n");
+	}
+}
+
+static int hns3_nic_uc_sync(struct net_device *netdev,
+			    const unsigned char *addr)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (h->ae_algo->ops->add_uc_addr)
+		return h->ae_algo->ops->add_uc_addr(h, addr);
+
+	return 0;
+}
+
+static int hns3_nic_uc_unsync(struct net_device *netdev,
+			      const unsigned char *addr)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (h->ae_algo->ops->rm_uc_addr)
+		return h->ae_algo->ops->rm_uc_addr(h, addr);
+
+	return 0;
+}
+
+static int hns3_nic_mc_sync(struct net_device *netdev,
+			    const unsigned char *addr)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (h->ae_algo->ops->add_mc_addr)
+		return h->ae_algo->ops->add_mc_addr(h, addr);
+
+	return 0;
+}
+
+static int hns3_nic_mc_unsync(struct net_device *netdev,
+			      const unsigned char *addr)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (h->ae_algo->ops->rm_mc_addr)
+		return h->ae_algo->ops->rm_mc_addr(h, addr);
+
+	return 0;
+}
+
+void hns3_nic_set_rx_mode(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (h->ae_algo->ops->set_promisc_mode) {
+		if (netdev->flags & IFF_PROMISC)
+			h->ae_algo->ops->set_promisc_mode(h, 1);
+		else
+			h->ae_algo->ops->set_promisc_mode(h, 0);
+	}
+	if (__dev_uc_sync(netdev, hns3_nic_uc_sync, hns3_nic_uc_unsync))
+		netdev_err(netdev, "sync uc address fail\n");
+	if (netdev->flags & IFF_MULTICAST)
+		if (__dev_mc_sync(netdev, hns3_nic_mc_sync, hns3_nic_mc_unsync))
+			netdev_err(netdev, "sync mc address fail\n");
+}
+
+static int hns3_set_tso(struct sk_buff *skb, u32 *paylen,
+			u16 *mss, u32 *type_cs_vlan_tso)
+{
+	u32 l4_offset, hdr_len;
+	union l3_hdr_info l3;
+	union l4_hdr_info l4;
+	u32 l4_paylen;
+	int ret;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	ret = skb_cow_head(skb, 0);
+	if (ret)
+		return ret;
+
+	l3.hdr = skb_network_header(skb);
+	l4.hdr = skb_transport_header(skb);
+
+	/* Software should clear the IPv4's checksum field when tso is
+	 * needed.
+	 */
+	if (l3.v4->version == 4)
+		l3.v4->check = 0;
+
+	/* tunnel packet.*/
+	if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
+					 SKB_GSO_GRE_CSUM |
+					 SKB_GSO_UDP_TUNNEL |
+					 SKB_GSO_UDP_TUNNEL_CSUM)) {
+		if ((!(skb_shinfo(skb)->gso_type &
+		    SKB_GSO_PARTIAL)) &&
+		    (skb_shinfo(skb)->gso_type &
+		    SKB_GSO_UDP_TUNNEL_CSUM)) {
+			/* Software should clear the udp's checksum
+			 * field when tso is needed.
+			 */
+			l4.udp->check = 0;
+		}
+		/* reset l3&l4 pointers from outer to inner headers */
+		l3.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_inner_transport_header(skb);
+
+		/* Software should clear the IPv4's checksum field when
+		 * tso is needed.
+		 */
+		if (l3.v4->version == 4)
+			l3.v4->check = 0;
+	}
+
+	/* normal or tunnel packet*/
+	l4_offset = l4.hdr - skb->data;
+	hdr_len = (l4.tcp->doff * 4) + l4_offset;
+
+	/* remove payload length from inner pseudo checksum when tso*/
+	l4_paylen = skb->len - l4_offset;
+	csum_replace_by_diff(&l4.tcp->check,
+			     (__force __wsum)htonl(l4_paylen));
+
+	/* find the txbd field values */
+	*paylen = skb->len - hdr_len;
+	hnae_set_bit(*type_cs_vlan_tso,
+		     HNS3_TXD_TSO_B, 1);
+
+	/* get MSS for TSO */
+	*mss = skb_shinfo(skb)->gso_size;
+
+	return 0;
+}
+
+static int hns3_get_l4_protocol(struct sk_buff *skb, u8 *ol4_proto,
+				u8 *il4_proto)
+{
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} l3;
+	unsigned char *l4_hdr;
+	unsigned char *exthdr;
+	u8 l4_proto_tmp;
+	__be16 frag_off;
+
+	/* find outer header point */
+	l3.hdr = skb_network_header(skb);
+	l4_hdr = skb_inner_transport_header(skb);
+
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		exthdr = l3.hdr + sizeof(*l3.v6);
+		l4_proto_tmp = l3.v6->nexthdr;
+		if (l4_hdr != exthdr)
+			ipv6_skip_exthdr(skb, exthdr - skb->data,
+					 &l4_proto_tmp, &frag_off);
+	} else if (skb->protocol == htons(ETH_P_IP)) {
+		l4_proto_tmp = l3.v4->protocol;
+	} else {
+		return -EINVAL;
+	}
+
+	*ol4_proto = l4_proto_tmp;
+
+	/* tunnel packet */
+	if (!skb->encapsulation) {
+		*il4_proto = 0;
+		return 0;
+	}
+
+	/* find inner header point */
+	l3.hdr = skb_inner_network_header(skb);
+	l4_hdr = skb_inner_transport_header(skb);
+
+	if (l3.v6->version == 6) {
+		exthdr = l3.hdr + sizeof(*l3.v6);
+		l4_proto_tmp = l3.v6->nexthdr;
+		if (l4_hdr != exthdr)
+			ipv6_skip_exthdr(skb, exthdr - skb->data,
+					 &l4_proto_tmp, &frag_off);
+	} else if (l3.v4->version == 4) {
+		l4_proto_tmp = l3.v4->protocol;
+	}
+
+	*il4_proto = l4_proto_tmp;
+
+	return 0;
+}
+
+static void hns3_set_l2l3l4_len(struct sk_buff *skb, u8 ol4_proto,
+				u8 il4_proto, u32 *type_cs_vlan_tso,
+				u32 *ol_type_vlan_len_msec)
+{
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} l3;
+	union {
+		struct tcphdr *tcp;
+		struct udphdr *udp;
+		struct gre_base_hdr *gre;
+		unsigned char *hdr;
+	} l4;
+	unsigned char *l2_hdr;
+	u8 l4_proto = ol4_proto;
+	u32 ol2_len;
+	u32 ol3_len;
+	u32 ol4_len;
+	u32 l2_len;
+	u32 l3_len;
+
+	l3.hdr = skb_network_header(skb);
+	l4.hdr = skb_transport_header(skb);
+
+	/* compute L2 header size for normal packet, defined in 2 Bytes */
+	l2_len = l3.hdr - skb->data;
+	hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L2LEN_M,
+		       HNS3_TXD_L2LEN_S, l2_len >> 1);
+
+	/* tunnel packet*/
+	if (skb->encapsulation) {
+		/* compute OL2 header size, defined in 2 Bytes */
+		ol2_len = l2_len;
+		hnae_set_field(*ol_type_vlan_len_msec,
+			       HNS3_TXD_L2LEN_M,
+			       HNS3_TXD_L2LEN_S, ol2_len >> 1);
+
+		/* compute OL3 header size, defined in 4 Bytes */
+		ol3_len = l4.hdr - l3.hdr;
+		hnae_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L3LEN_M,
+			       HNS3_TXD_L3LEN_S, ol3_len >> 2);
+
+		/* MAC in UDP, MAC in GRE (0x6558)*/
+		if ((ol4_proto == IPPROTO_UDP) || (ol4_proto == IPPROTO_GRE)) {
+			/* switch MAC header ptr from outer to inner header.*/
+			l2_hdr = skb_inner_mac_header(skb);
+
+			/* compute OL4 header size, defined in 4 Bytes. */
+			ol4_len = l2_hdr - l4.hdr;
+			hnae_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L4LEN_M,
+				       HNS3_TXD_L4LEN_S, ol4_len >> 2);
+
+			/* switch IP header ptr from outer to inner header */
+			l3.hdr = skb_inner_network_header(skb);
+
+			/* compute inner l2 header size, defined in 2 Bytes. */
+			l2_len = l3.hdr - l2_hdr;
+			hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L2LEN_M,
+				       HNS3_TXD_L2LEN_S, l2_len >> 1);
+		} else {
+			/* skb packet types not supported by hardware,
+			 * txbd len fild doesn't be filled.
+			 */
+			return;
+		}
+
+		/* switch L4 header pointer from outer to inner */
+		l4.hdr = skb_inner_transport_header(skb);
+
+		l4_proto = il4_proto;
+	}
+
+	/* compute inner(/normal) L3 header size, defined in 4 Bytes */
+	l3_len = l4.hdr - l3.hdr;
+	hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L3LEN_M,
+		       HNS3_TXD_L3LEN_S, l3_len >> 2);
+
+	/* compute inner(/normal) L4 header size, defined in 4 Bytes */
+	switch (l4_proto) {
+	case IPPROTO_TCP:
+		hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_M,
+			       HNS3_TXD_L4LEN_S, l4.tcp->doff);
+		break;
+	case IPPROTO_SCTP:
+		hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_M,
+			       HNS3_TXD_L4LEN_S, (sizeof(struct sctphdr) >> 2));
+		break;
+	case IPPROTO_UDP:
+		hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L4LEN_M,
+			       HNS3_TXD_L4LEN_S, (sizeof(struct udphdr) >> 2));
+		break;
+	default:
+		/* skb packet types not supported by hardware,
+		 * txbd len fild doesn't be filled.
+		 */
+		return;
+	}
+}
+
+static int hns3_set_l3l4_type_csum(struct sk_buff *skb, u8 ol4_proto,
+				   u8 il4_proto, u32 *type_cs_vlan_tso,
+				   u32 *ol_type_vlan_len_msec)
+{
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} l3;
+	u32 l4_proto = ol4_proto;
+
+	l3.hdr = skb_network_header(skb);
+
+	/* define OL3 type and tunnel type(OL4).*/
+	if (skb->encapsulation) {
+		/* define outer network header type.*/
+		if (skb->protocol == htons(ETH_P_IP)) {
+			if (skb_is_gso(skb))
+				hnae_set_field(*ol_type_vlan_len_msec,
+					       HNS3_TXD_OL3T_M, HNS3_TXD_OL3T_S,
+					       HNS3_OL3T_IPV4_CSUM);
+			else
+				hnae_set_field(*ol_type_vlan_len_msec,
+					       HNS3_TXD_OL3T_M, HNS3_TXD_OL3T_S,
+					       HNS3_OL3T_IPV4_NO_CSUM);
+
+		} else if (skb->protocol == htons(ETH_P_IPV6)) {
+			hnae_set_field(*ol_type_vlan_len_msec, HNS3_TXD_OL3T_M,
+				       HNS3_TXD_OL3T_S, HNS3_OL3T_IPV6);
+		}
+
+		/* define tunnel type(OL4).*/
+		switch (l4_proto) {
+		case IPPROTO_UDP:
+			hnae_set_field(*ol_type_vlan_len_msec,
+				       HNS3_TXD_TUNTYPE_M,
+				       HNS3_TXD_TUNTYPE_S,
+				       HNS3_TUN_MAC_IN_UDP);
+			break;
+		case IPPROTO_GRE:
+			hnae_set_field(*ol_type_vlan_len_msec,
+				       HNS3_TXD_TUNTYPE_M,
+				       HNS3_TXD_TUNTYPE_S,
+				       HNS3_TUN_NVGRE);
+			break;
+		default:
+			/* drop the skb tunnel packet if hardware don't support,
+			 * because hardware can't calculate csum when TSO.
+			 */
+			if (skb_is_gso(skb))
+				return -EDOM;
+
+			/* the stack computes the IP header already,
+			 * driver calculate l4 checksum when not TSO.
+			 */
+			skb_checksum_help(skb);
+			return 0;
+		}
+
+		l3.hdr = skb_inner_network_header(skb);
+		l4_proto = il4_proto;
+	}
+
+	if (l3.v4->version == 4) {
+		hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L3T_M,
+			       HNS3_TXD_L3T_S, HNS3_L3T_IPV4);
+
+		/* the stack computes the IP header already, the only time we
+		 * need the hardware to recompute it is in the case of TSO.
+		 */
+		if (skb_is_gso(skb))
+			hnae_set_bit(*type_cs_vlan_tso, HNS3_TXD_L3CS_B, 1);
+
+		hnae_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
+	} else if (l3.v6->version == 6) {
+		hnae_set_field(*type_cs_vlan_tso, HNS3_TXD_L3T_M,
+			       HNS3_TXD_L3T_S, HNS3_L3T_IPV6);
+		hnae_set_bit(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
+	}
+
+	switch (l4_proto) {
+	case IPPROTO_TCP:
+		hnae_set_field(*type_cs_vlan_tso,
+			       HNS3_TXD_L4T_M,
+			       HNS3_TXD_L4T_S,
+			       HNS3_L4T_TCP);
+		break;
+	case IPPROTO_UDP:
+		hnae_set_field(*type_cs_vlan_tso,
+			       HNS3_TXD_L4T_M,
+			       HNS3_TXD_L4T_S,
+			       HNS3_L4T_UDP);
+		break;
+	case IPPROTO_SCTP:
+		hnae_set_field(*type_cs_vlan_tso,
+			       HNS3_TXD_L4T_M,
+			       HNS3_TXD_L4T_S,
+			       HNS3_L4T_SCTP);
+		break;
+	default:
+		/* drop the skb tunnel packet if hardware don't support,
+		 * because hardware can't calculate csum when TSO.
+		 */
+		if (skb_is_gso(skb))
+			return -EDOM;
+
+		/* the stack computes the IP header already,
+		 * driver calculate l4 checksum when not TSO.
+		 */
+		skb_checksum_help(skb);
+		return 0;
+	}
+
+	return 0;
+}
+
+static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end)
+{
+	/* Config bd buffer end */
+	hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_BDTYPE_M,
+		       HNS3_TXD_BDTYPE_M, 0);
+	hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, !!frag_end);
+	hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1);
+	hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_SC_M, HNS3_TXD_SC_S, 1);
+}
+
+static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
+			  int size, dma_addr_t dma, int frag_end,
+			  enum hns_desc_type type)
+{
+	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
+	struct hns3_desc *desc = &ring->desc[ring->next_to_use];
+	u32 ol_type_vlan_len_msec = 0;
+	u16 bdtp_fe_sc_vld_ra_ri = 0;
+	u32 type_cs_vlan_tso = 0;
+	struct sk_buff *skb;
+	u32 paylen = 0;
+	u16 mss = 0;
+	__be16 protocol;
+	u8 ol4_proto;
+	u8 il4_proto;
+	int ret;
+
+	/* The txbd's baseinfo of DESC_TYPE_PAGE & DESC_TYPE_SKB */
+	desc_cb->priv = priv;
+	desc_cb->length = size;
+	desc_cb->dma = dma;
+	desc_cb->type = type;
+
+	/* now, fill the descriptor */
+	desc->addr = cpu_to_le64(dma);
+	desc->tx.send_size = cpu_to_le16((u16)size);
+	hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri, frag_end);
+	desc->tx.bdtp_fe_sc_vld_ra_ri = cpu_to_le16(bdtp_fe_sc_vld_ra_ri);
+
+	if (type == DESC_TYPE_SKB) {
+		skb = (struct sk_buff *)priv;
+		paylen = cpu_to_le16(skb->len);
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			skb_reset_mac_len(skb);
+			protocol = skb->protocol;
+
+			/* vlan packet*/
+			if (protocol == htons(ETH_P_8021Q)) {
+				protocol = vlan_get_protocol(skb);
+				skb->protocol = protocol;
+			}
+			ret = hns3_get_l4_protocol(skb, &ol4_proto, &il4_proto);
+			if (ret)
+				return ret;
+			hns3_set_l2l3l4_len(skb, ol4_proto, il4_proto,
+					    &type_cs_vlan_tso,
+					    &ol_type_vlan_len_msec);
+			ret = hns3_set_l3l4_type_csum(skb, ol4_proto, il4_proto,
+						      &type_cs_vlan_tso,
+						      &ol_type_vlan_len_msec);
+			if (ret)
+				return ret;
+
+			ret = hns3_set_tso(skb, &paylen, &mss,
+					   &type_cs_vlan_tso);
+			if (ret)
+				return ret;
+		}
+
+		/* Set txbd */
+		desc->tx.ol_type_vlan_len_msec =
+			cpu_to_le32(ol_type_vlan_len_msec);
+		desc->tx.type_cs_vlan_tso_len =
+			cpu_to_le32(type_cs_vlan_tso);
+		desc->tx.paylen = cpu_to_le16(paylen);
+		desc->tx.mss = cpu_to_le16(mss);
+	}
+
+	/* move ring pointer to next.*/
+	ring_ptr_move_fw(ring, next_to_use);
+
+	return 0;
+}
+
+static int hns3_fill_desc_tso(struct hns3_enet_ring *ring, void *priv,
+			      int size, dma_addr_t dma, int frag_end,
+			      enum hns_desc_type type)
+{
+	unsigned int frag_buf_num;
+	unsigned int k;
+	int sizeoflast;
+	int ret;
+
+	frag_buf_num = (size + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE;
+	sizeoflast = size % HNS3_MAX_BD_SIZE;
+	sizeoflast = sizeoflast ? sizeoflast : HNS3_MAX_BD_SIZE;
+
+	/* When the frag size is bigger than hardware, split this frag */
+	for (k = 0; k < frag_buf_num; k++) {
+		ret = hns3_fill_desc(ring, priv,
+				     (k == frag_buf_num - 1) ?
+				sizeoflast : HNS3_MAX_BD_SIZE,
+				dma + HNS3_MAX_BD_SIZE * k,
+				frag_end && (k == frag_buf_num - 1) ? 1 : 0,
+				(type == DESC_TYPE_SKB && !k) ?
+					DESC_TYPE_SKB : DESC_TYPE_PAGE);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int hns3_nic_maybe_stop_tso(struct sk_buff **out_skb, int *bnum,
+				   struct hns3_enet_ring *ring)
+{
+	struct sk_buff *skb = *out_skb;
+	struct skb_frag_struct *frag;
+	int bdnum_for_frag;
+	int frag_num;
+	int buf_num;
+	int size;
+	int i;
+
+	size = skb_headlen(skb);
+	buf_num = (size + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE;
+
+	frag_num = skb_shinfo(skb)->nr_frags;
+	for (i = 0; i < frag_num; i++) {
+		frag = &skb_shinfo(skb)->frags[i];
+		size = skb_frag_size(frag);
+		bdnum_for_frag =
+			(size + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE;
+		if (bdnum_for_frag > HNS3_MAX_BD_PER_FRAG)
+			return -ENOMEM;
+
+		buf_num += bdnum_for_frag;
+	}
+
+	if (buf_num > ring_space(ring))
+		return -EBUSY;
+
+	*bnum = buf_num;
+	return 0;
+}
+
+static int hns3_nic_maybe_stop_tx(struct sk_buff **out_skb, int *bnum,
+				  struct hns3_enet_ring *ring)
+{
+	struct sk_buff *skb = *out_skb;
+	int buf_num;
+
+	/* No. of segments (plus a header) */
+	buf_num = skb_shinfo(skb)->nr_frags + 1;
+
+	if (buf_num > ring_space(ring))
+		return -EBUSY;
+
+	*bnum = buf_num;
+
+	return 0;
+}
+
+static void hns_nic_dma_unmap(struct hns3_enet_ring *ring, int next_to_use_orig)
+{
+	struct device *dev = ring_to_dev(ring);
+	unsigned int i;
+
+	for (i = 0; i < ring->desc_num; i++) {
+		/* check if this is where we started */
+		if (ring->next_to_use == next_to_use_orig)
+			break;
+
+		/* unmap the descriptor dma address */
+		if (ring->desc_cb[ring->next_to_use].type == DESC_TYPE_SKB)
+			dma_unmap_single(dev,
+					 ring->desc_cb[ring->next_to_use].dma,
+					ring->desc_cb[ring->next_to_use].length,
+					DMA_TO_DEVICE);
+		else
+			dma_unmap_page(dev,
+				       ring->desc_cb[ring->next_to_use].dma,
+				       ring->desc_cb[ring->next_to_use].length,
+				       DMA_TO_DEVICE);
+
+		/* rollback one */
+		ring_ptr_move_bw(ring, next_to_use);
+	}
+}
+
+static netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb,
+				     struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hns3_nic_ring_data *ring_data =
+		&tx_ring_data(priv, skb->queue_mapping);
+	struct hns3_enet_ring *ring = ring_data->ring;
+	struct device *dev = priv->dev;
+	struct netdev_queue *dev_queue;
+	struct skb_frag_struct *frag;
+	int next_to_use_head;
+	int next_to_use_frag;
+	dma_addr_t dma;
+	int buf_num;
+	int seg_num;
+	int size;
+	int ret;
+	int i;
+
+	/* Prefetch the data used later */
+	prefetch(skb->data);
+
+	switch (priv->ops.maybe_stop_tx(&skb, &buf_num, ring)) {
+	case -EBUSY:
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.tx_busy++;
+		u64_stats_update_end(&ring->syncp);
+
+		goto out_net_tx_busy;
+	case -ENOMEM:
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.sw_err_cnt++;
+		u64_stats_update_end(&ring->syncp);
+		netdev_err(netdev, "no memory to xmit!\n");
+
+		goto out_err_tx_ok;
+	default:
+		break;
+	}
+
+	/* No. of segments (plus a header) */
+	seg_num = skb_shinfo(skb)->nr_frags + 1;
+	/* Fill the first part */
+	size = skb_headlen(skb);
+
+	next_to_use_head = ring->next_to_use;
+
+	dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma)) {
+		netdev_err(netdev, "TX head DMA map failed\n");
+		ring->stats.sw_err_cnt++;
+		goto out_err_tx_ok;
+	}
+
+	ret = priv->ops.fill_desc(ring, skb, size, dma, seg_num == 1 ? 1 : 0,
+			   DESC_TYPE_SKB);
+	if (ret)
+		goto head_dma_map_err;
+
+	next_to_use_frag = ring->next_to_use;
+	/* Fill the fragments */
+	for (i = 1; i < seg_num; i++) {
+		frag = &skb_shinfo(skb)->frags[i - 1];
+		size = skb_frag_size(frag);
+		dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma)) {
+			netdev_err(netdev, "TX frag(%d) DMA map failed\n", i);
+			ring->stats.sw_err_cnt++;
+			goto frag_dma_map_err;
+		}
+		ret = priv->ops.fill_desc(ring, skb_frag_page(frag), size, dma,
+				    seg_num - 1 == i ? 1 : 0,
+				    DESC_TYPE_PAGE);
+
+		if (ret)
+			goto frag_dma_map_err;
+	}
+
+	/* Complete translate all packets */
+	dev_queue = netdev_get_tx_queue(netdev, ring_data->queue_index);
+	netdev_tx_sent_queue(dev_queue, skb->len);
+
+	wmb(); /* Commit all data before submit */
+
+	hnae_queue_xmit(ring->tqp, buf_num);
+
+	return NETDEV_TX_OK;
+
+frag_dma_map_err:
+	hns_nic_dma_unmap(ring, next_to_use_frag);
+
+head_dma_map_err:
+	hns_nic_dma_unmap(ring, next_to_use_head);
+
+out_err_tx_ok:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+
+out_net_tx_busy:
+	netif_stop_subqueue(netdev, ring_data->queue_index);
+	smp_mb(); /* Commit all data before submit */
+
+	return NETDEV_TX_BUSY;
+}
+
+static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	struct sockaddr *mac_addr = p;
+	int ret;
+
+	if (!mac_addr || !is_valid_ether_addr((const u8 *)mac_addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	ret = h->ae_algo->ops->set_mac_addr(h, mac_addr->sa_data);
+	if (ret) {
+		netdev_err(netdev, "set_mac_address fail, ret=%d!\n", ret);
+		return ret;
+	}
+
+	ether_addr_copy(netdev->dev_addr, mac_addr->sa_data);
+
+	return 0;
+}
+
+static int hns3_nic_set_features(struct net_device *netdev,
+				 netdev_features_t features)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+
+	if (features & (NETIF_F_TSO | NETIF_F_TSO6)) {
+		priv->ops.fill_desc = hns3_fill_desc_tso;
+		priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tso;
+	} else {
+		priv->ops.fill_desc = hns3_fill_desc;
+		priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tx;
+	}
+
+	netdev->features = features;
+	return 0;
+}
+
+static void
+hns3_nic_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	int queue_num = priv->ae_handle->kinfo.num_tqps;
+	struct hns3_enet_ring *ring;
+	unsigned int start;
+	unsigned int idx;
+	u64 tx_bytes = 0;
+	u64 rx_bytes = 0;
+	u64 tx_pkts = 0;
+	u64 rx_pkts = 0;
+
+	for (idx = 0; idx < queue_num; idx++) {
+		/* fetch the tx stats */
+		ring = priv->ring_data[idx].ring;
+		do {
+			start = u64_stats_fetch_begin_irq(&ring->syncp);
+			tx_bytes += ring->stats.tx_bytes;
+			tx_pkts += ring->stats.tx_pkts;
+		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+
+		/* fetch the rx stats */
+		ring = priv->ring_data[idx + queue_num].ring;
+		do {
+			start = u64_stats_fetch_begin_irq(&ring->syncp);
+			rx_bytes += ring->stats.rx_bytes;
+			rx_pkts += ring->stats.rx_pkts;
+		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+	}
+
+	stats->tx_bytes = tx_bytes;
+	stats->tx_packets = tx_pkts;
+	stats->rx_bytes = rx_bytes;
+	stats->rx_packets = rx_pkts;
+
+	stats->rx_errors = netdev->stats.rx_errors;
+	stats->multicast = netdev->stats.multicast;
+	stats->rx_length_errors = netdev->stats.rx_length_errors;
+	stats->rx_crc_errors = netdev->stats.rx_crc_errors;
+	stats->rx_missed_errors = netdev->stats.rx_missed_errors;
+
+	stats->tx_errors = netdev->stats.tx_errors;
+	stats->rx_dropped = netdev->stats.rx_dropped;
+	stats->tx_dropped = netdev->stats.tx_dropped;
+	stats->collisions = netdev->stats.collisions;
+	stats->rx_over_errors = netdev->stats.rx_over_errors;
+	stats->rx_frame_errors = netdev->stats.rx_frame_errors;
+	stats->rx_fifo_errors = netdev->stats.rx_fifo_errors;
+	stats->tx_aborted_errors = netdev->stats.tx_aborted_errors;
+	stats->tx_carrier_errors = netdev->stats.tx_carrier_errors;
+	stats->tx_fifo_errors = netdev->stats.tx_fifo_errors;
+	stats->tx_heartbeat_errors = netdev->stats.tx_heartbeat_errors;
+	stats->tx_window_errors = netdev->stats.tx_window_errors;
+	stats->rx_compressed = netdev->stats.rx_compressed;
+	stats->tx_compressed = netdev->stats.tx_compressed;
+}
+
+static void hns3_add_tunnel_port(struct net_device *netdev, u16 port,
+				 enum hns3_udp_tnl_type type)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hns3_udp_tunnel *udp_tnl = &priv->udp_tnl[type];
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (udp_tnl->used && udp_tnl->dst_port == port) {
+		udp_tnl->used++;
+		return;
+	}
+
+	if (udp_tnl->used) {
+		netdev_warn(netdev,
+			    "UDP tunnel [%d], port [%d] offload\n", type, port);
+		return;
+	}
+
+	udp_tnl->dst_port = port;
+	udp_tnl->used = 1;
+	/* TBD send command to hardware to add port */
+	if (h->ae_algo->ops->add_tunnel_udp)
+		h->ae_algo->ops->add_tunnel_udp(h, port);
+}
+
+static void hns3_del_tunnel_port(struct net_device *netdev, u16 port,
+				 enum hns3_udp_tnl_type type)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hns3_udp_tunnel *udp_tnl = &priv->udp_tnl[type];
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (!udp_tnl->used || udp_tnl->dst_port != port) {
+		netdev_warn(netdev,
+			    "Invalid UDP tunnel port %d\n", port);
+		return;
+	}
+
+	udp_tnl->used--;
+	if (udp_tnl->used)
+		return;
+
+	udp_tnl->dst_port = 0;
+	/* TBD send command to hardware to del port  */
+	if (h->ae_algo->ops->del_tunnel_udp)
+		h->ae_algo->ops->del_tunnel_udp(h, port);
+}
+
+/* hns3_nic_udp_tunnel_add - Get notifiacetion about UDP tunnel ports
+ * @netdev: This physical ports's netdev
+ * @ti: Tunnel information
+ */
+static void hns3_nic_udp_tunnel_add(struct net_device *netdev,
+				    struct udp_tunnel_info *ti)
+{
+	u16 port_n = ntohs(ti->port);
+
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		hns3_add_tunnel_port(netdev, port_n, HNS3_UDP_TNL_VXLAN);
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		hns3_add_tunnel_port(netdev, port_n, HNS3_UDP_TNL_GENEVE);
+		break;
+	default:
+		netdev_err(netdev, "unsupported tunnel type %d\n", ti->type);
+		break;
+	}
+}
+
+static void hns3_nic_udp_tunnel_del(struct net_device *netdev,
+				    struct udp_tunnel_info *ti)
+{
+	u16 port_n = ntohs(ti->port);
+
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		hns3_del_tunnel_port(netdev, port_n, HNS3_UDP_TNL_VXLAN);
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		hns3_del_tunnel_port(netdev, port_n, HNS3_UDP_TNL_GENEVE);
+		break;
+	default:
+		break;
+	}
+}
+
+static int hns3_setup_tc(struct net_device *netdev, u8 tc)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	struct hnae3_knic_private_info *kinfo = &h->kinfo;
+	unsigned int i;
+	int ret;
+
+	if (tc > HNAE3_MAX_TC)
+		return -EINVAL;
+
+	if (kinfo->num_tc == tc)
+		return 0;
+
+	if (!netdev)
+		return -EINVAL;
+
+	if (!tc) {
+		netdev_reset_tc(netdev);
+		return 0;
+	}
+
+	/* Set num_tc for netdev */
+	ret = netdev_set_num_tc(netdev, tc);
+	if (ret)
+		return ret;
+
+	/* Set per TC queues for the VSI */
+	for (i = 0; i < HNAE3_MAX_TC; i++) {
+		if (kinfo->tc_info[i].enable)
+			netdev_set_tc_queue(netdev,
+					    kinfo->tc_info[i].tc,
+					    kinfo->tc_info[i].tqp_count,
+					    kinfo->tc_info[i].tqp_offset);
+	}
+
+	return 0;
+}
+
+static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			     void *type_data)
+{
+	struct tc_mqprio_qopt *mqprio = type_data;
+
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
+
+	return hns3_setup_tc(dev, mqprio->num_tc);
+}
+
+static int hns3_vlan_rx_add_vid(struct net_device *netdev,
+				__be16 proto, u16 vid)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	int ret = -EIO;
+
+	if (h->ae_algo->ops->set_vlan_filter)
+		ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, false);
+
+	return ret;
+}
+
+static int hns3_vlan_rx_kill_vid(struct net_device *netdev,
+				 __be16 proto, u16 vid)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	int ret = -EIO;
+
+	if (h->ae_algo->ops->set_vlan_filter)
+		ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, true);
+
+	return ret;
+}
+
+static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
+				u8 qos, __be16 vlan_proto)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	int ret = -EIO;
+
+	if (h->ae_algo->ops->set_vf_vlan_filter)
+		ret = h->ae_algo->ops->set_vf_vlan_filter(h, vf, vlan,
+						   qos, vlan_proto);
+
+	return ret;
+}
+
+static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	bool if_running = netif_running(netdev);
+	int ret;
+
+	if (!h->ae_algo->ops->set_mtu)
+		return -EOPNOTSUPP;
+
+	/* if this was called with netdev up then bring netdevice down */
+	if (if_running) {
+		(void)hns3_nic_net_stop(netdev);
+		msleep(100);
+	}
+
+	ret = h->ae_algo->ops->set_mtu(h, new_mtu);
+	if (ret) {
+		netdev_err(netdev, "failed to change MTU in hardware %d\n",
+			   ret);
+		return ret;
+	}
+
+	/* if the netdev was running earlier, bring it up again */
+	if (if_running && hns3_nic_net_open(netdev))
+		ret = -EINVAL;
+
+	return ret;
+}
+
+static const struct net_device_ops hns3_nic_netdev_ops = {
+	.ndo_open		= hns3_nic_net_open,
+	.ndo_stop		= hns3_nic_net_stop,
+	.ndo_start_xmit		= hns3_nic_net_xmit,
+	.ndo_set_mac_address	= hns3_nic_net_set_mac_address,
+	.ndo_change_mtu		= hns3_nic_change_mtu,
+	.ndo_set_features	= hns3_nic_set_features,
+	.ndo_get_stats64	= hns3_nic_get_stats64,
+	.ndo_setup_tc		= hns3_nic_setup_tc,
+	.ndo_set_rx_mode	= hns3_nic_set_rx_mode,
+	.ndo_udp_tunnel_add	= hns3_nic_udp_tunnel_add,
+	.ndo_udp_tunnel_del	= hns3_nic_udp_tunnel_del,
+	.ndo_vlan_rx_add_vid	= hns3_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= hns3_vlan_rx_kill_vid,
+	.ndo_set_vf_vlan	= hns3_ndo_set_vf_vlan,
+};
+
+/* hns3_probe - Device initialization routine
+ * @pdev: PCI device information struct
+ * @ent: entry in hns3_pci_tbl
+ *
+ * hns3_probe initializes a PF identified by a pci_dev structure.
+ * The OS initialization, configuring of the PF private structure,
+ * and a hardware reset occur.
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct hnae3_ae_dev *ae_dev;
+	int ret;
+
+	ae_dev = devm_kzalloc(&pdev->dev, sizeof(*ae_dev),
+			      GFP_KERNEL);
+	if (!ae_dev) {
+		ret = -ENOMEM;
+		return ret;
+	}
+
+	ae_dev->pdev = pdev;
+	ae_dev->dev_type = HNAE3_DEV_KNIC;
+	pci_set_drvdata(pdev, ae_dev);
+
+	return hnae3_register_ae_dev(ae_dev);
+}
+
+/* hns3_remove - Device removal routine
+ * @pdev: PCI device information struct
+ */
+static void hns3_remove(struct pci_dev *pdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+
+	hnae3_unregister_ae_dev(ae_dev);
+
+	devm_kfree(&pdev->dev, ae_dev);
+
+	pci_set_drvdata(pdev, NULL);
+}
+
+static struct pci_driver hns3_driver = {
+	.name     = hns3_driver_name,
+	.id_table = hns3_pci_tbl,
+	.probe    = hns3_probe,
+	.remove   = hns3_remove,
+};
+
+/* set default feature to hns3 */
+static void hns3_set_default_feature(struct net_device *netdev)
+{
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+
+	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+		NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
+		NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
+		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
+		NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+	netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
+
+	netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
+
+	netdev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+		NETIF_F_HW_VLAN_CTAG_FILTER |
+		NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
+		NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
+		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
+		NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+	netdev->vlan_features |=
+		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
+		NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO |
+		NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
+		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
+		NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+	netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+		NETIF_F_HW_VLAN_CTAG_FILTER |
+		NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
+		NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
+		NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
+		NETIF_F_GSO_UDP_TUNNEL_CSUM;
+}
+
+static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
+			     struct hns3_desc_cb *cb)
+{
+	unsigned int order = hnae_page_order(ring);
+	struct page *p;
+
+	p = dev_alloc_pages(order);
+	if (!p)
+		return -ENOMEM;
+
+	cb->priv = p;
+	cb->page_offset = 0;
+	cb->reuse_flag = 0;
+	cb->buf  = page_address(p);
+	cb->length = hnae_page_size(ring);
+	cb->type = DESC_TYPE_PAGE;
+
+	memset(cb->buf, 0, cb->length);
+
+	return 0;
+}
+
+static void hns3_free_buffer(struct hns3_enet_ring *ring,
+			     struct hns3_desc_cb *cb)
+{
+	if (cb->type == DESC_TYPE_SKB)
+		dev_kfree_skb_any((struct sk_buff *)cb->priv);
+	else if (!HNAE3_IS_TX_RING(ring))
+		put_page((struct page *)cb->priv);
+	memset(cb, 0, sizeof(*cb));
+}
+
+static int hns3_map_buffer(struct hns3_enet_ring *ring, struct hns3_desc_cb *cb)
+{
+	cb->dma = dma_map_page(ring_to_dev(ring), cb->priv, 0,
+			       cb->length, ring_to_dma_dir(ring));
+
+	if (dma_mapping_error(ring_to_dev(ring), cb->dma))
+		return -EIO;
+
+	return 0;
+}
+
+static void hns3_unmap_buffer(struct hns3_enet_ring *ring,
+			      struct hns3_desc_cb *cb)
+{
+	if (cb->type == DESC_TYPE_SKB)
+		dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length,
+				 ring_to_dma_dir(ring));
+	else
+		dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length,
+			       ring_to_dma_dir(ring));
+}
+
+static void hns3_buffer_detach(struct hns3_enet_ring *ring, int i)
+{
+	hns3_unmap_buffer(ring, &ring->desc_cb[i]);
+	ring->desc[i].addr = 0;
+}
+
+static void hns3_free_buffer_detach(struct hns3_enet_ring *ring, int i)
+{
+	struct hns3_desc_cb *cb = &ring->desc_cb[i];
+
+	if (!ring->desc_cb[i].dma)
+		return;
+
+	hns3_buffer_detach(ring, i);
+	hns3_free_buffer(ring, cb);
+}
+
+static void hns3_free_buffers(struct hns3_enet_ring *ring)
+{
+	int i;
+
+	for (i = 0; i < ring->desc_num; i++)
+		hns3_free_buffer_detach(ring, i);
+}
+
+/* free desc along with its attached buffer */
+static void hns3_free_desc(struct hns3_enet_ring *ring)
+{
+	hns3_free_buffers(ring);
+
+	dma_unmap_single(ring_to_dev(ring), ring->desc_dma_addr,
+			 ring->desc_num * sizeof(ring->desc[0]),
+			 DMA_BIDIRECTIONAL);
+	ring->desc_dma_addr = 0;
+	kfree(ring->desc);
+	ring->desc = NULL;
+}
+
+static int hns3_alloc_desc(struct hns3_enet_ring *ring)
+{
+	int size = ring->desc_num * sizeof(ring->desc[0]);
+
+	ring->desc = kzalloc(size, GFP_KERNEL);
+	if (!ring->desc)
+		return -ENOMEM;
+
+	ring->desc_dma_addr = dma_map_single(ring_to_dev(ring), ring->desc,
+					     size, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(ring_to_dev(ring), ring->desc_dma_addr)) {
+		ring->desc_dma_addr = 0;
+		kfree(ring->desc);
+		ring->desc = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int hns3_reserve_buffer_map(struct hns3_enet_ring *ring,
+				   struct hns3_desc_cb *cb)
+{
+	int ret;
+
+	ret = hns3_alloc_buffer(ring, cb);
+	if (ret)
+		goto out;
+
+	ret = hns3_map_buffer(ring, cb);
+	if (ret)
+		goto out_with_buf;
+
+	return 0;
+
+out_with_buf:
+	hns3_free_buffers(ring);
+out:
+	return ret;
+}
+
+static int hns3_alloc_buffer_attach(struct hns3_enet_ring *ring, int i)
+{
+	int ret = hns3_reserve_buffer_map(ring, &ring->desc_cb[i]);
+
+	if (ret)
+		return ret;
+
+	ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
+
+	return 0;
+}
+
+/* Allocate memory for raw pkg, and map with dma */
+static int hns3_alloc_ring_buffers(struct hns3_enet_ring *ring)
+{
+	int i, j, ret;
+
+	for (i = 0; i < ring->desc_num; i++) {
+		ret = hns3_alloc_buffer_attach(ring, i);
+		if (ret)
+			goto out_buffer_fail;
+	}
+
+	return 0;
+
+out_buffer_fail:
+	for (j = i - 1; j >= 0; j--)
+		hns3_free_buffer_detach(ring, j);
+	return ret;
+}
+
+/* detach a in-used buffer and replace with a reserved one  */
+static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i,
+				struct hns3_desc_cb *res_cb)
+{
+	hns3_map_buffer(ring, &ring->desc_cb[i]);
+	ring->desc_cb[i] = *res_cb;
+	ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
+}
+
+static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i)
+{
+	ring->desc_cb[i].reuse_flag = 0;
+	ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma
+		+ ring->desc_cb[i].page_offset);
+}
+
+static void hns3_nic_reclaim_one_desc(struct hns3_enet_ring *ring, int *bytes,
+				      int *pkts)
+{
+	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_clean];
+
+	(*pkts) += (desc_cb->type == DESC_TYPE_SKB);
+	(*bytes) += desc_cb->length;
+	/* desc_cb will be cleaned, after hnae_free_buffer_detach*/
+	hns3_free_buffer_detach(ring, ring->next_to_clean);
+
+	ring_ptr_move_fw(ring, next_to_clean);
+}
+
+static int is_valid_clean_head(struct hns3_enet_ring *ring, int h)
+{
+	int u = ring->next_to_use;
+	int c = ring->next_to_clean;
+
+	if (unlikely(h > ring->desc_num))
+		return 0;
+
+	return u > c ? (h > c && h <= u) : (h > c || h <= u);
+}
+
+int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget)
+{
+	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+	struct netdev_queue *dev_queue;
+	int bytes, pkts;
+	int head;
+
+	head = readl_relaxed(ring->tqp->io_base + HNS3_RING_TX_RING_HEAD_REG);
+	rmb(); /* Make sure head is ready before touch any data */
+
+	if (is_ring_empty(ring) || head == ring->next_to_clean)
+		return 0; /* no data to poll */
+
+	if (!is_valid_clean_head(ring, head)) {
+		netdev_err(netdev, "wrong head (%d, %d-%d)\n", head,
+			   ring->next_to_use, ring->next_to_clean);
+
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.io_err_cnt++;
+		u64_stats_update_end(&ring->syncp);
+		return -EIO;
+	}
+
+	bytes = 0;
+	pkts = 0;
+	while (head != ring->next_to_clean && budget) {
+		hns3_nic_reclaim_one_desc(ring, &bytes, &pkts);
+		/* Issue prefetch for next Tx descriptor */
+		prefetch(&ring->desc_cb[ring->next_to_clean]);
+		budget--;
+	}
+
+	ring->tqp_vector->tx_group.total_bytes += bytes;
+	ring->tqp_vector->tx_group.total_packets += pkts;
+
+	u64_stats_update_begin(&ring->syncp);
+	ring->stats.tx_bytes += bytes;
+	ring->stats.tx_pkts += pkts;
+	u64_stats_update_end(&ring->syncp);
+
+	dev_queue = netdev_get_tx_queue(netdev, ring->tqp->tqp_index);
+	netdev_tx_completed_queue(dev_queue, pkts, bytes);
+
+	if (unlikely(pkts && netif_carrier_ok(netdev) &&
+		     (ring_space(ring) > HNS3_MAX_BD_PER_PKT))) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (netif_tx_queue_stopped(dev_queue)) {
+			netif_tx_wake_queue(dev_queue);
+			ring->stats.restart_queue++;
+		}
+	}
+
+	return !!budget;
+}
+
+static int hns3_desc_unused(struct hns3_enet_ring *ring)
+{
+	int ntc = ring->next_to_clean;
+	int ntu = ring->next_to_use;
+
+	return ((ntc >= ntu) ? 0 : ring->desc_num) + ntc - ntu;
+}
+
+static void
+hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, int cleand_count)
+{
+	struct hns3_desc_cb *desc_cb;
+	struct hns3_desc_cb res_cbs;
+	int i, ret;
+
+	for (i = 0; i < cleand_count; i++) {
+		desc_cb = &ring->desc_cb[ring->next_to_use];
+		if (desc_cb->reuse_flag) {
+			u64_stats_update_begin(&ring->syncp);
+			ring->stats.reuse_pg_cnt++;
+			u64_stats_update_end(&ring->syncp);
+
+			hns3_reuse_buffer(ring, ring->next_to_use);
+		} else {
+			ret = hns3_reserve_buffer_map(ring, &res_cbs);
+			if (ret) {
+				u64_stats_update_begin(&ring->syncp);
+				ring->stats.sw_err_cnt++;
+				u64_stats_update_end(&ring->syncp);
+
+				netdev_err(ring->tqp->handle->kinfo.netdev,
+					   "hnae reserve buffer map failed.\n");
+				break;
+			}
+			hns3_replace_buffer(ring, ring->next_to_use, &res_cbs);
+		}
+
+		ring_ptr_move_fw(ring, next_to_use);
+	}
+
+	wmb(); /* Make all data has been write before submit */
+	writel_relaxed(i, ring->tqp->io_base + HNS3_RING_RX_RING_HEAD_REG);
+}
+
+/* hns3_nic_get_headlen - determine size of header for LRO/GRO
+ * @data: pointer to the start of the headers
+ * @max: total length of section to find headers in
+ *
+ * This function is meant to determine the length of headers that will
+ * be recognized by hardware for LRO, GRO, and RSC offloads.  The main
+ * motivation of doing this is to only perform one pull for IPv4 TCP
+ * packets so that we can do basic things like calculating the gso_size
+ * based on the average data per packet.
+ */
+static unsigned int hns3_nic_get_headlen(unsigned char *data, u32 flag,
+					 unsigned int max_size)
+{
+	unsigned char *network;
+	u8 hlen;
+
+	/* This should never happen, but better safe than sorry */
+	if (max_size < ETH_HLEN)
+		return max_size;
+
+	/* Initialize network frame pointer */
+	network = data;
+
+	/* Set first protocol and move network header forward */
+	network += ETH_HLEN;
+
+	/* Handle any vlan tag if present */
+	if (hnae_get_field(flag, HNS3_RXD_VLAN_M, HNS3_RXD_VLAN_S)
+		== HNS3_RX_FLAG_VLAN_PRESENT) {
+		if ((typeof(max_size))(network - data) > (max_size - VLAN_HLEN))
+			return max_size;
+
+		network += VLAN_HLEN;
+	}
+
+	/* Handle L3 protocols */
+	if (hnae_get_field(flag, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S)
+		== HNS3_RX_FLAG_L3ID_IPV4) {
+		if ((typeof(max_size))(network - data) >
+		    (max_size - sizeof(struct iphdr)))
+			return max_size;
+
+		/* Access ihl as a u8 to avoid unaligned access on ia64 */
+		hlen = (network[0] & 0x0F) << 2;
+
+		/* Verify hlen meets minimum size requirements */
+		if (hlen < sizeof(struct iphdr))
+			return network - data;
+
+		/* Record next protocol if header is present */
+	} else if (hnae_get_field(flag, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S)
+		== HNS3_RX_FLAG_L3ID_IPV6) {
+		if ((typeof(max_size))(network - data) >
+		    (max_size - sizeof(struct ipv6hdr)))
+			return max_size;
+
+		/* Record next protocol */
+		hlen = sizeof(struct ipv6hdr);
+	} else {
+		return network - data;
+	}
+
+	/* Relocate pointer to start of L4 header */
+	network += hlen;
+
+	/* Finally sort out TCP/UDP */
+	if (hnae_get_field(flag, HNS3_RXD_L4ID_M, HNS3_RXD_L4ID_S)
+		== HNS3_RX_FLAG_L4ID_TCP) {
+		if ((typeof(max_size))(network - data) >
+		    (max_size - sizeof(struct tcphdr)))
+			return max_size;
+
+		/* Access doff as a u8 to avoid unaligned access on ia64 */
+		hlen = (network[12] & 0xF0) >> 2;
+
+		/* Verify hlen meets minimum size requirements */
+		if (hlen < sizeof(struct tcphdr))
+			return network - data;
+
+		network += hlen;
+	} else if (hnae_get_field(flag, HNS3_RXD_L4ID_M, HNS3_RXD_L4ID_S)
+		== HNS3_RX_FLAG_L4ID_UDP) {
+		if ((typeof(max_size))(network - data) >
+		    (max_size - sizeof(struct udphdr)))
+			return max_size;
+
+		network += sizeof(struct udphdr);
+	}
+
+	/* If everything has gone correctly network should be the
+	 * data section of the packet and will be the end of the header.
+	 * If not then it probably represents the end of the last recognized
+	 * header.
+	 */
+	if ((typeof(max_size))(network - data) < max_size)
+		return network - data;
+	else
+		return max_size;
+}
+
+static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
+				struct hns3_enet_ring *ring, int pull_len,
+				struct hns3_desc_cb *desc_cb)
+{
+	struct hns3_desc *desc;
+	int truesize, size;
+	int last_offset;
+	bool twobufs;
+
+	twobufs = ((PAGE_SIZE < 8192) &&
+		hnae_buf_size(ring) == HNS3_BUFFER_SIZE_2048);
+
+	desc = &ring->desc[ring->next_to_clean];
+	size = le16_to_cpu(desc->rx.size);
+
+	if (twobufs) {
+		truesize = hnae_buf_size(ring);
+	} else {
+		truesize = ALIGN(size, L1_CACHE_BYTES);
+		last_offset = hnae_page_size(ring) - hnae_buf_size(ring);
+	}
+
+	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
+			size - pull_len, truesize - pull_len);
+
+	 /* Avoid re-using remote pages,flag default unreuse */
+	if (unlikely(page_to_nid(desc_cb->priv) != numa_node_id()))
+		return;
+
+	if (twobufs) {
+		/* If we are only owner of page we can reuse it */
+		if (likely(page_count(desc_cb->priv) == 1)) {
+			/* Flip page offset to other buffer */
+			desc_cb->page_offset ^= truesize;
+
+			desc_cb->reuse_flag = 1;
+			/* bump ref count on page before it is given*/
+			get_page(desc_cb->priv);
+		}
+		return;
+	}
+
+	/* Move offset up to the next cache line */
+	desc_cb->page_offset += truesize;
+
+	if (desc_cb->page_offset <= last_offset) {
+		desc_cb->reuse_flag = 1;
+		/* Bump ref count on page before it is given*/
+		get_page(desc_cb->priv);
+	}
+}
+
+static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
+			     struct hns3_desc *desc)
+{
+	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+	int l3_type, l4_type;
+	u32 bd_base_info;
+	int ol4_type;
+	u32 l234info;
+
+	bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
+	l234info = le32_to_cpu(desc->rx.l234_info);
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	skb_checksum_none_assert(skb);
+
+	if (!(netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* check if hardware has done checksum */
+	if (!hnae_get_bit(bd_base_info, HNS3_RXD_L3L4P_B))
+		return;
+
+	if (unlikely(hnae_get_bit(l234info, HNS3_RXD_L3E_B) ||
+		     hnae_get_bit(l234info, HNS3_RXD_L4E_B) ||
+		     hnae_get_bit(l234info, HNS3_RXD_OL3E_B) ||
+		     hnae_get_bit(l234info, HNS3_RXD_OL4E_B))) {
+		netdev_err(netdev, "L3/L4 error pkt\n");
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.l3l4_csum_err++;
+		u64_stats_update_end(&ring->syncp);
+
+		return;
+	}
+
+	l3_type = hnae_get_field(l234info, HNS3_RXD_L3ID_M,
+				 HNS3_RXD_L3ID_S);
+	l4_type = hnae_get_field(l234info, HNS3_RXD_L4ID_M,
+				 HNS3_RXD_L4ID_S);
+
+	ol4_type = hnae_get_field(l234info, HNS3_RXD_OL4ID_M, HNS3_RXD_OL4ID_S);
+	switch (ol4_type) {
+	case HNS3_OL4_TYPE_MAC_IN_UDP:
+	case HNS3_OL4_TYPE_NVGRE:
+		skb->csum_level = 1;
+	case HNS3_OL4_TYPE_NO_TUN:
+		/* Can checksum ipv4 or ipv6 + UDP/TCP/SCTP packets */
+		if (l3_type == HNS3_L3_TYPE_IPV4 ||
+		    (l3_type == HNS3_L3_TYPE_IPV6 &&
+		     (l4_type == HNS3_L4_TYPE_UDP ||
+		      l4_type == HNS3_L4_TYPE_TCP ||
+		      l4_type == HNS3_L4_TYPE_SCTP)))
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		break;
+	}
+}
+
+static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
+			     struct sk_buff **out_skb, int *out_bnum)
+{
+	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+	struct hns3_desc_cb *desc_cb;
+	struct hns3_desc *desc;
+	struct sk_buff *skb;
+	unsigned char *va;
+	u32 bd_base_info;
+	int pull_len;
+	u32 l234info;
+	int length;
+	int bnum;
+
+	desc = &ring->desc[ring->next_to_clean];
+	desc_cb = &ring->desc_cb[ring->next_to_clean];
+
+	prefetch(desc);
+
+	length = le16_to_cpu(desc->rx.pkt_len);
+	bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
+	l234info = le32_to_cpu(desc->rx.l234_info);
+
+	/* Check valid BD */
+	if (!hnae_get_bit(bd_base_info, HNS3_RXD_VLD_B))
+		return -EFAULT;
+
+	va = (unsigned char *)desc_cb->buf + desc_cb->page_offset;
+
+	/* Prefetch first cache line of first page
+	 * Idea is to cache few bytes of the header of the packet. Our L1 Cache
+	 * line size is 64B so need to prefetch twice to make it 128B. But in
+	 * actual we can have greater size of caches with 128B Level 1 cache
+	 * lines. In such a case, single fetch would suffice to cache in the
+	 * relevant part of the header.
+	 */
+	prefetch(va);
+#if L1_CACHE_BYTES < 128
+	prefetch(va + L1_CACHE_BYTES);
+#endif
+
+	skb = *out_skb = napi_alloc_skb(&ring->tqp_vector->napi,
+					HNS3_RX_HEAD_SIZE);
+	if (unlikely(!skb)) {
+		netdev_err(netdev, "alloc rx skb fail\n");
+
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.sw_err_cnt++;
+		u64_stats_update_end(&ring->syncp);
+
+		return -ENOMEM;
+	}
+
+	prefetchw(skb->data);
+
+	bnum = 1;
+	if (length <= HNS3_RX_HEAD_SIZE) {
+		memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long)));
+
+		/* We can reuse buffer as-is, just make sure it is local */
+		if (likely(page_to_nid(desc_cb->priv) == numa_node_id()))
+			desc_cb->reuse_flag = 1;
+		else /* This page cannot be reused so discard it */
+			put_page(desc_cb->priv);
+
+		ring_ptr_move_fw(ring, next_to_clean);
+	} else {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.seg_pkt_cnt++;
+		u64_stats_update_end(&ring->syncp);
+
+		pull_len = hns3_nic_get_headlen(va, l234info,
+						HNS3_RX_HEAD_SIZE);
+		memcpy(__skb_put(skb, pull_len), va,
+		       ALIGN(pull_len, sizeof(long)));
+
+		hns3_nic_reuse_page(skb, 0, ring, pull_len, desc_cb);
+		ring_ptr_move_fw(ring, next_to_clean);
+
+		while (!hnae_get_bit(bd_base_info, HNS3_RXD_FE_B)) {
+			desc = &ring->desc[ring->next_to_clean];
+			desc_cb = &ring->desc_cb[ring->next_to_clean];
+			bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
+			hns3_nic_reuse_page(skb, bnum, ring, 0, desc_cb);
+			ring_ptr_move_fw(ring, next_to_clean);
+			bnum++;
+		}
+	}
+
+	*out_bnum = bnum;
+
+	if (unlikely(!hnae_get_bit(bd_base_info, HNS3_RXD_VLD_B))) {
+		netdev_err(netdev, "no valid bd,%016llx,%016llx\n",
+			   ((u64 *)desc)[0], ((u64 *)desc)[1]);
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.non_vld_descs++;
+		u64_stats_update_end(&ring->syncp);
+
+		dev_kfree_skb_any(skb);
+		return -EINVAL;
+	}
+
+	if (unlikely((!desc->rx.pkt_len) ||
+		     hnae_get_bit(l234info, HNS3_RXD_TRUNCAT_B))) {
+		netdev_err(netdev, "truncated pkt\n");
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.err_pkt_len++;
+		u64_stats_update_end(&ring->syncp);
+
+		dev_kfree_skb_any(skb);
+		return -EFAULT;
+	}
+
+	if (unlikely(hnae_get_bit(l234info, HNS3_RXD_L2E_B))) {
+		netdev_err(netdev, "L2 error pkt\n");
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.l2_err++;
+		u64_stats_update_end(&ring->syncp);
+
+		dev_kfree_skb_any(skb);
+		return -EFAULT;
+	}
+
+	u64_stats_update_begin(&ring->syncp);
+	ring->stats.rx_pkts++;
+	ring->stats.rx_bytes += skb->len;
+	u64_stats_update_end(&ring->syncp);
+
+	ring->tqp_vector->rx_group.total_bytes += skb->len;
+
+	hns3_rx_checksum(ring, skb, desc);
+	return 0;
+}
+
+static int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget)
+{
+#define RCB_NOF_ALLOC_RX_BUFF_ONCE 16
+	struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
+	int recv_pkts, recv_bds, clean_count, err;
+	int unused_count = hns3_desc_unused(ring);
+	struct sk_buff *skb = NULL;
+	int num, bnum = 0;
+
+	num = readl_relaxed(ring->tqp->io_base + HNS3_RING_RX_RING_FBDNUM_REG);
+	rmb(); /* Make sure num taken effect before the other data is touched */
+
+	recv_pkts = 0, recv_bds = 0, clean_count = 0;
+	num -= unused_count;
+
+	while (recv_pkts < budget && recv_bds < num) {
+		/* Reuse or realloc buffers */
+		if (clean_count + unused_count >= RCB_NOF_ALLOC_RX_BUFF_ONCE) {
+			hns3_nic_alloc_rx_buffers(ring,
+						  clean_count + unused_count);
+			clean_count = 0;
+			unused_count = hns3_desc_unused(ring);
+		}
+
+		/* Poll one pkt */
+		err = hns3_handle_rx_bd(ring, &skb, &bnum);
+		if (unlikely(!skb)) /* This fault cannot be repaired */
+			goto out;
+
+		recv_bds += bnum;
+		clean_count += bnum;
+		if (unlikely(err)) {  /* Do jump the err */
+			recv_pkts++;
+			continue;
+		}
+
+		/* Do update ip stack process */
+		skb->protocol = eth_type_trans(skb, netdev);
+		(void)napi_gro_receive(&ring->tqp_vector->napi, skb);
+
+		recv_pkts++;
+	}
+
+out:
+	/* Make all data has been write before submit */
+	if (clean_count + unused_count > 0)
+		hns3_nic_alloc_rx_buffers(ring,
+					  clean_count + unused_count);
+
+	return recv_pkts;
+}
+
+static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group)
+{
+#define HNS3_RX_ULTRA_PACKET_RATE 40000
+	enum hns3_flow_level_range new_flow_level;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	int packets_per_secs;
+	int bytes_per_usecs;
+	u16 new_int_gl;
+	int usecs;
+
+	if (!ring_group->int_gl)
+		return false;
+
+	if (ring_group->total_packets == 0) {
+		ring_group->int_gl = HNS3_INT_GL_50K;
+		ring_group->flow_level = HNS3_FLOW_LOW;
+		return true;
+	}
+
+	/* Simple throttlerate management
+	 * 0-10MB/s   lower     (50000 ints/s)
+	 * 10-20MB/s   middle    (20000 ints/s)
+	 * 20-1249MB/s high      (18000 ints/s)
+	 * > 40000pps  ultra     (8000 ints/s)
+	 */
+	new_flow_level = ring_group->flow_level;
+	new_int_gl = ring_group->int_gl;
+	tqp_vector = ring_group->ring->tqp_vector;
+	usecs = (ring_group->int_gl << 1);
+	bytes_per_usecs = ring_group->total_bytes / usecs;
+	/* 1000000 microseconds */
+	packets_per_secs = ring_group->total_packets * 1000000 / usecs;
+
+	switch (new_flow_level) {
+	case HNS3_FLOW_LOW:
+		if (bytes_per_usecs > 10)
+			new_flow_level = HNS3_FLOW_MID;
+		break;
+	case HNS3_FLOW_MID:
+		if (bytes_per_usecs > 20)
+			new_flow_level = HNS3_FLOW_HIGH;
+		else if (bytes_per_usecs <= 10)
+			new_flow_level = HNS3_FLOW_LOW;
+		break;
+	case HNS3_FLOW_HIGH:
+	case HNS3_FLOW_ULTRA:
+	default:
+		if (bytes_per_usecs <= 20)
+			new_flow_level = HNS3_FLOW_MID;
+		break;
+	}
+
+	if ((packets_per_secs > HNS3_RX_ULTRA_PACKET_RATE) &&
+	    (&tqp_vector->rx_group == ring_group))
+		new_flow_level = HNS3_FLOW_ULTRA;
+
+	switch (new_flow_level) {
+	case HNS3_FLOW_LOW:
+		new_int_gl = HNS3_INT_GL_50K;
+		break;
+	case HNS3_FLOW_MID:
+		new_int_gl = HNS3_INT_GL_20K;
+		break;
+	case HNS3_FLOW_HIGH:
+		new_int_gl = HNS3_INT_GL_18K;
+		break;
+	case HNS3_FLOW_ULTRA:
+		new_int_gl = HNS3_INT_GL_8K;
+		break;
+	default:
+		break;
+	}
+
+	ring_group->total_bytes = 0;
+	ring_group->total_packets = 0;
+	ring_group->flow_level = new_flow_level;
+	if (new_int_gl != ring_group->int_gl) {
+		ring_group->int_gl = new_int_gl;
+		return true;
+	}
+	return false;
+}
+
+static void hns3_update_new_int_gl(struct hns3_enet_tqp_vector *tqp_vector)
+{
+	u16 rx_int_gl, tx_int_gl;
+	bool rx, tx;
+
+	rx = hns3_get_new_int_gl(&tqp_vector->rx_group);
+	tx = hns3_get_new_int_gl(&tqp_vector->tx_group);
+	rx_int_gl = tqp_vector->rx_group.int_gl;
+	tx_int_gl = tqp_vector->tx_group.int_gl;
+	if (rx && tx) {
+		if (rx_int_gl > tx_int_gl) {
+			tqp_vector->tx_group.int_gl = rx_int_gl;
+			tqp_vector->tx_group.flow_level =
+				tqp_vector->rx_group.flow_level;
+			hns3_set_vector_coalesc_gl(tqp_vector, rx_int_gl);
+		} else {
+			tqp_vector->rx_group.int_gl = tx_int_gl;
+			tqp_vector->rx_group.flow_level =
+				tqp_vector->tx_group.flow_level;
+			hns3_set_vector_coalesc_gl(tqp_vector, tx_int_gl);
+		}
+	}
+}
+
+static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
+{
+	struct hns3_enet_ring *ring;
+	int rx_pkt_total = 0;
+
+	struct hns3_enet_tqp_vector *tqp_vector =
+		container_of(napi, struct hns3_enet_tqp_vector, napi);
+	bool clean_complete = true;
+	int rx_budget;
+
+	/* Since the actual Tx work is minimal, we can give the Tx a larger
+	 * budget and be more aggressive about cleaning up the Tx descriptors.
+	 */
+	hns3_for_each_ring(ring, tqp_vector->tx_group) {
+		if (!hns3_clean_tx_ring(ring, budget))
+			clean_complete = false;
+	}
+
+	/* make sure rx ring budget not smaller than 1 */
+	rx_budget = max(budget / tqp_vector->num_tqps, 1);
+
+	hns3_for_each_ring(ring, tqp_vector->rx_group) {
+		int rx_cleaned = hns3_clean_rx_ring(ring, rx_budget);
+
+		if (rx_cleaned >= rx_budget)
+			clean_complete = false;
+
+		rx_pkt_total += rx_cleaned;
+	}
+
+	tqp_vector->rx_group.total_packets += rx_pkt_total;
+
+	if (!clean_complete)
+		return budget;
+
+	napi_complete(napi);
+	hns3_update_new_int_gl(tqp_vector);
+	hns3_mask_vector_irq(tqp_vector, 1);
+
+	return rx_pkt_total;
+}
+
+static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
+				      struct hnae3_ring_chain_node *head)
+{
+	struct pci_dev *pdev = tqp_vector->handle->pdev;
+	struct hnae3_ring_chain_node *cur_chain = head;
+	struct hnae3_ring_chain_node *chain;
+	struct hns3_enet_ring *tx_ring;
+	struct hns3_enet_ring *rx_ring;
+
+	tx_ring = tqp_vector->tx_group.ring;
+	if (tx_ring) {
+		cur_chain->tqp_index = tx_ring->tqp->tqp_index;
+		hnae_set_bit(cur_chain->flag, HNAE3_RING_TYPE_B,
+			     HNAE3_RING_TYPE_TX);
+
+		cur_chain->next = NULL;
+
+		while (tx_ring->next) {
+			tx_ring = tx_ring->next;
+
+			chain = devm_kzalloc(&pdev->dev, sizeof(*chain),
+					     GFP_KERNEL);
+			if (!chain)
+				return -ENOMEM;
+
+			cur_chain->next = chain;
+			chain->tqp_index = tx_ring->tqp->tqp_index;
+			hnae_set_bit(chain->flag, HNAE3_RING_TYPE_B,
+				     HNAE3_RING_TYPE_TX);
+
+			cur_chain = chain;
+		}
+	}
+
+	rx_ring = tqp_vector->rx_group.ring;
+	if (!tx_ring && rx_ring) {
+		cur_chain->next = NULL;
+		cur_chain->tqp_index = rx_ring->tqp->tqp_index;
+		hnae_set_bit(cur_chain->flag, HNAE3_RING_TYPE_B,
+			     HNAE3_RING_TYPE_RX);
+
+		rx_ring = rx_ring->next;
+	}
+
+	while (rx_ring) {
+		chain = devm_kzalloc(&pdev->dev, sizeof(*chain), GFP_KERNEL);
+		if (!chain)
+			return -ENOMEM;
+
+		cur_chain->next = chain;
+		chain->tqp_index = rx_ring->tqp->tqp_index;
+		hnae_set_bit(chain->flag, HNAE3_RING_TYPE_B,
+			     HNAE3_RING_TYPE_RX);
+		cur_chain = chain;
+
+		rx_ring = rx_ring->next;
+	}
+
+	return 0;
+}
+
+static void hns3_free_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
+					struct hnae3_ring_chain_node *head)
+{
+	struct pci_dev *pdev = tqp_vector->handle->pdev;
+	struct hnae3_ring_chain_node *chain_tmp, *chain;
+
+	chain = head->next;
+
+	while (chain) {
+		chain_tmp = chain->next;
+		devm_kfree(&pdev->dev, chain);
+		chain = chain_tmp;
+	}
+}
+
+static void hns3_add_ring_to_group(struct hns3_enet_ring_group *group,
+				   struct hns3_enet_ring *ring)
+{
+	ring->next = group->ring;
+	group->ring = ring;
+
+	group->count++;
+}
+
+static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
+{
+	struct hnae3_ring_chain_node vector_ring_chain;
+	struct hnae3_handle *h = priv->ae_handle;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	struct hnae3_vector_info *vector;
+	struct pci_dev *pdev = h->pdev;
+	u16 tqp_num = h->kinfo.num_tqps;
+	u16 vector_num;
+	int ret = 0;
+	u16 i;
+
+	/* RSS size, cpu online and vector_num should be the same */
+	/* Should consider 2p/4p later */
+	vector_num = min_t(u16, num_online_cpus(), tqp_num);
+	vector = devm_kcalloc(&pdev->dev, vector_num, sizeof(*vector),
+			      GFP_KERNEL);
+	if (!vector)
+		return -ENOMEM;
+
+	vector_num = h->ae_algo->ops->get_vector(h, vector_num, vector);
+
+	priv->vector_num = vector_num;
+	priv->tqp_vector = (struct hns3_enet_tqp_vector *)
+		devm_kcalloc(&pdev->dev, vector_num, sizeof(*priv->tqp_vector),
+			     GFP_KERNEL);
+	if (!priv->tqp_vector)
+		return -ENOMEM;
+
+	for (i = 0; i < tqp_num; i++) {
+		u16 vector_i = i % vector_num;
+
+		tqp_vector = &priv->tqp_vector[vector_i];
+
+		hns3_add_ring_to_group(&tqp_vector->tx_group,
+				       priv->ring_data[i].ring);
+
+		hns3_add_ring_to_group(&tqp_vector->rx_group,
+				       priv->ring_data[i + tqp_num].ring);
+
+		tqp_vector->idx = vector_i;
+		tqp_vector->mask_addr = vector[vector_i].io_addr;
+		tqp_vector->vector_irq = vector[vector_i].vector;
+		tqp_vector->num_tqps++;
+
+		priv->ring_data[i].ring->tqp_vector = tqp_vector;
+		priv->ring_data[i + tqp_num].ring->tqp_vector = tqp_vector;
+	}
+
+	for (i = 0; i < vector_num; i++) {
+		tqp_vector = &priv->tqp_vector[i];
+
+		tqp_vector->rx_group.total_bytes = 0;
+		tqp_vector->rx_group.total_packets = 0;
+		tqp_vector->tx_group.total_bytes = 0;
+		tqp_vector->tx_group.total_packets = 0;
+		hns3_vector_gl_rl_init(tqp_vector);
+		tqp_vector->handle = h;
+
+		ret = hns3_get_vector_ring_chain(tqp_vector,
+						 &vector_ring_chain);
+		if (ret)
+			goto out;
+
+		ret = h->ae_algo->ops->map_ring_to_vector(h,
+			tqp_vector->vector_irq, &vector_ring_chain);
+		if (ret)
+			goto out;
+
+		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
+
+		netif_napi_add(priv->netdev, &tqp_vector->napi,
+			       hns3_nic_common_poll, NAPI_POLL_WEIGHT);
+	}
+
+out:
+	devm_kfree(&pdev->dev, vector);
+	return ret;
+}
+
+static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
+{
+	struct hnae3_ring_chain_node vector_ring_chain;
+	struct hnae3_handle *h = priv->ae_handle;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	struct pci_dev *pdev = h->pdev;
+	int i, ret;
+
+	for (i = 0; i < priv->vector_num; i++) {
+		tqp_vector = &priv->tqp_vector[i];
+
+		ret = hns3_get_vector_ring_chain(tqp_vector,
+						 &vector_ring_chain);
+		if (ret)
+			return ret;
+
+		ret = h->ae_algo->ops->unmap_ring_from_vector(h,
+			tqp_vector->vector_irq, &vector_ring_chain);
+		if (ret)
+			return ret;
+
+		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
+
+		if (priv->tqp_vector[i].irq_init_flag == HNS3_VECTOR_INITED) {
+			(void)irq_set_affinity_hint(
+				priv->tqp_vector[i].vector_irq,
+						    NULL);
+			devm_free_irq(&pdev->dev,
+				      priv->tqp_vector[i].vector_irq,
+				      &priv->tqp_vector[i]);
+		}
+
+		priv->ring_data[i].ring->irq_init_flag = HNS3_VECTOR_NOT_INITED;
+
+		netif_napi_del(&priv->tqp_vector[i].napi);
+	}
+
+	devm_kfree(&pdev->dev, priv->tqp_vector);
+
+	return 0;
+}
+
+static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
+			     int ring_type)
+{
+	struct hns3_nic_ring_data *ring_data = priv->ring_data;
+	int queue_num = priv->ae_handle->kinfo.num_tqps;
+	struct pci_dev *pdev = priv->ae_handle->pdev;
+	struct hns3_enet_ring *ring;
+
+	ring = devm_kzalloc(&pdev->dev, sizeof(*ring), GFP_KERNEL);
+	if (!ring)
+		return -ENOMEM;
+
+	if (ring_type == HNAE3_RING_TYPE_TX) {
+		ring_data[q->tqp_index].ring = ring;
+		ring->io_base = (u8 __iomem *)q->io_base + HNS3_TX_REG_OFFSET;
+	} else {
+		ring_data[q->tqp_index + queue_num].ring = ring;
+		ring->io_base = q->io_base;
+	}
+
+	hnae_set_bit(ring->flag, HNAE3_RING_TYPE_B, ring_type);
+
+	ring_data[q->tqp_index].queue_index = q->tqp_index;
+
+	ring->tqp = q;
+	ring->desc = NULL;
+	ring->desc_cb = NULL;
+	ring->dev = priv->dev;
+	ring->desc_dma_addr = 0;
+	ring->buf_size = q->buf_size;
+	ring->desc_num = q->desc_num;
+	ring->next_to_use = 0;
+	ring->next_to_clean = 0;
+
+	return 0;
+}
+
+static int hns3_queue_to_ring(struct hnae3_queue *tqp,
+			      struct hns3_nic_priv *priv)
+{
+	int ret;
+
+	ret = hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_TX);
+	if (ret)
+		return ret;
+
+	ret = hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_RX);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int hns3_get_ring_config(struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+	struct pci_dev *pdev = h->pdev;
+	int i, ret;
+
+	priv->ring_data =  devm_kzalloc(&pdev->dev, h->kinfo.num_tqps *
+					sizeof(*priv->ring_data) * 2,
+					GFP_KERNEL);
+	if (!priv->ring_data)
+		return -ENOMEM;
+
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
+		ret = hns3_queue_to_ring(h->kinfo.tqp[i], priv);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+err:
+	devm_kfree(&pdev->dev, priv->ring_data);
+	return ret;
+}
+
+static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
+{
+	int ret;
+
+	if (ring->desc_num <= 0 || ring->buf_size <= 0)
+		return -EINVAL;
+
+	ring->desc_cb = kcalloc(ring->desc_num, sizeof(ring->desc_cb[0]),
+				GFP_KERNEL);
+	if (!ring->desc_cb) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = hns3_alloc_desc(ring);
+	if (ret)
+		goto out_with_desc_cb;
+
+	if (!HNAE3_IS_TX_RING(ring)) {
+		ret = hns3_alloc_ring_buffers(ring);
+		if (ret)
+			goto out_with_desc;
+	}
+
+	return 0;
+
+out_with_desc:
+	hns3_free_desc(ring);
+out_with_desc_cb:
+	kfree(ring->desc_cb);
+	ring->desc_cb = NULL;
+out:
+	return ret;
+}
+
+static void hns3_fini_ring(struct hns3_enet_ring *ring)
+{
+	hns3_free_desc(ring);
+	kfree(ring->desc_cb);
+	ring->desc_cb = NULL;
+	ring->next_to_clean = 0;
+	ring->next_to_use = 0;
+}
+
+int hns3_buf_size2type(u32 buf_size)
+{
+	int bd_size_type;
+
+	switch (buf_size) {
+	case 512:
+		bd_size_type = HNS3_BD_SIZE_512_TYPE;
+		break;
+	case 1024:
+		bd_size_type = HNS3_BD_SIZE_1024_TYPE;
+		break;
+	case 2048:
+		bd_size_type = HNS3_BD_SIZE_2048_TYPE;
+		break;
+	case 4096:
+		bd_size_type = HNS3_BD_SIZE_4096_TYPE;
+		break;
+	default:
+		bd_size_type = HNS3_BD_SIZE_2048_TYPE;
+	}
+
+	return bd_size_type;
+}
+
+static void hns3_init_ring_hw(struct hns3_enet_ring *ring)
+{
+	dma_addr_t dma = ring->desc_dma_addr;
+	struct hnae3_queue *q = ring->tqp;
+
+	if (!HNAE3_IS_TX_RING(ring)) {
+		hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_L_REG,
+			       (u32)dma);
+		hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_H_REG,
+			       (u32)((dma >> 31) >> 1));
+
+		hns3_write_dev(q, HNS3_RING_RX_RING_BD_LEN_REG,
+			       hns3_buf_size2type(ring->buf_size));
+		hns3_write_dev(q, HNS3_RING_RX_RING_BD_NUM_REG,
+			       ring->desc_num / 8 - 1);
+
+	} else {
+		hns3_write_dev(q, HNS3_RING_TX_RING_BASEADDR_L_REG,
+			       (u32)dma);
+		hns3_write_dev(q, HNS3_RING_TX_RING_BASEADDR_H_REG,
+			       (u32)((dma >> 31) >> 1));
+
+		hns3_write_dev(q, HNS3_RING_TX_RING_BD_LEN_REG,
+			       hns3_buf_size2type(ring->buf_size));
+		hns3_write_dev(q, HNS3_RING_TX_RING_BD_NUM_REG,
+			       ring->desc_num / 8 - 1);
+	}
+}
+
+static int hns3_init_all_ring(struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+	int ring_num = h->kinfo.num_tqps * 2;
+	int i, j;
+	int ret;
+
+	for (i = 0; i < ring_num; i++) {
+		ret = hns3_alloc_ring_memory(priv->ring_data[i].ring);
+		if (ret) {
+			dev_err(priv->dev,
+				"Alloc ring memory fail! ret=%d\n", ret);
+			goto out_when_alloc_ring_memory;
+		}
+
+		hns3_init_ring_hw(priv->ring_data[i].ring);
+
+		u64_stats_init(&priv->ring_data[i].ring->syncp);
+	}
+
+	return 0;
+
+out_when_alloc_ring_memory:
+	for (j = i - 1; j >= 0; j--)
+		hns3_fini_ring(priv->ring_data[i].ring);
+
+	return -ENOMEM;
+}
+
+static int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
+{
+	struct hnae3_handle *h = priv->ae_handle;
+	int i;
+
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
+		if (h->ae_algo->ops->reset_queue)
+			h->ae_algo->ops->reset_queue(h, i);
+
+		hns3_fini_ring(priv->ring_data[i].ring);
+		hns3_fini_ring(priv->ring_data[i + h->kinfo.num_tqps].ring);
+	}
+
+	return 0;
+}
+
+/* Set mac addr if it is configured. or leave it to the AE driver */
+static void hns3_init_mac_addr(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	u8 mac_addr_temp[ETH_ALEN];
+
+	if (h->ae_algo->ops->get_mac_addr) {
+		h->ae_algo->ops->get_mac_addr(h, mac_addr_temp);
+		ether_addr_copy(netdev->dev_addr, mac_addr_temp);
+	}
+
+	/* Check if the MAC address is valid, if not get a random one */
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		eth_hw_addr_random(netdev);
+		dev_warn(priv->dev, "using random MAC address %pM\n",
+			 netdev->dev_addr);
+		/* Also copy this new MAC address into hdev */
+		if (h->ae_algo->ops->set_mac_addr)
+			h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr);
+	}
+}
+
+static void hns3_nic_set_priv_ops(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+
+	if ((netdev->features & NETIF_F_TSO) ||
+	    (netdev->features & NETIF_F_TSO6)) {
+		priv->ops.fill_desc = hns3_fill_desc_tso;
+		priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tso;
+	} else {
+		priv->ops.fill_desc = hns3_fill_desc;
+		priv->ops.maybe_stop_tx = hns3_nic_maybe_stop_tx;
+	}
+}
+
+static int hns3_client_init(struct hnae3_handle *handle)
+{
+	struct pci_dev *pdev = handle->pdev;
+	struct hns3_nic_priv *priv;
+	struct net_device *netdev;
+	int ret;
+
+	netdev = alloc_etherdev_mq(sizeof(struct hns3_nic_priv),
+				   handle->kinfo.num_tqps);
+	if (!netdev)
+		return -ENOMEM;
+
+	priv = netdev_priv(netdev);
+	priv->dev = &pdev->dev;
+	priv->netdev = netdev;
+	priv->ae_handle = handle;
+
+	handle->kinfo.netdev = netdev;
+	handle->priv = (void *)priv;
+
+	hns3_init_mac_addr(netdev);
+
+	hns3_set_default_feature(netdev);
+
+	netdev->watchdog_timeo = HNS3_TX_TIMEOUT;
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+	netdev->netdev_ops = &hns3_nic_netdev_ops;
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+	hns3_ethtool_set_ops(netdev);
+	hns3_nic_set_priv_ops(netdev);
+
+	/* Carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+	ret = hns3_get_ring_config(priv);
+	if (ret) {
+		ret = -ENOMEM;
+		goto out_get_ring_cfg;
+	}
+
+	ret = hns3_nic_init_vector_data(priv);
+	if (ret) {
+		ret = -ENOMEM;
+		goto out_init_vector_data;
+	}
+
+	ret = hns3_init_all_ring(priv);
+	if (ret) {
+		ret = -ENOMEM;
+		goto out_init_ring_data;
+	}
+
+	ret = register_netdev(netdev);
+	if (ret) {
+		dev_err(priv->dev, "probe register netdev fail!\n");
+		goto out_reg_netdev_fail;
+	}
+
+	/* MTU range: (ETH_MIN_MTU(kernel default) - 9706) */
+	netdev->max_mtu = HNS3_MAX_MTU - (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
+
+	return ret;
+
+out_reg_netdev_fail:
+out_init_ring_data:
+	(void)hns3_nic_uninit_vector_data(priv);
+	priv->ring_data = NULL;
+out_init_vector_data:
+out_get_ring_cfg:
+	priv->ae_handle = NULL;
+	free_netdev(netdev);
+	return ret;
+}
+
+static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
+{
+	struct net_device *netdev = handle->kinfo.netdev;
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	int ret;
+
+	if (netdev->reg_state != NETREG_UNINITIALIZED)
+		unregister_netdev(netdev);
+
+	ret = hns3_nic_uninit_vector_data(priv);
+	if (ret)
+		netdev_err(netdev, "uninit vector error\n");
+
+	ret = hns3_uninit_all_ring(priv);
+	if (ret)
+		netdev_err(netdev, "uninit ring error\n");
+
+	priv->ring_data = NULL;
+
+	free_netdev(netdev);
+}
+
+static void hns3_link_status_change(struct hnae3_handle *handle, bool linkup)
+{
+	struct net_device *netdev = handle->kinfo.netdev;
+
+	if (!netdev)
+		return;
+
+	if (linkup) {
+		netif_carrier_on(netdev);
+		netif_tx_wake_all_queues(netdev);
+		netdev_info(netdev, "link up\n");
+	} else {
+		netif_carrier_off(netdev);
+		netif_tx_stop_all_queues(netdev);
+		netdev_info(netdev, "link down\n");
+	}
+}
+
+const struct hnae3_client_ops client_ops = {
+	.init_instance = hns3_client_init,
+	.uninit_instance = hns3_client_uninit,
+	.link_status_change = hns3_link_status_change,
+};
+
+/* hns3_init_module - Driver registration routine
+ * hns3_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ */
+static int __init hns3_init_module(void)
+{
+	int ret;
+
+	pr_info("%s: %s - version\n", hns3_driver_name, hns3_driver_string);
+	pr_info("%s: %s\n", hns3_driver_name, hns3_copyright);
+
+	client.type = HNAE3_CLIENT_KNIC;
+	snprintf(client.name, HNAE3_CLIENT_NAME_LENGTH - 1, "%s",
+		 hns3_driver_name);
+
+	client.ops = &client_ops;
+
+	ret = hnae3_register_client(&client);
+	if (ret)
+		return ret;
+
+	ret = pci_register_driver(&hns3_driver);
+	if (ret)
+		hnae3_unregister_client(&client);
+
+	return ret;
+}
+module_init(hns3_init_module);
+
+/* hns3_exit_module - Driver exit cleanup routine
+ * hns3_exit_module is called just before the driver is removed
+ * from memory.
+ */
+static void __exit hns3_exit_module(void)
+{
+	pci_unregister_driver(&hns3_driver);
+	hnae3_unregister_client(&client);
+}
+module_exit(hns3_exit_module);
+
+MODULE_DESCRIPTION("HNS3: Hisilicon Ethernet Driver");
+MODULE_AUTHOR("Huawei Tech. Co., Ltd.");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("pci:hns-nic");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
new file mode 100644
index 0000000..7e87461
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
@@ -0,0 +1,593 @@
+/*
+ * Copyright (c) 2016 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HNS3_ENET_H
+#define __HNS3_ENET_H
+
+#include "hnae3.h"
+
+extern const char hns3_driver_version[];
+
+enum hns3_nic_state {
+	HNS3_NIC_STATE_TESTING,
+	HNS3_NIC_STATE_RESETTING,
+	HNS3_NIC_STATE_REINITING,
+	HNS3_NIC_STATE_DOWN,
+	HNS3_NIC_STATE_DISABLED,
+	HNS3_NIC_STATE_REMOVING,
+	HNS3_NIC_STATE_SERVICE_INITED,
+	HNS3_NIC_STATE_SERVICE_SCHED,
+	HNS3_NIC_STATE2_RESET_REQUESTED,
+	HNS3_NIC_STATE_MAX
+};
+
+#define HNS3_RING_RX_RING_BASEADDR_L_REG	0x00000
+#define HNS3_RING_RX_RING_BASEADDR_H_REG	0x00004
+#define HNS3_RING_RX_RING_BD_NUM_REG		0x00008
+#define HNS3_RING_RX_RING_BD_LEN_REG		0x0000C
+#define HNS3_RING_RX_RING_TAIL_REG		0x00018
+#define HNS3_RING_RX_RING_HEAD_REG		0x0001C
+#define HNS3_RING_RX_RING_FBDNUM_REG		0x00020
+#define HNS3_RING_RX_RING_PKTNUM_RECORD_REG	0x0002C
+
+#define HNS3_RING_TX_RING_BASEADDR_L_REG	0x00040
+#define HNS3_RING_TX_RING_BASEADDR_H_REG	0x00044
+#define HNS3_RING_TX_RING_BD_NUM_REG		0x00048
+#define HNS3_RING_TX_RING_BD_LEN_REG		0x0004C
+#define HNS3_RING_TX_RING_TAIL_REG		0x00058
+#define HNS3_RING_TX_RING_HEAD_REG		0x0005C
+#define HNS3_RING_TX_RING_FBDNUM_REG		0x00060
+#define HNS3_RING_TX_RING_OFFSET_REG		0x00064
+#define HNS3_RING_TX_RING_PKTNUM_RECORD_REG	0x0006C
+
+#define HNS3_RING_PREFETCH_EN_REG		0x0007C
+#define HNS3_RING_CFG_VF_NUM_REG		0x00080
+#define HNS3_RING_ASID_REG			0x0008C
+#define HNS3_RING_RX_VM_REG			0x00090
+#define HNS3_RING_T0_BE_RST			0x00094
+#define HNS3_RING_COULD_BE_RST			0x00098
+#define HNS3_RING_WRR_WEIGHT_REG		0x0009c
+
+#define HNS3_RING_INTMSK_RXWL_REG		0x000A0
+#define HNS3_RING_INTSTS_RX_RING_REG		0x000A4
+#define HNS3_RX_RING_INT_STS_REG		0x000A8
+#define HNS3_RING_INTMSK_TXWL_REG		0x000AC
+#define HNS3_RING_INTSTS_TX_RING_REG		0x000B0
+#define HNS3_TX_RING_INT_STS_REG		0x000B4
+#define HNS3_RING_INTMSK_RX_OVERTIME_REG	0x000B8
+#define HNS3_RING_INTSTS_RX_OVERTIME_REG	0x000BC
+#define HNS3_RING_INTMSK_TX_OVERTIME_REG	0x000C4
+#define HNS3_RING_INTSTS_TX_OVERTIME_REG	0x000C8
+
+#define HNS3_RING_MB_CTRL_REG			0x00100
+#define HNS3_RING_MB_DATA_BASE_REG		0x00200
+
+#define HNS3_TX_REG_OFFSET			0x40
+
+#define HNS3_RX_HEAD_SIZE			256
+
+#define HNS3_TX_TIMEOUT (5 * HZ)
+#define HNS3_RING_NAME_LEN			16
+#define HNS3_BUFFER_SIZE_2048			2048
+#define HNS3_RING_MAX_PENDING			32768
+#define HNS3_MAX_MTU				9728
+
+#define HNS3_BD_SIZE_512_TYPE			0
+#define HNS3_BD_SIZE_1024_TYPE			1
+#define HNS3_BD_SIZE_2048_TYPE			2
+#define HNS3_BD_SIZE_4096_TYPE			3
+
+#define HNS3_RX_FLAG_VLAN_PRESENT		0x1
+#define HNS3_RX_FLAG_L3ID_IPV4			0x0
+#define HNS3_RX_FLAG_L3ID_IPV6			0x1
+#define HNS3_RX_FLAG_L4ID_UDP			0x0
+#define HNS3_RX_FLAG_L4ID_TCP			0x1
+
+#define HNS3_RXD_DMAC_S				0
+#define HNS3_RXD_DMAC_M				(0x3 << HNS3_RXD_DMAC_S)
+#define HNS3_RXD_VLAN_S				2
+#define HNS3_RXD_VLAN_M				(0x3 << HNS3_RXD_VLAN_S)
+#define HNS3_RXD_L3ID_S				4
+#define HNS3_RXD_L3ID_M				(0xf << HNS3_RXD_L3ID_S)
+#define HNS3_RXD_L4ID_S				8
+#define HNS3_RXD_L4ID_M				(0xf << HNS3_RXD_L4ID_S)
+#define HNS3_RXD_FRAG_B				12
+#define HNS3_RXD_L2E_B				16
+#define HNS3_RXD_L3E_B				17
+#define HNS3_RXD_L4E_B				18
+#define HNS3_RXD_TRUNCAT_B			19
+#define HNS3_RXD_HOI_B				20
+#define HNS3_RXD_DOI_B				21
+#define HNS3_RXD_OL3E_B				22
+#define HNS3_RXD_OL4E_B				23
+
+#define HNS3_RXD_ODMAC_S			0
+#define HNS3_RXD_ODMAC_M			(0x3 << HNS3_RXD_ODMAC_S)
+#define HNS3_RXD_OVLAN_S			2
+#define HNS3_RXD_OVLAN_M			(0x3 << HNS3_RXD_OVLAN_S)
+#define HNS3_RXD_OL3ID_S			4
+#define HNS3_RXD_OL3ID_M			(0xf << HNS3_RXD_OL3ID_S)
+#define HNS3_RXD_OL4ID_S			8
+#define HNS3_RXD_OL4ID_M			(0xf << HNS3_RXD_OL4ID_S)
+#define HNS3_RXD_FBHI_S				12
+#define HNS3_RXD_FBHI_M				(0x3 << HNS3_RXD_FBHI_S)
+#define HNS3_RXD_FBLI_S				14
+#define HNS3_RXD_FBLI_M				(0x3 << HNS3_RXD_FBLI_S)
+
+#define HNS3_RXD_BDTYPE_S			0
+#define HNS3_RXD_BDTYPE_M			(0xf << HNS3_RXD_BDTYPE_S)
+#define HNS3_RXD_VLD_B				4
+#define HNS3_RXD_UDP0_B				5
+#define HNS3_RXD_EXTEND_B			7
+#define HNS3_RXD_FE_B				8
+#define HNS3_RXD_LUM_B				9
+#define HNS3_RXD_CRCP_B				10
+#define HNS3_RXD_L3L4P_B			11
+#define HNS3_RXD_TSIND_S			12
+#define HNS3_RXD_TSIND_M			(0x7 << HNS3_RXD_TSIND_S)
+#define HNS3_RXD_LKBK_B				15
+#define HNS3_RXD_HDL_S				16
+#define HNS3_RXD_HDL_M				(0x7ff << HNS3_RXD_HDL_S)
+#define HNS3_RXD_HSIND_B			31
+
+#define HNS3_TXD_L3T_S				0
+#define HNS3_TXD_L3T_M				(0x3 << HNS3_TXD_L3T_S)
+#define HNS3_TXD_L4T_S				2
+#define HNS3_TXD_L4T_M				(0x3 << HNS3_TXD_L4T_S)
+#define HNS3_TXD_L3CS_B				4
+#define HNS3_TXD_L4CS_B				5
+#define HNS3_TXD_VLAN_B				6
+#define HNS3_TXD_TSO_B				7
+
+#define HNS3_TXD_L2LEN_S			8
+#define HNS3_TXD_L2LEN_M			(0xff << HNS3_TXD_L2LEN_S)
+#define HNS3_TXD_L3LEN_S			16
+#define HNS3_TXD_L3LEN_M			(0xff << HNS3_TXD_L3LEN_S)
+#define HNS3_TXD_L4LEN_S			24
+#define HNS3_TXD_L4LEN_M			(0xff << HNS3_TXD_L4LEN_S)
+
+#define HNS3_TXD_OL3T_S				0
+#define HNS3_TXD_OL3T_M				(0x3 << HNS3_TXD_OL3T_S)
+#define HNS3_TXD_OVLAN_B			2
+#define HNS3_TXD_MACSEC_B			3
+#define HNS3_TXD_TUNTYPE_S			4
+#define HNS3_TXD_TUNTYPE_M			(0xf << HNS3_TXD_TUNTYPE_S)
+
+#define HNS3_TXD_BDTYPE_S			0
+#define HNS3_TXD_BDTYPE_M			(0xf << HNS3_TXD_BDTYPE_S)
+#define HNS3_TXD_FE_B				4
+#define HNS3_TXD_SC_S				5
+#define HNS3_TXD_SC_M				(0x3 << HNS3_TXD_SC_S)
+#define HNS3_TXD_EXTEND_B			7
+#define HNS3_TXD_VLD_B				8
+#define HNS3_TXD_RI_B				9
+#define HNS3_TXD_RA_B				10
+#define HNS3_TXD_TSYN_B				11
+#define HNS3_TXD_DECTTL_S			12
+#define HNS3_TXD_DECTTL_M			(0xf << HNS3_TXD_DECTTL_S)
+
+#define HNS3_TXD_MSS_S				0
+#define HNS3_TXD_MSS_M				(0x3fff << HNS3_TXD_MSS_S)
+
+#define HNS3_VECTOR_TX_IRQ			BIT_ULL(0)
+#define HNS3_VECTOR_RX_IRQ			BIT_ULL(1)
+
+#define HNS3_VECTOR_NOT_INITED			0
+#define HNS3_VECTOR_INITED			1
+
+#define HNS3_MAX_BD_SIZE			65535
+#define HNS3_MAX_BD_PER_FRAG			8
+#define HNS3_MAX_BD_PER_PKT			MAX_SKB_FRAGS
+
+#define HNS3_VECTOR_GL0_OFFSET			0x100
+#define HNS3_VECTOR_GL1_OFFSET			0x200
+#define HNS3_VECTOR_GL2_OFFSET			0x300
+#define HNS3_VECTOR_RL_OFFSET			0x900
+#define HNS3_VECTOR_RL_EN_B			6
+
+enum hns3_pkt_l3t_type {
+	HNS3_L3T_NONE,
+	HNS3_L3T_IPV6,
+	HNS3_L3T_IPV4,
+	HNS3_L3T_RESERVED
+};
+
+enum hns3_pkt_l4t_type {
+	HNS3_L4T_UNKNOWN,
+	HNS3_L4T_TCP,
+	HNS3_L4T_UDP,
+	HNS3_L4T_SCTP
+};
+
+enum hns3_pkt_ol3t_type {
+	HNS3_OL3T_NONE,
+	HNS3_OL3T_IPV6,
+	HNS3_OL3T_IPV4_NO_CSUM,
+	HNS3_OL3T_IPV4_CSUM
+};
+
+enum hns3_pkt_tun_type {
+	HNS3_TUN_NONE,
+	HNS3_TUN_MAC_IN_UDP,
+	HNS3_TUN_NVGRE,
+	HNS3_TUN_OTHER
+};
+
+/* hardware spec ring buffer format */
+struct __packed hns3_desc {
+	__le64 addr;
+	union {
+		struct {
+			__le16 vlan_tag;
+			__le16 send_size;
+			union {
+				__le32 type_cs_vlan_tso_len;
+				struct {
+					__u8 type_cs_vlan_tso;
+					__u8 l2_len;
+					__u8 l3_len;
+					__u8 l4_len;
+				};
+			};
+			__le16 outer_vlan_tag;
+			__le16 tv;
+
+		union {
+			__le32 ol_type_vlan_len_msec;
+			struct {
+				__u8 ol_type_vlan_msec;
+				__u8 ol2_len;
+				__u8 ol3_len;
+				__u8 ol4_len;
+			};
+		};
+
+			__le32 paylen;
+			__le16 bdtp_fe_sc_vld_ra_ri;
+			__le16 mss;
+		} tx;
+
+		struct {
+			__le32 l234_info;
+			__le16 pkt_len;
+			__le16 size;
+
+			__le32 rss_hash;
+			__le16 fd_id;
+			__le16 vlan_tag;
+
+			union {
+				__le32 ol_info;
+				struct {
+					__le16 o_dm_vlan_id_fb;
+					__le16 ot_vlan_tag;
+				};
+			};
+
+			__le32 bd_base_info;
+		} rx;
+	};
+};
+
+struct hns3_desc_cb {
+	dma_addr_t dma; /* dma address of this desc */
+	void *buf;      /* cpu addr for a desc */
+
+	/* priv data for the desc, e.g. skb when use with ip stack*/
+	void *priv;
+	u16 page_offset;
+	u16 reuse_flag;
+
+	u16 length;     /* length of the buffer */
+
+       /* desc type, used by the ring user to mark the type of the priv data */
+	u16 type;
+};
+
+enum hns3_pkt_l3type {
+	HNS3_L3_TYPE_IPV4,
+	HNS3_L3_TYPE_IPV6,
+	HNS3_L3_TYPE_ARP,
+	HNS3_L3_TYPE_RARP,
+	HNS3_L3_TYPE_IPV4_OPT,
+	HNS3_L3_TYPE_IPV6_EXT,
+	HNS3_L3_TYPE_LLDP,
+	HNS3_L3_TYPE_BPDU,
+	HNS3_L3_TYPE_MAC_PAUSE,
+	HNS3_L3_TYPE_PFC_PAUSE,/* 0x9*/
+
+	/* reserved for 0xA~0xB*/
+
+	HNS3_L3_TYPE_CNM = 0xc,
+
+	/* reserved for 0xD~0xE*/
+
+	HNS3_L3_TYPE_PARSE_FAIL	= 0xf /* must be last */
+};
+
+enum hns3_pkt_l4type {
+	HNS3_L4_TYPE_UDP,
+	HNS3_L4_TYPE_TCP,
+	HNS3_L4_TYPE_GRE,
+	HNS3_L4_TYPE_SCTP,
+	HNS3_L4_TYPE_IGMP,
+	HNS3_L4_TYPE_ICMP,
+
+	/* reserved for 0x6~0xE */
+
+	HNS3_L4_TYPE_PARSE_FAIL	= 0xf /* must be last */
+};
+
+enum hns3_pkt_ol3type {
+	HNS3_OL3_TYPE_IPV4 = 0,
+	HNS3_OL3_TYPE_IPV6,
+	/* reserved for 0x2~0x3 */
+	HNS3_OL3_TYPE_IPV4_OPT = 4,
+	HNS3_OL3_TYPE_IPV6_EXT,
+
+	/* reserved for 0x6~0xE*/
+
+	HNS3_OL3_TYPE_PARSE_FAIL = 0xf	/* must be last */
+};
+
+enum hns3_pkt_ol4type {
+	HNS3_OL4_TYPE_NO_TUN,
+	HNS3_OL4_TYPE_MAC_IN_UDP,
+	HNS3_OL4_TYPE_NVGRE,
+	HNS3_OL4_TYPE_UNKNOWN
+};
+
+struct ring_stats {
+	u64 io_err_cnt;
+	u64 sw_err_cnt;
+	u64 seg_pkt_cnt;
+	union {
+		struct {
+			u64 tx_pkts;
+			u64 tx_bytes;
+			u64 tx_err_cnt;
+			u64 restart_queue;
+			u64 tx_busy;
+		};
+		struct {
+			u64 rx_pkts;
+			u64 rx_bytes;
+			u64 rx_err_cnt;
+			u64 reuse_pg_cnt;
+			u64 err_pkt_len;
+			u64 non_vld_descs;
+			u64 err_bd_num;
+			u64 l2_err;
+			u64 l3l4_csum_err;
+		};
+	};
+};
+
+struct hns3_enet_ring {
+	u8 __iomem *io_base; /* base io address for the ring */
+	struct hns3_desc *desc; /* dma map address space */
+	struct hns3_desc_cb *desc_cb;
+	struct hns3_enet_ring *next;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	struct hnae3_queue *tqp;
+	char ring_name[HNS3_RING_NAME_LEN];
+	struct device *dev; /* will be used for DMA mapping of descriptors */
+
+	/* statistic */
+	struct ring_stats stats;
+	struct u64_stats_sync syncp;
+
+	dma_addr_t desc_dma_addr;
+	u32 buf_size;       /* size for hnae_desc->addr, preset by AE */
+	u16 desc_num;       /* total number of desc */
+	u16 max_desc_num_per_pkt;
+	u16 max_raw_data_sz_per_desc;
+	u16 max_pkt_size;
+	int next_to_use;    /* idx of next spare desc */
+
+	/* idx of lastest sent desc, the ring is empty when equal to
+	 * next_to_use
+	 */
+	int next_to_clean;
+
+	u32 flag;          /* ring attribute */
+	int irq_init_flag;
+
+	int numa_node;
+	cpumask_t affinity_mask;
+};
+
+struct hns_queue;
+
+struct hns3_nic_ring_data {
+	struct hns3_enet_ring *ring;
+	struct napi_struct napi;
+	int queue_index;
+	int (*poll_one)(struct hns3_nic_ring_data *, int, void *);
+	void (*ex_process)(struct hns3_nic_ring_data *, struct sk_buff *);
+	void (*fini_process)(struct hns3_nic_ring_data *);
+};
+
+struct hns3_nic_ops {
+	int (*fill_desc)(struct hns3_enet_ring *ring, void *priv,
+			 int size, dma_addr_t dma, int frag_end,
+			 enum hns_desc_type type);
+	int (*maybe_stop_tx)(struct sk_buff **out_skb,
+			     int *bnum, struct hns3_enet_ring *ring);
+	void (*get_rxd_bnum)(u32 bnum_flag, int *out_bnum);
+};
+
+enum hns3_flow_level_range {
+	HNS3_FLOW_LOW = 0,
+	HNS3_FLOW_MID = 1,
+	HNS3_FLOW_HIGH = 2,
+	HNS3_FLOW_ULTRA = 3,
+};
+
+enum hns3_link_mode_bits {
+	HNS3_LM_FIBRE_BIT = BIT(0),
+	HNS3_LM_AUTONEG_BIT = BIT(1),
+	HNS3_LM_TP_BIT = BIT(2),
+	HNS3_LM_PAUSE_BIT = BIT(3),
+	HNS3_LM_BACKPLANE_BIT = BIT(4),
+	HNS3_LM_10BASET_HALF_BIT = BIT(5),
+	HNS3_LM_10BASET_FULL_BIT = BIT(6),
+	HNS3_LM_100BASET_HALF_BIT = BIT(7),
+	HNS3_LM_100BASET_FULL_BIT = BIT(8),
+	HNS3_LM_1000BASET_FULL_BIT = BIT(9),
+	HNS3_LM_10000BASEKR_FULL_BIT = BIT(10),
+	HNS3_LM_25000BASEKR_FULL_BIT = BIT(11),
+	HNS3_LM_40000BASELR4_FULL_BIT = BIT(12),
+	HNS3_LM_50000BASEKR2_FULL_BIT = BIT(13),
+	HNS3_LM_100000BASEKR4_FULL_BIT = BIT(14),
+	HNS3_LM_COUNT = 15
+};
+
+#define HNS3_INT_GL_50K		0x000A
+#define HNS3_INT_GL_20K		0x0019
+#define HNS3_INT_GL_18K		0x001B
+#define HNS3_INT_GL_8K		0x003E
+
+struct hns3_enet_ring_group {
+	/* array of pointers to rings */
+	struct hns3_enet_ring *ring;
+	u64 total_bytes;	/* total bytes processed this group */
+	u64 total_packets;	/* total packets processed this group */
+	u16 count;
+	enum hns3_flow_level_range flow_level;
+	u16 int_gl;
+};
+
+struct hns3_enet_tqp_vector {
+	struct hnae3_handle *handle;
+	u8 __iomem *mask_addr;
+	int vector_irq;
+	int irq_init_flag;
+
+	u16 idx;		/* index in the TQP vector array per handle. */
+
+	struct napi_struct napi;
+
+	struct hns3_enet_ring_group rx_group;
+	struct hns3_enet_ring_group tx_group;
+
+	u16 num_tqps;	/* total number of tqps in TQP vector */
+
+	cpumask_t affinity_mask;
+	char name[HNAE3_INT_NAME_LEN];
+
+	/* when 0 should adjust interrupt coalesce parameter */
+	u8 int_adapt_down;
+} ____cacheline_internodealigned_in_smp;
+
+enum hns3_udp_tnl_type {
+	HNS3_UDP_TNL_VXLAN,
+	HNS3_UDP_TNL_GENEVE,
+	HNS3_UDP_TNL_MAX,
+};
+
+struct hns3_udp_tunnel {
+	u16 dst_port;
+	int used;
+};
+
+struct hns3_nic_priv {
+	struct hnae3_handle *ae_handle;
+	u32 enet_ver;
+	u32 port_id;
+	struct net_device *netdev;
+	struct device *dev;
+	struct hns3_nic_ops ops;
+
+	/**
+	 * the cb for nic to manage the ring buffer, the first half of the
+	 * array is for tx_ring and vice versa for the second half
+	 */
+	struct hns3_nic_ring_data *ring_data;
+	struct hns3_enet_tqp_vector *tqp_vector;
+	u16 vector_num;
+
+	/* The most recently read link state */
+	int link;
+	u64 tx_timeout_count;
+
+	unsigned long state;
+
+	struct timer_list service_timer;
+
+	struct work_struct service_task;
+
+	struct notifier_block notifier_block;
+	/* Vxlan/Geneve information */
+	struct hns3_udp_tunnel udp_tnl[HNS3_UDP_TNL_MAX];
+};
+
+union l3_hdr_info {
+	struct iphdr *v4;
+	struct ipv6hdr *v6;
+	unsigned char *hdr;
+};
+
+union l4_hdr_info {
+	struct tcphdr *tcp;
+	struct udphdr *udp;
+	unsigned char *hdr;
+};
+
+/* the distance between [begin, end) in a ring buffer
+ * note: there is a unuse slot between the begin and the end
+ */
+static inline int ring_dist(struct hns3_enet_ring *ring, int begin, int end)
+{
+	return (end - begin + ring->desc_num) % ring->desc_num;
+}
+
+static inline int ring_space(struct hns3_enet_ring *ring)
+{
+	return ring->desc_num -
+		ring_dist(ring, ring->next_to_clean, ring->next_to_use) - 1;
+}
+
+static inline int is_ring_empty(struct hns3_enet_ring *ring)
+{
+	return ring->next_to_use == ring->next_to_clean;
+}
+
+static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value)
+{
+	u8 __iomem *reg_addr = READ_ONCE(base);
+
+	writel(value, reg_addr + reg);
+}
+
+#define hns3_write_dev(a, reg, value) \
+	hns3_write_reg((a)->io_base, (reg), (value))
+
+#define hnae_queue_xmit(tqp, buf_num) writel_relaxed(buf_num, \
+		(tqp)->io_base + HNS3_RING_TX_RING_TAIL_REG)
+
+#define ring_to_dev(ring) (&(ring)->tqp->handle->pdev->dev)
+
+#define ring_to_dma_dir(ring) (HNAE3_IS_TX_RING(ring) ? \
+	DMA_TO_DEVICE : DMA_FROM_DEVICE)
+
+#define tx_ring_data(priv, idx) ((priv)->ring_data[idx])
+
+#define hnae_buf_size(_ring) ((_ring)->buf_size)
+#define hnae_page_order(_ring) (get_order(hnae_buf_size(_ring)))
+#define hnae_page_size(_ring) (PAGE_SIZE << hnae_page_order(_ring))
+
+/* iterator for handling rings in ring group */
+#define hns3_for_each_ring(pos, head) \
+	for (pos = (head).ring; pos; pos = pos->next)
+
+void hns3_ethtool_set_ops(struct net_device *netdev);
+
+int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget);
+#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
new file mode 100644
index 0000000..d636399
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
@@ -0,0 +1,493 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/string.h>
+
+#include "hns3_enet.h"
+
+struct hns3_stats {
+	char stats_string[ETH_GSTRING_LEN];
+	int stats_size;
+	int stats_offset;
+};
+
+/* tqp related stats */
+#define HNS3_TQP_STAT(_string, _member)	{			\
+	.stats_string = _string,				\
+	.stats_size = FIELD_SIZEOF(struct ring_stats, _member),	\
+	.stats_offset = offsetof(struct hns3_enet_ring, stats),	\
+}								\
+
+static const struct hns3_stats hns3_txq_stats[] = {
+	/* Tx per-queue statistics */
+	HNS3_TQP_STAT("tx_io_err_cnt", io_err_cnt),
+	HNS3_TQP_STAT("tx_sw_err_cnt", sw_err_cnt),
+	HNS3_TQP_STAT("tx_seg_pkt_cnt", seg_pkt_cnt),
+	HNS3_TQP_STAT("tx_pkts", tx_pkts),
+	HNS3_TQP_STAT("tx_bytes", tx_bytes),
+	HNS3_TQP_STAT("tx_err_cnt", tx_err_cnt),
+	HNS3_TQP_STAT("tx_restart_queue", restart_queue),
+	HNS3_TQP_STAT("tx_busy", tx_busy),
+};
+
+#define HNS3_TXQ_STATS_COUNT ARRAY_SIZE(hns3_txq_stats)
+
+static const struct hns3_stats hns3_rxq_stats[] = {
+	/* Rx per-queue statistics */
+	HNS3_TQP_STAT("rx_io_err_cnt", io_err_cnt),
+	HNS3_TQP_STAT("rx_sw_err_cnt", sw_err_cnt),
+	HNS3_TQP_STAT("rx_seg_pkt_cnt", seg_pkt_cnt),
+	HNS3_TQP_STAT("rx_pkts", rx_pkts),
+	HNS3_TQP_STAT("rx_bytes", rx_bytes),
+	HNS3_TQP_STAT("rx_err_cnt", rx_err_cnt),
+	HNS3_TQP_STAT("rx_reuse_pg_cnt", reuse_pg_cnt),
+	HNS3_TQP_STAT("rx_err_pkt_len", err_pkt_len),
+	HNS3_TQP_STAT("rx_non_vld_descs", non_vld_descs),
+	HNS3_TQP_STAT("rx_err_bd_num", err_bd_num),
+	HNS3_TQP_STAT("rx_l2_err", l2_err),
+	HNS3_TQP_STAT("rx_l3l4_csum_err", l3l4_csum_err),
+};
+
+#define HNS3_RXQ_STATS_COUNT ARRAY_SIZE(hns3_rxq_stats)
+
+#define HNS3_TQP_STATS_COUNT (HNS3_TXQ_STATS_COUNT + HNS3_RXQ_STATS_COUNT)
+
+struct hns3_link_mode_mapping {
+	u32 hns3_link_mode;
+	u32 ethtool_link_mode;
+};
+
+static const struct hns3_link_mode_mapping hns3_lm_map[] = {
+	{HNS3_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT},
+	{HNS3_LM_AUTONEG_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT},
+	{HNS3_LM_TP_BIT, ETHTOOL_LINK_MODE_TP_BIT},
+	{HNS3_LM_PAUSE_BIT, ETHTOOL_LINK_MODE_Pause_BIT},
+	{HNS3_LM_BACKPLANE_BIT, ETHTOOL_LINK_MODE_Backplane_BIT},
+	{HNS3_LM_10BASET_HALF_BIT, ETHTOOL_LINK_MODE_10baseT_Half_BIT},
+	{HNS3_LM_10BASET_FULL_BIT, ETHTOOL_LINK_MODE_10baseT_Full_BIT},
+	{HNS3_LM_100BASET_HALF_BIT, ETHTOOL_LINK_MODE_100baseT_Half_BIT},
+	{HNS3_LM_100BASET_FULL_BIT, ETHTOOL_LINK_MODE_100baseT_Full_BIT},
+	{HNS3_LM_1000BASET_FULL_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
+};
+
+static void hns3_driv_to_eth_caps(u32 caps, struct ethtool_link_ksettings *cmd,
+				  bool is_advertised)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(hns3_lm_map); i++) {
+		if (!(caps & hns3_lm_map[i].hns3_link_mode))
+			continue;
+
+		if (is_advertised) {
+			ethtool_link_ksettings_zero_link_mode(cmd,
+							      advertising);
+			__set_bit(hns3_lm_map[i].ethtool_link_mode,
+				  cmd->link_modes.advertising);
+		} else {
+			ethtool_link_ksettings_zero_link_mode(cmd,
+							      supported);
+			__set_bit(hns3_lm_map[i].ethtool_link_mode,
+				  cmd->link_modes.supported);
+		}
+	}
+}
+
+static int hns3_get_sset_count(struct net_device *netdev, int stringset)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	const struct hnae3_ae_ops *ops = h->ae_algo->ops;
+
+	if (!ops->get_sset_count)
+		return -EOPNOTSUPP;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		return ((HNS3_TQP_STATS_COUNT * h->kinfo.num_tqps) +
+			ops->get_sset_count(h, stringset));
+
+	case ETH_SS_TEST:
+		return ops->get_sset_count(h, stringset);
+	}
+
+	return 0;
+}
+
+static void *hns3_update_strings(u8 *data, const struct hns3_stats *stats,
+				 u32 stat_count, u32 num_tqps)
+{
+#define MAX_PREFIX_SIZE (8 + 4)
+	u32 size_left;
+	u32 i, j;
+	u32 n1;
+
+	for (i = 0; i < num_tqps; i++) {
+		for (j = 0; j < stat_count; j++) {
+			data[ETH_GSTRING_LEN - 1] = '\0';
+
+			/* first, prepend the prefix string */
+			n1 = snprintf(data, MAX_PREFIX_SIZE, "rcb_q%d_", i);
+			n1 = min_t(uint, n1, MAX_PREFIX_SIZE - 1);
+			size_left = (ETH_GSTRING_LEN - 1) - n1;
+
+			/* now, concatenate the stats string to it */
+			strncat(data, stats[j].stats_string, size_left);
+			data += ETH_GSTRING_LEN;
+		}
+	}
+
+	return data;
+}
+
+static u8 *hns3_get_strings_tqps(struct hnae3_handle *handle, u8 *data)
+{
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+
+	/* get strings for Tx */
+	data = hns3_update_strings(data, hns3_txq_stats, HNS3_TXQ_STATS_COUNT,
+				   kinfo->num_tqps);
+
+	/* get strings for Rx */
+	data = hns3_update_strings(data, hns3_rxq_stats, HNS3_RXQ_STATS_COUNT,
+				   kinfo->num_tqps);
+
+	return data;
+}
+
+static void hns3_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	const struct hnae3_ae_ops *ops = h->ae_algo->ops;
+	char *buff = (char *)data;
+
+	if (!ops->get_strings)
+		return;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		buff = hns3_get_strings_tqps(h, buff);
+		h->ae_algo->ops->get_strings(h, stringset, (u8 *)buff);
+		break;
+	case ETH_SS_TEST:
+		ops->get_strings(h, stringset, data);
+		break;
+	}
+}
+
+static u64 *hns3_get_stats_tqps(struct hnae3_handle *handle, u64 *data)
+{
+	struct hns3_nic_priv *nic_priv = (struct hns3_nic_priv *)handle->priv;
+	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+	struct hns3_enet_ring *ring;
+	u8 *stat;
+	u32 i;
+
+	/* get stats for Tx */
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		ring = nic_priv->ring_data[i].ring;
+		for (i = 0; i < HNS3_TXQ_STATS_COUNT; i++) {
+			stat = (u8 *)ring + hns3_txq_stats[i].stats_offset;
+			*data++ = *(u64 *)stat;
+		}
+	}
+
+	/* get stats for Rx */
+	for (i = 0; i < kinfo->num_tqps; i++) {
+		ring = nic_priv->ring_data[i + kinfo->num_tqps].ring;
+		for (i = 0; i < HNS3_RXQ_STATS_COUNT; i++) {
+			stat = (u8 *)ring + hns3_rxq_stats[i].stats_offset;
+			*data++ = *(u64 *)stat;
+		}
+	}
+
+	return data;
+}
+
+/* hns3_get_stats - get detail statistics.
+ * @netdev: net device
+ * @stats: statistics info.
+ * @data: statistics data.
+ */
+void hns3_get_stats(struct net_device *netdev, struct ethtool_stats *stats,
+		    u64 *data)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	u64 *p = data;
+
+	if (!h->ae_algo->ops->get_stats || !h->ae_algo->ops->update_stats) {
+		netdev_err(netdev, "could not get any statistics\n");
+		return;
+	}
+
+	h->ae_algo->ops->update_stats(h, &netdev->stats);
+
+	/* get per-queue stats */
+	p = hns3_get_stats_tqps(h, p);
+
+	/* get MAC & other misc hardware stats */
+	h->ae_algo->ops->get_stats(h, p);
+}
+
+static void hns3_get_drvinfo(struct net_device *netdev,
+			     struct ethtool_drvinfo *drvinfo)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	strncpy(drvinfo->version, hns3_driver_version,
+		sizeof(drvinfo->version));
+	drvinfo->version[sizeof(drvinfo->version) - 1] = '\0';
+
+	strncpy(drvinfo->driver, h->pdev->driver->name,
+		sizeof(drvinfo->driver));
+	drvinfo->driver[sizeof(drvinfo->driver) - 1] = '\0';
+
+	strncpy(drvinfo->bus_info, pci_name(h->pdev),
+		sizeof(drvinfo->bus_info));
+	drvinfo->bus_info[ETHTOOL_BUSINFO_LEN - 1] = '\0';
+
+	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "0x%08x",
+		 priv->ae_handle->ae_algo->ops->get_fw_version(h));
+}
+
+static u32 hns3_get_link(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h;
+
+	h = priv->ae_handle;
+
+	if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_status)
+		return h->ae_algo->ops->get_status(h);
+	else
+		return 0;
+}
+
+static void hns3_get_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *param)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	int queue_num = priv->ae_handle->kinfo.num_tqps;
+
+	param->tx_max_pending = HNS3_RING_MAX_PENDING;
+	param->rx_max_pending = HNS3_RING_MAX_PENDING;
+
+	param->tx_pending = priv->ring_data[0].ring->desc_num;
+	param->rx_pending = priv->ring_data[queue_num].ring->desc_num;
+}
+
+static void hns3_get_pauseparam(struct net_device *netdev,
+				struct ethtool_pauseparam *param)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_pauseparam)
+		h->ae_algo->ops->get_pauseparam(h, &param->autoneg,
+			&param->rx_pause, &param->tx_pause);
+}
+
+static int hns3_get_link_ksettings(struct net_device *netdev,
+				   struct ethtool_link_ksettings *cmd)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+	u32 supported_caps;
+	u32 advertised_caps;
+	u8 media_type = HNAE3_MEDIA_TYPE_UNKNOWN;
+	u8 link_stat;
+	u8 auto_neg;
+	u8 duplex;
+	u32 speed;
+
+	if (!h->ae_algo || !h->ae_algo->ops)
+		return -EOPNOTSUPP;
+
+	/* 1.auto_neg & speed & duplex from cmd */
+	if (h->ae_algo->ops->get_ksettings_an_result) {
+		h->ae_algo->ops->get_ksettings_an_result(h, &auto_neg,
+							 &speed, &duplex);
+		cmd->base.autoneg = auto_neg;
+		cmd->base.speed = speed;
+		cmd->base.duplex = duplex;
+
+		link_stat = hns3_get_link(netdev);
+		if (!link_stat) {
+			cmd->base.speed = (u32)SPEED_UNKNOWN;
+			cmd->base.duplex = DUPLEX_UNKNOWN;
+		}
+	}
+
+	/* 2.media_type get from bios parameter block */
+	if (h->ae_algo->ops->get_media_type) {
+		h->ae_algo->ops->get_media_type(h, &media_type);
+
+		switch (media_type) {
+		case HNAE3_MEDIA_TYPE_FIBER:
+			cmd->base.port = PORT_FIBRE;
+			supported_caps = HNS3_LM_FIBRE_BIT |
+					 HNS3_LM_AUTONEG_BIT |
+					 HNS3_LM_PAUSE_BIT |
+					 HNS3_LM_1000BASET_FULL_BIT;
+
+			advertised_caps = supported_caps;
+			break;
+		case HNAE3_MEDIA_TYPE_COPPER:
+			cmd->base.port = PORT_TP;
+			supported_caps = HNS3_LM_TP_BIT |
+					 HNS3_LM_AUTONEG_BIT |
+					 HNS3_LM_PAUSE_BIT |
+					 HNS3_LM_1000BASET_FULL_BIT |
+					 HNS3_LM_100BASET_FULL_BIT |
+					 HNS3_LM_100BASET_HALF_BIT |
+					 HNS3_LM_10BASET_FULL_BIT |
+					 HNS3_LM_10BASET_HALF_BIT;
+			advertised_caps = supported_caps;
+			break;
+		case HNAE3_MEDIA_TYPE_BACKPLANE:
+			cmd->base.port = PORT_NONE;
+			supported_caps = HNS3_LM_BACKPLANE_BIT |
+					 HNS3_LM_PAUSE_BIT |
+					 HNS3_LM_AUTONEG_BIT |
+					 HNS3_LM_1000BASET_FULL_BIT |
+					 HNS3_LM_100BASET_FULL_BIT |
+					 HNS3_LM_100BASET_HALF_BIT |
+					 HNS3_LM_10BASET_FULL_BIT |
+					 HNS3_LM_10BASET_HALF_BIT;
+
+			advertised_caps = supported_caps;
+			break;
+		case HNAE3_MEDIA_TYPE_UNKNOWN:
+		default:
+			cmd->base.port = PORT_OTHER;
+			supported_caps = 0;
+			advertised_caps = 0;
+			break;
+		}
+
+		/* now, map driver link modes to ethtool link modes */
+		hns3_driv_to_eth_caps(supported_caps, cmd, false);
+		hns3_driv_to_eth_caps(advertised_caps, cmd, true);
+	}
+
+	/* 3.mdix_ctrl&mdix get from phy reg */
+	if (h->ae_algo->ops->get_mdix_mode)
+		h->ae_algo->ops->get_mdix_mode(h, &cmd->base.eth_tp_mdix_ctrl,
+					       &cmd->base.eth_tp_mdix);
+	/* 4.mdio_support */
+	cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C22;
+
+	return 0;
+}
+
+static u32 hns3_get_rss_key_size(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (!h->ae_algo || !h->ae_algo->ops ||
+	    !h->ae_algo->ops->get_rss_key_size)
+		return -EOPNOTSUPP;
+
+	return h->ae_algo->ops->get_rss_key_size(h);
+}
+
+static u32 hns3_get_rss_indir_size(struct net_device *netdev)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (!h->ae_algo || !h->ae_algo->ops ||
+	    !h->ae_algo->ops->get_rss_indir_size)
+		return -EOPNOTSUPP;
+
+	return h->ae_algo->ops->get_rss_indir_size(h);
+}
+
+static int hns3_get_rss(struct net_device *netdev, u32 *indir, u8 *key,
+			u8 *hfunc)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss)
+		return -EOPNOTSUPP;
+
+	return h->ae_algo->ops->get_rss(h, indir, key, hfunc);
+}
+
+static int hns3_set_rss(struct net_device *netdev, const u32 *indir,
+			const u8 *key, const u8 hfunc)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_rss)
+		return -EOPNOTSUPP;
+
+	/* currently we only support Toeplitz hash */
+	if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && (hfunc != ETH_RSS_HASH_TOP)) {
+		netdev_err(netdev,
+			   "hash func not supported (only Toeplitz hash)\n");
+		return -EOPNOTSUPP;
+	}
+	if (!indir) {
+		netdev_err(netdev,
+			   "set rss failed for indir is empty\n");
+		return -EOPNOTSUPP;
+	}
+
+	return h->ae_algo->ops->set_rss(h, indir, key, hfunc);
+}
+
+static int hns3_get_rxnfc(struct net_device *netdev,
+			  struct ethtool_rxnfc *cmd,
+			  u32 *rule_locs)
+{
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hnae3_handle *h = priv->ae_handle;
+
+	if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_tc_size)
+		return -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = h->ae_algo->ops->get_tc_size(h);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static const struct ethtool_ops hns3_ethtool_ops = {
+	.get_drvinfo = hns3_get_drvinfo,
+	.get_link = hns3_get_link,
+	.get_ringparam = hns3_get_ringparam,
+	.get_pauseparam = hns3_get_pauseparam,
+	.get_strings = hns3_get_strings,
+	.get_ethtool_stats = hns3_get_stats,
+	.get_sset_count = hns3_get_sset_count,
+	.get_rxnfc = hns3_get_rxnfc,
+	.get_rxfh_key_size = hns3_get_rss_key_size,
+	.get_rxfh_indir_size = hns3_get_rss_indir_size,
+	.get_rxfh = hns3_get_rss,
+	.set_rxfh = hns3_set_rss,
+	.get_link_ksettings = hns3_get_link_ksettings,
+};
+
+void hns3_ethtool_set_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &hns3_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c
index c6164a9..c8c7ad2 100644
--- a/drivers/net/ethernet/hp/hp100.c
+++ b/drivers/net/ethernet/hp/hp100.c
@@ -194,7 +194,7 @@ static const char *hp100_isa_tbl[] = {
 };
 #endif
 
-static struct eisa_device_id hp100_eisa_tbl[] = {
+static const struct eisa_device_id hp100_eisa_tbl[] = {
 	{ "HWPF180" }, /* HP J2577 rev A */
 	{ "HWP1920" }, /* HP 27248B */
 	{ "HWP1940" }, /* HP J2577 */
diff --git a/drivers/net/ethernet/huawei/Kconfig b/drivers/net/ethernet/huawei/Kconfig
new file mode 100644
index 0000000..c1a95ae
--- /dev/null
+++ b/drivers/net/ethernet/huawei/Kconfig
@@ -0,0 +1,19 @@
+#
+# Huawei driver configuration
+#
+
+config NET_VENDOR_HUAWEI
+	bool "Huawei devices"
+	default y
+	---help---
+	  If you have a network (Ethernet) card belonging to this class, say Y.
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about Huawei cards. If you say Y, you will be asked
+	  for your specific card in the following questions.
+
+if NET_VENDOR_HUAWEI
+
+source "drivers/net/ethernet/huawei/hinic/Kconfig"
+
+endif # NET_VENDOR_HUAWEI
diff --git a/drivers/net/ethernet/huawei/Makefile b/drivers/net/ethernet/huawei/Makefile
new file mode 100644
index 0000000..5c37cc8
--- /dev/null
+++ b/drivers/net/ethernet/huawei/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the Huawei device drivers.
+#
+
+obj-$(CONFIG_HINIC) += hinic/
diff --git a/drivers/net/ethernet/huawei/hinic/Kconfig b/drivers/net/ethernet/huawei/hinic/Kconfig
new file mode 100644
index 0000000..08db249
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/Kconfig
@@ -0,0 +1,12 @@
+#
+# Huawei driver configuration
+#
+
+config HINIC
+	tristate "Huawei Intelligent PCIE Network Interface Card"
+	depends on (PCI_MSI && X86)
+	---help---
+	  This driver supports HiNIC PCIE Ethernet cards.
+	  To compile this driver as part of the kernel, choose Y here.
+	  If unsure, choose N.
+	  The default is compiled as module.
diff --git a/drivers/net/ethernet/huawei/hinic/Makefile b/drivers/net/ethernet/huawei/hinic/Makefile
new file mode 100644
index 0000000..289ce88b
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/Makefile
@@ -0,0 +1,6 @@
+obj-$(CONFIG_HINIC) += hinic.o
+
+hinic-y := hinic_main.o hinic_tx.o hinic_rx.o hinic_port.o hinic_hw_dev.o \
+	   hinic_hw_io.o hinic_hw_qp.o hinic_hw_cmdq.o hinic_hw_wq.o \
+	   hinic_hw_mgmt.o hinic_hw_api_cmd.o hinic_hw_eqs.o hinic_hw_if.o \
+	   hinic_common.o
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_common.c b/drivers/net/ethernet/huawei/hinic/hinic_common.c
new file mode 100644
index 0000000..02c74fd
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_common.c
@@ -0,0 +1,80 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#include "hinic_common.h"
+
+/**
+ * hinic_cpu_to_be32 - convert data to big endian 32 bit format
+ * @data: the data to convert
+ * @len: length of data to convert
+ **/
+void hinic_cpu_to_be32(void *data, int len)
+{
+	u32 *mem = data;
+	int i;
+
+	len = len / sizeof(u32);
+
+	for (i = 0; i < len; i++) {
+		*mem = cpu_to_be32(*mem);
+		mem++;
+	}
+}
+
+/**
+ * hinic_be32_to_cpu - convert data from big endian 32 bit format
+ * @data: the data to convert
+ * @len: length of data to convert
+ **/
+void hinic_be32_to_cpu(void *data, int len)
+{
+	u32 *mem = data;
+	int i;
+
+	len = len / sizeof(u32);
+
+	for (i = 0; i < len; i++) {
+		*mem = be32_to_cpu(*mem);
+		mem++;
+	}
+}
+
+/**
+ * hinic_set_sge - set dma area in scatter gather entry
+ * @sge: scatter gather entry
+ * @addr: dma address
+ * @len: length of relevant data in the dma address
+ **/
+void hinic_set_sge(struct hinic_sge *sge, dma_addr_t addr, int len)
+{
+	sge->hi_addr = upper_32_bits(addr);
+	sge->lo_addr = lower_32_bits(addr);
+	sge->len  = len;
+}
+
+/**
+ * hinic_sge_to_dma - get dma address from scatter gather entry
+ * @sge: scatter gather entry
+ *
+ * Return dma address of sg entry
+ **/
+dma_addr_t hinic_sge_to_dma(struct hinic_sge *sge)
+{
+	return (dma_addr_t)((((u64)sge->hi_addr) << 32) | sge->lo_addr);
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_common.h b/drivers/net/ethernet/huawei/hinic/hinic_common.h
new file mode 100644
index 0000000..2c06b76
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_common.h
@@ -0,0 +1,38 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_COMMON_H
+#define HINIC_COMMON_H
+
+#include <linux/types.h>
+
+#define UPPER_8_BITS(data)      (((data) >> 8) & 0xFF)
+#define LOWER_8_BITS(data)      ((data) & 0xFF)
+
+struct hinic_sge {
+	u32             hi_addr;
+	u32             lo_addr;
+	u32             len;
+};
+
+void hinic_cpu_to_be32(void *data, int len);
+
+void hinic_be32_to_cpu(void *data, int len);
+
+void hinic_set_sge(struct hinic_sge *sge, dma_addr_t addr, int len);
+
+dma_addr_t hinic_sge_to_dma(struct hinic_sge *sge);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
new file mode 100644
index 0000000..5186cc9
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
@@ -0,0 +1,64 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_DEV_H
+#define HINIC_DEV_H
+
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/semaphore.h>
+#include <linux/workqueue.h>
+#include <linux/bitops.h>
+
+#include "hinic_hw_dev.h"
+#include "hinic_tx.h"
+#include "hinic_rx.h"
+
+#define HINIC_DRV_NAME          "hinic"
+
+enum hinic_flags {
+	HINIC_LINK_UP = BIT(0),
+	HINIC_INTF_UP = BIT(1),
+};
+
+struct hinic_rx_mode_work {
+	struct work_struct      work;
+	u32                     rx_mode;
+};
+
+struct hinic_dev {
+	struct net_device               *netdev;
+	struct hinic_hwdev              *hwdev;
+
+	u32                             msg_enable;
+	unsigned int                    tx_weight;
+	unsigned int                    rx_weight;
+
+	unsigned int                    flags;
+
+	struct semaphore                mgmt_lock;
+	unsigned long                   *vlan_bitmap;
+
+	struct hinic_rx_mode_work       rx_mode_work;
+	struct workqueue_struct         *workq;
+
+	struct hinic_txq                *txqs;
+	struct hinic_rxq                *rxqs;
+
+	struct hinic_txq_stats          tx_stats;
+	struct hinic_rxq_stats          rx_stats;
+};
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
new file mode 100644
index 0000000..c40603a
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
@@ -0,0 +1,978 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/jiffies.h>
+#include <linux/delay.h>
+#include <linux/log2.h>
+#include <linux/semaphore.h>
+#include <asm/byteorder.h>
+#include <asm/barrier.h>
+
+#include "hinic_hw_csr.h"
+#include "hinic_hw_if.h"
+#include "hinic_hw_api_cmd.h"
+
+#define API_CHAIN_NUM_CELLS                     32
+
+#define API_CMD_CELL_SIZE_SHIFT                 6
+#define API_CMD_CELL_SIZE_MIN                   (BIT(API_CMD_CELL_SIZE_SHIFT))
+
+#define API_CMD_CELL_SIZE(cell_size)            \
+		(((cell_size) >= API_CMD_CELL_SIZE_MIN) ? \
+		 (1 << (fls(cell_size - 1))) : API_CMD_CELL_SIZE_MIN)
+
+#define API_CMD_CELL_SIZE_VAL(size)             \
+		ilog2((size) >> API_CMD_CELL_SIZE_SHIFT)
+
+#define API_CMD_BUF_SIZE                        2048
+
+/* Sizes of the members in hinic_api_cmd_cell */
+#define API_CMD_CELL_DESC_SIZE          8
+#define API_CMD_CELL_DATA_ADDR_SIZE     8
+
+#define API_CMD_CELL_ALIGNMENT          8
+
+#define API_CMD_TIMEOUT                 1000
+
+#define MASKED_IDX(chain, idx)          ((idx) & ((chain)->num_cells - 1))
+
+#define SIZE_8BYTES(size)               (ALIGN((size), 8) >> 3)
+#define SIZE_4BYTES(size)               (ALIGN((size), 4) >> 2)
+
+#define RD_DMA_ATTR_DEFAULT             0
+#define WR_DMA_ATTR_DEFAULT             0
+
+enum api_cmd_data_format {
+	SGE_DATA = 1,           /* cell data is passed by hw address */
+};
+
+enum api_cmd_type {
+	API_CMD_WRITE = 0,
+};
+
+enum api_cmd_bypass {
+	NO_BYPASS       = 0,
+	BYPASS          = 1,
+};
+
+enum api_cmd_xor_chk_level {
+	XOR_CHK_DIS = 0,
+
+	XOR_CHK_ALL = 3,
+};
+
+static u8 xor_chksum_set(void *data)
+{
+	int idx;
+	u8 *val, checksum = 0;
+
+	val = data;
+
+	for (idx = 0; idx < 7; idx++)
+		checksum ^= val[idx];
+
+	return checksum;
+}
+
+static void set_prod_idx(struct hinic_api_cmd_chain *chain)
+{
+	enum hinic_api_cmd_chain_type chain_type = chain->chain_type;
+	struct hinic_hwif *hwif = chain->hwif;
+	u32 addr, prod_idx;
+
+	addr = HINIC_CSR_API_CMD_CHAIN_PI_ADDR(chain_type);
+	prod_idx = hinic_hwif_read_reg(hwif, addr);
+
+	prod_idx = HINIC_API_CMD_PI_CLEAR(prod_idx, IDX);
+
+	prod_idx |= HINIC_API_CMD_PI_SET(chain->prod_idx, IDX);
+
+	hinic_hwif_write_reg(hwif, addr, prod_idx);
+}
+
+static u32 get_hw_cons_idx(struct hinic_api_cmd_chain *chain)
+{
+	u32 addr, val;
+
+	addr = HINIC_CSR_API_CMD_STATUS_ADDR(chain->chain_type);
+	val  = hinic_hwif_read_reg(chain->hwif, addr);
+
+	return HINIC_API_CMD_STATUS_GET(val, CONS_IDX);
+}
+
+/**
+ * chain_busy - check if the chain is still processing last requests
+ * @chain: chain to check
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int chain_busy(struct hinic_api_cmd_chain *chain)
+{
+	struct hinic_hwif *hwif = chain->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u32 prod_idx;
+
+	switch (chain->chain_type) {
+	case HINIC_API_CMD_WRITE_TO_MGMT_CPU:
+		chain->cons_idx = get_hw_cons_idx(chain);
+		prod_idx = chain->prod_idx;
+
+		/* check for a space for a new command */
+		if (chain->cons_idx == MASKED_IDX(chain, prod_idx + 1)) {
+			dev_err(&pdev->dev, "API CMD chain %d is busy\n",
+				chain->chain_type);
+			return -EBUSY;
+		}
+		break;
+
+	default:
+		dev_err(&pdev->dev, "Unknown API CMD Chain type\n");
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * get_cell_data_size - get the data size of a specific cell type
+ * @type: chain type
+ *
+ * Return the data(Desc + Address) size in the cell
+ **/
+static u8 get_cell_data_size(enum hinic_api_cmd_chain_type type)
+{
+	u8 cell_data_size = 0;
+
+	switch (type) {
+	case HINIC_API_CMD_WRITE_TO_MGMT_CPU:
+		cell_data_size = ALIGN(API_CMD_CELL_DESC_SIZE +
+				       API_CMD_CELL_DATA_ADDR_SIZE,
+				       API_CMD_CELL_ALIGNMENT);
+		break;
+	default:
+		break;
+	}
+
+	return cell_data_size;
+}
+
+/**
+ * prepare_cell_ctrl - prepare the ctrl of the cell for the command
+ * @cell_ctrl: the control of the cell to set the control value into it
+ * @data_size: the size of the data in the cell
+ **/
+static void prepare_cell_ctrl(u64 *cell_ctrl, u16 data_size)
+{
+	u8 chksum;
+	u64 ctrl;
+
+	ctrl =  HINIC_API_CMD_CELL_CTRL_SET(SIZE_8BYTES(data_size), DATA_SZ)  |
+		HINIC_API_CMD_CELL_CTRL_SET(RD_DMA_ATTR_DEFAULT, RD_DMA_ATTR) |
+		HINIC_API_CMD_CELL_CTRL_SET(WR_DMA_ATTR_DEFAULT, WR_DMA_ATTR);
+
+	chksum = xor_chksum_set(&ctrl);
+
+	ctrl |= HINIC_API_CMD_CELL_CTRL_SET(chksum, XOR_CHKSUM);
+
+	/* The data in the HW should be in Big Endian Format */
+	*cell_ctrl = cpu_to_be64(ctrl);
+}
+
+/**
+ * prepare_api_cmd - prepare API CMD command
+ * @chain: chain for the command
+ * @dest: destination node on the card that will receive the command
+ * @cmd: command data
+ * @cmd_size: the command size
+ **/
+static void prepare_api_cmd(struct hinic_api_cmd_chain *chain,
+			    enum hinic_node_id dest,
+			    void *cmd, u16 cmd_size)
+{
+	struct hinic_api_cmd_cell *cell = chain->curr_node;
+	struct hinic_api_cmd_cell_ctxt *cell_ctxt;
+	struct hinic_hwif *hwif = chain->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+
+	cell_ctxt = &chain->cell_ctxt[chain->prod_idx];
+
+	switch (chain->chain_type) {
+	case HINIC_API_CMD_WRITE_TO_MGMT_CPU:
+		cell->desc = HINIC_API_CMD_DESC_SET(SGE_DATA, API_TYPE)   |
+			     HINIC_API_CMD_DESC_SET(API_CMD_WRITE, RD_WR) |
+			     HINIC_API_CMD_DESC_SET(NO_BYPASS, MGMT_BYPASS);
+		break;
+
+	default:
+		dev_err(&pdev->dev, "unknown Chain type\n");
+		return;
+	}
+
+	cell->desc |= HINIC_API_CMD_DESC_SET(dest, DEST)        |
+		      HINIC_API_CMD_DESC_SET(SIZE_4BYTES(cmd_size), SIZE);
+
+	cell->desc |= HINIC_API_CMD_DESC_SET(xor_chksum_set(&cell->desc),
+					     XOR_CHKSUM);
+
+	/* The data in the HW should be in Big Endian Format */
+	cell->desc = cpu_to_be64(cell->desc);
+
+	memcpy(cell_ctxt->api_cmd_vaddr, cmd, cmd_size);
+}
+
+/**
+ * prepare_cell - prepare cell ctrl and cmd in the current cell
+ * @chain: chain for the command
+ * @dest: destination node on the card that will receive the command
+ * @cmd: command data
+ * @cmd_size: the command size
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static void prepare_cell(struct hinic_api_cmd_chain *chain,
+			 enum  hinic_node_id dest,
+			 void *cmd, u16 cmd_size)
+{
+	struct hinic_api_cmd_cell *curr_node = chain->curr_node;
+	u16 data_size = get_cell_data_size(chain->chain_type);
+
+	prepare_cell_ctrl(&curr_node->ctrl, data_size);
+	prepare_api_cmd(chain, dest, cmd, cmd_size);
+}
+
+static inline void cmd_chain_prod_idx_inc(struct hinic_api_cmd_chain *chain)
+{
+	chain->prod_idx = MASKED_IDX(chain, chain->prod_idx + 1);
+}
+
+/**
+ * api_cmd_status_update - update the status in the chain struct
+ * @chain: chain to update
+ **/
+static void api_cmd_status_update(struct hinic_api_cmd_chain *chain)
+{
+	enum hinic_api_cmd_chain_type chain_type;
+	struct hinic_api_cmd_status *wb_status;
+	struct hinic_hwif *hwif = chain->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u64 status_header;
+	u32 status;
+
+	wb_status = chain->wb_status;
+	status_header = be64_to_cpu(wb_status->header);
+
+	status = be32_to_cpu(wb_status->status);
+	if (HINIC_API_CMD_STATUS_GET(status, CHKSUM_ERR)) {
+		dev_err(&pdev->dev, "API CMD status: Xor check error\n");
+		return;
+	}
+
+	chain_type = HINIC_API_CMD_STATUS_HEADER_GET(status_header, CHAIN_ID);
+	if (chain_type >= HINIC_API_CMD_MAX) {
+		dev_err(&pdev->dev, "unknown API CMD Chain %d\n", chain_type);
+		return;
+	}
+
+	chain->cons_idx = HINIC_API_CMD_STATUS_GET(status, CONS_IDX);
+}
+
+/**
+ * wait_for_status_poll - wait for write to api cmd command to complete
+ * @chain: the chain of the command
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int wait_for_status_poll(struct hinic_api_cmd_chain *chain)
+{
+	int err = -ETIMEDOUT;
+	unsigned long end;
+
+	end = jiffies + msecs_to_jiffies(API_CMD_TIMEOUT);
+	do {
+		api_cmd_status_update(chain);
+
+		/* wait for CI to be updated - sign for completion */
+		if (chain->cons_idx == chain->prod_idx) {
+			err = 0;
+			break;
+		}
+
+		msleep(20);
+	} while (time_before(jiffies, end));
+
+	return err;
+}
+
+/**
+ * wait_for_api_cmd_completion - wait for command to complete
+ * @chain: chain for the command
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int wait_for_api_cmd_completion(struct hinic_api_cmd_chain *chain)
+{
+	struct hinic_hwif *hwif = chain->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	int err;
+
+	switch (chain->chain_type) {
+	case HINIC_API_CMD_WRITE_TO_MGMT_CPU:
+		err = wait_for_status_poll(chain);
+		if (err) {
+			dev_err(&pdev->dev, "API CMD Poll status timeout\n");
+			break;
+		}
+		break;
+
+	default:
+		dev_err(&pdev->dev, "unknown API CMD Chain type\n");
+		err = -EINVAL;
+		break;
+	}
+
+	return err;
+}
+
+/**
+ * api_cmd - API CMD command
+ * @chain: chain for the command
+ * @dest: destination node on the card that will receive the command
+ * @cmd: command data
+ * @size: the command size
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int api_cmd(struct hinic_api_cmd_chain *chain,
+		   enum hinic_node_id dest, u8 *cmd, u16 cmd_size)
+{
+	struct hinic_api_cmd_cell_ctxt *ctxt;
+	int err;
+
+	down(&chain->sem);
+	if (chain_busy(chain)) {
+		up(&chain->sem);
+		return -EBUSY;
+	}
+
+	prepare_cell(chain, dest, cmd, cmd_size);
+	cmd_chain_prod_idx_inc(chain);
+
+	wmb();  /* inc pi before issue the command */
+
+	set_prod_idx(chain);    /* issue the command */
+
+	ctxt = &chain->cell_ctxt[chain->prod_idx];
+
+	chain->curr_node = ctxt->cell_vaddr;
+
+	err = wait_for_api_cmd_completion(chain);
+
+	up(&chain->sem);
+	return err;
+}
+
+/**
+ * hinic_api_cmd_write - Write API CMD command
+ * @chain: chain for write command
+ * @dest: destination node on the card that will receive the command
+ * @cmd: command data
+ * @size: the command size
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_api_cmd_write(struct hinic_api_cmd_chain *chain,
+			enum hinic_node_id dest, u8 *cmd, u16 size)
+{
+	/* Verify the chain type */
+	if (chain->chain_type == HINIC_API_CMD_WRITE_TO_MGMT_CPU)
+		return api_cmd(chain, dest, cmd, size);
+
+	return -EINVAL;
+}
+
+/**
+ * api_cmd_hw_restart - restart the chain in the HW
+ * @chain: the API CMD specific chain to restart
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int api_cmd_hw_restart(struct hinic_api_cmd_chain *chain)
+{
+	struct hinic_hwif *hwif = chain->hwif;
+	int err = -ETIMEDOUT;
+	unsigned long end;
+	u32 reg_addr, val;
+
+	/* Read Modify Write */
+	reg_addr = HINIC_CSR_API_CMD_CHAIN_REQ_ADDR(chain->chain_type);
+	val = hinic_hwif_read_reg(hwif, reg_addr);
+
+	val = HINIC_API_CMD_CHAIN_REQ_CLEAR(val, RESTART);
+	val |= HINIC_API_CMD_CHAIN_REQ_SET(1, RESTART);
+
+	hinic_hwif_write_reg(hwif, reg_addr, val);
+
+	end = jiffies + msecs_to_jiffies(API_CMD_TIMEOUT);
+	do {
+		val = hinic_hwif_read_reg(hwif, reg_addr);
+
+		if (!HINIC_API_CMD_CHAIN_REQ_GET(val, RESTART)) {
+			err = 0;
+			break;
+		}
+
+		msleep(20);
+	} while (time_before(jiffies, end));
+
+	return err;
+}
+
+/**
+ * api_cmd_ctrl_init - set the control register of a chain
+ * @chain: the API CMD specific chain to set control register for
+ **/
+static void api_cmd_ctrl_init(struct hinic_api_cmd_chain *chain)
+{
+	struct hinic_hwif *hwif = chain->hwif;
+	u32 addr, ctrl;
+	u16 cell_size;
+
+	/* Read Modify Write */
+	addr = HINIC_CSR_API_CMD_CHAIN_CTRL_ADDR(chain->chain_type);
+
+	cell_size = API_CMD_CELL_SIZE_VAL(chain->cell_size);
+
+	ctrl = hinic_hwif_read_reg(hwif, addr);
+
+	ctrl =  HINIC_API_CMD_CHAIN_CTRL_CLEAR(ctrl, RESTART_WB_STAT) &
+		HINIC_API_CMD_CHAIN_CTRL_CLEAR(ctrl, XOR_ERR)         &
+		HINIC_API_CMD_CHAIN_CTRL_CLEAR(ctrl, AEQE_EN)         &
+		HINIC_API_CMD_CHAIN_CTRL_CLEAR(ctrl, XOR_CHK_EN)      &
+		HINIC_API_CMD_CHAIN_CTRL_CLEAR(ctrl, CELL_SIZE);
+
+	ctrl |= HINIC_API_CMD_CHAIN_CTRL_SET(1, XOR_ERR)              |
+		HINIC_API_CMD_CHAIN_CTRL_SET(XOR_CHK_ALL, XOR_CHK_EN) |
+		HINIC_API_CMD_CHAIN_CTRL_SET(cell_size, CELL_SIZE);
+
+	hinic_hwif_write_reg(hwif, addr, ctrl);
+}
+
+/**
+ * api_cmd_set_status_addr - set the status address of a chain in the HW
+ * @chain: the API CMD specific chain to set in HW status address for
+ **/
+static void api_cmd_set_status_addr(struct hinic_api_cmd_chain *chain)
+{
+	struct hinic_hwif *hwif = chain->hwif;
+	u32 addr, val;
+
+	addr = HINIC_CSR_API_CMD_STATUS_HI_ADDR(chain->chain_type);
+	val = upper_32_bits(chain->wb_status_paddr);
+	hinic_hwif_write_reg(hwif, addr, val);
+
+	addr = HINIC_CSR_API_CMD_STATUS_LO_ADDR(chain->chain_type);
+	val = lower_32_bits(chain->wb_status_paddr);
+	hinic_hwif_write_reg(hwif, addr, val);
+}
+
+/**
+ * api_cmd_set_num_cells - set the number cells of a chain in the HW
+ * @chain: the API CMD specific chain to set in HW the number of cells for
+ **/
+static void api_cmd_set_num_cells(struct hinic_api_cmd_chain *chain)
+{
+	struct hinic_hwif *hwif = chain->hwif;
+	u32 addr, val;
+
+	addr = HINIC_CSR_API_CMD_CHAIN_NUM_CELLS_ADDR(chain->chain_type);
+	val  = chain->num_cells;
+	hinic_hwif_write_reg(hwif, addr, val);
+}
+
+/**
+ * api_cmd_head_init - set the head of a chain in the HW
+ * @chain: the API CMD specific chain to set in HW the head for
+ **/
+static void api_cmd_head_init(struct hinic_api_cmd_chain *chain)
+{
+	struct hinic_hwif *hwif = chain->hwif;
+	u32 addr, val;
+
+	addr = HINIC_CSR_API_CMD_CHAIN_HEAD_HI_ADDR(chain->chain_type);
+	val = upper_32_bits(chain->head_cell_paddr);
+	hinic_hwif_write_reg(hwif, addr, val);
+
+	addr = HINIC_CSR_API_CMD_CHAIN_HEAD_LO_ADDR(chain->chain_type);
+	val = lower_32_bits(chain->head_cell_paddr);
+	hinic_hwif_write_reg(hwif, addr, val);
+}
+
+/**
+ * api_cmd_chain_hw_clean - clean the HW
+ * @chain: the API CMD specific chain
+ **/
+static void api_cmd_chain_hw_clean(struct hinic_api_cmd_chain *chain)
+{
+	struct hinic_hwif *hwif = chain->hwif;
+	u32 addr, ctrl;
+
+	addr = HINIC_CSR_API_CMD_CHAIN_CTRL_ADDR(chain->chain_type);
+
+	ctrl = hinic_hwif_read_reg(hwif, addr);
+	ctrl = HINIC_API_CMD_CHAIN_CTRL_CLEAR(ctrl, RESTART_WB_STAT) &
+	       HINIC_API_CMD_CHAIN_CTRL_CLEAR(ctrl, XOR_ERR)         &
+	       HINIC_API_CMD_CHAIN_CTRL_CLEAR(ctrl, AEQE_EN)         &
+	       HINIC_API_CMD_CHAIN_CTRL_CLEAR(ctrl, XOR_CHK_EN)      &
+	       HINIC_API_CMD_CHAIN_CTRL_CLEAR(ctrl, CELL_SIZE);
+
+	hinic_hwif_write_reg(hwif, addr, ctrl);
+}
+
+/**
+ * api_cmd_chain_hw_init - initialize the chain in the HW
+ * @chain: the API CMD specific chain to initialize in HW
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int api_cmd_chain_hw_init(struct hinic_api_cmd_chain *chain)
+{
+	struct hinic_hwif *hwif = chain->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	int err;
+
+	api_cmd_chain_hw_clean(chain);
+
+	api_cmd_set_status_addr(chain);
+
+	err = api_cmd_hw_restart(chain);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to restart API CMD HW\n");
+		return err;
+	}
+
+	api_cmd_ctrl_init(chain);
+	api_cmd_set_num_cells(chain);
+	api_cmd_head_init(chain);
+	return 0;
+}
+
+/**
+ * free_cmd_buf - free the dma buffer of API CMD command
+ * @chain: the API CMD specific chain of the cmd
+ * @cell_idx: the cell index of the cmd
+ **/
+static void free_cmd_buf(struct hinic_api_cmd_chain *chain, int cell_idx)
+{
+	struct hinic_api_cmd_cell_ctxt *cell_ctxt;
+	struct hinic_hwif *hwif = chain->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+
+	cell_ctxt = &chain->cell_ctxt[cell_idx];
+
+	dma_free_coherent(&pdev->dev, API_CMD_BUF_SIZE,
+			  cell_ctxt->api_cmd_vaddr,
+			  cell_ctxt->api_cmd_paddr);
+}
+
+/**
+ * alloc_cmd_buf - allocate a dma buffer for API CMD command
+ * @chain: the API CMD specific chain for the cmd
+ * @cell: the cell in the HW for the cmd
+ * @cell_idx: the index of the cell
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int alloc_cmd_buf(struct hinic_api_cmd_chain *chain,
+			 struct hinic_api_cmd_cell *cell, int cell_idx)
+{
+	struct hinic_api_cmd_cell_ctxt *cell_ctxt;
+	struct hinic_hwif *hwif = chain->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	dma_addr_t cmd_paddr;
+	u8 *cmd_vaddr;
+	int err = 0;
+
+	cmd_vaddr = dma_zalloc_coherent(&pdev->dev, API_CMD_BUF_SIZE,
+					&cmd_paddr, GFP_KERNEL);
+	if (!cmd_vaddr) {
+		dev_err(&pdev->dev, "Failed to allocate API CMD DMA memory\n");
+		return -ENOMEM;
+	}
+
+	cell_ctxt = &chain->cell_ctxt[cell_idx];
+
+	cell_ctxt->api_cmd_vaddr = cmd_vaddr;
+	cell_ctxt->api_cmd_paddr = cmd_paddr;
+
+	/* set the cmd DMA address in the cell */
+	switch (chain->chain_type) {
+	case HINIC_API_CMD_WRITE_TO_MGMT_CPU:
+		/* The data in the HW should be in Big Endian Format */
+		cell->write.hw_cmd_paddr = cpu_to_be64(cmd_paddr);
+		break;
+
+	default:
+		dev_err(&pdev->dev, "Unsupported API CMD chain type\n");
+		free_cmd_buf(chain, cell_idx);
+		err = -EINVAL;
+		break;
+	}
+
+	return err;
+}
+
+/**
+ * api_cmd_create_cell - create API CMD cell for specific chain
+ * @chain: the API CMD specific chain to create its cell
+ * @cell_idx: the index of the cell to create
+ * @pre_node: previous cell
+ * @node_vaddr: the returned virt addr of the cell
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int api_cmd_create_cell(struct hinic_api_cmd_chain *chain,
+			       int cell_idx,
+			       struct hinic_api_cmd_cell *pre_node,
+			       struct hinic_api_cmd_cell **node_vaddr)
+{
+	struct hinic_api_cmd_cell_ctxt *cell_ctxt;
+	struct hinic_hwif *hwif = chain->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_api_cmd_cell *node;
+	dma_addr_t node_paddr;
+	int err;
+
+	node = dma_zalloc_coherent(&pdev->dev, chain->cell_size,
+				   &node_paddr, GFP_KERNEL);
+	if (!node) {
+		dev_err(&pdev->dev, "Failed to allocate dma API CMD cell\n");
+		return -ENOMEM;
+	}
+
+	node->read.hw_wb_resp_paddr = 0;
+
+	cell_ctxt = &chain->cell_ctxt[cell_idx];
+	cell_ctxt->cell_vaddr = node;
+	cell_ctxt->cell_paddr = node_paddr;
+
+	if (!pre_node) {
+		chain->head_cell_paddr = node_paddr;
+		chain->head_node = node;
+	} else {
+		/* The data in the HW should be in Big Endian Format */
+		pre_node->next_cell_paddr = cpu_to_be64(node_paddr);
+	}
+
+	switch (chain->chain_type) {
+	case HINIC_API_CMD_WRITE_TO_MGMT_CPU:
+		err = alloc_cmd_buf(chain, node, cell_idx);
+		if (err) {
+			dev_err(&pdev->dev, "Failed to allocate cmd buffer\n");
+			goto err_alloc_cmd_buf;
+		}
+		break;
+
+	default:
+		dev_err(&pdev->dev, "Unsupported API CMD chain type\n");
+		err = -EINVAL;
+		goto err_alloc_cmd_buf;
+	}
+
+	*node_vaddr = node;
+	return 0;
+
+err_alloc_cmd_buf:
+	dma_free_coherent(&pdev->dev, chain->cell_size, node, node_paddr);
+	return err;
+}
+
+/**
+ * api_cmd_destroy_cell - destroy API CMD cell of specific chain
+ * @chain: the API CMD specific chain to destroy its cell
+ * @cell_idx: the cell to destroy
+ **/
+static void api_cmd_destroy_cell(struct hinic_api_cmd_chain *chain,
+				 int cell_idx)
+{
+	struct hinic_api_cmd_cell_ctxt *cell_ctxt;
+	struct hinic_hwif *hwif = chain->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_api_cmd_cell *node;
+	dma_addr_t node_paddr;
+	size_t node_size;
+
+	cell_ctxt = &chain->cell_ctxt[cell_idx];
+
+	node = cell_ctxt->cell_vaddr;
+	node_paddr = cell_ctxt->cell_paddr;
+	node_size = chain->cell_size;
+
+	if (cell_ctxt->api_cmd_vaddr) {
+		switch (chain->chain_type) {
+		case HINIC_API_CMD_WRITE_TO_MGMT_CPU:
+			free_cmd_buf(chain, cell_idx);
+			break;
+		default:
+			dev_err(&pdev->dev, "Unsupported API CMD chain type\n");
+			break;
+		}
+
+		dma_free_coherent(&pdev->dev, node_size, node,
+				  node_paddr);
+	}
+}
+
+/**
+ * api_cmd_destroy_cells - destroy API CMD cells of specific chain
+ * @chain: the API CMD specific chain to destroy its cells
+ * @num_cells: number of cells to destroy
+ **/
+static void api_cmd_destroy_cells(struct hinic_api_cmd_chain *chain,
+				  int num_cells)
+{
+	int cell_idx;
+
+	for (cell_idx = 0; cell_idx < num_cells; cell_idx++)
+		api_cmd_destroy_cell(chain, cell_idx);
+}
+
+/**
+ * api_cmd_create_cells - create API CMD cells for specific chain
+ * @chain: the API CMD specific chain
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int api_cmd_create_cells(struct hinic_api_cmd_chain *chain)
+{
+	struct hinic_api_cmd_cell *node = NULL, *pre_node = NULL;
+	struct hinic_hwif *hwif = chain->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	int err, cell_idx;
+
+	for (cell_idx = 0; cell_idx < chain->num_cells; cell_idx++) {
+		err = api_cmd_create_cell(chain, cell_idx, pre_node, &node);
+		if (err) {
+			dev_err(&pdev->dev, "Failed to create API CMD cell\n");
+			goto err_create_cell;
+		}
+
+		pre_node = node;
+	}
+
+	/* set the Final node to point on the start */
+	node->next_cell_paddr = cpu_to_be64(chain->head_cell_paddr);
+
+	/* set the current node to be the head */
+	chain->curr_node = chain->head_node;
+	return 0;
+
+err_create_cell:
+	api_cmd_destroy_cells(chain, cell_idx);
+	return err;
+}
+
+/**
+ * api_chain_init - initialize API CMD specific chain
+ * @chain: the API CMD specific chain to initialize
+ * @attr: attributes to set in the chain
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int api_chain_init(struct hinic_api_cmd_chain *chain,
+			  struct hinic_api_cmd_chain_attr *attr)
+{
+	struct hinic_hwif *hwif = attr->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	size_t cell_ctxt_size;
+
+	chain->hwif = hwif;
+	chain->chain_type  = attr->chain_type;
+	chain->num_cells = attr->num_cells;
+	chain->cell_size = attr->cell_size;
+
+	chain->prod_idx  = 0;
+	chain->cons_idx  = 0;
+
+	sema_init(&chain->sem, 1);
+
+	cell_ctxt_size = chain->num_cells * sizeof(*chain->cell_ctxt);
+	chain->cell_ctxt = devm_kzalloc(&pdev->dev, cell_ctxt_size, GFP_KERNEL);
+	if (!chain->cell_ctxt)
+		return -ENOMEM;
+
+	chain->wb_status = dma_zalloc_coherent(&pdev->dev,
+					       sizeof(*chain->wb_status),
+					       &chain->wb_status_paddr,
+					       GFP_KERNEL);
+	if (!chain->wb_status) {
+		dev_err(&pdev->dev, "Failed to allocate DMA wb status\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * api_chain_free - free API CMD specific chain
+ * @chain: the API CMD specific chain to free
+ **/
+static void api_chain_free(struct hinic_api_cmd_chain *chain)
+{
+	struct hinic_hwif *hwif = chain->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+
+	dma_free_coherent(&pdev->dev, sizeof(*chain->wb_status),
+			  chain->wb_status, chain->wb_status_paddr);
+}
+
+/**
+ * api_cmd_create_chain - create API CMD specific chain
+ * @attr: attributes to set the chain
+ *
+ * Return the created chain
+ **/
+static struct hinic_api_cmd_chain *
+	api_cmd_create_chain(struct hinic_api_cmd_chain_attr *attr)
+{
+	struct hinic_hwif *hwif = attr->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_api_cmd_chain *chain;
+	int err;
+
+	if (attr->num_cells & (attr->num_cells - 1)) {
+		dev_err(&pdev->dev, "Invalid number of cells, must be power of 2\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	chain = devm_kzalloc(&pdev->dev, sizeof(*chain), GFP_KERNEL);
+	if (!chain)
+		return ERR_PTR(-ENOMEM);
+
+	err = api_chain_init(chain, attr);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to initialize chain\n");
+		return ERR_PTR(err);
+	}
+
+	err = api_cmd_create_cells(chain);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to create cells for API CMD chain\n");
+		goto err_create_cells;
+	}
+
+	err = api_cmd_chain_hw_init(chain);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to initialize chain HW\n");
+		goto err_chain_hw_init;
+	}
+
+	return chain;
+
+err_chain_hw_init:
+	api_cmd_destroy_cells(chain, chain->num_cells);
+
+err_create_cells:
+	api_chain_free(chain);
+	return ERR_PTR(err);
+}
+
+/**
+ * api_cmd_destroy_chain - destroy API CMD specific chain
+ * @chain: the API CMD specific chain to destroy
+ **/
+static void api_cmd_destroy_chain(struct hinic_api_cmd_chain *chain)
+{
+	api_cmd_chain_hw_clean(chain);
+	api_cmd_destroy_cells(chain, chain->num_cells);
+	api_chain_free(chain);
+}
+
+/**
+ * hinic_api_cmd_init - Initialize all the API CMD chains
+ * @chain: the API CMD chains that are initialized
+ * @hwif: the hardware interface of a pci function device
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_api_cmd_init(struct hinic_api_cmd_chain **chain,
+		       struct hinic_hwif *hwif)
+{
+	enum hinic_api_cmd_chain_type type, chain_type;
+	struct hinic_api_cmd_chain_attr attr;
+	struct pci_dev *pdev = hwif->pdev;
+	size_t hw_cell_sz;
+	int err;
+
+	hw_cell_sz = sizeof(struct hinic_api_cmd_cell);
+
+	attr.hwif = hwif;
+	attr.num_cells  = API_CHAIN_NUM_CELLS;
+	attr.cell_size  = API_CMD_CELL_SIZE(hw_cell_sz);
+
+	chain_type = HINIC_API_CMD_WRITE_TO_MGMT_CPU;
+	for ( ; chain_type < HINIC_API_CMD_MAX; chain_type++) {
+		attr.chain_type = chain_type;
+
+		if (chain_type != HINIC_API_CMD_WRITE_TO_MGMT_CPU)
+			continue;
+
+		chain[chain_type] = api_cmd_create_chain(&attr);
+		if (IS_ERR(chain[chain_type])) {
+			dev_err(&pdev->dev, "Failed to create chain %d\n",
+				chain_type);
+			err = PTR_ERR(chain[chain_type]);
+			goto err_create_chain;
+		}
+	}
+
+	return 0;
+
+err_create_chain:
+	type = HINIC_API_CMD_WRITE_TO_MGMT_CPU;
+	for ( ; type < chain_type; type++) {
+		if (type != HINIC_API_CMD_WRITE_TO_MGMT_CPU)
+			continue;
+
+		api_cmd_destroy_chain(chain[type]);
+	}
+
+	return err;
+}
+
+/**
+ * hinic_api_cmd_free - free the API CMD chains
+ * @chain: the API CMD chains that are freed
+ **/
+void hinic_api_cmd_free(struct hinic_api_cmd_chain **chain)
+{
+	enum hinic_api_cmd_chain_type chain_type;
+
+	chain_type = HINIC_API_CMD_WRITE_TO_MGMT_CPU;
+	for ( ; chain_type < HINIC_API_CMD_MAX; chain_type++) {
+		if (chain_type != HINIC_API_CMD_WRITE_TO_MGMT_CPU)
+			continue;
+
+		api_cmd_destroy_chain(chain[chain_type]);
+	}
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.h
new file mode 100644
index 0000000..31b94d5
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.h
@@ -0,0 +1,208 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_HW_API_CMD_H
+#define HINIC_HW_API_CMD_H
+
+#include <linux/types.h>
+#include <linux/semaphore.h>
+
+#include "hinic_hw_if.h"
+
+#define HINIC_API_CMD_PI_IDX_SHIFT                              0
+
+#define HINIC_API_CMD_PI_IDX_MASK                               0xFFFFFF
+
+#define HINIC_API_CMD_PI_SET(val, member)                       \
+	(((u32)(val) & HINIC_API_CMD_PI_##member##_MASK) <<     \
+	 HINIC_API_CMD_PI_##member##_SHIFT)
+
+#define HINIC_API_CMD_PI_CLEAR(val, member)                     \
+	((val) & (~(HINIC_API_CMD_PI_##member##_MASK            \
+	 << HINIC_API_CMD_PI_##member##_SHIFT)))
+
+#define HINIC_API_CMD_CHAIN_REQ_RESTART_SHIFT                   1
+
+#define HINIC_API_CMD_CHAIN_REQ_RESTART_MASK                    0x1
+
+#define HINIC_API_CMD_CHAIN_REQ_SET(val, member)                \
+	(((u32)(val) & HINIC_API_CMD_CHAIN_REQ_##member##_MASK) << \
+	 HINIC_API_CMD_CHAIN_REQ_##member##_SHIFT)
+
+#define HINIC_API_CMD_CHAIN_REQ_GET(val, member)                \
+	(((val) >> HINIC_API_CMD_CHAIN_REQ_##member##_SHIFT) &  \
+	 HINIC_API_CMD_CHAIN_REQ_##member##_MASK)
+
+#define HINIC_API_CMD_CHAIN_REQ_CLEAR(val, member)              \
+	((val) & (~(HINIC_API_CMD_CHAIN_REQ_##member##_MASK     \
+	 << HINIC_API_CMD_CHAIN_REQ_##member##_SHIFT)))
+
+#define HINIC_API_CMD_CHAIN_CTRL_RESTART_WB_STAT_SHIFT          1
+#define HINIC_API_CMD_CHAIN_CTRL_XOR_ERR_SHIFT                  2
+#define HINIC_API_CMD_CHAIN_CTRL_AEQE_EN_SHIFT                  4
+#define HINIC_API_CMD_CHAIN_CTRL_AEQ_ID_SHIFT                   8
+#define HINIC_API_CMD_CHAIN_CTRL_XOR_CHK_EN_SHIFT               28
+#define HINIC_API_CMD_CHAIN_CTRL_CELL_SIZE_SHIFT                30
+
+#define HINIC_API_CMD_CHAIN_CTRL_RESTART_WB_STAT_MASK           0x1
+#define HINIC_API_CMD_CHAIN_CTRL_XOR_ERR_MASK                   0x1
+#define HINIC_API_CMD_CHAIN_CTRL_AEQE_EN_MASK                   0x1
+#define HINIC_API_CMD_CHAIN_CTRL_AEQ_ID_MASK                    0x3
+#define HINIC_API_CMD_CHAIN_CTRL_XOR_CHK_EN_MASK                0x3
+#define HINIC_API_CMD_CHAIN_CTRL_CELL_SIZE_MASK                 0x3
+
+#define HINIC_API_CMD_CHAIN_CTRL_SET(val, member)               \
+	(((u32)(val) & HINIC_API_CMD_CHAIN_CTRL_##member##_MASK) << \
+	 HINIC_API_CMD_CHAIN_CTRL_##member##_SHIFT)
+
+#define HINIC_API_CMD_CHAIN_CTRL_CLEAR(val, member)             \
+	((val) & (~(HINIC_API_CMD_CHAIN_CTRL_##member##_MASK    \
+	 << HINIC_API_CMD_CHAIN_CTRL_##member##_SHIFT)))
+
+#define HINIC_API_CMD_CELL_CTRL_DATA_SZ_SHIFT                   0
+#define HINIC_API_CMD_CELL_CTRL_RD_DMA_ATTR_SHIFT               16
+#define HINIC_API_CMD_CELL_CTRL_WR_DMA_ATTR_SHIFT               24
+#define HINIC_API_CMD_CELL_CTRL_XOR_CHKSUM_SHIFT                56
+
+#define HINIC_API_CMD_CELL_CTRL_DATA_SZ_MASK                    0x3F
+#define HINIC_API_CMD_CELL_CTRL_RD_DMA_ATTR_MASK                0x3F
+#define HINIC_API_CMD_CELL_CTRL_WR_DMA_ATTR_MASK                0x3F
+#define HINIC_API_CMD_CELL_CTRL_XOR_CHKSUM_MASK                 0xFF
+
+#define HINIC_API_CMD_CELL_CTRL_SET(val, member)                \
+	((((u64)val) & HINIC_API_CMD_CELL_CTRL_##member##_MASK) << \
+	 HINIC_API_CMD_CELL_CTRL_##member##_SHIFT)
+
+#define HINIC_API_CMD_DESC_API_TYPE_SHIFT                       0
+#define HINIC_API_CMD_DESC_RD_WR_SHIFT                          1
+#define HINIC_API_CMD_DESC_MGMT_BYPASS_SHIFT                    2
+#define HINIC_API_CMD_DESC_DEST_SHIFT                           32
+#define HINIC_API_CMD_DESC_SIZE_SHIFT                           40
+#define HINIC_API_CMD_DESC_XOR_CHKSUM_SHIFT                     56
+
+#define HINIC_API_CMD_DESC_API_TYPE_MASK                        0x1
+#define HINIC_API_CMD_DESC_RD_WR_MASK                           0x1
+#define HINIC_API_CMD_DESC_MGMT_BYPASS_MASK                     0x1
+#define HINIC_API_CMD_DESC_DEST_MASK                            0x1F
+#define HINIC_API_CMD_DESC_SIZE_MASK                            0x7FF
+#define HINIC_API_CMD_DESC_XOR_CHKSUM_MASK                      0xFF
+
+#define HINIC_API_CMD_DESC_SET(val, member)                     \
+	((((u64)val) & HINIC_API_CMD_DESC_##member##_MASK) <<   \
+	 HINIC_API_CMD_DESC_##member##_SHIFT)
+
+#define HINIC_API_CMD_STATUS_HEADER_CHAIN_ID_SHIFT              16
+
+#define HINIC_API_CMD_STATUS_HEADER_CHAIN_ID_MASK               0xFF
+
+#define HINIC_API_CMD_STATUS_HEADER_GET(val, member)            \
+	(((val) >> HINIC_API_CMD_STATUS_HEADER_##member##_SHIFT) & \
+	 HINIC_API_CMD_STATUS_HEADER_##member##_MASK)
+
+#define HINIC_API_CMD_STATUS_CONS_IDX_SHIFT                     0
+#define HINIC_API_CMD_STATUS_CHKSUM_ERR_SHIFT                   28
+
+#define HINIC_API_CMD_STATUS_CONS_IDX_MASK                      0xFFFFFF
+#define HINIC_API_CMD_STATUS_CHKSUM_ERR_MASK                    0x3
+
+#define HINIC_API_CMD_STATUS_GET(val, member)                   \
+	(((val) >> HINIC_API_CMD_STATUS_##member##_SHIFT) &     \
+	 HINIC_API_CMD_STATUS_##member##_MASK)
+
+enum hinic_api_cmd_chain_type {
+	HINIC_API_CMD_WRITE_TO_MGMT_CPU = 2,
+
+	HINIC_API_CMD_MAX,
+};
+
+struct hinic_api_cmd_chain_attr {
+	struct hinic_hwif               *hwif;
+	enum hinic_api_cmd_chain_type   chain_type;
+
+	u32                             num_cells;
+	u16                             cell_size;
+};
+
+struct hinic_api_cmd_status {
+	u64     header;
+	u32     status;
+	u32     rsvd0;
+	u32     rsvd1;
+	u32     rsvd2;
+	u64     rsvd3;
+};
+
+/* HW struct */
+struct hinic_api_cmd_cell {
+	u64 ctrl;
+
+	/* address is 64 bit in HW struct */
+	u64 next_cell_paddr;
+
+	u64 desc;
+
+	/* HW struct */
+	union {
+		struct {
+			u64 hw_cmd_paddr;
+		} write;
+
+		struct {
+			u64 hw_wb_resp_paddr;
+			u64 hw_cmd_paddr;
+		} read;
+	};
+};
+
+struct hinic_api_cmd_cell_ctxt {
+	dma_addr_t                      cell_paddr;
+	struct hinic_api_cmd_cell       *cell_vaddr;
+
+	dma_addr_t                      api_cmd_paddr;
+	u8                              *api_cmd_vaddr;
+};
+
+struct hinic_api_cmd_chain {
+	struct hinic_hwif               *hwif;
+	enum hinic_api_cmd_chain_type   chain_type;
+
+	u32                             num_cells;
+	u16                             cell_size;
+
+	/* HW members in 24 bit format */
+	u32                             prod_idx;
+	u32                             cons_idx;
+
+	struct semaphore                sem;
+
+	struct hinic_api_cmd_cell_ctxt  *cell_ctxt;
+
+	dma_addr_t                      wb_status_paddr;
+	struct hinic_api_cmd_status     *wb_status;
+
+	dma_addr_t                      head_cell_paddr;
+	struct hinic_api_cmd_cell       *head_node;
+	struct hinic_api_cmd_cell       *curr_node;
+};
+
+int hinic_api_cmd_write(struct hinic_api_cmd_chain *chain,
+			enum hinic_node_id dest, u8 *cmd, u16 size);
+
+int hinic_api_cmd_init(struct hinic_api_cmd_chain **chain,
+		       struct hinic_hwif *hwif);
+
+void hinic_api_cmd_free(struct hinic_api_cmd_chain **chain);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
new file mode 100644
index 0000000..7d95f08
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
@@ -0,0 +1,946 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/spinlock.h>
+#include <linux/sizes.h>
+#include <linux/atomic.h>
+#include <linux/log2.h>
+#include <linux/io.h>
+#include <linux/completion.h>
+#include <linux/err.h>
+#include <asm/byteorder.h>
+#include <asm/barrier.h>
+
+#include "hinic_common.h"
+#include "hinic_hw_if.h"
+#include "hinic_hw_eqs.h"
+#include "hinic_hw_mgmt.h"
+#include "hinic_hw_wqe.h"
+#include "hinic_hw_wq.h"
+#include "hinic_hw_cmdq.h"
+#include "hinic_hw_io.h"
+#include "hinic_hw_dev.h"
+
+#define CMDQ_CEQE_TYPE_SHIFT                    0
+
+#define CMDQ_CEQE_TYPE_MASK                     0x7
+
+#define CMDQ_CEQE_GET(val, member)              \
+			(((val) >> CMDQ_CEQE_##member##_SHIFT) \
+			 & CMDQ_CEQE_##member##_MASK)
+
+#define CMDQ_WQE_ERRCODE_VAL_SHIFT              20
+
+#define CMDQ_WQE_ERRCODE_VAL_MASK               0xF
+
+#define CMDQ_WQE_ERRCODE_GET(val, member)       \
+			(((val) >> CMDQ_WQE_ERRCODE_##member##_SHIFT) \
+			 & CMDQ_WQE_ERRCODE_##member##_MASK)
+
+#define CMDQ_DB_PI_OFF(pi)              (((u16)LOWER_8_BITS(pi)) << 3)
+
+#define CMDQ_DB_ADDR(db_base, pi)       ((db_base) + CMDQ_DB_PI_OFF(pi))
+
+#define CMDQ_WQE_HEADER(wqe)            ((struct hinic_cmdq_header *)(wqe))
+
+#define CMDQ_WQE_COMPLETED(ctrl_info)   \
+			HINIC_CMDQ_CTRL_GET(ctrl_info, HW_BUSY_BIT)
+
+#define FIRST_DATA_TO_WRITE_LAST        sizeof(u64)
+
+#define CMDQ_DB_OFF                     SZ_2K
+
+#define CMDQ_WQEBB_SIZE                 64
+#define CMDQ_WQE_SIZE                   64
+#define CMDQ_DEPTH                      SZ_4K
+
+#define CMDQ_WQ_PAGE_SIZE               SZ_4K
+
+#define WQE_LCMD_SIZE                   64
+#define WQE_SCMD_SIZE                   64
+
+#define COMPLETE_LEN                    3
+
+#define CMDQ_TIMEOUT                    1000
+
+#define CMDQ_PFN(addr, page_size)       ((addr) >> (ilog2(page_size)))
+
+#define cmdq_to_cmdqs(cmdq)     container_of((cmdq) - (cmdq)->cmdq_type, \
+					     struct hinic_cmdqs, cmdq[0])
+
+#define cmdqs_to_func_to_io(cmdqs)      container_of(cmdqs, \
+						     struct hinic_func_to_io, \
+						     cmdqs)
+
+enum cmdq_wqe_type {
+	WQE_LCMD_TYPE = 0,
+	WQE_SCMD_TYPE = 1,
+};
+
+enum completion_format {
+	COMPLETE_DIRECT = 0,
+	COMPLETE_SGE    = 1,
+};
+
+enum data_format {
+	DATA_SGE        = 0,
+	DATA_DIRECT     = 1,
+};
+
+enum bufdesc_len {
+	BUFDESC_LCMD_LEN = 2,   /* 16 bytes - 2(8 byte unit) */
+	BUFDESC_SCMD_LEN = 3,   /* 24 bytes - 3(8 byte unit) */
+};
+
+enum ctrl_sect_len {
+	CTRL_SECT_LEN        = 1, /* 4 bytes (ctrl) - 1(8 byte unit) */
+	CTRL_DIRECT_SECT_LEN = 2, /* 12 bytes (ctrl + rsvd) - 2(8 byte unit) */
+};
+
+enum cmdq_scmd_type {
+	CMDQ_SET_ARM_CMD = 2,
+};
+
+enum cmdq_cmd_type {
+	CMDQ_CMD_SYNC_DIRECT_RESP = 0,
+	CMDQ_CMD_SYNC_SGE_RESP    = 1,
+};
+
+enum completion_request {
+	NO_CEQ  = 0,
+	CEQ_SET = 1,
+};
+
+/**
+ * hinic_alloc_cmdq_buf - alloc buffer for sending command
+ * @cmdqs: the cmdqs
+ * @cmdq_buf: the buffer returned in this struct
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_alloc_cmdq_buf(struct hinic_cmdqs *cmdqs,
+			 struct hinic_cmdq_buf *cmdq_buf)
+{
+	struct hinic_hwif *hwif = cmdqs->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+
+	cmdq_buf->buf = pci_pool_alloc(cmdqs->cmdq_buf_pool, GFP_KERNEL,
+				       &cmdq_buf->dma_addr);
+	if (!cmdq_buf->buf) {
+		dev_err(&pdev->dev, "Failed to allocate cmd from the pool\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * hinic_free_cmdq_buf - free buffer
+ * @cmdqs: the cmdqs
+ * @cmdq_buf: the buffer to free that is in this struct
+ **/
+void hinic_free_cmdq_buf(struct hinic_cmdqs *cmdqs,
+			 struct hinic_cmdq_buf *cmdq_buf)
+{
+	pci_pool_free(cmdqs->cmdq_buf_pool, cmdq_buf->buf, cmdq_buf->dma_addr);
+}
+
+static unsigned int cmdq_wqe_size_from_bdlen(enum bufdesc_len len)
+{
+	unsigned int wqe_size = 0;
+
+	switch (len) {
+	case BUFDESC_LCMD_LEN:
+		wqe_size = WQE_LCMD_SIZE;
+		break;
+	case BUFDESC_SCMD_LEN:
+		wqe_size = WQE_SCMD_SIZE;
+		break;
+	}
+
+	return wqe_size;
+}
+
+static void cmdq_set_sge_completion(struct hinic_cmdq_completion *completion,
+				    struct hinic_cmdq_buf *buf_out)
+{
+	struct hinic_sge_resp *sge_resp = &completion->sge_resp;
+
+	hinic_set_sge(&sge_resp->sge, buf_out->dma_addr, buf_out->size);
+}
+
+static void cmdq_prepare_wqe_ctrl(struct hinic_cmdq_wqe *wqe, int wrapped,
+				  enum hinic_cmd_ack_type ack_type,
+				  enum hinic_mod_type mod, u8 cmd, u16 prod_idx,
+				  enum completion_format complete_format,
+				  enum data_format data_format,
+				  enum bufdesc_len buf_len)
+{
+	struct hinic_cmdq_wqe_lcmd *wqe_lcmd;
+	struct hinic_cmdq_wqe_scmd *wqe_scmd;
+	enum ctrl_sect_len ctrl_len;
+	struct hinic_ctrl *ctrl;
+	u32 saved_data;
+
+	if (data_format == DATA_SGE) {
+		wqe_lcmd = &wqe->wqe_lcmd;
+
+		wqe_lcmd->status.status_info = 0;
+		ctrl = &wqe_lcmd->ctrl;
+		ctrl_len = CTRL_SECT_LEN;
+	} else {
+		wqe_scmd = &wqe->direct_wqe.wqe_scmd;
+
+		wqe_scmd->status.status_info = 0;
+		ctrl = &wqe_scmd->ctrl;
+		ctrl_len = CTRL_DIRECT_SECT_LEN;
+	}
+
+	ctrl->ctrl_info = HINIC_CMDQ_CTRL_SET(prod_idx, PI)             |
+			  HINIC_CMDQ_CTRL_SET(cmd, CMD)                 |
+			  HINIC_CMDQ_CTRL_SET(mod, MOD)                 |
+			  HINIC_CMDQ_CTRL_SET(ack_type, ACK_TYPE);
+
+	CMDQ_WQE_HEADER(wqe)->header_info =
+		HINIC_CMDQ_WQE_HEADER_SET(buf_len, BUFDESC_LEN)            |
+		HINIC_CMDQ_WQE_HEADER_SET(complete_format, COMPLETE_FMT)   |
+		HINIC_CMDQ_WQE_HEADER_SET(data_format, DATA_FMT)           |
+		HINIC_CMDQ_WQE_HEADER_SET(CEQ_SET, COMPLETE_REQ)           |
+		HINIC_CMDQ_WQE_HEADER_SET(COMPLETE_LEN, COMPLETE_SECT_LEN) |
+		HINIC_CMDQ_WQE_HEADER_SET(ctrl_len, CTRL_LEN)              |
+		HINIC_CMDQ_WQE_HEADER_SET(wrapped, TOGGLED_WRAPPED);
+
+	saved_data = CMDQ_WQE_HEADER(wqe)->saved_data;
+	saved_data = HINIC_SAVED_DATA_CLEAR(saved_data, ARM);
+
+	if ((cmd == CMDQ_SET_ARM_CMD) && (mod == HINIC_MOD_COMM))
+		CMDQ_WQE_HEADER(wqe)->saved_data |=
+						HINIC_SAVED_DATA_SET(1, ARM);
+	else
+		CMDQ_WQE_HEADER(wqe)->saved_data = saved_data;
+}
+
+static void cmdq_set_lcmd_bufdesc(struct hinic_cmdq_wqe_lcmd *wqe_lcmd,
+				  struct hinic_cmdq_buf *buf_in)
+{
+	hinic_set_sge(&wqe_lcmd->buf_desc.sge, buf_in->dma_addr, buf_in->size);
+}
+
+static void cmdq_set_direct_wqe_data(struct hinic_cmdq_direct_wqe *wqe,
+				     void *buf_in, u32 in_size)
+{
+	struct hinic_cmdq_wqe_scmd *wqe_scmd = &wqe->wqe_scmd;
+
+	wqe_scmd->buf_desc.buf_len = in_size;
+	memcpy(wqe_scmd->buf_desc.data, buf_in, in_size);
+}
+
+static void cmdq_set_lcmd_wqe(struct hinic_cmdq_wqe *wqe,
+			      enum cmdq_cmd_type cmd_type,
+			      struct hinic_cmdq_buf *buf_in,
+			      struct hinic_cmdq_buf *buf_out, int wrapped,
+			      enum hinic_cmd_ack_type ack_type,
+			      enum hinic_mod_type mod, u8 cmd, u16 prod_idx)
+{
+	struct hinic_cmdq_wqe_lcmd *wqe_lcmd = &wqe->wqe_lcmd;
+	enum completion_format complete_format;
+
+	switch (cmd_type) {
+	case CMDQ_CMD_SYNC_SGE_RESP:
+		complete_format = COMPLETE_SGE;
+		cmdq_set_sge_completion(&wqe_lcmd->completion, buf_out);
+		break;
+	case CMDQ_CMD_SYNC_DIRECT_RESP:
+		complete_format = COMPLETE_DIRECT;
+		wqe_lcmd->completion.direct_resp = 0;
+		break;
+	}
+
+	cmdq_prepare_wqe_ctrl(wqe, wrapped, ack_type, mod, cmd,
+			      prod_idx, complete_format, DATA_SGE,
+			      BUFDESC_LCMD_LEN);
+
+	cmdq_set_lcmd_bufdesc(wqe_lcmd, buf_in);
+}
+
+static void cmdq_set_direct_wqe(struct hinic_cmdq_wqe *wqe,
+				enum cmdq_cmd_type cmd_type,
+				void *buf_in, u16 in_size,
+				struct hinic_cmdq_buf *buf_out, int wrapped,
+				enum hinic_cmd_ack_type ack_type,
+				enum hinic_mod_type mod, u8 cmd, u16 prod_idx)
+{
+	struct hinic_cmdq_direct_wqe *direct_wqe = &wqe->direct_wqe;
+	enum completion_format complete_format;
+	struct hinic_cmdq_wqe_scmd *wqe_scmd;
+
+	wqe_scmd = &direct_wqe->wqe_scmd;
+
+	switch (cmd_type) {
+	case CMDQ_CMD_SYNC_SGE_RESP:
+		complete_format = COMPLETE_SGE;
+		cmdq_set_sge_completion(&wqe_scmd->completion, buf_out);
+		break;
+	case CMDQ_CMD_SYNC_DIRECT_RESP:
+		complete_format = COMPLETE_DIRECT;
+		wqe_scmd->completion.direct_resp = 0;
+		break;
+	}
+
+	cmdq_prepare_wqe_ctrl(wqe, wrapped, ack_type, mod, cmd, prod_idx,
+			      complete_format, DATA_DIRECT, BUFDESC_SCMD_LEN);
+
+	cmdq_set_direct_wqe_data(direct_wqe, buf_in, in_size);
+}
+
+static void cmdq_wqe_fill(void *dst, void *src)
+{
+	memcpy(dst + FIRST_DATA_TO_WRITE_LAST, src + FIRST_DATA_TO_WRITE_LAST,
+	       CMDQ_WQE_SIZE - FIRST_DATA_TO_WRITE_LAST);
+
+	wmb();          /* The first 8 bytes should be written last */
+
+	*(u64 *)dst = *(u64 *)src;
+}
+
+static void cmdq_fill_db(u32 *db_info,
+			 enum hinic_cmdq_type cmdq_type, u16 prod_idx)
+{
+	*db_info = HINIC_CMDQ_DB_INFO_SET(UPPER_8_BITS(prod_idx), HI_PROD_IDX) |
+		   HINIC_CMDQ_DB_INFO_SET(HINIC_CTRL_PATH, PATH)               |
+		   HINIC_CMDQ_DB_INFO_SET(cmdq_type, CMDQ_TYPE)                |
+		   HINIC_CMDQ_DB_INFO_SET(HINIC_DB_CMDQ_TYPE, DB_TYPE);
+}
+
+static void cmdq_set_db(struct hinic_cmdq *cmdq,
+			enum hinic_cmdq_type cmdq_type, u16 prod_idx)
+{
+	u32 db_info;
+
+	cmdq_fill_db(&db_info, cmdq_type, prod_idx);
+
+	/* The data that is written to HW should be in Big Endian Format */
+	db_info = cpu_to_be32(db_info);
+
+	wmb();  /* write all before the doorbell */
+
+	writel(db_info, CMDQ_DB_ADDR(cmdq->db_base, prod_idx));
+}
+
+static int cmdq_sync_cmd_direct_resp(struct hinic_cmdq *cmdq,
+				     enum hinic_mod_type mod, u8 cmd,
+				     struct hinic_cmdq_buf *buf_in,
+				     u64 *resp)
+{
+	struct hinic_cmdq_wqe *curr_cmdq_wqe, cmdq_wqe;
+	u16 curr_prod_idx, next_prod_idx;
+	int errcode, wrapped, num_wqebbs;
+	struct hinic_wq *wq = cmdq->wq;
+	struct hinic_hw_wqe *hw_wqe;
+	struct completion done;
+
+	/* Keep doorbell index correct. bh - for tasklet(ceq). */
+	spin_lock_bh(&cmdq->cmdq_lock);
+
+	/* WQE_SIZE = WQEBB_SIZE, we will get the wq element and not shadow*/
+	hw_wqe = hinic_get_wqe(wq, WQE_LCMD_SIZE, &curr_prod_idx);
+	if (IS_ERR(hw_wqe)) {
+		spin_unlock_bh(&cmdq->cmdq_lock);
+		return -EBUSY;
+	}
+
+	curr_cmdq_wqe = &hw_wqe->cmdq_wqe;
+
+	wrapped = cmdq->wrapped;
+
+	num_wqebbs = ALIGN(WQE_LCMD_SIZE, wq->wqebb_size) / wq->wqebb_size;
+	next_prod_idx = curr_prod_idx + num_wqebbs;
+	if (next_prod_idx >= wq->q_depth) {
+		cmdq->wrapped = !cmdq->wrapped;
+		next_prod_idx -= wq->q_depth;
+	}
+
+	cmdq->errcode[curr_prod_idx] = &errcode;
+
+	init_completion(&done);
+	cmdq->done[curr_prod_idx] = &done;
+
+	cmdq_set_lcmd_wqe(&cmdq_wqe, CMDQ_CMD_SYNC_DIRECT_RESP, buf_in, NULL,
+			  wrapped, HINIC_CMD_ACK_TYPE_CMDQ, mod, cmd,
+			  curr_prod_idx);
+
+	/* The data that is written to HW should be in Big Endian Format */
+	hinic_cpu_to_be32(&cmdq_wqe, WQE_LCMD_SIZE);
+
+	/* CMDQ WQE is not shadow, therefore wqe will be written to wq */
+	cmdq_wqe_fill(curr_cmdq_wqe, &cmdq_wqe);
+
+	cmdq_set_db(cmdq, HINIC_CMDQ_SYNC, next_prod_idx);
+
+	spin_unlock_bh(&cmdq->cmdq_lock);
+
+	if (!wait_for_completion_timeout(&done, CMDQ_TIMEOUT)) {
+		spin_lock_bh(&cmdq->cmdq_lock);
+
+		if (cmdq->errcode[curr_prod_idx] == &errcode)
+			cmdq->errcode[curr_prod_idx] = NULL;
+
+		if (cmdq->done[curr_prod_idx] == &done)
+			cmdq->done[curr_prod_idx] = NULL;
+
+		spin_unlock_bh(&cmdq->cmdq_lock);
+
+		return -ETIMEDOUT;
+	}
+
+	smp_rmb();      /* read error code after completion */
+
+	if (resp) {
+		struct hinic_cmdq_wqe_lcmd *wqe_lcmd = &curr_cmdq_wqe->wqe_lcmd;
+
+		*resp = cpu_to_be64(wqe_lcmd->completion.direct_resp);
+	}
+
+	if (errcode != 0)
+		return -EFAULT;
+
+	return 0;
+}
+
+static int cmdq_set_arm_bit(struct hinic_cmdq *cmdq, void *buf_in,
+			    u16 in_size)
+{
+	struct hinic_cmdq_wqe *curr_cmdq_wqe, cmdq_wqe;
+	u16 curr_prod_idx, next_prod_idx;
+	struct hinic_wq *wq = cmdq->wq;
+	struct hinic_hw_wqe *hw_wqe;
+	int wrapped, num_wqebbs;
+
+	/* Keep doorbell index correct */
+	spin_lock(&cmdq->cmdq_lock);
+
+	/* WQE_SIZE = WQEBB_SIZE, we will get the wq element and not shadow*/
+	hw_wqe = hinic_get_wqe(wq, WQE_SCMD_SIZE, &curr_prod_idx);
+	if (IS_ERR(hw_wqe)) {
+		spin_unlock(&cmdq->cmdq_lock);
+		return -EBUSY;
+	}
+
+	curr_cmdq_wqe = &hw_wqe->cmdq_wqe;
+
+	wrapped = cmdq->wrapped;
+
+	num_wqebbs = ALIGN(WQE_SCMD_SIZE, wq->wqebb_size) / wq->wqebb_size;
+	next_prod_idx = curr_prod_idx + num_wqebbs;
+	if (next_prod_idx >= wq->q_depth) {
+		cmdq->wrapped = !cmdq->wrapped;
+		next_prod_idx -= wq->q_depth;
+	}
+
+	cmdq_set_direct_wqe(&cmdq_wqe, CMDQ_CMD_SYNC_DIRECT_RESP, buf_in,
+			    in_size, NULL, wrapped, HINIC_CMD_ACK_TYPE_CMDQ,
+			    HINIC_MOD_COMM, CMDQ_SET_ARM_CMD, curr_prod_idx);
+
+	/* The data that is written to HW should be in Big Endian Format */
+	hinic_cpu_to_be32(&cmdq_wqe, WQE_SCMD_SIZE);
+
+	/* cmdq wqe is not shadow, therefore wqe will be written to wq */
+	cmdq_wqe_fill(curr_cmdq_wqe, &cmdq_wqe);
+
+	cmdq_set_db(cmdq, HINIC_CMDQ_SYNC, next_prod_idx);
+
+	spin_unlock(&cmdq->cmdq_lock);
+	return 0;
+}
+
+static int cmdq_params_valid(struct hinic_cmdq_buf *buf_in)
+{
+	if (buf_in->size > HINIC_CMDQ_MAX_DATA_SIZE)
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * hinic_cmdq_direct_resp - send command with direct data as resp
+ * @cmdqs: the cmdqs
+ * @mod: module on the card that will handle the command
+ * @cmd: the command
+ * @buf_in: the buffer for the command
+ * @resp: the response to return
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_cmdq_direct_resp(struct hinic_cmdqs *cmdqs,
+			   enum hinic_mod_type mod, u8 cmd,
+			   struct hinic_cmdq_buf *buf_in, u64 *resp)
+{
+	struct hinic_hwif *hwif = cmdqs->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	int err;
+
+	err = cmdq_params_valid(buf_in);
+	if (err) {
+		dev_err(&pdev->dev, "Invalid CMDQ parameters\n");
+		return err;
+	}
+
+	return cmdq_sync_cmd_direct_resp(&cmdqs->cmdq[HINIC_CMDQ_SYNC],
+					 mod, cmd, buf_in, resp);
+}
+
+/**
+ * hinic_set_arm_bit - set arm bit for enable interrupt again
+ * @cmdqs: the cmdqs
+ * @q_type: type of queue to set the arm bit for
+ * @q_id: the queue number
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_set_arm_bit(struct hinic_cmdqs *cmdqs,
+		      enum hinic_set_arm_qtype q_type, u32 q_id)
+{
+	struct hinic_cmdq *cmdq = &cmdqs->cmdq[HINIC_CMDQ_SYNC];
+	struct hinic_hwif *hwif = cmdqs->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_cmdq_arm_bit arm_bit;
+	int err;
+
+	arm_bit.q_type = q_type;
+	arm_bit.q_id   = q_id;
+
+	err = cmdq_set_arm_bit(cmdq, &arm_bit, sizeof(arm_bit));
+	if (err) {
+		dev_err(&pdev->dev, "Failed to set arm for qid %d\n", q_id);
+		return err;
+	}
+
+	return 0;
+}
+
+static void clear_wqe_complete_bit(struct hinic_cmdq *cmdq,
+				   struct hinic_cmdq_wqe *wqe)
+{
+	u32 header_info = be32_to_cpu(CMDQ_WQE_HEADER(wqe)->header_info);
+	unsigned int bufdesc_len, wqe_size;
+	struct hinic_ctrl *ctrl;
+
+	bufdesc_len = HINIC_CMDQ_WQE_HEADER_GET(header_info, BUFDESC_LEN);
+	wqe_size = cmdq_wqe_size_from_bdlen(bufdesc_len);
+	if (wqe_size == WQE_LCMD_SIZE) {
+		struct hinic_cmdq_wqe_lcmd *wqe_lcmd = &wqe->wqe_lcmd;
+
+		ctrl = &wqe_lcmd->ctrl;
+	} else {
+		struct hinic_cmdq_direct_wqe *direct_wqe = &wqe->direct_wqe;
+		struct hinic_cmdq_wqe_scmd *wqe_scmd;
+
+		wqe_scmd = &direct_wqe->wqe_scmd;
+		ctrl = &wqe_scmd->ctrl;
+	}
+
+	/* clear HW busy bit */
+	ctrl->ctrl_info = 0;
+
+	wmb();  /* verify wqe is clear */
+}
+
+/**
+ * cmdq_arm_ceq_handler - cmdq completion event handler for arm command
+ * @cmdq: the cmdq of the arm command
+ * @wqe: the wqe of the arm command
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int cmdq_arm_ceq_handler(struct hinic_cmdq *cmdq,
+				struct hinic_cmdq_wqe *wqe)
+{
+	struct hinic_cmdq_direct_wqe *direct_wqe = &wqe->direct_wqe;
+	struct hinic_cmdq_wqe_scmd *wqe_scmd;
+	struct hinic_ctrl *ctrl;
+	u32 ctrl_info;
+
+	wqe_scmd = &direct_wqe->wqe_scmd;
+	ctrl = &wqe_scmd->ctrl;
+	ctrl_info = be32_to_cpu(ctrl->ctrl_info);
+
+	/* HW should toggle the HW BUSY BIT */
+	if (!CMDQ_WQE_COMPLETED(ctrl_info))
+		return -EBUSY;
+
+	clear_wqe_complete_bit(cmdq, wqe);
+
+	hinic_put_wqe(cmdq->wq, WQE_SCMD_SIZE);
+	return 0;
+}
+
+static void cmdq_update_errcode(struct hinic_cmdq *cmdq, u16 prod_idx,
+				int errcode)
+{
+	if (cmdq->errcode[prod_idx])
+		*cmdq->errcode[prod_idx] = errcode;
+}
+
+/**
+ * cmdq_arm_ceq_handler - cmdq completion event handler for sync command
+ * @cmdq: the cmdq of the command
+ * @cons_idx: the consumer index to update the error code for
+ * @errcode: the error code
+ **/
+static void cmdq_sync_cmd_handler(struct hinic_cmdq *cmdq, u16 cons_idx,
+				  int errcode)
+{
+	u16 prod_idx = cons_idx;
+
+	spin_lock(&cmdq->cmdq_lock);
+	cmdq_update_errcode(cmdq, prod_idx, errcode);
+
+	wmb();  /* write all before update for the command request */
+
+	if (cmdq->done[prod_idx])
+		complete(cmdq->done[prod_idx]);
+	spin_unlock(&cmdq->cmdq_lock);
+}
+
+static int cmdq_cmd_ceq_handler(struct hinic_cmdq *cmdq, u16 ci,
+				struct hinic_cmdq_wqe *cmdq_wqe)
+{
+	struct hinic_cmdq_wqe_lcmd *wqe_lcmd = &cmdq_wqe->wqe_lcmd;
+	struct hinic_status *status = &wqe_lcmd->status;
+	struct hinic_ctrl *ctrl = &wqe_lcmd->ctrl;
+	int errcode;
+
+	if (!CMDQ_WQE_COMPLETED(be32_to_cpu(ctrl->ctrl_info)))
+		return -EBUSY;
+
+	errcode = CMDQ_WQE_ERRCODE_GET(be32_to_cpu(status->status_info), VAL);
+
+	cmdq_sync_cmd_handler(cmdq, ci, errcode);
+
+	clear_wqe_complete_bit(cmdq, cmdq_wqe);
+	hinic_put_wqe(cmdq->wq, WQE_LCMD_SIZE);
+	return 0;
+}
+
+/**
+ * cmdq_ceq_handler - cmdq completion event handler
+ * @handle: private data for the handler(cmdqs)
+ * @ceqe_data: ceq element data
+ **/
+static void cmdq_ceq_handler(void *handle, u32 ceqe_data)
+{
+	enum hinic_cmdq_type cmdq_type = CMDQ_CEQE_GET(ceqe_data, TYPE);
+	struct hinic_cmdqs *cmdqs = (struct hinic_cmdqs *)handle;
+	struct hinic_cmdq *cmdq = &cmdqs->cmdq[cmdq_type];
+	struct hinic_cmdq_header *header;
+	struct hinic_hw_wqe *hw_wqe;
+	int err, set_arm = 0;
+	u32 saved_data;
+	u16 ci;
+
+	/* Read the smallest wqe size for getting wqe size */
+	while ((hw_wqe = hinic_read_wqe(cmdq->wq, WQE_SCMD_SIZE, &ci))) {
+		if (IS_ERR(hw_wqe))
+			break;
+
+		header = CMDQ_WQE_HEADER(&hw_wqe->cmdq_wqe);
+		saved_data = be32_to_cpu(header->saved_data);
+
+		if (HINIC_SAVED_DATA_GET(saved_data, ARM)) {
+			/* arm_bit was set until here */
+			set_arm = 0;
+
+			if (cmdq_arm_ceq_handler(cmdq, &hw_wqe->cmdq_wqe))
+				break;
+		} else {
+			set_arm = 1;
+
+			hw_wqe = hinic_read_wqe(cmdq->wq, WQE_LCMD_SIZE, &ci);
+			if (IS_ERR(hw_wqe))
+				break;
+
+			if (cmdq_cmd_ceq_handler(cmdq, ci, &hw_wqe->cmdq_wqe))
+				break;
+		}
+	}
+
+	if (set_arm) {
+		struct hinic_hwif *hwif = cmdqs->hwif;
+		struct pci_dev *pdev = hwif->pdev;
+
+		err = hinic_set_arm_bit(cmdqs, HINIC_SET_ARM_CMDQ, cmdq_type);
+		if (err)
+			dev_err(&pdev->dev, "Failed to set arm for CMDQ\n");
+	}
+}
+
+/**
+ * cmdq_init_queue_ctxt - init the queue ctxt of a cmdq
+ * @cmdq_ctxt: cmdq ctxt to initialize
+ * @cmdq: the cmdq
+ * @cmdq_pages: the memory of the queue
+ **/
+static void cmdq_init_queue_ctxt(struct hinic_cmdq_ctxt *cmdq_ctxt,
+				 struct hinic_cmdq *cmdq,
+				 struct hinic_cmdq_pages *cmdq_pages)
+{
+	struct hinic_cmdq_ctxt_info *ctxt_info = &cmdq_ctxt->ctxt_info;
+	u64 wq_first_page_paddr, cmdq_first_block_paddr, pfn;
+	struct hinic_cmdqs *cmdqs = cmdq_to_cmdqs(cmdq);
+	struct hinic_wq *wq = cmdq->wq;
+
+	/* The data in the HW is in Big Endian Format */
+	wq_first_page_paddr = be64_to_cpu(*wq->block_vaddr);
+
+	pfn = CMDQ_PFN(wq_first_page_paddr, wq->wq_page_size);
+
+	ctxt_info->curr_wqe_page_pfn =
+		HINIC_CMDQ_CTXT_PAGE_INFO_SET(pfn, CURR_WQE_PAGE_PFN)   |
+		HINIC_CMDQ_CTXT_PAGE_INFO_SET(HINIC_CEQ_ID_CMDQ, EQ_ID) |
+		HINIC_CMDQ_CTXT_PAGE_INFO_SET(1, CEQ_ARM)               |
+		HINIC_CMDQ_CTXT_PAGE_INFO_SET(1, CEQ_EN)                |
+		HINIC_CMDQ_CTXT_PAGE_INFO_SET(cmdq->wrapped, WRAPPED);
+
+	/* block PFN - Read Modify Write */
+	cmdq_first_block_paddr = cmdq_pages->page_paddr;
+
+	pfn = CMDQ_PFN(cmdq_first_block_paddr, wq->wq_page_size);
+
+	ctxt_info->wq_block_pfn =
+		HINIC_CMDQ_CTXT_BLOCK_INFO_SET(pfn, WQ_BLOCK_PFN) |
+		HINIC_CMDQ_CTXT_BLOCK_INFO_SET(atomic_read(&wq->cons_idx), CI);
+
+	cmdq_ctxt->func_idx = HINIC_HWIF_FUNC_IDX(cmdqs->hwif);
+	cmdq_ctxt->cmdq_type  = cmdq->cmdq_type;
+}
+
+/**
+ * init_cmdq - initialize cmdq
+ * @cmdq: the cmdq
+ * @wq: the wq attaced to the cmdq
+ * @q_type: the cmdq type of the cmdq
+ * @db_area: doorbell area for the cmdq
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int init_cmdq(struct hinic_cmdq *cmdq, struct hinic_wq *wq,
+		     enum hinic_cmdq_type q_type, void __iomem *db_area)
+{
+	int err;
+
+	cmdq->wq = wq;
+	cmdq->cmdq_type = q_type;
+	cmdq->wrapped = 1;
+
+	spin_lock_init(&cmdq->cmdq_lock);
+
+	cmdq->done = vzalloc(wq->q_depth * sizeof(*cmdq->done));
+	if (!cmdq->done)
+		return -ENOMEM;
+
+	cmdq->errcode = vzalloc(wq->q_depth * sizeof(*cmdq->errcode));
+	if (!cmdq->errcode) {
+		err = -ENOMEM;
+		goto err_errcode;
+	}
+
+	cmdq->db_base = db_area + CMDQ_DB_OFF;
+	return 0;
+
+err_errcode:
+	vfree(cmdq->done);
+	return err;
+}
+
+/**
+ * free_cmdq - Free cmdq
+ * @cmdq: the cmdq to free
+ **/
+static void free_cmdq(struct hinic_cmdq *cmdq)
+{
+	vfree(cmdq->errcode);
+	vfree(cmdq->done);
+}
+
+/**
+ * init_cmdqs_ctxt - write the cmdq ctxt to HW after init all cmdq
+ * @hwdev: the NIC HW device
+ * @cmdqs: cmdqs to write the ctxts for
+ * &db_area: db_area for all the cmdqs
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int init_cmdqs_ctxt(struct hinic_hwdev *hwdev,
+			   struct hinic_cmdqs *cmdqs, void __iomem **db_area)
+{
+	struct hinic_hwif *hwif = hwdev->hwif;
+	enum hinic_cmdq_type type, cmdq_type;
+	struct hinic_cmdq_ctxt *cmdq_ctxts;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_pfhwdev *pfhwdev;
+	size_t cmdq_ctxts_size;
+	int err;
+
+	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+		dev_err(&pdev->dev, "Unsupported PCI function type\n");
+		return -EINVAL;
+	}
+
+	cmdq_ctxts_size = HINIC_MAX_CMDQ_TYPES * sizeof(*cmdq_ctxts);
+	cmdq_ctxts = devm_kzalloc(&pdev->dev, cmdq_ctxts_size, GFP_KERNEL);
+	if (!cmdq_ctxts)
+		return -ENOMEM;
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+	cmdq_type = HINIC_CMDQ_SYNC;
+	for (; cmdq_type < HINIC_MAX_CMDQ_TYPES; cmdq_type++) {
+		err = init_cmdq(&cmdqs->cmdq[cmdq_type],
+				&cmdqs->saved_wqs[cmdq_type], cmdq_type,
+				db_area[cmdq_type]);
+		if (err) {
+			dev_err(&pdev->dev, "Failed to initialize cmdq\n");
+			goto err_init_cmdq;
+		}
+
+		cmdq_init_queue_ctxt(&cmdq_ctxts[cmdq_type],
+				     &cmdqs->cmdq[cmdq_type],
+				     &cmdqs->cmdq_pages);
+	}
+
+	/* Write the CMDQ ctxts */
+	cmdq_type = HINIC_CMDQ_SYNC;
+	for (; cmdq_type < HINIC_MAX_CMDQ_TYPES; cmdq_type++) {
+		err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM,
+					HINIC_COMM_CMD_CMDQ_CTXT_SET,
+					&cmdq_ctxts[cmdq_type],
+					sizeof(cmdq_ctxts[cmdq_type]),
+					NULL, NULL, HINIC_MGMT_MSG_SYNC);
+		if (err) {
+			dev_err(&pdev->dev, "Failed to set CMDQ CTXT type = %d\n",
+				cmdq_type);
+			goto err_write_cmdq_ctxt;
+		}
+	}
+
+	devm_kfree(&pdev->dev, cmdq_ctxts);
+	return 0;
+
+err_write_cmdq_ctxt:
+	cmdq_type = HINIC_MAX_CMDQ_TYPES;
+
+err_init_cmdq:
+	for (type = HINIC_CMDQ_SYNC; type < cmdq_type; type++)
+		free_cmdq(&cmdqs->cmdq[type]);
+
+	devm_kfree(&pdev->dev, cmdq_ctxts);
+	return err;
+}
+
+/**
+ * hinic_init_cmdqs - init all cmdqs
+ * @cmdqs: cmdqs to init
+ * @hwif: HW interface for accessing cmdqs
+ * @db_area: doorbell areas for all the cmdqs
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_init_cmdqs(struct hinic_cmdqs *cmdqs, struct hinic_hwif *hwif,
+		     void __iomem **db_area)
+{
+	struct hinic_func_to_io *func_to_io = cmdqs_to_func_to_io(cmdqs);
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_hwdev *hwdev;
+	size_t saved_wqs_size;
+	u16 max_wqe_size;
+	int err;
+
+	cmdqs->hwif = hwif;
+	cmdqs->cmdq_buf_pool = pci_pool_create("hinic_cmdq", pdev,
+					       HINIC_CMDQ_BUF_SIZE,
+					       HINIC_CMDQ_BUF_SIZE, 0);
+	if (!cmdqs->cmdq_buf_pool)
+		return -ENOMEM;
+
+	saved_wqs_size = HINIC_MAX_CMDQ_TYPES * sizeof(struct hinic_wq);
+	cmdqs->saved_wqs = devm_kzalloc(&pdev->dev, saved_wqs_size, GFP_KERNEL);
+	if (!cmdqs->saved_wqs) {
+		err = -ENOMEM;
+		goto err_saved_wqs;
+	}
+
+	max_wqe_size = WQE_LCMD_SIZE;
+	err = hinic_wqs_cmdq_alloc(&cmdqs->cmdq_pages, cmdqs->saved_wqs, hwif,
+				   HINIC_MAX_CMDQ_TYPES, CMDQ_WQEBB_SIZE,
+				   CMDQ_WQ_PAGE_SIZE, CMDQ_DEPTH, max_wqe_size);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate CMDQ wqs\n");
+		goto err_cmdq_wqs;
+	}
+
+	hwdev = container_of(func_to_io, struct hinic_hwdev, func_to_io);
+	err = init_cmdqs_ctxt(hwdev, cmdqs, db_area);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to write cmdq ctxt\n");
+		goto err_cmdq_ctxt;
+	}
+
+	hinic_ceq_register_cb(&func_to_io->ceqs, HINIC_CEQ_CMDQ, cmdqs,
+			      cmdq_ceq_handler);
+	return 0;
+
+err_cmdq_ctxt:
+	hinic_wqs_cmdq_free(&cmdqs->cmdq_pages, cmdqs->saved_wqs,
+			    HINIC_MAX_CMDQ_TYPES);
+
+err_cmdq_wqs:
+	devm_kfree(&pdev->dev, cmdqs->saved_wqs);
+
+err_saved_wqs:
+	pci_pool_destroy(cmdqs->cmdq_buf_pool);
+	return err;
+}
+
+/**
+ * hinic_free_cmdqs - free all cmdqs
+ * @cmdqs: cmdqs to free
+ **/
+void hinic_free_cmdqs(struct hinic_cmdqs *cmdqs)
+{
+	struct hinic_func_to_io *func_to_io = cmdqs_to_func_to_io(cmdqs);
+	struct hinic_hwif *hwif = cmdqs->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	enum hinic_cmdq_type cmdq_type;
+
+	hinic_ceq_unregister_cb(&func_to_io->ceqs, HINIC_CEQ_CMDQ);
+
+	cmdq_type = HINIC_CMDQ_SYNC;
+	for (; cmdq_type < HINIC_MAX_CMDQ_TYPES; cmdq_type++)
+		free_cmdq(&cmdqs->cmdq[cmdq_type]);
+
+	hinic_wqs_cmdq_free(&cmdqs->cmdq_pages, cmdqs->saved_wqs,
+			    HINIC_MAX_CMDQ_TYPES);
+
+	devm_kfree(&pdev->dev, cmdqs->saved_wqs);
+
+	pci_pool_destroy(cmdqs->cmdq_buf_pool);
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h
new file mode 100644
index 0000000..b355834
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h
@@ -0,0 +1,187 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_CMDQ_H
+#define HINIC_CMDQ_H
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/pci.h>
+
+#include "hinic_hw_if.h"
+#include "hinic_hw_wq.h"
+
+#define HINIC_CMDQ_CTXT_CURR_WQE_PAGE_PFN_SHIFT         0
+#define HINIC_CMDQ_CTXT_EQ_ID_SHIFT                     56
+#define HINIC_CMDQ_CTXT_CEQ_ARM_SHIFT                   61
+#define HINIC_CMDQ_CTXT_CEQ_EN_SHIFT                    62
+#define HINIC_CMDQ_CTXT_WRAPPED_SHIFT                   63
+
+#define HINIC_CMDQ_CTXT_CURR_WQE_PAGE_PFN_MASK          0xFFFFFFFFFFFFF
+#define HINIC_CMDQ_CTXT_EQ_ID_MASK                      0x1F
+#define HINIC_CMDQ_CTXT_CEQ_ARM_MASK                    0x1
+#define HINIC_CMDQ_CTXT_CEQ_EN_MASK                     0x1
+#define HINIC_CMDQ_CTXT_WRAPPED_MASK                    0x1
+
+#define HINIC_CMDQ_CTXT_PAGE_INFO_SET(val, member)      \
+			(((u64)(val) & HINIC_CMDQ_CTXT_##member##_MASK) \
+			 << HINIC_CMDQ_CTXT_##member##_SHIFT)
+
+#define HINIC_CMDQ_CTXT_PAGE_INFO_CLEAR(val, member)    \
+			((val) & (~((u64)HINIC_CMDQ_CTXT_##member##_MASK \
+			 << HINIC_CMDQ_CTXT_##member##_SHIFT)))
+
+#define HINIC_CMDQ_CTXT_WQ_BLOCK_PFN_SHIFT              0
+#define HINIC_CMDQ_CTXT_CI_SHIFT                        52
+
+#define HINIC_CMDQ_CTXT_WQ_BLOCK_PFN_MASK               0xFFFFFFFFFFFFF
+#define HINIC_CMDQ_CTXT_CI_MASK                         0xFFF
+
+#define HINIC_CMDQ_CTXT_BLOCK_INFO_SET(val, member)     \
+			(((u64)(val) & HINIC_CMDQ_CTXT_##member##_MASK) \
+			 << HINIC_CMDQ_CTXT_##member##_SHIFT)
+
+#define HINIC_CMDQ_CTXT_BLOCK_INFO_CLEAR(val, member)   \
+			((val) & (~((u64)HINIC_CMDQ_CTXT_##member##_MASK \
+			 << HINIC_CMDQ_CTXT_##member##_SHIFT)))
+
+#define HINIC_SAVED_DATA_ARM_SHIFT                      31
+
+#define HINIC_SAVED_DATA_ARM_MASK                       0x1
+
+#define HINIC_SAVED_DATA_SET(val, member)               \
+			(((u32)(val) & HINIC_SAVED_DATA_##member##_MASK) \
+			 << HINIC_SAVED_DATA_##member##_SHIFT)
+
+#define HINIC_SAVED_DATA_GET(val, member)               \
+			(((val) >> HINIC_SAVED_DATA_##member##_SHIFT) \
+			 & HINIC_SAVED_DATA_##member##_MASK)
+
+#define HINIC_SAVED_DATA_CLEAR(val, member)             \
+			((val) & (~(HINIC_SAVED_DATA_##member##_MASK \
+			 << HINIC_SAVED_DATA_##member##_SHIFT)))
+
+#define HINIC_CMDQ_DB_INFO_HI_PROD_IDX_SHIFT            0
+#define HINIC_CMDQ_DB_INFO_PATH_SHIFT                   23
+#define HINIC_CMDQ_DB_INFO_CMDQ_TYPE_SHIFT              24
+#define HINIC_CMDQ_DB_INFO_DB_TYPE_SHIFT                27
+
+#define HINIC_CMDQ_DB_INFO_HI_PROD_IDX_MASK             0xFF
+#define HINIC_CMDQ_DB_INFO_PATH_MASK                    0x1
+#define HINIC_CMDQ_DB_INFO_CMDQ_TYPE_MASK               0x7
+#define HINIC_CMDQ_DB_INFO_DB_TYPE_MASK                 0x1F
+
+#define HINIC_CMDQ_DB_INFO_SET(val, member)             \
+			(((u32)(val) & HINIC_CMDQ_DB_INFO_##member##_MASK) \
+			 << HINIC_CMDQ_DB_INFO_##member##_SHIFT)
+
+#define HINIC_CMDQ_BUF_SIZE             2048
+
+#define HINIC_CMDQ_BUF_HW_RSVD          8
+#define HINIC_CMDQ_MAX_DATA_SIZE        (HINIC_CMDQ_BUF_SIZE - \
+					 HINIC_CMDQ_BUF_HW_RSVD)
+
+enum hinic_cmdq_type {
+	HINIC_CMDQ_SYNC,
+
+	HINIC_MAX_CMDQ_TYPES,
+};
+
+enum hinic_set_arm_qtype {
+	HINIC_SET_ARM_CMDQ,
+};
+
+enum hinic_cmd_ack_type {
+	HINIC_CMD_ACK_TYPE_CMDQ,
+};
+
+struct hinic_cmdq_buf {
+	void            *buf;
+	dma_addr_t      dma_addr;
+	size_t          size;
+};
+
+struct hinic_cmdq_arm_bit {
+	u32     q_type;
+	u32     q_id;
+};
+
+struct hinic_cmdq_ctxt_info {
+	u64     curr_wqe_page_pfn;
+	u64     wq_block_pfn;
+};
+
+struct hinic_cmdq_ctxt {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_idx;
+	u8      cmdq_type;
+	u8      rsvd1[1];
+
+	u8      rsvd2[4];
+
+	struct hinic_cmdq_ctxt_info ctxt_info;
+};
+
+struct hinic_cmdq {
+	struct hinic_wq         *wq;
+
+	enum hinic_cmdq_type    cmdq_type;
+	int                     wrapped;
+
+	/* Lock for keeping the doorbell order */
+	spinlock_t              cmdq_lock;
+
+	struct completion       **done;
+	int                     **errcode;
+
+	/* doorbell area */
+	void __iomem            *db_base;
+};
+
+struct hinic_cmdqs {
+	struct hinic_hwif       *hwif;
+
+	struct pci_pool         *cmdq_buf_pool;
+
+	struct hinic_wq         *saved_wqs;
+
+	struct hinic_cmdq_pages cmdq_pages;
+
+	struct hinic_cmdq       cmdq[HINIC_MAX_CMDQ_TYPES];
+};
+
+int hinic_alloc_cmdq_buf(struct hinic_cmdqs *cmdqs,
+			 struct hinic_cmdq_buf *cmdq_buf);
+
+void hinic_free_cmdq_buf(struct hinic_cmdqs *cmdqs,
+			 struct hinic_cmdq_buf *cmdq_buf);
+
+int hinic_cmdq_direct_resp(struct hinic_cmdqs *cmdqs,
+			   enum hinic_mod_type mod, u8 cmd,
+			   struct hinic_cmdq_buf *buf_in, u64 *out_param);
+
+int hinic_set_arm_bit(struct hinic_cmdqs *cmdqs,
+		      enum hinic_set_arm_qtype q_type, u32 q_id);
+
+int hinic_init_cmdqs(struct hinic_cmdqs *cmdqs, struct hinic_hwif *hwif,
+		     void __iomem **db_area);
+
+void hinic_free_cmdqs(struct hinic_cmdqs *cmdqs);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
new file mode 100644
index 0000000..f39b184
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
@@ -0,0 +1,149 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_HW_CSR_H
+#define HINIC_HW_CSR_H
+
+/* HW interface registers */
+#define HINIC_CSR_FUNC_ATTR0_ADDR                       0x0
+#define HINIC_CSR_FUNC_ATTR1_ADDR                       0x4
+
+#define HINIC_CSR_FUNC_ATTR4_ADDR                       0x10
+#define HINIC_CSR_FUNC_ATTR5_ADDR                       0x14
+
+#define HINIC_DMA_ATTR_BASE                             0xC80
+#define HINIC_ELECTION_BASE                             0x4200
+
+#define HINIC_DMA_ATTR_STRIDE                           0x4
+#define HINIC_CSR_DMA_ATTR_ADDR(idx)                    \
+	(HINIC_DMA_ATTR_BASE + (idx) * HINIC_DMA_ATTR_STRIDE)
+
+#define HINIC_PPF_ELECTION_STRIDE                       0x4
+#define HINIC_CSR_MAX_PORTS                             4
+
+#define HINIC_CSR_PPF_ELECTION_ADDR(idx)                \
+	(HINIC_ELECTION_BASE +  (idx) * HINIC_PPF_ELECTION_STRIDE)
+
+/* API CMD registers */
+#define HINIC_CSR_API_CMD_BASE                          0xF000
+
+#define HINIC_CSR_API_CMD_STRIDE                        0x100
+
+#define HINIC_CSR_API_CMD_CHAIN_HEAD_HI_ADDR(idx)       \
+	(HINIC_CSR_API_CMD_BASE + 0x0 + (idx) * HINIC_CSR_API_CMD_STRIDE)
+
+#define HINIC_CSR_API_CMD_CHAIN_HEAD_LO_ADDR(idx)       \
+	(HINIC_CSR_API_CMD_BASE + 0x4 + (idx) * HINIC_CSR_API_CMD_STRIDE)
+
+#define HINIC_CSR_API_CMD_STATUS_HI_ADDR(idx)           \
+	(HINIC_CSR_API_CMD_BASE + 0x8 + (idx) * HINIC_CSR_API_CMD_STRIDE)
+
+#define HINIC_CSR_API_CMD_STATUS_LO_ADDR(idx)           \
+	(HINIC_CSR_API_CMD_BASE + 0xC + (idx) * HINIC_CSR_API_CMD_STRIDE)
+
+#define HINIC_CSR_API_CMD_CHAIN_NUM_CELLS_ADDR(idx)     \
+	(HINIC_CSR_API_CMD_BASE + 0x10 + (idx) * HINIC_CSR_API_CMD_STRIDE)
+
+#define HINIC_CSR_API_CMD_CHAIN_CTRL_ADDR(idx)          \
+	(HINIC_CSR_API_CMD_BASE + 0x14 + (idx) * HINIC_CSR_API_CMD_STRIDE)
+
+#define HINIC_CSR_API_CMD_CHAIN_PI_ADDR(idx)            \
+	(HINIC_CSR_API_CMD_BASE + 0x1C + (idx) * HINIC_CSR_API_CMD_STRIDE)
+
+#define HINIC_CSR_API_CMD_CHAIN_REQ_ADDR(idx)           \
+	(HINIC_CSR_API_CMD_BASE + 0x20 + (idx) * HINIC_CSR_API_CMD_STRIDE)
+
+#define HINIC_CSR_API_CMD_STATUS_ADDR(idx)              \
+	(HINIC_CSR_API_CMD_BASE + 0x30 + (idx) * HINIC_CSR_API_CMD_STRIDE)
+
+/* MSI-X registers */
+#define HINIC_CSR_MSIX_CTRL_BASE                        0x2000
+#define HINIC_CSR_MSIX_CNT_BASE                         0x2004
+
+#define HINIC_CSR_MSIX_STRIDE                           0x8
+
+#define HINIC_CSR_MSIX_CTRL_ADDR(idx)                   \
+	(HINIC_CSR_MSIX_CTRL_BASE + (idx) * HINIC_CSR_MSIX_STRIDE)
+
+#define HINIC_CSR_MSIX_CNT_ADDR(idx)                    \
+	(HINIC_CSR_MSIX_CNT_BASE + (idx) * HINIC_CSR_MSIX_STRIDE)
+
+/* EQ registers */
+#define HINIC_AEQ_MTT_OFF_BASE_ADDR                     0x200
+#define HINIC_CEQ_MTT_OFF_BASE_ADDR                     0x400
+
+#define HINIC_EQ_MTT_OFF_STRIDE                         0x40
+
+#define HINIC_CSR_AEQ_MTT_OFF(id)                       \
+	(HINIC_AEQ_MTT_OFF_BASE_ADDR + (id) * HINIC_EQ_MTT_OFF_STRIDE)
+
+#define HINIC_CSR_CEQ_MTT_OFF(id)                       \
+	(HINIC_CEQ_MTT_OFF_BASE_ADDR + (id) * HINIC_EQ_MTT_OFF_STRIDE)
+
+#define HINIC_CSR_EQ_PAGE_OFF_STRIDE                    8
+
+#define HINIC_CSR_AEQ_HI_PHYS_ADDR_REG(q_id, pg_num)    \
+	(HINIC_CSR_AEQ_MTT_OFF(q_id) + \
+	 (pg_num) * HINIC_CSR_EQ_PAGE_OFF_STRIDE)
+
+#define HINIC_CSR_CEQ_HI_PHYS_ADDR_REG(q_id, pg_num)    \
+	(HINIC_CSR_CEQ_MTT_OFF(q_id) +          \
+	 (pg_num) * HINIC_CSR_EQ_PAGE_OFF_STRIDE)
+
+#define HINIC_CSR_AEQ_LO_PHYS_ADDR_REG(q_id, pg_num)    \
+	(HINIC_CSR_AEQ_MTT_OFF(q_id) + \
+	 (pg_num) * HINIC_CSR_EQ_PAGE_OFF_STRIDE + 4)
+
+#define HINIC_CSR_CEQ_LO_PHYS_ADDR_REG(q_id, pg_num)    \
+	(HINIC_CSR_CEQ_MTT_OFF(q_id) +  \
+	 (pg_num) * HINIC_CSR_EQ_PAGE_OFF_STRIDE + 4)
+
+#define HINIC_AEQ_CTRL_0_ADDR_BASE                      0xE00
+#define HINIC_AEQ_CTRL_1_ADDR_BASE                      0xE04
+#define HINIC_AEQ_CONS_IDX_ADDR_BASE                    0xE08
+#define HINIC_AEQ_PROD_IDX_ADDR_BASE                    0xE0C
+
+#define HINIC_CEQ_CTRL_0_ADDR_BASE                      0x1000
+#define HINIC_CEQ_CTRL_1_ADDR_BASE                      0x1004
+#define HINIC_CEQ_CONS_IDX_ADDR_BASE                    0x1008
+#define HINIC_CEQ_PROD_IDX_ADDR_BASE                    0x100C
+
+#define HINIC_EQ_OFF_STRIDE                             0x80
+
+#define HINIC_CSR_AEQ_CTRL_0_ADDR(idx)                  \
+	(HINIC_AEQ_CTRL_0_ADDR_BASE + (idx) * HINIC_EQ_OFF_STRIDE)
+
+#define HINIC_CSR_AEQ_CTRL_1_ADDR(idx)                  \
+	(HINIC_AEQ_CTRL_1_ADDR_BASE + (idx) * HINIC_EQ_OFF_STRIDE)
+
+#define HINIC_CSR_AEQ_CONS_IDX_ADDR(idx)                \
+	(HINIC_AEQ_CONS_IDX_ADDR_BASE + (idx) * HINIC_EQ_OFF_STRIDE)
+
+#define HINIC_CSR_AEQ_PROD_IDX_ADDR(idx)                \
+	(HINIC_AEQ_PROD_IDX_ADDR_BASE + (idx) * HINIC_EQ_OFF_STRIDE)
+
+#define HINIC_CSR_CEQ_CTRL_0_ADDR(idx)                  \
+	(HINIC_CEQ_CTRL_0_ADDR_BASE + (idx) * HINIC_EQ_OFF_STRIDE)
+
+#define HINIC_CSR_CEQ_CTRL_1_ADDR(idx)                  \
+	(HINIC_CEQ_CTRL_1_ADDR_BASE + (idx) * HINIC_EQ_OFF_STRIDE)
+
+#define HINIC_CSR_CEQ_CONS_IDX_ADDR(idx)                \
+	(HINIC_CEQ_CONS_IDX_ADDR_BASE + (idx) * HINIC_EQ_OFF_STRIDE)
+
+#define HINIC_CSR_CEQ_PROD_IDX_ADDR(idx)                \
+	(HINIC_CEQ_PROD_IDX_ADDR_BASE + (idx) * HINIC_EQ_OFF_STRIDE)
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
new file mode 100644
index 0000000..79b5674
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -0,0 +1,1013 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/log2.h>
+#include <linux/err.h>
+
+#include "hinic_hw_if.h"
+#include "hinic_hw_eqs.h"
+#include "hinic_hw_mgmt.h"
+#include "hinic_hw_qp_ctxt.h"
+#include "hinic_hw_qp.h"
+#include "hinic_hw_io.h"
+#include "hinic_hw_dev.h"
+
+#define IO_STATUS_TIMEOUT               100
+#define OUTBOUND_STATE_TIMEOUT          100
+#define DB_STATE_TIMEOUT                100
+
+#define MAX_IRQS(max_qps, num_aeqs, num_ceqs)   \
+		 (2 * (max_qps) + (num_aeqs) + (num_ceqs))
+
+#define ADDR_IN_4BYTES(addr)            ((addr) >> 2)
+
+enum intr_type {
+	INTR_MSIX_TYPE,
+};
+
+enum io_status {
+	IO_STOPPED = 0,
+	IO_RUNNING = 1,
+};
+
+enum hw_ioctxt_set_cmdq_depth {
+	HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT,
+};
+
+/* HW struct */
+struct hinic_dev_cap {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u8      rsvd1[5];
+	u8      intr_type;
+	u8      rsvd2[66];
+	u16     max_sqs;
+	u16     max_rqs;
+	u8      rsvd3[208];
+};
+
+/**
+ * get_capability - convert device capabilities to NIC capabilities
+ * @hwdev: the HW device to set and convert device capabilities for
+ * @dev_cap: device capabilities from FW
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int get_capability(struct hinic_hwdev *hwdev,
+			  struct hinic_dev_cap *dev_cap)
+{
+	struct hinic_cap *nic_cap = &hwdev->nic_cap;
+	int num_aeqs, num_ceqs, num_irqs;
+
+	if (!HINIC_IS_PF(hwdev->hwif) && !HINIC_IS_PPF(hwdev->hwif))
+		return -EINVAL;
+
+	if (dev_cap->intr_type != INTR_MSIX_TYPE)
+		return -EFAULT;
+
+	num_aeqs = HINIC_HWIF_NUM_AEQS(hwdev->hwif);
+	num_ceqs = HINIC_HWIF_NUM_CEQS(hwdev->hwif);
+	num_irqs = HINIC_HWIF_NUM_IRQS(hwdev->hwif);
+
+	/* Each QP has its own (SQ + RQ) interrupts */
+	nic_cap->num_qps = (num_irqs - (num_aeqs + num_ceqs)) / 2;
+
+	if (nic_cap->num_qps > HINIC_Q_CTXT_MAX)
+		nic_cap->num_qps = HINIC_Q_CTXT_MAX;
+
+	/* num_qps must be power of 2 */
+	nic_cap->num_qps = BIT(fls(nic_cap->num_qps) - 1);
+
+	nic_cap->max_qps = dev_cap->max_sqs + 1;
+	if (nic_cap->max_qps != (dev_cap->max_rqs + 1))
+		return -EFAULT;
+
+	if (nic_cap->num_qps > nic_cap->max_qps)
+		nic_cap->num_qps = nic_cap->max_qps;
+
+	return 0;
+}
+
+/**
+ * get_cap_from_fw - get device capabilities from FW
+ * @pfhwdev: the PF HW device to get capabilities for
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int get_cap_from_fw(struct hinic_pfhwdev *pfhwdev)
+{
+	struct hinic_hwdev *hwdev = &pfhwdev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_dev_cap dev_cap;
+	u16 in_len, out_len;
+	int err;
+
+	in_len = 0;
+	out_len = sizeof(dev_cap);
+
+	err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_CFGM,
+				HINIC_CFG_NIC_CAP, &dev_cap, in_len, &dev_cap,
+				&out_len, HINIC_MGMT_MSG_SYNC);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to get capability from FW\n");
+		return err;
+	}
+
+	return get_capability(hwdev, &dev_cap);
+}
+
+/**
+ * get_dev_cap - get device capabilities
+ * @hwdev: the NIC HW device to get capabilities for
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int get_dev_cap(struct hinic_hwdev *hwdev)
+{
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_pfhwdev *pfhwdev;
+	int err;
+
+	switch (HINIC_FUNC_TYPE(hwif)) {
+	case HINIC_PPF:
+	case HINIC_PF:
+		pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+		err = get_cap_from_fw(pfhwdev);
+		if (err) {
+			dev_err(&pdev->dev, "Failed to get capability from FW\n");
+			return err;
+		}
+		break;
+
+	default:
+		dev_err(&pdev->dev, "Unsupported PCI Function type\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * init_msix - enable the msix and save the entries
+ * @hwdev: the NIC HW device
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int init_msix(struct hinic_hwdev *hwdev)
+{
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	int nr_irqs, num_aeqs, num_ceqs;
+	size_t msix_entries_size;
+	int i, err;
+
+	num_aeqs = HINIC_HWIF_NUM_AEQS(hwif);
+	num_ceqs = HINIC_HWIF_NUM_CEQS(hwif);
+	nr_irqs = MAX_IRQS(HINIC_MAX_QPS, num_aeqs, num_ceqs);
+	if (nr_irqs > HINIC_HWIF_NUM_IRQS(hwif))
+		nr_irqs = HINIC_HWIF_NUM_IRQS(hwif);
+
+	msix_entries_size = nr_irqs * sizeof(*hwdev->msix_entries);
+	hwdev->msix_entries = devm_kzalloc(&pdev->dev, msix_entries_size,
+					   GFP_KERNEL);
+	if (!hwdev->msix_entries)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_irqs; i++)
+		hwdev->msix_entries[i].entry = i;
+
+	err = pci_enable_msix_exact(pdev, hwdev->msix_entries, nr_irqs);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to enable pci msix\n");
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * disable_msix - disable the msix
+ * @hwdev: the NIC HW device
+ **/
+static void disable_msix(struct hinic_hwdev *hwdev)
+{
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+
+	pci_disable_msix(pdev);
+}
+
+/**
+ * hinic_port_msg_cmd - send port msg to mgmt
+ * @hwdev: the NIC HW device
+ * @cmd: the port command
+ * @buf_in: input buffer
+ * @in_size: input size
+ * @buf_out: output buffer
+ * @out_size: returned output size
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_msg_cmd(struct hinic_hwdev *hwdev, enum hinic_port_cmd cmd,
+		       void *buf_in, u16 in_size, void *buf_out, u16 *out_size)
+{
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_pfhwdev *pfhwdev;
+
+	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+		dev_err(&pdev->dev, "unsupported PCI Function type\n");
+		return -EINVAL;
+	}
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+	return hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_L2NIC, cmd,
+				 buf_in, in_size, buf_out, out_size,
+				 HINIC_MGMT_MSG_SYNC);
+}
+
+/**
+ * init_fw_ctxt- Init Firmware tables before network mgmt and io operations
+ * @hwdev: the NIC HW device
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int init_fw_ctxt(struct hinic_hwdev *hwdev)
+{
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_cmd_fw_ctxt fw_ctxt;
+	struct hinic_pfhwdev *pfhwdev;
+	u16 out_size;
+	int err;
+
+	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+		dev_err(&pdev->dev, "Unsupported PCI Function type\n");
+		return -EINVAL;
+	}
+
+	fw_ctxt.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
+	fw_ctxt.rx_buf_sz = HINIC_RX_BUF_SZ;
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_FWCTXT_INIT,
+				 &fw_ctxt, sizeof(fw_ctxt),
+				 &fw_ctxt, &out_size);
+	if (err || (out_size != sizeof(fw_ctxt)) || fw_ctxt.status) {
+		dev_err(&pdev->dev, "Failed to init FW ctxt, ret = %d\n",
+			fw_ctxt.status);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * set_hw_ioctxt - set the shape of the IO queues in FW
+ * @hwdev: the NIC HW device
+ * @rq_depth: rq depth
+ * @sq_depth: sq depth
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int set_hw_ioctxt(struct hinic_hwdev *hwdev, unsigned int rq_depth,
+			 unsigned int sq_depth)
+{
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct hinic_cmd_hw_ioctxt hw_ioctxt;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_pfhwdev *pfhwdev;
+
+	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+		dev_err(&pdev->dev, "Unsupported PCI Function type\n");
+		return -EINVAL;
+	}
+
+	hw_ioctxt.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
+
+	hw_ioctxt.set_cmdq_depth = HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT;
+	hw_ioctxt.cmdq_depth = 0;
+
+	hw_ioctxt.rq_depth  = ilog2(rq_depth);
+
+	hw_ioctxt.rx_buf_sz_idx = HINIC_RX_BUF_SZ_IDX;
+
+	hw_ioctxt.sq_depth  = ilog2(sq_depth);
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+	return hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM,
+				 HINIC_COMM_CMD_HWCTXT_SET,
+				 &hw_ioctxt, sizeof(hw_ioctxt), NULL,
+				 NULL, HINIC_MGMT_MSG_SYNC);
+}
+
+static int wait_for_outbound_state(struct hinic_hwdev *hwdev)
+{
+	enum hinic_outbound_state outbound_state;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	unsigned long end;
+
+	end = jiffies + msecs_to_jiffies(OUTBOUND_STATE_TIMEOUT);
+	do {
+		outbound_state = hinic_outbound_state_get(hwif);
+
+		if (outbound_state == HINIC_OUTBOUND_ENABLE)
+			return 0;
+
+		msleep(20);
+	} while (time_before(jiffies, end));
+
+	dev_err(&pdev->dev, "Wait for OUTBOUND - Timeout\n");
+	return -EFAULT;
+}
+
+static int wait_for_db_state(struct hinic_hwdev *hwdev)
+{
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	enum hinic_db_state db_state;
+	unsigned long end;
+
+	end = jiffies + msecs_to_jiffies(DB_STATE_TIMEOUT);
+	do {
+		db_state = hinic_db_state_get(hwif);
+
+		if (db_state == HINIC_DB_ENABLE)
+			return 0;
+
+		msleep(20);
+	} while (time_before(jiffies, end));
+
+	dev_err(&pdev->dev, "Wait for DB - Timeout\n");
+	return -EFAULT;
+}
+
+static int wait_for_io_stopped(struct hinic_hwdev *hwdev)
+{
+	struct hinic_cmd_io_status cmd_io_status;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_pfhwdev *pfhwdev;
+	unsigned long end;
+	u16 out_size;
+	int err;
+
+	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+		dev_err(&pdev->dev, "Unsupported PCI Function type\n");
+		return -EINVAL;
+	}
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+	cmd_io_status.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
+
+	end = jiffies + msecs_to_jiffies(IO_STATUS_TIMEOUT);
+	do {
+		err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM,
+					HINIC_COMM_CMD_IO_STATUS_GET,
+					&cmd_io_status, sizeof(cmd_io_status),
+					&cmd_io_status, &out_size,
+					HINIC_MGMT_MSG_SYNC);
+		if ((err) || (out_size != sizeof(cmd_io_status))) {
+			dev_err(&pdev->dev, "Failed to get IO status, ret = %d\n",
+				err);
+			return err;
+		}
+
+		if (cmd_io_status.status == IO_STOPPED) {
+			dev_info(&pdev->dev, "IO stopped\n");
+			return 0;
+		}
+
+		msleep(20);
+	} while (time_before(jiffies, end));
+
+	dev_err(&pdev->dev, "Wait for IO stopped - Timeout\n");
+	return -ETIMEDOUT;
+}
+
+/**
+ * clear_io_resource - set the IO resources as not active in the NIC
+ * @hwdev: the NIC HW device
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int clear_io_resources(struct hinic_hwdev *hwdev)
+{
+	struct hinic_cmd_clear_io_res cmd_clear_io_res;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_pfhwdev *pfhwdev;
+	int err;
+
+	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+		dev_err(&pdev->dev, "Unsupported PCI Function type\n");
+		return -EINVAL;
+	}
+
+	err = wait_for_io_stopped(hwdev);
+	if (err) {
+		dev_err(&pdev->dev, "IO has not stopped yet\n");
+		return err;
+	}
+
+	cmd_clear_io_res.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+	err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM,
+				HINIC_COMM_CMD_IO_RES_CLEAR, &cmd_clear_io_res,
+				sizeof(cmd_clear_io_res), NULL, NULL,
+				HINIC_MGMT_MSG_SYNC);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to clear IO resources\n");
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * set_resources_state - set the state of the resources in the NIC
+ * @hwdev: the NIC HW device
+ * @state: the state to set
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int set_resources_state(struct hinic_hwdev *hwdev,
+			       enum hinic_res_state state)
+{
+	struct hinic_cmd_set_res_state res_state;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_pfhwdev *pfhwdev;
+
+	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+		dev_err(&pdev->dev, "Unsupported PCI Function type\n");
+		return -EINVAL;
+	}
+
+	res_state.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
+	res_state.state = state;
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+	return hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt,
+				 HINIC_MOD_COMM,
+				 HINIC_COMM_CMD_RES_STATE_SET,
+				 &res_state, sizeof(res_state), NULL,
+				 NULL, HINIC_MGMT_MSG_SYNC);
+}
+
+/**
+ * get_base_qpn - get the first qp number
+ * @hwdev: the NIC HW device
+ * @base_qpn: returned qp number
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int get_base_qpn(struct hinic_hwdev *hwdev, u16 *base_qpn)
+{
+	struct hinic_cmd_base_qpn cmd_base_qpn;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	cmd_base_qpn.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_GLOBAL_QPN,
+				 &cmd_base_qpn, sizeof(cmd_base_qpn),
+				 &cmd_base_qpn, &out_size);
+	if (err || (out_size != sizeof(cmd_base_qpn)) || cmd_base_qpn.status) {
+		dev_err(&pdev->dev, "Failed to get base qpn, status = %d\n",
+			cmd_base_qpn.status);
+		return -EFAULT;
+	}
+
+	*base_qpn = cmd_base_qpn.qpn;
+	return 0;
+}
+
+/**
+ * hinic_hwdev_ifup - Preparing the HW for passing IO
+ * @hwdev: the NIC HW device
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_hwdev_ifup(struct hinic_hwdev *hwdev)
+{
+	struct hinic_func_to_io *func_to_io = &hwdev->func_to_io;
+	struct hinic_cap *nic_cap = &hwdev->nic_cap;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	int err, num_aeqs, num_ceqs, num_qps;
+	struct msix_entry *ceq_msix_entries;
+	struct msix_entry *sq_msix_entries;
+	struct msix_entry *rq_msix_entries;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 base_qpn;
+
+	err = get_base_qpn(hwdev, &base_qpn);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to get global base qp number\n");
+		return err;
+	}
+
+	num_aeqs = HINIC_HWIF_NUM_AEQS(hwif);
+	num_ceqs = HINIC_HWIF_NUM_CEQS(hwif);
+
+	ceq_msix_entries = &hwdev->msix_entries[num_aeqs];
+
+	err = hinic_io_init(func_to_io, hwif, nic_cap->max_qps, num_ceqs,
+			    ceq_msix_entries);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init IO channel\n");
+		return err;
+	}
+
+	num_qps = nic_cap->num_qps;
+	sq_msix_entries = &hwdev->msix_entries[num_aeqs + num_ceqs];
+	rq_msix_entries = &hwdev->msix_entries[num_aeqs + num_ceqs + num_qps];
+
+	err = hinic_io_create_qps(func_to_io, base_qpn, num_qps,
+				  sq_msix_entries, rq_msix_entries);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to create QPs\n");
+		goto err_create_qps;
+	}
+
+	err = wait_for_db_state(hwdev);
+	if (err) {
+		dev_warn(&pdev->dev, "db - disabled, try again\n");
+		hinic_db_state_set(hwif, HINIC_DB_ENABLE);
+	}
+
+	err = set_hw_ioctxt(hwdev, HINIC_SQ_DEPTH, HINIC_RQ_DEPTH);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to set HW IO ctxt\n");
+		goto err_hw_ioctxt;
+	}
+
+	return 0;
+
+err_hw_ioctxt:
+	hinic_io_destroy_qps(func_to_io, num_qps);
+
+err_create_qps:
+	hinic_io_free(func_to_io);
+	return err;
+}
+
+/**
+ * hinic_hwdev_ifdown - Closing the HW for passing IO
+ * @hwdev: the NIC HW device
+ *
+ **/
+void hinic_hwdev_ifdown(struct hinic_hwdev *hwdev)
+{
+	struct hinic_func_to_io *func_to_io = &hwdev->func_to_io;
+	struct hinic_cap *nic_cap = &hwdev->nic_cap;
+
+	clear_io_resources(hwdev);
+
+	hinic_io_destroy_qps(func_to_io, nic_cap->num_qps);
+	hinic_io_free(func_to_io);
+}
+
+/**
+ * hinic_hwdev_cb_register - register callback handler for MGMT events
+ * @hwdev: the NIC HW device
+ * @cmd: the mgmt event
+ * @handle: private data for the handler
+ * @handler: event handler
+ **/
+void hinic_hwdev_cb_register(struct hinic_hwdev *hwdev,
+			     enum hinic_mgmt_msg_cmd cmd, void *handle,
+			     void (*handler)(void *handle, void *buf_in,
+					     u16 in_size, void *buf_out,
+					     u16 *out_size))
+{
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_pfhwdev *pfhwdev;
+	struct hinic_nic_cb *nic_cb;
+	u8 cmd_cb;
+
+	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+		dev_err(&pdev->dev, "unsupported PCI Function type\n");
+		return;
+	}
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+	cmd_cb = cmd - HINIC_MGMT_MSG_CMD_BASE;
+	nic_cb = &pfhwdev->nic_cb[cmd_cb];
+
+	nic_cb->handler = handler;
+	nic_cb->handle = handle;
+	nic_cb->cb_state = HINIC_CB_ENABLED;
+}
+
+/**
+ * hinic_hwdev_cb_unregister - unregister callback handler for MGMT events
+ * @hwdev: the NIC HW device
+ * @cmd: the mgmt event
+ **/
+void hinic_hwdev_cb_unregister(struct hinic_hwdev *hwdev,
+			       enum hinic_mgmt_msg_cmd cmd)
+{
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_pfhwdev *pfhwdev;
+	struct hinic_nic_cb *nic_cb;
+	u8 cmd_cb;
+
+	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+		dev_err(&pdev->dev, "unsupported PCI Function type\n");
+		return;
+	}
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+	cmd_cb = cmd - HINIC_MGMT_MSG_CMD_BASE;
+	nic_cb = &pfhwdev->nic_cb[cmd_cb];
+
+	nic_cb->cb_state &= ~HINIC_CB_ENABLED;
+
+	while (nic_cb->cb_state & HINIC_CB_RUNNING)
+		schedule();
+
+	nic_cb->handler = NULL;
+}
+
+/**
+ * nic_mgmt_msg_handler - nic mgmt event handler
+ * @handle: private data for the handler
+ * @buf_in: input buffer
+ * @in_size: input size
+ * @buf_out: output buffer
+ * @out_size: returned output size
+ **/
+static void nic_mgmt_msg_handler(void *handle, u8 cmd, void *buf_in,
+				 u16 in_size, void *buf_out, u16 *out_size)
+{
+	struct hinic_pfhwdev *pfhwdev = handle;
+	enum hinic_cb_state cb_state;
+	struct hinic_nic_cb *nic_cb;
+	struct hinic_hwdev *hwdev;
+	struct hinic_hwif *hwif;
+	struct pci_dev *pdev;
+	u8 cmd_cb;
+
+	hwdev = &pfhwdev->hwdev;
+	hwif = hwdev->hwif;
+	pdev = hwif->pdev;
+
+	if ((cmd < HINIC_MGMT_MSG_CMD_BASE) ||
+	    (cmd >= HINIC_MGMT_MSG_CMD_MAX)) {
+		dev_err(&pdev->dev, "unknown L2NIC event, cmd = %d\n", cmd);
+		return;
+	}
+
+	cmd_cb = cmd - HINIC_MGMT_MSG_CMD_BASE;
+
+	nic_cb = &pfhwdev->nic_cb[cmd_cb];
+
+	cb_state = cmpxchg(&nic_cb->cb_state,
+			   HINIC_CB_ENABLED,
+			   HINIC_CB_ENABLED | HINIC_CB_RUNNING);
+
+	if ((cb_state == HINIC_CB_ENABLED) && (nic_cb->handler))
+		nic_cb->handler(nic_cb->handle, buf_in,
+				in_size, buf_out, out_size);
+	else
+		dev_err(&pdev->dev, "Unhandled NIC Event %d\n", cmd);
+
+	nic_cb->cb_state &= ~HINIC_CB_RUNNING;
+}
+
+/**
+ * init_pfhwdev - Initialize the extended components of PF
+ * @pfhwdev: the HW device for PF
+ *
+ * Return 0 - success, negative - failure
+ **/
+static int init_pfhwdev(struct hinic_pfhwdev *pfhwdev)
+{
+	struct hinic_hwdev *hwdev = &pfhwdev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	int err;
+
+	err = hinic_pf_to_mgmt_init(&pfhwdev->pf_to_mgmt, hwif);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to initialize PF to MGMT channel\n");
+		return err;
+	}
+
+	hinic_register_mgmt_msg_cb(&pfhwdev->pf_to_mgmt, HINIC_MOD_L2NIC,
+				   pfhwdev, nic_mgmt_msg_handler);
+
+	hinic_set_pf_action(hwif, HINIC_PF_MGMT_ACTIVE);
+	return 0;
+}
+
+/**
+ * free_pfhwdev - Free the extended components of PF
+ * @pfhwdev: the HW device for PF
+ **/
+static void free_pfhwdev(struct hinic_pfhwdev *pfhwdev)
+{
+	struct hinic_hwdev *hwdev = &pfhwdev->hwdev;
+
+	hinic_set_pf_action(hwdev->hwif, HINIC_PF_MGMT_INIT);
+
+	hinic_unregister_mgmt_msg_cb(&pfhwdev->pf_to_mgmt, HINIC_MOD_L2NIC);
+
+	hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt);
+}
+
+/**
+ * hinic_init_hwdev - Initialize the NIC HW
+ * @pdev: the NIC pci device
+ *
+ * Return initialized NIC HW device
+ *
+ * Initialize the NIC HW device and return a pointer to it
+ **/
+struct hinic_hwdev *hinic_init_hwdev(struct pci_dev *pdev)
+{
+	struct hinic_pfhwdev *pfhwdev;
+	struct hinic_hwdev *hwdev;
+	struct hinic_hwif *hwif;
+	int err, num_aeqs;
+
+	hwif = devm_kzalloc(&pdev->dev, sizeof(*hwif), GFP_KERNEL);
+	if (!hwif)
+		return ERR_PTR(-ENOMEM);
+
+	err = hinic_init_hwif(hwif, pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init HW interface\n");
+		return ERR_PTR(err);
+	}
+
+	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+		dev_err(&pdev->dev, "Unsupported PCI Function type\n");
+		err = -EFAULT;
+		goto err_func_type;
+	}
+
+	pfhwdev = devm_kzalloc(&pdev->dev, sizeof(*pfhwdev), GFP_KERNEL);
+	if (!pfhwdev) {
+		err = -ENOMEM;
+		goto err_pfhwdev_alloc;
+	}
+
+	hwdev = &pfhwdev->hwdev;
+	hwdev->hwif = hwif;
+
+	err = init_msix(hwdev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init msix\n");
+		goto err_init_msix;
+	}
+
+	err = wait_for_outbound_state(hwdev);
+	if (err) {
+		dev_warn(&pdev->dev, "outbound - disabled, try again\n");
+		hinic_outbound_state_set(hwif, HINIC_OUTBOUND_ENABLE);
+	}
+
+	num_aeqs = HINIC_HWIF_NUM_AEQS(hwif);
+
+	err = hinic_aeqs_init(&hwdev->aeqs, hwif, num_aeqs,
+			      HINIC_DEFAULT_AEQ_LEN, HINIC_EQ_PAGE_SIZE,
+			      hwdev->msix_entries);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init async event queues\n");
+		goto err_aeqs_init;
+	}
+
+	err = init_pfhwdev(pfhwdev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init PF HW device\n");
+		goto err_init_pfhwdev;
+	}
+
+	err = get_dev_cap(hwdev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to get device capabilities\n");
+		goto err_dev_cap;
+	}
+
+	err = init_fw_ctxt(hwdev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init function table\n");
+		goto err_init_fw_ctxt;
+	}
+
+	err = set_resources_state(hwdev, HINIC_RES_ACTIVE);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to set resources state\n");
+		goto err_resources_state;
+	}
+
+	return hwdev;
+
+err_resources_state:
+err_init_fw_ctxt:
+err_dev_cap:
+	free_pfhwdev(pfhwdev);
+
+err_init_pfhwdev:
+	hinic_aeqs_free(&hwdev->aeqs);
+
+err_aeqs_init:
+	disable_msix(hwdev);
+
+err_init_msix:
+err_pfhwdev_alloc:
+err_func_type:
+	hinic_free_hwif(hwif);
+	return ERR_PTR(err);
+}
+
+/**
+ * hinic_free_hwdev - Free the NIC HW device
+ * @hwdev: the NIC HW device
+ **/
+void hinic_free_hwdev(struct hinic_hwdev *hwdev)
+{
+	struct hinic_pfhwdev *pfhwdev = container_of(hwdev,
+						     struct hinic_pfhwdev,
+						     hwdev);
+
+	set_resources_state(hwdev, HINIC_RES_CLEAN);
+
+	free_pfhwdev(pfhwdev);
+
+	hinic_aeqs_free(&hwdev->aeqs);
+
+	disable_msix(hwdev);
+
+	hinic_free_hwif(hwdev->hwif);
+}
+
+/**
+ * hinic_hwdev_num_qps - return the number QPs available for use
+ * @hwdev: the NIC HW device
+ *
+ * Return number QPs available for use
+ **/
+int hinic_hwdev_num_qps(struct hinic_hwdev *hwdev)
+{
+	struct hinic_cap *nic_cap = &hwdev->nic_cap;
+
+	return nic_cap->num_qps;
+}
+
+/**
+ * hinic_hwdev_get_sq - get SQ
+ * @hwdev: the NIC HW device
+ * @i: the position of the SQ
+ *
+ * Return: the SQ in the i position
+ **/
+struct hinic_sq *hinic_hwdev_get_sq(struct hinic_hwdev *hwdev, int i)
+{
+	struct hinic_func_to_io *func_to_io = &hwdev->func_to_io;
+	struct hinic_qp *qp = &func_to_io->qps[i];
+
+	if (i >= hinic_hwdev_num_qps(hwdev))
+		return NULL;
+
+	return &qp->sq;
+}
+
+/**
+ * hinic_hwdev_get_sq - get RQ
+ * @hwdev: the NIC HW device
+ * @i: the position of the RQ
+ *
+ * Return: the RQ in the i position
+ **/
+struct hinic_rq *hinic_hwdev_get_rq(struct hinic_hwdev *hwdev, int i)
+{
+	struct hinic_func_to_io *func_to_io = &hwdev->func_to_io;
+	struct hinic_qp *qp = &func_to_io->qps[i];
+
+	if (i >= hinic_hwdev_num_qps(hwdev))
+		return NULL;
+
+	return &qp->rq;
+}
+
+/**
+ * hinic_hwdev_msix_cnt_set - clear message attribute counters for msix entry
+ * @hwdev: the NIC HW device
+ * @msix_index: msix_index
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_hwdev_msix_cnt_set(struct hinic_hwdev *hwdev, u16 msix_index)
+{
+	return hinic_msix_attr_cnt_clear(hwdev->hwif, msix_index);
+}
+
+/**
+ * hinic_hwdev_msix_set - set message attribute for msix entry
+ * @hwdev: the NIC HW device
+ * @msix_index: msix_index
+ * @pending_limit: the maximum pending interrupt events (unit 8)
+ * @coalesc_timer: coalesc period for interrupt (unit 8 us)
+ * @lli_timer: replenishing period for low latency credit (unit 8 us)
+ * @lli_credit_limit: maximum credits for low latency msix messages (unit 8)
+ * @resend_timer: maximum wait for resending msix (unit coalesc period)
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_hwdev_msix_set(struct hinic_hwdev *hwdev, u16 msix_index,
+			 u8 pending_limit, u8 coalesc_timer,
+			 u8 lli_timer_cfg, u8 lli_credit_limit,
+			 u8 resend_timer)
+{
+	return hinic_msix_attr_set(hwdev->hwif, msix_index,
+				   pending_limit, coalesc_timer,
+				   lli_timer_cfg, lli_credit_limit,
+				   resend_timer);
+}
+
+/**
+ * hinic_hwdev_hw_ci_addr_set - set cons idx addr and attributes in HW for sq
+ * @hwdev: the NIC HW device
+ * @sq: send queue
+ * @pending_limit: the maximum pending update ci events (unit 8)
+ * @coalesc_timer: coalesc period for update ci (unit 8 us)
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_hwdev_hw_ci_addr_set(struct hinic_hwdev *hwdev, struct hinic_sq *sq,
+			       u8 pending_limit, u8 coalesc_timer)
+{
+	struct hinic_qp *qp = container_of(sq, struct hinic_qp, sq);
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_pfhwdev *pfhwdev;
+	struct hinic_cmd_hw_ci hw_ci;
+
+	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+		dev_err(&pdev->dev, "Unsupported PCI Function type\n");
+		return -EINVAL;
+	}
+
+	hw_ci.dma_attr_off  = 0;
+	hw_ci.pending_limit = pending_limit;
+	hw_ci.coalesc_timer = coalesc_timer;
+
+	hw_ci.msix_en = 1;
+	hw_ci.msix_entry_idx = sq->msix_entry;
+
+	hw_ci.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
+
+	hw_ci.sq_id = qp->q_id;
+
+	hw_ci.ci_addr = ADDR_IN_4BYTES(sq->hw_ci_dma_addr);
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+	return hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt,
+				 HINIC_MOD_COMM,
+				 HINIC_COMM_CMD_SQ_HI_CI_SET,
+				 &hw_ci, sizeof(hw_ci), NULL,
+				 NULL, HINIC_MGMT_MSG_SYNC);
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
new file mode 100644
index 0000000..0f5563f
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -0,0 +1,239 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_HW_DEV_H
+#define HINIC_HW_DEV_H
+
+#include <linux/pci.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hinic_hw_if.h"
+#include "hinic_hw_eqs.h"
+#include "hinic_hw_mgmt.h"
+#include "hinic_hw_qp.h"
+#include "hinic_hw_io.h"
+
+#define HINIC_MAX_QPS   32
+
+#define HINIC_MGMT_NUM_MSG_CMD  (HINIC_MGMT_MSG_CMD_MAX - \
+				 HINIC_MGMT_MSG_CMD_BASE)
+
+struct hinic_cap {
+	u16     max_qps;
+	u16     num_qps;
+};
+
+enum hinic_port_cmd {
+	HINIC_PORT_CMD_CHANGE_MTU       = 2,
+
+	HINIC_PORT_CMD_ADD_VLAN         = 3,
+	HINIC_PORT_CMD_DEL_VLAN         = 4,
+
+	HINIC_PORT_CMD_SET_MAC          = 9,
+	HINIC_PORT_CMD_GET_MAC          = 10,
+	HINIC_PORT_CMD_DEL_MAC          = 11,
+
+	HINIC_PORT_CMD_SET_RX_MODE      = 12,
+
+	HINIC_PORT_CMD_GET_LINK_STATE   = 24,
+
+	HINIC_PORT_CMD_SET_PORT_STATE   = 41,
+
+	HINIC_PORT_CMD_FWCTXT_INIT      = 69,
+
+	HINIC_PORT_CMD_SET_FUNC_STATE   = 93,
+
+	HINIC_PORT_CMD_GET_GLOBAL_QPN   = 102,
+
+	HINIC_PORT_CMD_GET_CAP          = 170,
+};
+
+enum hinic_mgmt_msg_cmd {
+	HINIC_MGMT_MSG_CMD_BASE         = 160,
+
+	HINIC_MGMT_MSG_CMD_LINK_STATUS  = 160,
+
+	HINIC_MGMT_MSG_CMD_MAX,
+};
+
+enum hinic_cb_state {
+	HINIC_CB_ENABLED = BIT(0),
+	HINIC_CB_RUNNING = BIT(1),
+};
+
+enum hinic_res_state {
+	HINIC_RES_CLEAN         = 0,
+	HINIC_RES_ACTIVE        = 1,
+};
+
+struct hinic_cmd_fw_ctxt {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_idx;
+	u16     rx_buf_sz;
+
+	u32     rsvd1;
+};
+
+struct hinic_cmd_hw_ioctxt {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_idx;
+
+	u16     rsvd1;
+
+	u8      set_cmdq_depth;
+	u8      cmdq_depth;
+
+	u8      rsvd2;
+	u8      rsvd3;
+	u8      rsvd4;
+	u8      rsvd5;
+
+	u16     rq_depth;
+	u16     rx_buf_sz_idx;
+	u16     sq_depth;
+};
+
+struct hinic_cmd_io_status {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_idx;
+	u8      rsvd1;
+	u8      rsvd2;
+	u32     io_status;
+};
+
+struct hinic_cmd_clear_io_res {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_idx;
+	u8      rsvd1;
+	u8      rsvd2;
+};
+
+struct hinic_cmd_set_res_state {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_idx;
+	u8      state;
+	u8      rsvd1;
+	u32     rsvd2;
+};
+
+struct hinic_cmd_base_qpn {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_idx;
+	u16     qpn;
+};
+
+struct hinic_cmd_hw_ci {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_idx;
+
+	u8      dma_attr_off;
+	u8      pending_limit;
+	u8      coalesc_timer;
+
+	u8      msix_en;
+	u16     msix_entry_idx;
+
+	u32     sq_id;
+	u32     rsvd1;
+	u64     ci_addr;
+};
+
+struct hinic_hwdev {
+	struct hinic_hwif               *hwif;
+	struct msix_entry               *msix_entries;
+
+	struct hinic_aeqs               aeqs;
+	struct hinic_func_to_io         func_to_io;
+
+	struct hinic_cap                nic_cap;
+};
+
+struct hinic_nic_cb {
+	void    (*handler)(void *handle, void *buf_in,
+			   u16 in_size, void *buf_out,
+			   u16 *out_size);
+
+	void            *handle;
+	unsigned long   cb_state;
+};
+
+struct hinic_pfhwdev {
+	struct hinic_hwdev              hwdev;
+
+	struct hinic_pf_to_mgmt         pf_to_mgmt;
+
+	struct hinic_nic_cb             nic_cb[HINIC_MGMT_NUM_MSG_CMD];
+};
+
+void hinic_hwdev_cb_register(struct hinic_hwdev *hwdev,
+			     enum hinic_mgmt_msg_cmd cmd, void *handle,
+			     void (*handler)(void *handle, void *buf_in,
+					     u16 in_size, void *buf_out,
+					     u16 *out_size));
+
+void hinic_hwdev_cb_unregister(struct hinic_hwdev *hwdev,
+			       enum hinic_mgmt_msg_cmd cmd);
+
+int hinic_port_msg_cmd(struct hinic_hwdev *hwdev, enum hinic_port_cmd cmd,
+		       void *buf_in, u16 in_size, void *buf_out,
+		       u16 *out_size);
+
+int hinic_hwdev_ifup(struct hinic_hwdev *hwdev);
+
+void hinic_hwdev_ifdown(struct hinic_hwdev *hwdev);
+
+struct hinic_hwdev *hinic_init_hwdev(struct pci_dev *pdev);
+
+void hinic_free_hwdev(struct hinic_hwdev *hwdev);
+
+int hinic_hwdev_num_qps(struct hinic_hwdev *hwdev);
+
+struct hinic_sq *hinic_hwdev_get_sq(struct hinic_hwdev *hwdev, int i);
+
+struct hinic_rq *hinic_hwdev_get_rq(struct hinic_hwdev *hwdev, int i);
+
+int hinic_hwdev_msix_cnt_set(struct hinic_hwdev *hwdev, u16 msix_index);
+
+int hinic_hwdev_msix_set(struct hinic_hwdev *hwdev, u16 msix_index,
+			 u8 pending_limit, u8 coalesc_timer,
+			 u8 lli_timer_cfg, u8 lli_credit_limit,
+			 u8 resend_timer);
+
+int hinic_hwdev_hw_ci_addr_set(struct hinic_hwdev *hwdev, struct hinic_sq *sq,
+			       u8 pending_limit, u8 coalesc_timer);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
new file mode 100644
index 0000000..7cb8b9b9
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
@@ -0,0 +1,886 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/log2.h>
+#include <asm/byteorder.h>
+#include <asm/barrier.h>
+
+#include "hinic_hw_csr.h"
+#include "hinic_hw_if.h"
+#include "hinic_hw_eqs.h"
+
+#define HINIC_EQS_WQ_NAME                       "hinic_eqs"
+
+#define GET_EQ_NUM_PAGES(eq, pg_size)           \
+		(ALIGN((eq)->q_len * (eq)->elem_size, pg_size) / (pg_size))
+
+#define GET_EQ_NUM_ELEMS_IN_PG(eq, pg_size)     ((pg_size) / (eq)->elem_size)
+
+#define EQ_CONS_IDX_REG_ADDR(eq)        (((eq)->type == HINIC_AEQ) ? \
+			HINIC_CSR_AEQ_CONS_IDX_ADDR((eq)->q_id) : \
+			HINIC_CSR_CEQ_CONS_IDX_ADDR((eq)->q_id))
+
+#define EQ_PROD_IDX_REG_ADDR(eq)        (((eq)->type == HINIC_AEQ) ? \
+			HINIC_CSR_AEQ_PROD_IDX_ADDR((eq)->q_id) : \
+			HINIC_CSR_CEQ_PROD_IDX_ADDR((eq)->q_id))
+
+#define EQ_HI_PHYS_ADDR_REG(eq, pg_num) (((eq)->type == HINIC_AEQ) ? \
+			HINIC_CSR_AEQ_HI_PHYS_ADDR_REG((eq)->q_id, pg_num) : \
+			HINIC_CSR_CEQ_HI_PHYS_ADDR_REG((eq)->q_id, pg_num))
+
+#define EQ_LO_PHYS_ADDR_REG(eq, pg_num) (((eq)->type == HINIC_AEQ) ? \
+			HINIC_CSR_AEQ_LO_PHYS_ADDR_REG((eq)->q_id, pg_num) : \
+			HINIC_CSR_CEQ_LO_PHYS_ADDR_REG((eq)->q_id, pg_num))
+
+#define GET_EQ_ELEMENT(eq, idx)         \
+		((eq)->virt_addr[(idx) / (eq)->num_elem_in_pg] + \
+		 (((idx) & ((eq)->num_elem_in_pg - 1)) * (eq)->elem_size))
+
+#define GET_AEQ_ELEM(eq, idx)           ((struct hinic_aeq_elem *) \
+					GET_EQ_ELEMENT(eq, idx))
+
+#define GET_CEQ_ELEM(eq, idx)           ((u32 *) \
+					 GET_EQ_ELEMENT(eq, idx))
+
+#define GET_CURR_AEQ_ELEM(eq)           GET_AEQ_ELEM(eq, (eq)->cons_idx)
+
+#define GET_CURR_CEQ_ELEM(eq)           GET_CEQ_ELEM(eq, (eq)->cons_idx)
+
+#define PAGE_IN_4K(page_size)           ((page_size) >> 12)
+#define EQ_SET_HW_PAGE_SIZE_VAL(eq)     (ilog2(PAGE_IN_4K((eq)->page_size)))
+
+#define ELEMENT_SIZE_IN_32B(eq)         (((eq)->elem_size) >> 5)
+#define EQ_SET_HW_ELEM_SIZE_VAL(eq)     (ilog2(ELEMENT_SIZE_IN_32B(eq)))
+
+#define EQ_MAX_PAGES                    8
+
+#define CEQE_TYPE_SHIFT                 23
+#define CEQE_TYPE_MASK                  0x7
+
+#define CEQE_TYPE(ceqe)                 (((ceqe) >> CEQE_TYPE_SHIFT) &  \
+					 CEQE_TYPE_MASK)
+
+#define CEQE_DATA_MASK                  0x3FFFFFF
+#define CEQE_DATA(ceqe)                 ((ceqe) & CEQE_DATA_MASK)
+
+#define aeq_to_aeqs(eq)                 \
+		container_of((eq) - (eq)->q_id, struct hinic_aeqs, aeq[0])
+
+#define ceq_to_ceqs(eq)                 \
+		container_of((eq) - (eq)->q_id, struct hinic_ceqs, ceq[0])
+
+#define work_to_aeq_work(work)          \
+		container_of(work, struct hinic_eq_work, work)
+
+#define DMA_ATTR_AEQ_DEFAULT            0
+#define DMA_ATTR_CEQ_DEFAULT            0
+
+/* No coalescence */
+#define THRESH_CEQ_DEFAULT              0
+
+enum eq_int_mode {
+	EQ_INT_MODE_ARMED,
+	EQ_INT_MODE_ALWAYS
+};
+
+enum eq_arm_state {
+	EQ_NOT_ARMED,
+	EQ_ARMED
+};
+
+/**
+ * hinic_aeq_register_hw_cb - register AEQ callback for specific event
+ * @aeqs: pointer to Async eqs of the chip
+ * @event: aeq event to register callback for it
+ * @handle: private data will be used by the callback
+ * @hw_handler: callback function
+ **/
+void hinic_aeq_register_hw_cb(struct hinic_aeqs *aeqs,
+			      enum hinic_aeq_type event, void *handle,
+			      void (*hwe_handler)(void *handle, void *data,
+						  u8 size))
+{
+	struct hinic_hw_event_cb *hwe_cb = &aeqs->hwe_cb[event];
+
+	hwe_cb->hwe_handler = hwe_handler;
+	hwe_cb->handle = handle;
+	hwe_cb->hwe_state = HINIC_EQE_ENABLED;
+}
+
+/**
+ * hinic_aeq_unregister_hw_cb - unregister the AEQ callback for specific event
+ * @aeqs: pointer to Async eqs of the chip
+ * @event: aeq event to unregister callback for it
+ **/
+void hinic_aeq_unregister_hw_cb(struct hinic_aeqs *aeqs,
+				enum hinic_aeq_type event)
+{
+	struct hinic_hw_event_cb *hwe_cb = &aeqs->hwe_cb[event];
+
+	hwe_cb->hwe_state &= ~HINIC_EQE_ENABLED;
+
+	while (hwe_cb->hwe_state & HINIC_EQE_RUNNING)
+		schedule();
+
+	hwe_cb->hwe_handler = NULL;
+}
+
+/**
+ * hinic_ceq_register_cb - register CEQ callback for specific event
+ * @ceqs: pointer to Completion eqs part of the chip
+ * @event: ceq event to register callback for it
+ * @handle: private data will be used by the callback
+ * @handler: callback function
+ **/
+void hinic_ceq_register_cb(struct hinic_ceqs *ceqs,
+			   enum hinic_ceq_type event, void *handle,
+			   void (*handler)(void *handle, u32 ceqe_data))
+{
+	struct hinic_ceq_cb *ceq_cb = &ceqs->ceq_cb[event];
+
+	ceq_cb->handler = handler;
+	ceq_cb->handle = handle;
+	ceq_cb->ceqe_state = HINIC_EQE_ENABLED;
+}
+
+/**
+ * hinic_ceq_unregister_cb - unregister the CEQ callback for specific event
+ * @ceqs: pointer to Completion eqs part of the chip
+ * @event: ceq event to unregister callback for it
+ **/
+void hinic_ceq_unregister_cb(struct hinic_ceqs *ceqs,
+			     enum hinic_ceq_type event)
+{
+	struct hinic_ceq_cb *ceq_cb = &ceqs->ceq_cb[event];
+
+	ceq_cb->ceqe_state &= ~HINIC_EQE_ENABLED;
+
+	while (ceq_cb->ceqe_state & HINIC_EQE_RUNNING)
+		schedule();
+
+	ceq_cb->handler = NULL;
+}
+
+static u8 eq_cons_idx_checksum_set(u32 val)
+{
+	u8 checksum = 0;
+	int idx;
+
+	for (idx = 0; idx < 32; idx += 4)
+		checksum ^= ((val >> idx) & 0xF);
+
+	return (checksum & 0xF);
+}
+
+/**
+ * eq_update_ci - update the HW cons idx of event queue
+ * @eq: the event queue to update the cons idx for
+ **/
+static void eq_update_ci(struct hinic_eq *eq)
+{
+	u32 val, addr = EQ_CONS_IDX_REG_ADDR(eq);
+
+	/* Read Modify Write */
+	val = hinic_hwif_read_reg(eq->hwif, addr);
+
+	val = HINIC_EQ_CI_CLEAR(val, IDX)       &
+	      HINIC_EQ_CI_CLEAR(val, WRAPPED)   &
+	      HINIC_EQ_CI_CLEAR(val, INT_ARMED) &
+	      HINIC_EQ_CI_CLEAR(val, XOR_CHKSUM);
+
+	val |= HINIC_EQ_CI_SET(eq->cons_idx, IDX)    |
+	       HINIC_EQ_CI_SET(eq->wrapped, WRAPPED) |
+	       HINIC_EQ_CI_SET(EQ_ARMED, INT_ARMED);
+
+	val |= HINIC_EQ_CI_SET(eq_cons_idx_checksum_set(val), XOR_CHKSUM);
+
+	hinic_hwif_write_reg(eq->hwif, addr, val);
+}
+
+/**
+ * aeq_irq_handler - handler for the AEQ event
+ * @eq: the Async Event Queue that received the event
+ **/
+static void aeq_irq_handler(struct hinic_eq *eq)
+{
+	struct hinic_aeqs *aeqs = aeq_to_aeqs(eq);
+	struct hinic_hwif *hwif = aeqs->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_aeq_elem *aeqe_curr;
+	struct hinic_hw_event_cb *hwe_cb;
+	enum hinic_aeq_type event;
+	unsigned long eqe_state;
+	u32 aeqe_desc;
+	int i, size;
+
+	for (i = 0; i < eq->q_len; i++) {
+		aeqe_curr = GET_CURR_AEQ_ELEM(eq);
+
+		/* Data in HW is in Big endian Format */
+		aeqe_desc = be32_to_cpu(aeqe_curr->desc);
+
+		/* HW toggles the wrapped bit, when it adds eq element */
+		if (HINIC_EQ_ELEM_DESC_GET(aeqe_desc, WRAPPED) == eq->wrapped)
+			break;
+
+		event = HINIC_EQ_ELEM_DESC_GET(aeqe_desc, TYPE);
+		if (event >= HINIC_MAX_AEQ_EVENTS) {
+			dev_err(&pdev->dev, "Unknown AEQ Event %d\n", event);
+			return;
+		}
+
+		if (!HINIC_EQ_ELEM_DESC_GET(aeqe_desc, SRC)) {
+			hwe_cb = &aeqs->hwe_cb[event];
+
+			size = HINIC_EQ_ELEM_DESC_GET(aeqe_desc, SIZE);
+
+			eqe_state = cmpxchg(&hwe_cb->hwe_state,
+					    HINIC_EQE_ENABLED,
+					    HINIC_EQE_ENABLED |
+					    HINIC_EQE_RUNNING);
+			if ((eqe_state == HINIC_EQE_ENABLED) &&
+			    (hwe_cb->hwe_handler))
+				hwe_cb->hwe_handler(hwe_cb->handle,
+						    aeqe_curr->data, size);
+			else
+				dev_err(&pdev->dev, "Unhandled AEQ Event %d\n",
+					event);
+
+			hwe_cb->hwe_state &= ~HINIC_EQE_RUNNING;
+		}
+
+		eq->cons_idx++;
+
+		if (eq->cons_idx == eq->q_len) {
+			eq->cons_idx = 0;
+			eq->wrapped = !eq->wrapped;
+		}
+	}
+}
+
+/**
+ * ceq_event_handler - handler for the ceq events
+ * @ceqs: ceqs part of the chip
+ * @ceqe: ceq element that describes the event
+ **/
+static void ceq_event_handler(struct hinic_ceqs *ceqs, u32 ceqe)
+{
+	struct hinic_hwif *hwif = ceqs->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_ceq_cb *ceq_cb;
+	enum hinic_ceq_type event;
+	unsigned long eqe_state;
+
+	event = CEQE_TYPE(ceqe);
+	if (event >= HINIC_MAX_CEQ_EVENTS) {
+		dev_err(&pdev->dev, "Unknown CEQ event, event = %d\n", event);
+		return;
+	}
+
+	ceq_cb = &ceqs->ceq_cb[event];
+
+	eqe_state = cmpxchg(&ceq_cb->ceqe_state,
+			    HINIC_EQE_ENABLED,
+			    HINIC_EQE_ENABLED | HINIC_EQE_RUNNING);
+
+	if ((eqe_state == HINIC_EQE_ENABLED) && (ceq_cb->handler))
+		ceq_cb->handler(ceq_cb->handle, CEQE_DATA(ceqe));
+	else
+		dev_err(&pdev->dev, "Unhandled CEQ Event %d\n", event);
+
+	ceq_cb->ceqe_state &= ~HINIC_EQE_RUNNING;
+}
+
+/**
+ * ceq_irq_handler - handler for the CEQ event
+ * @eq: the Completion Event Queue that received the event
+ **/
+static void ceq_irq_handler(struct hinic_eq *eq)
+{
+	struct hinic_ceqs *ceqs = ceq_to_ceqs(eq);
+	u32 ceqe;
+	int i;
+
+	for (i = 0; i < eq->q_len; i++) {
+		ceqe = *(GET_CURR_CEQ_ELEM(eq));
+
+		/* Data in HW is in Big endian Format */
+		ceqe = be32_to_cpu(ceqe);
+
+		/* HW toggles the wrapped bit, when it adds eq element event */
+		if (HINIC_EQ_ELEM_DESC_GET(ceqe, WRAPPED) == eq->wrapped)
+			break;
+
+		ceq_event_handler(ceqs, ceqe);
+
+		eq->cons_idx++;
+
+		if (eq->cons_idx == eq->q_len) {
+			eq->cons_idx = 0;
+			eq->wrapped = !eq->wrapped;
+		}
+	}
+}
+
+/**
+ * eq_irq_handler - handler for the EQ event
+ * @data: the Event Queue that received the event
+ **/
+static void eq_irq_handler(void *data)
+{
+	struct hinic_eq *eq = data;
+
+	if (eq->type == HINIC_AEQ)
+		aeq_irq_handler(eq);
+	else if (eq->type == HINIC_CEQ)
+		ceq_irq_handler(eq);
+
+	eq_update_ci(eq);
+}
+
+/**
+ * eq_irq_work - the work of the EQ that received the event
+ * @work: the work struct that is associated with the EQ
+ **/
+static void eq_irq_work(struct work_struct *work)
+{
+	struct hinic_eq_work *aeq_work = work_to_aeq_work(work);
+	struct hinic_eq *aeq;
+
+	aeq = aeq_work->data;
+	eq_irq_handler(aeq);
+}
+
+/**
+ * ceq_tasklet - the tasklet of the EQ that received the event
+ * @ceq_data: the eq
+ **/
+static void ceq_tasklet(unsigned long ceq_data)
+{
+	struct hinic_eq *ceq = (struct hinic_eq *)ceq_data;
+
+	eq_irq_handler(ceq);
+}
+
+/**
+ * aeq_interrupt - aeq interrupt handler
+ * @irq: irq number
+ * @data: the Async Event Queue that collected the event
+ **/
+static irqreturn_t aeq_interrupt(int irq, void *data)
+{
+	struct hinic_eq_work *aeq_work;
+	struct hinic_eq *aeq = data;
+	struct hinic_aeqs *aeqs;
+
+	/* clear resend timer cnt register */
+	hinic_msix_attr_cnt_clear(aeq->hwif, aeq->msix_entry.entry);
+
+	aeq_work = &aeq->aeq_work;
+	aeq_work->data = aeq;
+
+	aeqs = aeq_to_aeqs(aeq);
+	queue_work(aeqs->workq, &aeq_work->work);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ceq_interrupt - ceq interrupt handler
+ * @irq: irq number
+ * @data: the Completion Event Queue that collected the event
+ **/
+static irqreturn_t ceq_interrupt(int irq, void *data)
+{
+	struct hinic_eq *ceq = data;
+
+	/* clear resend timer cnt register */
+	hinic_msix_attr_cnt_clear(ceq->hwif, ceq->msix_entry.entry);
+
+	tasklet_schedule(&ceq->ceq_tasklet);
+
+	return IRQ_HANDLED;
+}
+
+static void set_ctrl0(struct hinic_eq *eq)
+{
+	struct msix_entry *msix_entry = &eq->msix_entry;
+	enum hinic_eq_type type = eq->type;
+	u32 addr, val, ctrl0;
+
+	if (type == HINIC_AEQ) {
+		/* RMW Ctrl0 */
+		addr = HINIC_CSR_AEQ_CTRL_0_ADDR(eq->q_id);
+
+		val = hinic_hwif_read_reg(eq->hwif, addr);
+
+		val = HINIC_AEQ_CTRL_0_CLEAR(val, INT_IDX)      &
+		      HINIC_AEQ_CTRL_0_CLEAR(val, DMA_ATTR)     &
+		      HINIC_AEQ_CTRL_0_CLEAR(val, PCI_INTF_IDX) &
+		      HINIC_AEQ_CTRL_0_CLEAR(val, INT_MODE);
+
+		ctrl0 = HINIC_AEQ_CTRL_0_SET(msix_entry->entry, INT_IDX)     |
+			HINIC_AEQ_CTRL_0_SET(DMA_ATTR_AEQ_DEFAULT, DMA_ATTR) |
+			HINIC_AEQ_CTRL_0_SET(HINIC_HWIF_PCI_INTF(eq->hwif),
+					     PCI_INTF_IDX)                   |
+			HINIC_AEQ_CTRL_0_SET(EQ_INT_MODE_ARMED, INT_MODE);
+
+		val |= ctrl0;
+
+		hinic_hwif_write_reg(eq->hwif, addr, val);
+	} else if (type == HINIC_CEQ) {
+		/* RMW Ctrl0 */
+		addr = HINIC_CSR_CEQ_CTRL_0_ADDR(eq->q_id);
+
+		val = hinic_hwif_read_reg(eq->hwif, addr);
+
+		val = HINIC_CEQ_CTRL_0_CLEAR(val, INTR_IDX)     &
+		      HINIC_CEQ_CTRL_0_CLEAR(val, DMA_ATTR)     &
+		      HINIC_CEQ_CTRL_0_CLEAR(val, KICK_THRESH)  &
+		      HINIC_CEQ_CTRL_0_CLEAR(val, PCI_INTF_IDX) &
+		      HINIC_CEQ_CTRL_0_CLEAR(val, INTR_MODE);
+
+		ctrl0 = HINIC_CEQ_CTRL_0_SET(msix_entry->entry, INTR_IDX)     |
+			HINIC_CEQ_CTRL_0_SET(DMA_ATTR_CEQ_DEFAULT, DMA_ATTR)  |
+			HINIC_CEQ_CTRL_0_SET(THRESH_CEQ_DEFAULT, KICK_THRESH) |
+			HINIC_CEQ_CTRL_0_SET(HINIC_HWIF_PCI_INTF(eq->hwif),
+					     PCI_INTF_IDX)                    |
+			HINIC_CEQ_CTRL_0_SET(EQ_INT_MODE_ARMED, INTR_MODE);
+
+		val |= ctrl0;
+
+		hinic_hwif_write_reg(eq->hwif, addr, val);
+	}
+}
+
+static void set_ctrl1(struct hinic_eq *eq)
+{
+	enum hinic_eq_type type = eq->type;
+	u32 page_size_val, elem_size;
+	u32 addr, val, ctrl1;
+
+	if (type == HINIC_AEQ) {
+		/* RMW Ctrl1 */
+		addr = HINIC_CSR_AEQ_CTRL_1_ADDR(eq->q_id);
+
+		page_size_val = EQ_SET_HW_PAGE_SIZE_VAL(eq);
+		elem_size = EQ_SET_HW_ELEM_SIZE_VAL(eq);
+
+		val = hinic_hwif_read_reg(eq->hwif, addr);
+
+		val = HINIC_AEQ_CTRL_1_CLEAR(val, LEN)          &
+		      HINIC_AEQ_CTRL_1_CLEAR(val, ELEM_SIZE)    &
+		      HINIC_AEQ_CTRL_1_CLEAR(val, PAGE_SIZE);
+
+		ctrl1 = HINIC_AEQ_CTRL_1_SET(eq->q_len, LEN)            |
+			HINIC_AEQ_CTRL_1_SET(elem_size, ELEM_SIZE)      |
+			HINIC_AEQ_CTRL_1_SET(page_size_val, PAGE_SIZE);
+
+		val |= ctrl1;
+
+		hinic_hwif_write_reg(eq->hwif, addr, val);
+	} else if (type == HINIC_CEQ) {
+		/* RMW Ctrl1 */
+		addr = HINIC_CSR_CEQ_CTRL_1_ADDR(eq->q_id);
+
+		page_size_val = EQ_SET_HW_PAGE_SIZE_VAL(eq);
+
+		val = hinic_hwif_read_reg(eq->hwif, addr);
+
+		val = HINIC_CEQ_CTRL_1_CLEAR(val, LEN) &
+		      HINIC_CEQ_CTRL_1_CLEAR(val, PAGE_SIZE);
+
+		ctrl1 = HINIC_CEQ_CTRL_1_SET(eq->q_len, LEN) |
+			HINIC_CEQ_CTRL_1_SET(page_size_val, PAGE_SIZE);
+
+		val |= ctrl1;
+
+		hinic_hwif_write_reg(eq->hwif, addr, val);
+	}
+}
+
+/**
+ * set_eq_ctrls - setting eq's ctrl registers
+ * @eq: the Event Queue for setting
+ **/
+static void set_eq_ctrls(struct hinic_eq *eq)
+{
+	set_ctrl0(eq);
+	set_ctrl1(eq);
+}
+
+/**
+ * aeq_elements_init - initialize all the elements in the aeq
+ * @eq: the Async Event Queue
+ * @init_val: value to initialize the elements with it
+ **/
+static void aeq_elements_init(struct hinic_eq *eq, u32 init_val)
+{
+	struct hinic_aeq_elem *aeqe;
+	int i;
+
+	for (i = 0; i < eq->q_len; i++) {
+		aeqe = GET_AEQ_ELEM(eq, i);
+		aeqe->desc = cpu_to_be32(init_val);
+	}
+
+	wmb();  /* Write the initilzation values */
+}
+
+/**
+ * ceq_elements_init - Initialize all the elements in the ceq
+ * @eq: the event queue
+ * @init_val: value to init with it the elements
+ **/
+static void ceq_elements_init(struct hinic_eq *eq, u32 init_val)
+{
+	u32 *ceqe;
+	int i;
+
+	for (i = 0; i < eq->q_len; i++) {
+		ceqe = GET_CEQ_ELEM(eq, i);
+		*(ceqe) = cpu_to_be32(init_val);
+	}
+
+	wmb();  /* Write the initilzation values */
+}
+
+/**
+ * alloc_eq_pages - allocate the pages for the queue
+ * @eq: the event queue
+ *
+ * Return 0 - Success, Negative - Failure
+ **/
+static int alloc_eq_pages(struct hinic_eq *eq)
+{
+	struct hinic_hwif *hwif = eq->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u32 init_val, addr, val;
+	size_t addr_size;
+	int err, pg;
+
+	addr_size = eq->num_pages * sizeof(*eq->dma_addr);
+	eq->dma_addr = devm_kzalloc(&pdev->dev, addr_size, GFP_KERNEL);
+	if (!eq->dma_addr)
+		return -ENOMEM;
+
+	addr_size = eq->num_pages * sizeof(*eq->virt_addr);
+	eq->virt_addr = devm_kzalloc(&pdev->dev, addr_size, GFP_KERNEL);
+	if (!eq->virt_addr) {
+		err = -ENOMEM;
+		goto err_virt_addr_alloc;
+	}
+
+	for (pg = 0; pg < eq->num_pages; pg++) {
+		eq->virt_addr[pg] = dma_zalloc_coherent(&pdev->dev,
+							eq->page_size,
+							&eq->dma_addr[pg],
+							GFP_KERNEL);
+		if (!eq->virt_addr[pg]) {
+			err = -ENOMEM;
+			goto err_dma_alloc;
+		}
+
+		addr = EQ_HI_PHYS_ADDR_REG(eq, pg);
+		val = upper_32_bits(eq->dma_addr[pg]);
+
+		hinic_hwif_write_reg(hwif, addr, val);
+
+		addr = EQ_LO_PHYS_ADDR_REG(eq, pg);
+		val = lower_32_bits(eq->dma_addr[pg]);
+
+		hinic_hwif_write_reg(hwif, addr, val);
+	}
+
+	init_val = HINIC_EQ_ELEM_DESC_SET(eq->wrapped, WRAPPED);
+
+	if (eq->type == HINIC_AEQ)
+		aeq_elements_init(eq, init_val);
+	else if (eq->type == HINIC_CEQ)
+		ceq_elements_init(eq, init_val);
+
+	return 0;
+
+err_dma_alloc:
+	while (--pg >= 0)
+		dma_free_coherent(&pdev->dev, eq->page_size,
+				  eq->virt_addr[pg],
+				  eq->dma_addr[pg]);
+
+	devm_kfree(&pdev->dev, eq->virt_addr);
+
+err_virt_addr_alloc:
+	devm_kfree(&pdev->dev, eq->dma_addr);
+	return err;
+}
+
+/**
+ * free_eq_pages - free the pages of the queue
+ * @eq: the Event Queue
+ **/
+static void free_eq_pages(struct hinic_eq *eq)
+{
+	struct hinic_hwif *hwif = eq->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	int pg;
+
+	for (pg = 0; pg < eq->num_pages; pg++)
+		dma_free_coherent(&pdev->dev, eq->page_size,
+				  eq->virt_addr[pg],
+				  eq->dma_addr[pg]);
+
+	devm_kfree(&pdev->dev, eq->virt_addr);
+	devm_kfree(&pdev->dev, eq->dma_addr);
+}
+
+/**
+ * init_eq - initialize Event Queue
+ * @eq: the event queue
+ * @hwif: the HW interface of a PCI function device
+ * @type: the type of the event queue, aeq or ceq
+ * @q_id: Queue id number
+ * @q_len: the number of EQ elements
+ * @page_size: the page size of the pages in the event queue
+ * @entry: msix entry associated with the event queue
+ *
+ * Return 0 - Success, Negative - Failure
+ **/
+static int init_eq(struct hinic_eq *eq, struct hinic_hwif *hwif,
+		   enum hinic_eq_type type, int q_id, u32 q_len, u32 page_size,
+		   struct msix_entry entry)
+{
+	struct pci_dev *pdev = hwif->pdev;
+	int err;
+
+	eq->hwif = hwif;
+	eq->type = type;
+	eq->q_id = q_id;
+	eq->q_len = q_len;
+	eq->page_size = page_size;
+
+	/* Clear PI and CI, also clear the ARM bit */
+	hinic_hwif_write_reg(eq->hwif, EQ_CONS_IDX_REG_ADDR(eq), 0);
+	hinic_hwif_write_reg(eq->hwif, EQ_PROD_IDX_REG_ADDR(eq), 0);
+
+	eq->cons_idx = 0;
+	eq->wrapped = 0;
+
+	if (type == HINIC_AEQ) {
+		eq->elem_size = HINIC_AEQE_SIZE;
+	} else if (type == HINIC_CEQ) {
+		eq->elem_size = HINIC_CEQE_SIZE;
+	} else {
+		dev_err(&pdev->dev, "Invalid EQ type\n");
+		return -EINVAL;
+	}
+
+	eq->num_pages = GET_EQ_NUM_PAGES(eq, page_size);
+	eq->num_elem_in_pg = GET_EQ_NUM_ELEMS_IN_PG(eq, page_size);
+
+	eq->msix_entry = entry;
+
+	if (eq->num_elem_in_pg & (eq->num_elem_in_pg - 1)) {
+		dev_err(&pdev->dev, "num elements in eq page != power of 2\n");
+		return -EINVAL;
+	}
+
+	if (eq->num_pages > EQ_MAX_PAGES) {
+		dev_err(&pdev->dev, "too many pages for eq\n");
+		return -EINVAL;
+	}
+
+	set_eq_ctrls(eq);
+	eq_update_ci(eq);
+
+	err = alloc_eq_pages(eq);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate pages for eq\n");
+		return err;
+	}
+
+	if (type == HINIC_AEQ) {
+		struct hinic_eq_work *aeq_work = &eq->aeq_work;
+
+		INIT_WORK(&aeq_work->work, eq_irq_work);
+	} else if (type == HINIC_CEQ) {
+		tasklet_init(&eq->ceq_tasklet, ceq_tasklet,
+			     (unsigned long)eq);
+	}
+
+	/* set the attributes of the msix entry */
+	hinic_msix_attr_set(eq->hwif, eq->msix_entry.entry,
+			    HINIC_EQ_MSIX_PENDING_LIMIT_DEFAULT,
+			    HINIC_EQ_MSIX_COALESC_TIMER_DEFAULT,
+			    HINIC_EQ_MSIX_LLI_TIMER_DEFAULT,
+			    HINIC_EQ_MSIX_LLI_CREDIT_LIMIT_DEFAULT,
+			    HINIC_EQ_MSIX_RESEND_TIMER_DEFAULT);
+
+	if (type == HINIC_AEQ)
+		err = request_irq(entry.vector, aeq_interrupt, 0,
+				  "hinic_aeq", eq);
+	else if (type == HINIC_CEQ)
+		err = request_irq(entry.vector, ceq_interrupt, 0,
+				  "hinic_ceq", eq);
+
+	if (err) {
+		dev_err(&pdev->dev, "Failed to request irq for the EQ\n");
+		goto err_req_irq;
+	}
+
+	return 0;
+
+err_req_irq:
+	free_eq_pages(eq);
+	return err;
+}
+
+/**
+ * remove_eq - remove Event Queue
+ * @eq: the event queue
+ **/
+static void remove_eq(struct hinic_eq *eq)
+{
+	struct msix_entry *entry = &eq->msix_entry;
+
+	free_irq(entry->vector, eq);
+
+	if (eq->type == HINIC_AEQ) {
+		struct hinic_eq_work *aeq_work = &eq->aeq_work;
+
+		cancel_work_sync(&aeq_work->work);
+	} else if (eq->type == HINIC_CEQ) {
+		tasklet_kill(&eq->ceq_tasklet);
+	}
+
+	free_eq_pages(eq);
+}
+
+/**
+ * hinic_aeqs_init - initialize all the aeqs
+ * @aeqs: pointer to Async eqs of the chip
+ * @hwif: the HW interface of a PCI function device
+ * @num_aeqs: number of AEQs
+ * @q_len: number of EQ elements
+ * @page_size: the page size of the pages in the event queue
+ * @msix_entries: msix entries associated with the event queues
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_aeqs_init(struct hinic_aeqs *aeqs, struct hinic_hwif *hwif,
+		    int num_aeqs, u32 q_len, u32 page_size,
+		    struct msix_entry *msix_entries)
+{
+	struct pci_dev *pdev = hwif->pdev;
+	int err, i, q_id;
+
+	aeqs->workq = create_singlethread_workqueue(HINIC_EQS_WQ_NAME);
+	if (!aeqs->workq)
+		return -ENOMEM;
+
+	aeqs->hwif = hwif;
+	aeqs->num_aeqs = num_aeqs;
+
+	for (q_id = 0; q_id < num_aeqs; q_id++) {
+		err = init_eq(&aeqs->aeq[q_id], hwif, HINIC_AEQ, q_id, q_len,
+			      page_size, msix_entries[q_id]);
+		if (err) {
+			dev_err(&pdev->dev, "Failed to init aeq %d\n", q_id);
+			goto err_init_aeq;
+		}
+	}
+
+	return 0;
+
+err_init_aeq:
+	for (i = 0; i < q_id; i++)
+		remove_eq(&aeqs->aeq[i]);
+
+	destroy_workqueue(aeqs->workq);
+	return err;
+}
+
+/**
+ * hinic_aeqs_free - free all the aeqs
+ * @aeqs: pointer to Async eqs of the chip
+ **/
+void hinic_aeqs_free(struct hinic_aeqs *aeqs)
+{
+	int q_id;
+
+	for (q_id = 0; q_id < aeqs->num_aeqs ; q_id++)
+		remove_eq(&aeqs->aeq[q_id]);
+
+	destroy_workqueue(aeqs->workq);
+}
+
+/**
+ * hinic_ceqs_init - init all the ceqs
+ * @ceqs: ceqs part of the chip
+ * @hwif: the hardware interface of a pci function device
+ * @num_ceqs: number of CEQs
+ * @q_len: number of EQ elements
+ * @page_size: the page size of the event queue
+ * @msix_entries: msix entries associated with the event queues
+ *
+ * Return 0 - Success, Negative - Failure
+ **/
+int hinic_ceqs_init(struct hinic_ceqs *ceqs, struct hinic_hwif *hwif,
+		    int num_ceqs, u32 q_len, u32 page_size,
+		    struct msix_entry *msix_entries)
+{
+	struct pci_dev *pdev = hwif->pdev;
+	int i, q_id, err;
+
+	ceqs->hwif = hwif;
+	ceqs->num_ceqs = num_ceqs;
+
+	for (q_id = 0; q_id < num_ceqs; q_id++) {
+		err = init_eq(&ceqs->ceq[q_id], hwif, HINIC_CEQ, q_id, q_len,
+			      page_size, msix_entries[q_id]);
+		if (err) {
+			dev_err(&pdev->dev, "Failed to init ceq %d\n", q_id);
+			goto err_init_ceq;
+		}
+	}
+
+	return 0;
+
+err_init_ceq:
+	for (i = 0; i < q_id; i++)
+		remove_eq(&ceqs->ceq[i]);
+
+	return err;
+}
+
+/**
+ * hinic_ceqs_free - free all the ceqs
+ * @ceqs: ceqs part of the chip
+ **/
+void hinic_ceqs_free(struct hinic_ceqs *ceqs)
+{
+	int q_id;
+
+	for (q_id = 0; q_id < ceqs->num_ceqs; q_id++)
+		remove_eq(&ceqs->ceq[q_id]);
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h
new file mode 100644
index 0000000..ecb9c2b
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h
@@ -0,0 +1,265 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_HW_EQS_H
+#define HINIC_HW_EQS_H
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/pci.h>
+#include <linux/sizes.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+
+#include "hinic_hw_if.h"
+
+#define HINIC_AEQ_CTRL_0_INT_IDX_SHIFT          0
+#define HINIC_AEQ_CTRL_0_DMA_ATTR_SHIFT         12
+#define HINIC_AEQ_CTRL_0_PCI_INTF_IDX_SHIFT     20
+#define HINIC_AEQ_CTRL_0_INT_MODE_SHIFT         31
+
+#define HINIC_AEQ_CTRL_0_INT_IDX_MASK           0x3FF
+#define HINIC_AEQ_CTRL_0_DMA_ATTR_MASK          0x3F
+#define HINIC_AEQ_CTRL_0_PCI_INTF_IDX_MASK      0x3
+#define HINIC_AEQ_CTRL_0_INT_MODE_MASK          0x1
+
+#define HINIC_AEQ_CTRL_0_SET(val, member)       \
+			(((u32)(val) & HINIC_AEQ_CTRL_0_##member##_MASK) << \
+			 HINIC_AEQ_CTRL_0_##member##_SHIFT)
+
+#define HINIC_AEQ_CTRL_0_CLEAR(val, member)     \
+			((val) & (~(HINIC_AEQ_CTRL_0_##member##_MASK \
+			 << HINIC_AEQ_CTRL_0_##member##_SHIFT)))
+
+#define HINIC_AEQ_CTRL_1_LEN_SHIFT              0
+#define HINIC_AEQ_CTRL_1_ELEM_SIZE_SHIFT        24
+#define HINIC_AEQ_CTRL_1_PAGE_SIZE_SHIFT        28
+
+#define HINIC_AEQ_CTRL_1_LEN_MASK               0x1FFFFF
+#define HINIC_AEQ_CTRL_1_ELEM_SIZE_MASK         0x3
+#define HINIC_AEQ_CTRL_1_PAGE_SIZE_MASK         0xF
+
+#define HINIC_AEQ_CTRL_1_SET(val, member)       \
+			(((u32)(val) & HINIC_AEQ_CTRL_1_##member##_MASK) << \
+			 HINIC_AEQ_CTRL_1_##member##_SHIFT)
+
+#define HINIC_AEQ_CTRL_1_CLEAR(val, member)     \
+			((val) & (~(HINIC_AEQ_CTRL_1_##member##_MASK \
+			 << HINIC_AEQ_CTRL_1_##member##_SHIFT)))
+
+#define HINIC_CEQ_CTRL_0_INTR_IDX_SHIFT         0
+#define HINIC_CEQ_CTRL_0_DMA_ATTR_SHIFT         12
+#define HINIC_CEQ_CTRL_0_KICK_THRESH_SHIFT      20
+#define HINIC_CEQ_CTRL_0_PCI_INTF_IDX_SHIFT     24
+#define HINIC_CEQ_CTRL_0_INTR_MODE_SHIFT        31
+
+#define HINIC_CEQ_CTRL_0_INTR_IDX_MASK          0x3FF
+#define HINIC_CEQ_CTRL_0_DMA_ATTR_MASK          0x3F
+#define HINIC_CEQ_CTRL_0_KICK_THRESH_MASK       0xF
+#define HINIC_CEQ_CTRL_0_PCI_INTF_IDX_MASK      0x3
+#define HINIC_CEQ_CTRL_0_INTR_MODE_MASK         0x1
+
+#define HINIC_CEQ_CTRL_0_SET(val, member)       \
+			(((u32)(val) & HINIC_CEQ_CTRL_0_##member##_MASK) << \
+			 HINIC_CEQ_CTRL_0_##member##_SHIFT)
+
+#define HINIC_CEQ_CTRL_0_CLEAR(val, member)     \
+			((val) & (~(HINIC_CEQ_CTRL_0_##member##_MASK \
+			 << HINIC_CEQ_CTRL_0_##member##_SHIFT)))
+
+#define HINIC_CEQ_CTRL_1_LEN_SHIFT              0
+#define HINIC_CEQ_CTRL_1_PAGE_SIZE_SHIFT        28
+
+#define HINIC_CEQ_CTRL_1_LEN_MASK               0x1FFFFF
+#define HINIC_CEQ_CTRL_1_PAGE_SIZE_MASK         0xF
+
+#define HINIC_CEQ_CTRL_1_SET(val, member)       \
+			(((u32)(val) & HINIC_CEQ_CTRL_1_##member##_MASK) << \
+			 HINIC_CEQ_CTRL_1_##member##_SHIFT)
+
+#define HINIC_CEQ_CTRL_1_CLEAR(val, member)     \
+			((val) & (~(HINIC_CEQ_CTRL_1_##member##_MASK \
+			 << HINIC_CEQ_CTRL_1_##member##_SHIFT)))
+
+#define HINIC_EQ_ELEM_DESC_TYPE_SHIFT           0
+#define HINIC_EQ_ELEM_DESC_SRC_SHIFT            7
+#define HINIC_EQ_ELEM_DESC_SIZE_SHIFT           8
+#define HINIC_EQ_ELEM_DESC_WRAPPED_SHIFT        31
+
+#define HINIC_EQ_ELEM_DESC_TYPE_MASK            0x7F
+#define HINIC_EQ_ELEM_DESC_SRC_MASK             0x1
+#define HINIC_EQ_ELEM_DESC_SIZE_MASK            0xFF
+#define HINIC_EQ_ELEM_DESC_WRAPPED_MASK         0x1
+
+#define HINIC_EQ_ELEM_DESC_SET(val, member)     \
+			(((u32)(val) & HINIC_EQ_ELEM_DESC_##member##_MASK) << \
+			 HINIC_EQ_ELEM_DESC_##member##_SHIFT)
+
+#define HINIC_EQ_ELEM_DESC_GET(val, member)     \
+			(((val) >> HINIC_EQ_ELEM_DESC_##member##_SHIFT) & \
+			 HINIC_EQ_ELEM_DESC_##member##_MASK)
+
+#define HINIC_EQ_CI_IDX_SHIFT                   0
+#define HINIC_EQ_CI_WRAPPED_SHIFT               20
+#define HINIC_EQ_CI_XOR_CHKSUM_SHIFT            24
+#define HINIC_EQ_CI_INT_ARMED_SHIFT             31
+
+#define HINIC_EQ_CI_IDX_MASK                    0xFFFFF
+#define HINIC_EQ_CI_WRAPPED_MASK                0x1
+#define HINIC_EQ_CI_XOR_CHKSUM_MASK             0xF
+#define HINIC_EQ_CI_INT_ARMED_MASK              0x1
+
+#define HINIC_EQ_CI_SET(val, member)            \
+			(((u32)(val) & HINIC_EQ_CI_##member##_MASK) << \
+			 HINIC_EQ_CI_##member##_SHIFT)
+
+#define HINIC_EQ_CI_CLEAR(val, member)          \
+			((val) & (~(HINIC_EQ_CI_##member##_MASK \
+			 << HINIC_EQ_CI_##member##_SHIFT)))
+
+#define HINIC_MAX_AEQS                  4
+#define HINIC_MAX_CEQS                  32
+
+#define HINIC_AEQE_SIZE                 64
+#define HINIC_CEQE_SIZE                 4
+
+#define HINIC_AEQE_DESC_SIZE            4
+#define HINIC_AEQE_DATA_SIZE            \
+			(HINIC_AEQE_SIZE - HINIC_AEQE_DESC_SIZE)
+
+#define HINIC_DEFAULT_AEQ_LEN           64
+#define HINIC_DEFAULT_CEQ_LEN           1024
+
+#define HINIC_EQ_PAGE_SIZE              SZ_4K
+
+#define HINIC_CEQ_ID_CMDQ               0
+
+enum hinic_eq_type {
+	HINIC_AEQ,
+	HINIC_CEQ,
+};
+
+enum hinic_aeq_type {
+	HINIC_MSG_FROM_MGMT_CPU = 2,
+
+	HINIC_MAX_AEQ_EVENTS,
+};
+
+enum hinic_ceq_type {
+	HINIC_CEQ_CMDQ = 3,
+
+	HINIC_MAX_CEQ_EVENTS,
+};
+
+enum hinic_eqe_state {
+	HINIC_EQE_ENABLED = BIT(0),
+	HINIC_EQE_RUNNING = BIT(1),
+};
+
+struct hinic_aeq_elem {
+	u8      data[HINIC_AEQE_DATA_SIZE];
+	u32     desc;
+};
+
+struct hinic_eq_work {
+	struct work_struct      work;
+	void                    *data;
+};
+
+struct hinic_eq {
+	struct hinic_hwif       *hwif;
+
+	enum hinic_eq_type      type;
+	int                     q_id;
+	u32                     q_len;
+	u32                     page_size;
+
+	u32                     cons_idx;
+	int                     wrapped;
+
+	size_t                  elem_size;
+	int                     num_pages;
+	int                     num_elem_in_pg;
+
+	struct msix_entry       msix_entry;
+
+	dma_addr_t              *dma_addr;
+	void                    **virt_addr;
+
+	struct hinic_eq_work    aeq_work;
+
+	struct tasklet_struct   ceq_tasklet;
+};
+
+struct hinic_hw_event_cb {
+	void    (*hwe_handler)(void *handle, void *data, u8 size);
+	void                    *handle;
+	unsigned long           hwe_state;
+};
+
+struct hinic_aeqs {
+	struct hinic_hwif       *hwif;
+
+	struct hinic_eq         aeq[HINIC_MAX_AEQS];
+	int                     num_aeqs;
+
+	struct hinic_hw_event_cb hwe_cb[HINIC_MAX_AEQ_EVENTS];
+
+	struct workqueue_struct *workq;
+};
+
+struct hinic_ceq_cb {
+	void    (*handler)(void *handle, u32 ceqe_data);
+	void                    *handle;
+	enum hinic_eqe_state    ceqe_state;
+};
+
+struct hinic_ceqs {
+	struct hinic_hwif       *hwif;
+
+	struct hinic_eq         ceq[HINIC_MAX_CEQS];
+	int                     num_ceqs;
+
+	struct hinic_ceq_cb     ceq_cb[HINIC_MAX_CEQ_EVENTS];
+};
+
+void hinic_aeq_register_hw_cb(struct hinic_aeqs *aeqs,
+			      enum hinic_aeq_type event, void *handle,
+			      void (*hwe_handler)(void *handle, void *data,
+						  u8 size));
+
+void hinic_aeq_unregister_hw_cb(struct hinic_aeqs *aeqs,
+				enum hinic_aeq_type event);
+
+void hinic_ceq_register_cb(struct hinic_ceqs *ceqs,
+			   enum hinic_ceq_type event, void *handle,
+			   void (*ceq_cb)(void *handle, u32 ceqe_data));
+
+void hinic_ceq_unregister_cb(struct hinic_ceqs *ceqs,
+			     enum hinic_ceq_type event);
+
+int hinic_aeqs_init(struct hinic_aeqs *aeqs, struct hinic_hwif *hwif,
+		    int num_aeqs, u32 q_len, u32 page_size,
+		    struct msix_entry *msix_entries);
+
+void hinic_aeqs_free(struct hinic_aeqs *aeqs);
+
+int hinic_ceqs_init(struct hinic_ceqs *ceqs, struct hinic_hwif *hwif,
+		    int num_ceqs, u32 q_len, u32 page_size,
+		    struct msix_entry *msix_entries);
+
+void hinic_ceqs_free(struct hinic_ceqs *ceqs);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
new file mode 100644
index 0000000..823a170
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
@@ -0,0 +1,351 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hinic_hw_csr.h"
+#include "hinic_hw_if.h"
+
+#define PCIE_ATTR_ENTRY         0
+
+#define VALID_MSIX_IDX(attr, msix_index) ((msix_index) < (attr)->num_irqs)
+
+/**
+ * hinic_msix_attr_set - set message attribute for msix entry
+ * @hwif: the HW interface of a pci function device
+ * @msix_index: msix_index
+ * @pending_limit: the maximum pending interrupt events (unit 8)
+ * @coalesc_timer: coalesc period for interrupt (unit 8 us)
+ * @lli_timer: replenishing period for low latency credit (unit 8 us)
+ * @lli_credit_limit: maximum credits for low latency msix messages (unit 8)
+ * @resend_timer: maximum wait for resending msix (unit coalesc period)
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_msix_attr_set(struct hinic_hwif *hwif, u16 msix_index,
+			u8 pending_limit, u8 coalesc_timer,
+			u8 lli_timer, u8 lli_credit_limit,
+			u8 resend_timer)
+{
+	u32 msix_ctrl, addr;
+
+	if (!VALID_MSIX_IDX(&hwif->attr, msix_index))
+		return -EINVAL;
+
+	msix_ctrl = HINIC_MSIX_ATTR_SET(pending_limit, PENDING_LIMIT)   |
+		    HINIC_MSIX_ATTR_SET(coalesc_timer, COALESC_TIMER)   |
+		    HINIC_MSIX_ATTR_SET(lli_timer, LLI_TIMER)           |
+		    HINIC_MSIX_ATTR_SET(lli_credit_limit, LLI_CREDIT)   |
+		    HINIC_MSIX_ATTR_SET(resend_timer, RESEND_TIMER);
+
+	addr = HINIC_CSR_MSIX_CTRL_ADDR(msix_index);
+
+	hinic_hwif_write_reg(hwif, addr, msix_ctrl);
+	return 0;
+}
+
+/**
+ * hinic_msix_attr_get - get message attribute of msix entry
+ * @hwif: the HW interface of a pci function device
+ * @msix_index: msix_index
+ * @pending_limit: the maximum pending interrupt events (unit 8)
+ * @coalesc_timer: coalesc period for interrupt (unit 8 us)
+ * @lli_timer: replenishing period for low latency credit (unit 8 us)
+ * @lli_credit_limit: maximum credits for low latency msix messages (unit 8)
+ * @resend_timer: maximum wait for resending msix (unit coalesc period)
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_msix_attr_get(struct hinic_hwif *hwif, u16 msix_index,
+			u8 *pending_limit, u8 *coalesc_timer,
+			u8 *lli_timer, u8 *lli_credit_limit,
+			u8 *resend_timer)
+{
+	u32 addr, val;
+
+	if (!VALID_MSIX_IDX(&hwif->attr, msix_index))
+		return -EINVAL;
+
+	addr = HINIC_CSR_MSIX_CTRL_ADDR(msix_index);
+	val  = hinic_hwif_read_reg(hwif, addr);
+
+	*pending_limit    = HINIC_MSIX_ATTR_GET(val, PENDING_LIMIT);
+	*coalesc_timer    = HINIC_MSIX_ATTR_GET(val, COALESC_TIMER);
+	*lli_timer        = HINIC_MSIX_ATTR_GET(val, LLI_TIMER);
+	*lli_credit_limit = HINIC_MSIX_ATTR_GET(val, LLI_CREDIT);
+	*resend_timer     = HINIC_MSIX_ATTR_GET(val, RESEND_TIMER);
+	return 0;
+}
+
+/**
+ * hinic_msix_attr_cnt_clear - clear message attribute counters for msix entry
+ * @hwif: the HW interface of a pci function device
+ * @msix_index: msix_index
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_msix_attr_cnt_clear(struct hinic_hwif *hwif, u16 msix_index)
+{
+	u32 msix_ctrl, addr;
+
+	if (!VALID_MSIX_IDX(&hwif->attr, msix_index))
+		return -EINVAL;
+
+	msix_ctrl = HINIC_MSIX_CNT_SET(1, RESEND_TIMER);
+	addr = HINIC_CSR_MSIX_CNT_ADDR(msix_index);
+
+	hinic_hwif_write_reg(hwif, addr, msix_ctrl);
+	return 0;
+}
+
+/**
+ * hinic_set_pf_action - set action on pf channel
+ * @hwif: the HW interface of a pci function device
+ * @action: action on pf channel
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+void hinic_set_pf_action(struct hinic_hwif *hwif, enum hinic_pf_action action)
+{
+	u32 attr5 = hinic_hwif_read_reg(hwif, HINIC_CSR_FUNC_ATTR5_ADDR);
+
+	attr5 = HINIC_FA5_CLEAR(attr5, PF_ACTION);
+	attr5 |= HINIC_FA5_SET(action, PF_ACTION);
+
+	hinic_hwif_write_reg(hwif, HINIC_CSR_FUNC_ATTR5_ADDR, attr5);
+}
+
+enum hinic_outbound_state hinic_outbound_state_get(struct hinic_hwif *hwif)
+{
+	u32 attr4 = hinic_hwif_read_reg(hwif, HINIC_CSR_FUNC_ATTR4_ADDR);
+
+	return HINIC_FA4_GET(attr4, OUTBOUND_STATE);
+}
+
+void hinic_outbound_state_set(struct hinic_hwif *hwif,
+			      enum hinic_outbound_state outbound_state)
+{
+	u32 attr4 = hinic_hwif_read_reg(hwif, HINIC_CSR_FUNC_ATTR4_ADDR);
+
+	attr4 = HINIC_FA4_CLEAR(attr4, OUTBOUND_STATE);
+	attr4 |= HINIC_FA4_SET(outbound_state, OUTBOUND_STATE);
+
+	hinic_hwif_write_reg(hwif, HINIC_CSR_FUNC_ATTR4_ADDR, attr4);
+}
+
+enum hinic_db_state hinic_db_state_get(struct hinic_hwif *hwif)
+{
+	u32 attr4 = hinic_hwif_read_reg(hwif, HINIC_CSR_FUNC_ATTR4_ADDR);
+
+	return HINIC_FA4_GET(attr4, DB_STATE);
+}
+
+void hinic_db_state_set(struct hinic_hwif *hwif,
+			enum hinic_db_state db_state)
+{
+	u32 attr4 = hinic_hwif_read_reg(hwif, HINIC_CSR_FUNC_ATTR4_ADDR);
+
+	attr4 = HINIC_FA4_CLEAR(attr4, DB_STATE);
+	attr4 |= HINIC_FA4_SET(db_state, DB_STATE);
+
+	hinic_hwif_write_reg(hwif, HINIC_CSR_FUNC_ATTR4_ADDR, attr4);
+}
+
+/**
+ * hwif_ready - test if the HW is ready for use
+ * @hwif: the HW interface of a pci function device
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int hwif_ready(struct hinic_hwif *hwif)
+{
+	struct pci_dev *pdev = hwif->pdev;
+	u32 addr, attr1;
+
+	addr   = HINIC_CSR_FUNC_ATTR1_ADDR;
+	attr1  = hinic_hwif_read_reg(hwif, addr);
+
+	if (!HINIC_FA1_GET(attr1, INIT_STATUS)) {
+		dev_err(&pdev->dev, "hwif status is not ready\n");
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * set_hwif_attr - set the attributes in the relevant members in hwif
+ * @hwif: the HW interface of a pci function device
+ * @attr0: the first attribute that was read from the hw
+ * @attr1: the second attribute that was read from the hw
+ **/
+static void set_hwif_attr(struct hinic_hwif *hwif, u32 attr0, u32 attr1)
+{
+	hwif->attr.func_idx     = HINIC_FA0_GET(attr0, FUNC_IDX);
+	hwif->attr.pf_idx       = HINIC_FA0_GET(attr0, PF_IDX);
+	hwif->attr.pci_intf_idx = HINIC_FA0_GET(attr0, PCI_INTF_IDX);
+	hwif->attr.func_type    = HINIC_FA0_GET(attr0, FUNC_TYPE);
+
+	hwif->attr.num_aeqs = BIT(HINIC_FA1_GET(attr1, AEQS_PER_FUNC));
+	hwif->attr.num_ceqs = BIT(HINIC_FA1_GET(attr1, CEQS_PER_FUNC));
+	hwif->attr.num_irqs = BIT(HINIC_FA1_GET(attr1, IRQS_PER_FUNC));
+	hwif->attr.num_dma_attr = BIT(HINIC_FA1_GET(attr1, DMA_ATTR_PER_FUNC));
+}
+
+/**
+ * read_hwif_attr - read the attributes and set members in hwif
+ * @hwif: the HW interface of a pci function device
+ **/
+static void read_hwif_attr(struct hinic_hwif *hwif)
+{
+	u32 addr, attr0, attr1;
+
+	addr   = HINIC_CSR_FUNC_ATTR0_ADDR;
+	attr0  = hinic_hwif_read_reg(hwif, addr);
+
+	addr   = HINIC_CSR_FUNC_ATTR1_ADDR;
+	attr1  = hinic_hwif_read_reg(hwif, addr);
+
+	set_hwif_attr(hwif, attr0, attr1);
+}
+
+/**
+ * set_ppf - try to set hwif as ppf and set the type of hwif in this case
+ * @hwif: the HW interface of a pci function device
+ **/
+static void set_ppf(struct hinic_hwif *hwif)
+{
+	struct hinic_func_attr *attr = &hwif->attr;
+	u32 addr, val, ppf_election;
+
+	/* Read Modify Write */
+	addr = HINIC_CSR_PPF_ELECTION_ADDR(HINIC_HWIF_PCI_INTF(hwif));
+
+	val = hinic_hwif_read_reg(hwif, addr);
+	val = HINIC_PPF_ELECTION_CLEAR(val, IDX);
+
+	ppf_election = HINIC_PPF_ELECTION_SET(HINIC_HWIF_FUNC_IDX(hwif), IDX);
+
+	val |= ppf_election;
+	hinic_hwif_write_reg(hwif, addr, val);
+
+	/* check PPF */
+	val = hinic_hwif_read_reg(hwif, addr);
+
+	attr->ppf_idx = HINIC_PPF_ELECTION_GET(val, IDX);
+	if (attr->ppf_idx == HINIC_HWIF_FUNC_IDX(hwif))
+		attr->func_type = HINIC_PPF;
+}
+
+/**
+ * set_dma_attr - set the dma attributes in the HW
+ * @hwif: the HW interface of a pci function device
+ * @entry_idx: the entry index in the dma table
+ * @st: PCIE TLP steering tag
+ * @at: PCIE TLP AT field
+ * @ph: PCIE TLP Processing Hint field
+ * @no_snooping: PCIE TLP No snooping
+ * @tph_en: PCIE TLP Processing Hint Enable
+ **/
+static void set_dma_attr(struct hinic_hwif *hwif, u32 entry_idx,
+			 u8 st, u8 at, u8 ph,
+			 enum hinic_pcie_nosnoop no_snooping,
+			 enum hinic_pcie_tph tph_en)
+{
+	u32 addr, val, dma_attr_entry;
+
+	/* Read Modify Write */
+	addr = HINIC_CSR_DMA_ATTR_ADDR(entry_idx);
+
+	val = hinic_hwif_read_reg(hwif, addr);
+	val = HINIC_DMA_ATTR_CLEAR(val, ST)             &
+	      HINIC_DMA_ATTR_CLEAR(val, AT)             &
+	      HINIC_DMA_ATTR_CLEAR(val, PH)             &
+	      HINIC_DMA_ATTR_CLEAR(val, NO_SNOOPING)    &
+	      HINIC_DMA_ATTR_CLEAR(val, TPH_EN);
+
+	dma_attr_entry = HINIC_DMA_ATTR_SET(st, ST)                     |
+			 HINIC_DMA_ATTR_SET(at, AT)                     |
+			 HINIC_DMA_ATTR_SET(ph, PH)                     |
+			 HINIC_DMA_ATTR_SET(no_snooping, NO_SNOOPING)   |
+			 HINIC_DMA_ATTR_SET(tph_en, TPH_EN);
+
+	val |= dma_attr_entry;
+	hinic_hwif_write_reg(hwif, addr, val);
+}
+
+/**
+ * dma_attr_table_init - initialize the the default dma attributes
+ * @hwif: the HW interface of a pci function device
+ **/
+static void dma_attr_init(struct hinic_hwif *hwif)
+{
+	set_dma_attr(hwif, PCIE_ATTR_ENTRY, HINIC_PCIE_ST_DISABLE,
+		     HINIC_PCIE_AT_DISABLE, HINIC_PCIE_PH_DISABLE,
+		     HINIC_PCIE_SNOOP, HINIC_PCIE_TPH_DISABLE);
+}
+
+/**
+ * hinic_init_hwif - initialize the hw interface
+ * @hwif: the HW interface of a pci function device
+ * @pdev: the pci device for acessing PCI resources
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_init_hwif(struct hinic_hwif *hwif, struct pci_dev *pdev)
+{
+	int err;
+
+	hwif->pdev = pdev;
+
+	hwif->cfg_regs_bar = pci_ioremap_bar(pdev, HINIC_PCI_CFG_REGS_BAR);
+	if (!hwif->cfg_regs_bar) {
+		dev_err(&pdev->dev, "Failed to map configuration regs\n");
+		return -ENOMEM;
+	}
+
+	err = hwif_ready(hwif);
+	if (err) {
+		dev_err(&pdev->dev, "HW interface is not ready\n");
+		goto err_hwif_ready;
+	}
+
+	read_hwif_attr(hwif);
+
+	if (HINIC_IS_PF(hwif))
+		set_ppf(hwif);
+
+	/* No transactionss before DMA is initialized */
+	dma_attr_init(hwif);
+	return 0;
+
+err_hwif_ready:
+	iounmap(hwif->cfg_regs_bar);
+	return err;
+}
+
+/**
+ * hinic_free_hwif - free the HW interface
+ * @hwif: the HW interface of a pci function device
+ **/
+void hinic_free_hwif(struct hinic_hwif *hwif)
+{
+	iounmap(hwif->cfg_regs_bar);
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
new file mode 100644
index 0000000..5b4760c
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
@@ -0,0 +1,272 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_HW_IF_H
+#define HINIC_HW_IF_H
+
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#define HINIC_DMA_ATTR_ST_SHIFT                                 0
+#define HINIC_DMA_ATTR_AT_SHIFT                                 8
+#define HINIC_DMA_ATTR_PH_SHIFT                                 10
+#define HINIC_DMA_ATTR_NO_SNOOPING_SHIFT                        12
+#define HINIC_DMA_ATTR_TPH_EN_SHIFT                             13
+
+#define HINIC_DMA_ATTR_ST_MASK                                  0xFF
+#define HINIC_DMA_ATTR_AT_MASK                                  0x3
+#define HINIC_DMA_ATTR_PH_MASK                                  0x3
+#define HINIC_DMA_ATTR_NO_SNOOPING_MASK                         0x1
+#define HINIC_DMA_ATTR_TPH_EN_MASK                              0x1
+
+#define HINIC_DMA_ATTR_SET(val, member)                         \
+	(((u32)(val) & HINIC_DMA_ATTR_##member##_MASK) <<       \
+	 HINIC_DMA_ATTR_##member##_SHIFT)
+
+#define HINIC_DMA_ATTR_CLEAR(val, member)                       \
+	((val) & (~(HINIC_DMA_ATTR_##member##_MASK              \
+	 << HINIC_DMA_ATTR_##member##_SHIFT)))
+
+#define HINIC_FA0_FUNC_IDX_SHIFT                                0
+#define HINIC_FA0_PF_IDX_SHIFT                                  10
+#define HINIC_FA0_PCI_INTF_IDX_SHIFT                            14
+/* reserved members - off 16 */
+#define HINIC_FA0_FUNC_TYPE_SHIFT                               24
+
+#define HINIC_FA0_FUNC_IDX_MASK                                 0x3FF
+#define HINIC_FA0_PF_IDX_MASK                                   0xF
+#define HINIC_FA0_PCI_INTF_IDX_MASK                             0x3
+#define HINIC_FA0_FUNC_TYPE_MASK                                0x1
+
+#define HINIC_FA0_GET(val, member)                              \
+	(((val) >> HINIC_FA0_##member##_SHIFT) & HINIC_FA0_##member##_MASK)
+
+#define HINIC_FA1_AEQS_PER_FUNC_SHIFT                           8
+/* reserved members - off 10 */
+#define HINIC_FA1_CEQS_PER_FUNC_SHIFT                           12
+/* reserved members - off 15 */
+#define HINIC_FA1_IRQS_PER_FUNC_SHIFT                           20
+#define HINIC_FA1_DMA_ATTR_PER_FUNC_SHIFT                       24
+/* reserved members - off 27 */
+#define HINIC_FA1_INIT_STATUS_SHIFT                             30
+
+#define HINIC_FA1_AEQS_PER_FUNC_MASK                            0x3
+#define HINIC_FA1_CEQS_PER_FUNC_MASK                            0x7
+#define HINIC_FA1_IRQS_PER_FUNC_MASK                            0xF
+#define HINIC_FA1_DMA_ATTR_PER_FUNC_MASK                        0x7
+#define HINIC_FA1_INIT_STATUS_MASK                              0x1
+
+#define HINIC_FA1_GET(val, member)                              \
+	(((val) >> HINIC_FA1_##member##_SHIFT) & HINIC_FA1_##member##_MASK)
+
+#define HINIC_FA4_OUTBOUND_STATE_SHIFT                          0
+#define HINIC_FA4_DB_STATE_SHIFT                                1
+
+#define HINIC_FA4_OUTBOUND_STATE_MASK                           0x1
+#define HINIC_FA4_DB_STATE_MASK                                 0x1
+
+#define HINIC_FA4_GET(val, member)                              \
+	(((val) >> HINIC_FA4_##member##_SHIFT) & HINIC_FA4_##member##_MASK)
+
+#define HINIC_FA4_SET(val, member)                              \
+	((((u32)val) & HINIC_FA4_##member##_MASK) << HINIC_FA4_##member##_SHIFT)
+
+#define HINIC_FA4_CLEAR(val, member)                            \
+	((val) & (~(HINIC_FA4_##member##_MASK << HINIC_FA4_##member##_SHIFT)))
+
+#define HINIC_FA5_PF_ACTION_SHIFT                               0
+#define HINIC_FA5_PF_ACTION_MASK                                0xFFFF
+
+#define HINIC_FA5_SET(val, member)                              \
+	(((u32)(val) & HINIC_FA5_##member##_MASK) << HINIC_FA5_##member##_SHIFT)
+
+#define HINIC_FA5_CLEAR(val, member)                            \
+	((val) & (~(HINIC_FA5_##member##_MASK << HINIC_FA5_##member##_SHIFT)))
+
+#define HINIC_PPF_ELECTION_IDX_SHIFT                            0
+#define HINIC_PPF_ELECTION_IDX_MASK                             0x1F
+
+#define HINIC_PPF_ELECTION_SET(val, member)                     \
+	(((u32)(val) & HINIC_PPF_ELECTION_##member##_MASK) <<   \
+	 HINIC_PPF_ELECTION_##member##_SHIFT)
+
+#define HINIC_PPF_ELECTION_GET(val, member)                     \
+	(((val) >> HINIC_PPF_ELECTION_##member##_SHIFT) &       \
+	 HINIC_PPF_ELECTION_##member##_MASK)
+
+#define HINIC_PPF_ELECTION_CLEAR(val, member)                   \
+	((val) & (~(HINIC_PPF_ELECTION_##member##_MASK          \
+	 << HINIC_PPF_ELECTION_##member##_SHIFT)))
+
+#define HINIC_MSIX_PENDING_LIMIT_SHIFT                          0
+#define HINIC_MSIX_COALESC_TIMER_SHIFT                          8
+#define HINIC_MSIX_LLI_TIMER_SHIFT                              16
+#define HINIC_MSIX_LLI_CREDIT_SHIFT                             24
+#define HINIC_MSIX_RESEND_TIMER_SHIFT                           29
+
+#define HINIC_MSIX_PENDING_LIMIT_MASK                           0xFF
+#define HINIC_MSIX_COALESC_TIMER_MASK                           0xFF
+#define HINIC_MSIX_LLI_TIMER_MASK                               0xFF
+#define HINIC_MSIX_LLI_CREDIT_MASK                              0x1F
+#define HINIC_MSIX_RESEND_TIMER_MASK                            0x7
+
+#define HINIC_MSIX_ATTR_SET(val, member)                        \
+	(((u32)(val) & HINIC_MSIX_##member##_MASK) <<           \
+	 HINIC_MSIX_##member##_SHIFT)
+
+#define HINIC_MSIX_ATTR_GET(val, member)                        \
+	(((val) >> HINIC_MSIX_##member##_SHIFT) &               \
+	 HINIC_MSIX_##member##_MASK)
+
+#define HINIC_MSIX_CNT_RESEND_TIMER_SHIFT                       29
+
+#define HINIC_MSIX_CNT_RESEND_TIMER_MASK                        0x1
+
+#define HINIC_MSIX_CNT_SET(val, member)                         \
+	(((u32)(val) & HINIC_MSIX_CNT_##member##_MASK) <<       \
+	 HINIC_MSIX_CNT_##member##_SHIFT)
+
+#define HINIC_HWIF_NUM_AEQS(hwif)       ((hwif)->attr.num_aeqs)
+#define HINIC_HWIF_NUM_CEQS(hwif)       ((hwif)->attr.num_ceqs)
+#define HINIC_HWIF_NUM_IRQS(hwif)       ((hwif)->attr.num_irqs)
+#define HINIC_HWIF_FUNC_IDX(hwif)       ((hwif)->attr.func_idx)
+#define HINIC_HWIF_PCI_INTF(hwif)       ((hwif)->attr.pci_intf_idx)
+#define HINIC_HWIF_PF_IDX(hwif)         ((hwif)->attr.pf_idx)
+
+#define HINIC_FUNC_TYPE(hwif)           ((hwif)->attr.func_type)
+#define HINIC_IS_PF(hwif)               (HINIC_FUNC_TYPE(hwif) == HINIC_PF)
+#define HINIC_IS_PPF(hwif)              (HINIC_FUNC_TYPE(hwif) == HINIC_PPF)
+
+#define HINIC_PCI_CFG_REGS_BAR          0
+#define HINIC_PCI_DB_BAR                4
+
+#define HINIC_PCIE_ST_DISABLE           0
+#define HINIC_PCIE_AT_DISABLE           0
+#define HINIC_PCIE_PH_DISABLE           0
+
+#define HINIC_EQ_MSIX_PENDING_LIMIT_DEFAULT     0       /* Disabled */
+#define HINIC_EQ_MSIX_COALESC_TIMER_DEFAULT     0xFF    /* max */
+#define HINIC_EQ_MSIX_LLI_TIMER_DEFAULT         0       /* Disabled */
+#define HINIC_EQ_MSIX_LLI_CREDIT_LIMIT_DEFAULT  0       /* Disabled */
+#define HINIC_EQ_MSIX_RESEND_TIMER_DEFAULT      7       /* max */
+
+enum hinic_pcie_nosnoop {
+	HINIC_PCIE_SNOOP        = 0,
+	HINIC_PCIE_NO_SNOOP     = 1,
+};
+
+enum hinic_pcie_tph {
+	HINIC_PCIE_TPH_DISABLE  = 0,
+	HINIC_PCIE_TPH_ENABLE   = 1,
+};
+
+enum hinic_func_type {
+	HINIC_PF        = 0,
+	HINIC_PPF       = 2,
+};
+
+enum hinic_mod_type {
+	HINIC_MOD_COMM  = 0,    /* HW communication module */
+	HINIC_MOD_L2NIC = 1,    /* L2NIC module */
+	HINIC_MOD_CFGM  = 7,    /* Configuration module */
+
+	HINIC_MOD_MAX   = 15
+};
+
+enum hinic_node_id {
+	HINIC_NODE_ID_MGMT = 21,
+};
+
+enum hinic_pf_action {
+	HINIC_PF_MGMT_INIT = 0x0,
+
+	HINIC_PF_MGMT_ACTIVE = 0x11,
+};
+
+enum hinic_outbound_state {
+	HINIC_OUTBOUND_ENABLE  = 0,
+	HINIC_OUTBOUND_DISABLE = 1,
+};
+
+enum hinic_db_state {
+	HINIC_DB_ENABLE  = 0,
+	HINIC_DB_DISABLE = 1,
+};
+
+struct hinic_func_attr {
+	u16                     func_idx;
+	u8                      pf_idx;
+	u8                      pci_intf_idx;
+
+	enum hinic_func_type    func_type;
+
+	u8                      ppf_idx;
+
+	u16                     num_irqs;
+	u8                      num_aeqs;
+	u8                      num_ceqs;
+
+	u8                      num_dma_attr;
+};
+
+struct hinic_hwif {
+	struct pci_dev          *pdev;
+	void __iomem            *cfg_regs_bar;
+
+	struct hinic_func_attr  attr;
+};
+
+static inline u32 hinic_hwif_read_reg(struct hinic_hwif *hwif, u32 reg)
+{
+	return be32_to_cpu(readl(hwif->cfg_regs_bar + reg));
+}
+
+static inline void hinic_hwif_write_reg(struct hinic_hwif *hwif, u32 reg,
+					u32 val)
+{
+	writel(cpu_to_be32(val), hwif->cfg_regs_bar + reg);
+}
+
+int hinic_msix_attr_set(struct hinic_hwif *hwif, u16 msix_index,
+			u8 pending_limit, u8 coalesc_timer,
+			u8 lli_timer_cfg, u8 lli_credit_limit,
+			u8 resend_timer);
+
+int hinic_msix_attr_get(struct hinic_hwif *hwif, u16 msix_index,
+			u8 *pending_limit, u8 *coalesc_timer_cfg,
+			u8 *lli_timer, u8 *lli_credit_limit,
+			u8 *resend_timer);
+
+int hinic_msix_attr_cnt_clear(struct hinic_hwif *hwif, u16 msix_index);
+
+void hinic_set_pf_action(struct hinic_hwif *hwif, enum hinic_pf_action action);
+
+enum hinic_outbound_state hinic_outbound_state_get(struct hinic_hwif *hwif);
+
+void hinic_outbound_state_set(struct hinic_hwif *hwif,
+			      enum hinic_outbound_state outbound_state);
+
+enum hinic_db_state hinic_db_state_get(struct hinic_hwif *hwif);
+
+void hinic_db_state_set(struct hinic_hwif *hwif,
+			enum hinic_db_state db_state);
+
+int hinic_init_hwif(struct hinic_hwif *hwif, struct pci_dev *pdev);
+
+void hinic_free_hwif(struct hinic_hwif *hwif);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
new file mode 100644
index 0000000..8e58976
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
@@ -0,0 +1,533 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/semaphore.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/err.h>
+
+#include "hinic_hw_if.h"
+#include "hinic_hw_eqs.h"
+#include "hinic_hw_wqe.h"
+#include "hinic_hw_wq.h"
+#include "hinic_hw_cmdq.h"
+#include "hinic_hw_qp_ctxt.h"
+#include "hinic_hw_qp.h"
+#include "hinic_hw_io.h"
+
+#define CI_Q_ADDR_SIZE                  sizeof(u32)
+
+#define CI_ADDR(base_addr, q_id)        ((base_addr) + \
+					 (q_id) * CI_Q_ADDR_SIZE)
+
+#define CI_TABLE_SIZE(num_qps)          ((num_qps) * CI_Q_ADDR_SIZE)
+
+#define DB_IDX(db, db_base)             \
+	(((unsigned long)(db) - (unsigned long)(db_base)) / HINIC_DB_PAGE_SIZE)
+
+enum io_cmd {
+	IO_CMD_MODIFY_QUEUE_CTXT = 0,
+};
+
+static void init_db_area_idx(struct hinic_free_db_area *free_db_area)
+{
+	int i;
+
+	for (i = 0; i < HINIC_DB_MAX_AREAS; i++)
+		free_db_area->db_idx[i] = i;
+
+	free_db_area->alloc_pos = 0;
+	free_db_area->return_pos = HINIC_DB_MAX_AREAS;
+
+	free_db_area->num_free = HINIC_DB_MAX_AREAS;
+
+	sema_init(&free_db_area->idx_lock, 1);
+}
+
+static void __iomem *get_db_area(struct hinic_func_to_io *func_to_io)
+{
+	struct hinic_free_db_area *free_db_area = &func_to_io->free_db_area;
+	int pos, idx;
+
+	down(&free_db_area->idx_lock);
+
+	free_db_area->num_free--;
+
+	if (free_db_area->num_free < 0) {
+		free_db_area->num_free++;
+		up(&free_db_area->idx_lock);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	pos = free_db_area->alloc_pos++;
+	pos &= HINIC_DB_MAX_AREAS - 1;
+
+	idx = free_db_area->db_idx[pos];
+
+	free_db_area->db_idx[pos] = -1;
+
+	up(&free_db_area->idx_lock);
+
+	return func_to_io->db_base + idx * HINIC_DB_PAGE_SIZE;
+}
+
+static void return_db_area(struct hinic_func_to_io *func_to_io,
+			   void __iomem *db_base)
+{
+	struct hinic_free_db_area *free_db_area = &func_to_io->free_db_area;
+	int pos, idx = DB_IDX(db_base, func_to_io->db_base);
+
+	down(&free_db_area->idx_lock);
+
+	pos = free_db_area->return_pos++;
+	pos &= HINIC_DB_MAX_AREAS - 1;
+
+	free_db_area->db_idx[pos] = idx;
+
+	free_db_area->num_free++;
+
+	up(&free_db_area->idx_lock);
+}
+
+static int write_sq_ctxts(struct hinic_func_to_io *func_to_io, u16 base_qpn,
+			  u16 num_sqs)
+{
+	struct hinic_hwif *hwif = func_to_io->hwif;
+	struct hinic_sq_ctxt_block *sq_ctxt_block;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_cmdq_buf cmdq_buf;
+	struct hinic_sq_ctxt *sq_ctxt;
+	struct hinic_qp *qp;
+	u64 out_param;
+	int err, i;
+
+	err = hinic_alloc_cmdq_buf(&func_to_io->cmdqs, &cmdq_buf);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate cmdq buf\n");
+		return err;
+	}
+
+	sq_ctxt_block = cmdq_buf.buf;
+	sq_ctxt = sq_ctxt_block->sq_ctxt;
+
+	hinic_qp_prepare_header(&sq_ctxt_block->hdr, HINIC_QP_CTXT_TYPE_SQ,
+				num_sqs, func_to_io->max_qps);
+	for (i = 0; i < num_sqs; i++) {
+		qp = &func_to_io->qps[i];
+
+		hinic_sq_prepare_ctxt(&sq_ctxt[i], &qp->sq,
+				      base_qpn + qp->q_id);
+	}
+
+	cmdq_buf.size = HINIC_SQ_CTXT_SIZE(num_sqs);
+
+	err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC,
+				     IO_CMD_MODIFY_QUEUE_CTXT, &cmdq_buf,
+				     &out_param);
+	if ((err) || (out_param != 0)) {
+		dev_err(&pdev->dev, "Failed to set SQ ctxts\n");
+		err = -EFAULT;
+	}
+
+	hinic_free_cmdq_buf(&func_to_io->cmdqs, &cmdq_buf);
+	return err;
+}
+
+static int write_rq_ctxts(struct hinic_func_to_io *func_to_io, u16 base_qpn,
+			  u16 num_rqs)
+{
+	struct hinic_hwif *hwif = func_to_io->hwif;
+	struct hinic_rq_ctxt_block *rq_ctxt_block;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_cmdq_buf cmdq_buf;
+	struct hinic_rq_ctxt *rq_ctxt;
+	struct hinic_qp *qp;
+	u64 out_param;
+	int err, i;
+
+	err = hinic_alloc_cmdq_buf(&func_to_io->cmdqs, &cmdq_buf);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate cmdq buf\n");
+		return err;
+	}
+
+	rq_ctxt_block = cmdq_buf.buf;
+	rq_ctxt = rq_ctxt_block->rq_ctxt;
+
+	hinic_qp_prepare_header(&rq_ctxt_block->hdr, HINIC_QP_CTXT_TYPE_RQ,
+				num_rqs, func_to_io->max_qps);
+	for (i = 0; i < num_rqs; i++) {
+		qp = &func_to_io->qps[i];
+
+		hinic_rq_prepare_ctxt(&rq_ctxt[i], &qp->rq,
+				      base_qpn + qp->q_id);
+	}
+
+	cmdq_buf.size = HINIC_RQ_CTXT_SIZE(num_rqs);
+
+	err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC,
+				     IO_CMD_MODIFY_QUEUE_CTXT, &cmdq_buf,
+				     &out_param);
+	if ((err) || (out_param != 0)) {
+		dev_err(&pdev->dev, "Failed to set RQ ctxts\n");
+		err = -EFAULT;
+	}
+
+	hinic_free_cmdq_buf(&func_to_io->cmdqs, &cmdq_buf);
+	return err;
+}
+
+/**
+ * write_qp_ctxts - write the qp ctxt to HW
+ * @func_to_io: func to io channel that holds the IO components
+ * @base_qpn: first qp number
+ * @num_qps: number of qps to write
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int write_qp_ctxts(struct hinic_func_to_io *func_to_io, u16 base_qpn,
+			  u16 num_qps)
+{
+	return (write_sq_ctxts(func_to_io, base_qpn, num_qps) ||
+		write_rq_ctxts(func_to_io, base_qpn, num_qps));
+}
+
+/**
+ * init_qp - Initialize a Queue Pair
+ * @func_to_io: func to io channel that holds the IO components
+ * @qp: pointer to the qp to initialize
+ * @q_id: the id of the qp
+ * @sq_msix_entry: msix entry for sq
+ * @rq_msix_entry: msix entry for rq
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int init_qp(struct hinic_func_to_io *func_to_io,
+		   struct hinic_qp *qp, int q_id,
+		   struct msix_entry *sq_msix_entry,
+		   struct msix_entry *rq_msix_entry)
+{
+	struct hinic_hwif *hwif = func_to_io->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	void __iomem *db_base;
+	int err;
+
+	qp->q_id = q_id;
+
+	err = hinic_wq_allocate(&func_to_io->wqs, &func_to_io->sq_wq[q_id],
+				HINIC_SQ_WQEBB_SIZE, HINIC_SQ_PAGE_SIZE,
+				HINIC_SQ_DEPTH, HINIC_SQ_WQE_MAX_SIZE);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate WQ for SQ\n");
+		return err;
+	}
+
+	err = hinic_wq_allocate(&func_to_io->wqs, &func_to_io->rq_wq[q_id],
+				HINIC_RQ_WQEBB_SIZE, HINIC_RQ_PAGE_SIZE,
+				HINIC_RQ_DEPTH, HINIC_RQ_WQE_SIZE);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate WQ for RQ\n");
+		goto err_rq_alloc;
+	}
+
+	db_base = get_db_area(func_to_io);
+	if (IS_ERR(db_base)) {
+		dev_err(&pdev->dev, "Failed to get DB area for SQ\n");
+		err = PTR_ERR(db_base);
+		goto err_get_db;
+	}
+
+	func_to_io->sq_db[q_id] = db_base;
+
+	err = hinic_init_sq(&qp->sq, hwif, &func_to_io->sq_wq[q_id],
+			    sq_msix_entry,
+			    CI_ADDR(func_to_io->ci_addr_base, q_id),
+			    CI_ADDR(func_to_io->ci_dma_base, q_id), db_base);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init SQ\n");
+		goto err_sq_init;
+	}
+
+	err = hinic_init_rq(&qp->rq, hwif, &func_to_io->rq_wq[q_id],
+			    rq_msix_entry);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init RQ\n");
+		goto err_rq_init;
+	}
+
+	return 0;
+
+err_rq_init:
+	hinic_clean_sq(&qp->sq);
+
+err_sq_init:
+	return_db_area(func_to_io, db_base);
+
+err_get_db:
+	hinic_wq_free(&func_to_io->wqs, &func_to_io->rq_wq[q_id]);
+
+err_rq_alloc:
+	hinic_wq_free(&func_to_io->wqs, &func_to_io->sq_wq[q_id]);
+	return err;
+}
+
+/**
+ * destroy_qp - Clean the resources of a Queue Pair
+ * @func_to_io: func to io channel that holds the IO components
+ * @qp: pointer to the qp to clean
+ **/
+static void destroy_qp(struct hinic_func_to_io *func_to_io,
+		       struct hinic_qp *qp)
+{
+	int q_id = qp->q_id;
+
+	hinic_clean_rq(&qp->rq);
+	hinic_clean_sq(&qp->sq);
+
+	return_db_area(func_to_io, func_to_io->sq_db[q_id]);
+
+	hinic_wq_free(&func_to_io->wqs, &func_to_io->rq_wq[q_id]);
+	hinic_wq_free(&func_to_io->wqs, &func_to_io->sq_wq[q_id]);
+}
+
+/**
+ * hinic_io_create_qps - Create Queue Pairs
+ * @func_to_io: func to io channel that holds the IO components
+ * @base_qpn: base qp number
+ * @num_qps: number queue pairs to create
+ * @sq_msix_entry: msix entries for sq
+ * @rq_msix_entry: msix entries for rq
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_io_create_qps(struct hinic_func_to_io *func_to_io,
+			u16 base_qpn, int num_qps,
+			struct msix_entry *sq_msix_entries,
+			struct msix_entry *rq_msix_entries)
+{
+	struct hinic_hwif *hwif = func_to_io->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	size_t qps_size, wq_size, db_size;
+	void *ci_addr_base;
+	int i, j, err;
+
+	qps_size = num_qps * sizeof(*func_to_io->qps);
+	func_to_io->qps = devm_kzalloc(&pdev->dev, qps_size, GFP_KERNEL);
+	if (!func_to_io->qps)
+		return -ENOMEM;
+
+	wq_size = num_qps * sizeof(*func_to_io->sq_wq);
+	func_to_io->sq_wq = devm_kzalloc(&pdev->dev, wq_size, GFP_KERNEL);
+	if (!func_to_io->sq_wq) {
+		err = -ENOMEM;
+		goto err_sq_wq;
+	}
+
+	wq_size = num_qps * sizeof(*func_to_io->rq_wq);
+	func_to_io->rq_wq = devm_kzalloc(&pdev->dev, wq_size, GFP_KERNEL);
+	if (!func_to_io->rq_wq) {
+		err = -ENOMEM;
+		goto err_rq_wq;
+	}
+
+	db_size = num_qps * sizeof(*func_to_io->sq_db);
+	func_to_io->sq_db = devm_kzalloc(&pdev->dev, db_size, GFP_KERNEL);
+	if (!func_to_io->sq_db) {
+		err = -ENOMEM;
+		goto err_sq_db;
+	}
+
+	ci_addr_base = dma_zalloc_coherent(&pdev->dev, CI_TABLE_SIZE(num_qps),
+					   &func_to_io->ci_dma_base,
+					   GFP_KERNEL);
+	if (!ci_addr_base) {
+		dev_err(&pdev->dev, "Failed to allocate CI area\n");
+		err = -ENOMEM;
+		goto err_ci_base;
+	}
+
+	func_to_io->ci_addr_base = ci_addr_base;
+
+	for (i = 0; i < num_qps; i++) {
+		err = init_qp(func_to_io, &func_to_io->qps[i], i,
+			      &sq_msix_entries[i], &rq_msix_entries[i]);
+		if (err) {
+			dev_err(&pdev->dev, "Failed to create QP %d\n", i);
+			goto err_init_qp;
+		}
+	}
+
+	err = write_qp_ctxts(func_to_io, base_qpn, num_qps);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init QP ctxts\n");
+		goto err_write_qp_ctxts;
+	}
+
+	return 0;
+
+err_write_qp_ctxts:
+err_init_qp:
+	for (j = 0; j < i; j++)
+		destroy_qp(func_to_io, &func_to_io->qps[j]);
+
+	dma_free_coherent(&pdev->dev, CI_TABLE_SIZE(num_qps),
+			  func_to_io->ci_addr_base, func_to_io->ci_dma_base);
+
+err_ci_base:
+	devm_kfree(&pdev->dev, func_to_io->sq_db);
+
+err_sq_db:
+	devm_kfree(&pdev->dev, func_to_io->rq_wq);
+
+err_rq_wq:
+	devm_kfree(&pdev->dev, func_to_io->sq_wq);
+
+err_sq_wq:
+	devm_kfree(&pdev->dev, func_to_io->qps);
+	return err;
+}
+
+/**
+ * hinic_io_destroy_qps - Destroy the IO Queue Pairs
+ * @func_to_io: func to io channel that holds the IO components
+ * @num_qps: number queue pairs to destroy
+ **/
+void hinic_io_destroy_qps(struct hinic_func_to_io *func_to_io, int num_qps)
+{
+	struct hinic_hwif *hwif = func_to_io->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	size_t ci_table_size;
+	int i;
+
+	ci_table_size = CI_TABLE_SIZE(num_qps);
+
+	for (i = 0; i < num_qps; i++)
+		destroy_qp(func_to_io, &func_to_io->qps[i]);
+
+	dma_free_coherent(&pdev->dev, ci_table_size, func_to_io->ci_addr_base,
+			  func_to_io->ci_dma_base);
+
+	devm_kfree(&pdev->dev, func_to_io->sq_db);
+
+	devm_kfree(&pdev->dev, func_to_io->rq_wq);
+	devm_kfree(&pdev->dev, func_to_io->sq_wq);
+
+	devm_kfree(&pdev->dev, func_to_io->qps);
+}
+
+/**
+ * hinic_io_init - Initialize the IO components
+ * @func_to_io: func to io channel that holds the IO components
+ * @hwif: HW interface for accessing IO
+ * @max_qps: maximum QPs in HW
+ * @num_ceqs: number completion event queues
+ * @ceq_msix_entries: msix entries for ceqs
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_io_init(struct hinic_func_to_io *func_to_io,
+		  struct hinic_hwif *hwif, u16 max_qps, int num_ceqs,
+		  struct msix_entry *ceq_msix_entries)
+{
+	struct pci_dev *pdev = hwif->pdev;
+	enum hinic_cmdq_type cmdq, type;
+	void __iomem *db_area;
+	int err;
+
+	func_to_io->hwif = hwif;
+	func_to_io->qps = NULL;
+	func_to_io->max_qps = max_qps;
+
+	err = hinic_ceqs_init(&func_to_io->ceqs, hwif, num_ceqs,
+			      HINIC_DEFAULT_CEQ_LEN, HINIC_EQ_PAGE_SIZE,
+			      ceq_msix_entries);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init CEQs\n");
+		return err;
+	}
+
+	err = hinic_wqs_alloc(&func_to_io->wqs, 2 * max_qps, hwif);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate WQS for IO\n");
+		goto err_wqs_alloc;
+	}
+
+	func_to_io->db_base = pci_ioremap_bar(pdev, HINIC_PCI_DB_BAR);
+	if (!func_to_io->db_base) {
+		dev_err(&pdev->dev, "Failed to remap IO DB area\n");
+		err = -ENOMEM;
+		goto err_db_ioremap;
+	}
+
+	init_db_area_idx(&func_to_io->free_db_area);
+
+	for (cmdq = HINIC_CMDQ_SYNC; cmdq < HINIC_MAX_CMDQ_TYPES; cmdq++) {
+		db_area = get_db_area(func_to_io);
+		if (IS_ERR(db_area)) {
+			dev_err(&pdev->dev, "Failed to get cmdq db area\n");
+			err = PTR_ERR(db_area);
+			goto err_db_area;
+		}
+
+		func_to_io->cmdq_db_area[cmdq] = db_area;
+	}
+
+	err = hinic_init_cmdqs(&func_to_io->cmdqs, hwif,
+			       func_to_io->cmdq_db_area);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to initialize cmdqs\n");
+		goto err_init_cmdqs;
+	}
+
+	return 0;
+
+err_init_cmdqs:
+err_db_area:
+	for (type = HINIC_CMDQ_SYNC; type < cmdq; type++)
+		return_db_area(func_to_io, func_to_io->cmdq_db_area[type]);
+
+	iounmap(func_to_io->db_base);
+
+err_db_ioremap:
+	hinic_wqs_free(&func_to_io->wqs);
+
+err_wqs_alloc:
+	hinic_ceqs_free(&func_to_io->ceqs);
+	return err;
+}
+
+/**
+ * hinic_io_free - Free the IO components
+ * @func_to_io: func to io channel that holds the IO components
+ **/
+void hinic_io_free(struct hinic_func_to_io *func_to_io)
+{
+	enum hinic_cmdq_type cmdq;
+
+	hinic_free_cmdqs(&func_to_io->cmdqs);
+
+	for (cmdq = HINIC_CMDQ_SYNC; cmdq < HINIC_MAX_CMDQ_TYPES; cmdq++)
+		return_db_area(func_to_io, func_to_io->cmdq_db_area[cmdq]);
+
+	iounmap(func_to_io->db_base);
+	hinic_wqs_free(&func_to_io->wqs);
+	hinic_ceqs_free(&func_to_io->ceqs);
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
new file mode 100644
index 0000000..adb6417
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
@@ -0,0 +1,97 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_HW_IO_H
+#define HINIC_HW_IO_H
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/semaphore.h>
+#include <linux/sizes.h>
+
+#include "hinic_hw_if.h"
+#include "hinic_hw_eqs.h"
+#include "hinic_hw_wq.h"
+#include "hinic_hw_cmdq.h"
+#include "hinic_hw_qp.h"
+
+#define HINIC_DB_PAGE_SIZE      SZ_4K
+#define HINIC_DB_SIZE           SZ_4M
+
+#define HINIC_DB_MAX_AREAS      (HINIC_DB_SIZE / HINIC_DB_PAGE_SIZE)
+
+enum hinic_db_type {
+	HINIC_DB_CMDQ_TYPE,
+	HINIC_DB_SQ_TYPE,
+};
+
+enum hinic_io_path {
+	HINIC_CTRL_PATH,
+	HINIC_DATA_PATH,
+};
+
+struct hinic_free_db_area {
+	int             db_idx[HINIC_DB_MAX_AREAS];
+
+	int             alloc_pos;
+	int             return_pos;
+
+	int             num_free;
+
+	/* Lock for getting db area */
+	struct semaphore        idx_lock;
+};
+
+struct hinic_func_to_io {
+	struct hinic_hwif       *hwif;
+
+	struct hinic_ceqs       ceqs;
+
+	struct hinic_wqs        wqs;
+
+	struct hinic_wq         *sq_wq;
+	struct hinic_wq         *rq_wq;
+
+	struct hinic_qp         *qps;
+	u16                     max_qps;
+
+	void __iomem            **sq_db;
+	void __iomem            *db_base;
+
+	void                    *ci_addr_base;
+	dma_addr_t              ci_dma_base;
+
+	struct hinic_free_db_area       free_db_area;
+
+	void __iomem                    *cmdq_db_area[HINIC_MAX_CMDQ_TYPES];
+
+	struct hinic_cmdqs              cmdqs;
+};
+
+int hinic_io_create_qps(struct hinic_func_to_io *func_to_io,
+			u16 base_qpn, int num_qps,
+			struct msix_entry *sq_msix_entries,
+			struct msix_entry *rq_msix_entries);
+
+void hinic_io_destroy_qps(struct hinic_func_to_io *func_to_io,
+			  int num_qps);
+
+int hinic_io_init(struct hinic_func_to_io *func_to_io,
+		  struct hinic_hwif *hwif, u16 max_qps, int num_ceqs,
+		  struct msix_entry *ceq_msix_entries);
+
+void hinic_io_free(struct hinic_func_to_io *func_to_io);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
new file mode 100644
index 0000000..278dc13
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
@@ -0,0 +1,597 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/semaphore.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <asm/barrier.h>
+
+#include "hinic_hw_if.h"
+#include "hinic_hw_eqs.h"
+#include "hinic_hw_api_cmd.h"
+#include "hinic_hw_mgmt.h"
+#include "hinic_hw_dev.h"
+
+#define SYNC_MSG_ID_MASK                0x1FF
+
+#define SYNC_MSG_ID(pf_to_mgmt)         ((pf_to_mgmt)->sync_msg_id)
+
+#define SYNC_MSG_ID_INC(pf_to_mgmt)     (SYNC_MSG_ID(pf_to_mgmt) = \
+					((SYNC_MSG_ID(pf_to_mgmt) + 1) & \
+					 SYNC_MSG_ID_MASK))
+
+#define MSG_SZ_IS_VALID(in_size)        ((in_size) <= MAX_MSG_LEN)
+
+#define MGMT_MSG_LEN_MIN                20
+#define MGMT_MSG_LEN_STEP               16
+#define MGMT_MSG_RSVD_FOR_DEV           8
+
+#define SEGMENT_LEN                     48
+
+#define MAX_PF_MGMT_BUF_SIZE            2048
+
+/* Data should be SEG LEN size aligned */
+#define MAX_MSG_LEN                     2016
+
+#define MSG_NOT_RESP                    0xFFFF
+
+#define MGMT_MSG_TIMEOUT                1000
+
+#define mgmt_to_pfhwdev(pf_mgmt)        \
+		container_of(pf_mgmt, struct hinic_pfhwdev, pf_to_mgmt)
+
+enum msg_segment_type {
+	NOT_LAST_SEGMENT = 0,
+	LAST_SEGMENT     = 1,
+};
+
+enum mgmt_direction_type {
+	MGMT_DIRECT_SEND = 0,
+	MGMT_RESP        = 1,
+};
+
+enum msg_ack_type {
+	MSG_ACK         = 0,
+	MSG_NO_ACK      = 1,
+};
+
+/**
+ * hinic_register_mgmt_msg_cb - register msg handler for a msg from a module
+ * @pf_to_mgmt: PF to MGMT channel
+ * @mod: module in the chip that this handler will handle its messages
+ * @handle: private data for the callback
+ * @callback: the handler that will handle messages
+ **/
+void hinic_register_mgmt_msg_cb(struct hinic_pf_to_mgmt *pf_to_mgmt,
+				enum hinic_mod_type mod,
+				void *handle,
+				void (*callback)(void *handle,
+						 u8 cmd, void *buf_in,
+						 u16 in_size, void *buf_out,
+						 u16 *out_size))
+{
+	struct hinic_mgmt_cb *mgmt_cb = &pf_to_mgmt->mgmt_cb[mod];
+
+	mgmt_cb->cb = callback;
+	mgmt_cb->handle = handle;
+	mgmt_cb->state = HINIC_MGMT_CB_ENABLED;
+}
+
+/**
+ * hinic_unregister_mgmt_msg_cb - unregister msg handler for a msg from a module
+ * @pf_to_mgmt: PF to MGMT channel
+ * @mod: module in the chip that this handler handles its messages
+ **/
+void hinic_unregister_mgmt_msg_cb(struct hinic_pf_to_mgmt *pf_to_mgmt,
+				  enum hinic_mod_type mod)
+{
+	struct hinic_mgmt_cb *mgmt_cb = &pf_to_mgmt->mgmt_cb[mod];
+
+	mgmt_cb->state &= ~HINIC_MGMT_CB_ENABLED;
+
+	while (mgmt_cb->state & HINIC_MGMT_CB_RUNNING)
+		schedule();
+
+	mgmt_cb->cb = NULL;
+}
+
+/**
+ * prepare_header - prepare the header of the message
+ * @pf_to_mgmt: PF to MGMT channel
+ * @msg_len: the length of the message
+ * @mod: module in the chip that will get the message
+ * @ack_type: ask for response
+ * @direction: the direction of the message
+ * @cmd: command of the message
+ * @msg_id: message id
+ *
+ * Return the prepared header value
+ **/
+static u64 prepare_header(struct hinic_pf_to_mgmt *pf_to_mgmt,
+			  u16 msg_len, enum hinic_mod_type mod,
+			  enum msg_ack_type ack_type,
+			  enum mgmt_direction_type direction,
+			  u16 cmd, u16 msg_id)
+{
+	struct hinic_hwif *hwif = pf_to_mgmt->hwif;
+
+	return HINIC_MSG_HEADER_SET(msg_len, MSG_LEN)           |
+	       HINIC_MSG_HEADER_SET(mod, MODULE)                |
+	       HINIC_MSG_HEADER_SET(SEGMENT_LEN, SEG_LEN)       |
+	       HINIC_MSG_HEADER_SET(ack_type, NO_ACK)           |
+	       HINIC_MSG_HEADER_SET(0, ASYNC_MGMT_TO_PF)        |
+	       HINIC_MSG_HEADER_SET(0, SEQID)                   |
+	       HINIC_MSG_HEADER_SET(LAST_SEGMENT, LAST)         |
+	       HINIC_MSG_HEADER_SET(direction, DIRECTION)       |
+	       HINIC_MSG_HEADER_SET(cmd, CMD)                   |
+	       HINIC_MSG_HEADER_SET(HINIC_HWIF_PCI_INTF(hwif), PCI_INTF) |
+	       HINIC_MSG_HEADER_SET(HINIC_HWIF_PF_IDX(hwif), PF_IDX)     |
+	       HINIC_MSG_HEADER_SET(msg_id, MSG_ID);
+}
+
+/**
+ * prepare_mgmt_cmd - prepare the mgmt command
+ * @mgmt_cmd: pointer to the command to prepare
+ * @header: pointer of the header for the message
+ * @msg: the data of the message
+ * @msg_len: the length of the message
+ **/
+static void prepare_mgmt_cmd(u8 *mgmt_cmd, u64 *header, u8 *msg, u16 msg_len)
+{
+	memset(mgmt_cmd, 0, MGMT_MSG_RSVD_FOR_DEV);
+
+	mgmt_cmd += MGMT_MSG_RSVD_FOR_DEV;
+	memcpy(mgmt_cmd, header, sizeof(*header));
+
+	mgmt_cmd += sizeof(*header);
+	memcpy(mgmt_cmd, msg, msg_len);
+}
+
+/**
+ * mgmt_msg_len - calculate the total message length
+ * @msg_data_len: the length of the message data
+ *
+ * Return the total message length
+ **/
+static u16 mgmt_msg_len(u16 msg_data_len)
+{
+	/* RSVD + HEADER_SIZE + DATA_LEN */
+	u16 msg_len = MGMT_MSG_RSVD_FOR_DEV + sizeof(u64) + msg_data_len;
+
+	if (msg_len > MGMT_MSG_LEN_MIN)
+		msg_len = MGMT_MSG_LEN_MIN +
+			   ALIGN((msg_len - MGMT_MSG_LEN_MIN),
+				 MGMT_MSG_LEN_STEP);
+	else
+		msg_len = MGMT_MSG_LEN_MIN;
+
+	return msg_len;
+}
+
+/**
+ * send_msg_to_mgmt - send message to mgmt by API CMD
+ * @pf_to_mgmt: PF to MGMT channel
+ * @mod: module in the chip that will get the message
+ * @cmd: command of the message
+ * @data: the msg data
+ * @data_len: the msg data length
+ * @ack_type: ask for response
+ * @direction: the direction of the original message
+ * @resp_msg_id: msg id to response for
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int send_msg_to_mgmt(struct hinic_pf_to_mgmt *pf_to_mgmt,
+			    enum hinic_mod_type mod, u8 cmd,
+			    u8 *data, u16 data_len,
+			    enum msg_ack_type ack_type,
+			    enum mgmt_direction_type direction,
+			    u16 resp_msg_id)
+{
+	struct hinic_api_cmd_chain *chain;
+	u64 header;
+	u16 msg_id;
+
+	msg_id = SYNC_MSG_ID(pf_to_mgmt);
+
+	if (direction == MGMT_RESP) {
+		header = prepare_header(pf_to_mgmt, data_len, mod, ack_type,
+					direction, cmd, resp_msg_id);
+	} else {
+		SYNC_MSG_ID_INC(pf_to_mgmt);
+		header = prepare_header(pf_to_mgmt, data_len, mod, ack_type,
+					direction, cmd, msg_id);
+	}
+
+	prepare_mgmt_cmd(pf_to_mgmt->sync_msg_buf, &header, data, data_len);
+
+	chain = pf_to_mgmt->cmd_chain[HINIC_API_CMD_WRITE_TO_MGMT_CPU];
+	return hinic_api_cmd_write(chain, HINIC_NODE_ID_MGMT,
+				   pf_to_mgmt->sync_msg_buf,
+				   mgmt_msg_len(data_len));
+}
+
+/**
+ * msg_to_mgmt_sync - send sync message to mgmt
+ * @pf_to_mgmt: PF to MGMT channel
+ * @mod: module in the chip that will get the message
+ * @cmd: command of the message
+ * @buf_in: the msg data
+ * @in_size: the msg data length
+ * @buf_out: response
+ * @out_size: response length
+ * @direction: the direction of the original message
+ * @resp_msg_id: msg id to response for
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int msg_to_mgmt_sync(struct hinic_pf_to_mgmt *pf_to_mgmt,
+			    enum hinic_mod_type mod, u8 cmd,
+			    u8 *buf_in, u16 in_size,
+			    u8 *buf_out, u16 *out_size,
+			    enum mgmt_direction_type direction,
+			    u16 resp_msg_id)
+{
+	struct hinic_hwif *hwif = pf_to_mgmt->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_recv_msg *recv_msg;
+	struct completion *recv_done;
+	u16 msg_id;
+	int err;
+
+	/* Lock the sync_msg_buf */
+	down(&pf_to_mgmt->sync_msg_lock);
+
+	recv_msg = &pf_to_mgmt->recv_resp_msg_from_mgmt;
+	recv_done = &recv_msg->recv_done;
+
+	if (resp_msg_id == MSG_NOT_RESP)
+		msg_id = SYNC_MSG_ID(pf_to_mgmt);
+	else
+		msg_id = resp_msg_id;
+
+	init_completion(recv_done);
+
+	err = send_msg_to_mgmt(pf_to_mgmt, mod, cmd, buf_in, in_size,
+			       MSG_ACK, direction, resp_msg_id);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to send sync msg to mgmt\n");
+		goto unlock_sync_msg;
+	}
+
+	if (!wait_for_completion_timeout(recv_done, MGMT_MSG_TIMEOUT)) {
+		dev_err(&pdev->dev, "MGMT timeout, MSG id = %d\n", msg_id);
+		err = -ETIMEDOUT;
+		goto unlock_sync_msg;
+	}
+
+	smp_rmb();      /* verify reading after completion */
+
+	if (recv_msg->msg_id != msg_id) {
+		dev_err(&pdev->dev, "incorrect MSG for id = %d\n", msg_id);
+		err = -EFAULT;
+		goto unlock_sync_msg;
+	}
+
+	if ((buf_out) && (recv_msg->msg_len <= MAX_PF_MGMT_BUF_SIZE)) {
+		memcpy(buf_out, recv_msg->msg, recv_msg->msg_len);
+		*out_size = recv_msg->msg_len;
+	}
+
+unlock_sync_msg:
+	up(&pf_to_mgmt->sync_msg_lock);
+	return err;
+}
+
+/**
+ * msg_to_mgmt_async - send message to mgmt without response
+ * @pf_to_mgmt: PF to MGMT channel
+ * @mod: module in the chip that will get the message
+ * @cmd: command of the message
+ * @buf_in: the msg data
+ * @in_size: the msg data length
+ * @direction: the direction of the original message
+ * @resp_msg_id: msg id to response for
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int msg_to_mgmt_async(struct hinic_pf_to_mgmt *pf_to_mgmt,
+			     enum hinic_mod_type mod, u8 cmd,
+			     u8 *buf_in, u16 in_size,
+			     enum mgmt_direction_type direction,
+			     u16 resp_msg_id)
+{
+	int err;
+
+	/* Lock the sync_msg_buf */
+	down(&pf_to_mgmt->sync_msg_lock);
+
+	err = send_msg_to_mgmt(pf_to_mgmt, mod, cmd, buf_in, in_size,
+			       MSG_NO_ACK, direction, resp_msg_id);
+
+	up(&pf_to_mgmt->sync_msg_lock);
+	return err;
+}
+
+/**
+ * hinic_msg_to_mgmt - send message to mgmt
+ * @pf_to_mgmt: PF to MGMT channel
+ * @mod: module in the chip that will get the message
+ * @cmd: command of the message
+ * @buf_in: the msg data
+ * @in_size: the msg data length
+ * @buf_out: response
+ * @out_size: returned response length
+ * @sync: sync msg or async msg
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_msg_to_mgmt(struct hinic_pf_to_mgmt *pf_to_mgmt,
+		      enum hinic_mod_type mod, u8 cmd,
+		      void *buf_in, u16 in_size, void *buf_out, u16 *out_size,
+		      enum hinic_mgmt_msg_type sync)
+{
+	struct hinic_hwif *hwif = pf_to_mgmt->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+
+	if (sync != HINIC_MGMT_MSG_SYNC) {
+		dev_err(&pdev->dev, "Invalid MGMT msg type\n");
+		return -EINVAL;
+	}
+
+	if (!MSG_SZ_IS_VALID(in_size)) {
+		dev_err(&pdev->dev, "Invalid MGMT msg buffer size\n");
+		return -EINVAL;
+	}
+
+	return msg_to_mgmt_sync(pf_to_mgmt, mod, cmd, buf_in, in_size,
+				buf_out, out_size, MGMT_DIRECT_SEND,
+				MSG_NOT_RESP);
+}
+
+/**
+ * mgmt_recv_msg_handler - handler for message from mgmt cpu
+ * @pf_to_mgmt: PF to MGMT channel
+ * @recv_msg: received message details
+ **/
+static void mgmt_recv_msg_handler(struct hinic_pf_to_mgmt *pf_to_mgmt,
+				  struct hinic_recv_msg *recv_msg)
+{
+	struct hinic_hwif *hwif = pf_to_mgmt->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u8 *buf_out = recv_msg->buf_out;
+	struct hinic_mgmt_cb *mgmt_cb;
+	unsigned long cb_state;
+	u16 out_size = 0;
+
+	if (recv_msg->mod >= HINIC_MOD_MAX) {
+		dev_err(&pdev->dev, "Unknown MGMT MSG module = %d\n",
+			recv_msg->mod);
+		return;
+	}
+
+	mgmt_cb = &pf_to_mgmt->mgmt_cb[recv_msg->mod];
+
+	cb_state = cmpxchg(&mgmt_cb->state,
+			   HINIC_MGMT_CB_ENABLED,
+			   HINIC_MGMT_CB_ENABLED | HINIC_MGMT_CB_RUNNING);
+
+	if ((cb_state == HINIC_MGMT_CB_ENABLED) && (mgmt_cb->cb))
+		mgmt_cb->cb(mgmt_cb->handle, recv_msg->cmd,
+			    recv_msg->msg, recv_msg->msg_len,
+			    buf_out, &out_size);
+	else
+		dev_err(&pdev->dev, "No MGMT msg handler, mod = %d\n",
+			recv_msg->mod);
+
+	mgmt_cb->state &= ~HINIC_MGMT_CB_RUNNING;
+
+	if (!recv_msg->async_mgmt_to_pf)
+		/* MGMT sent sync msg, send the response */
+		msg_to_mgmt_async(pf_to_mgmt, recv_msg->mod, recv_msg->cmd,
+				  buf_out, out_size, MGMT_RESP,
+				  recv_msg->msg_id);
+}
+
+/**
+ * mgmt_resp_msg_handler - handler for a response message from mgmt cpu
+ * @pf_to_mgmt: PF to MGMT channel
+ * @recv_msg: received message details
+ **/
+static void mgmt_resp_msg_handler(struct hinic_pf_to_mgmt *pf_to_mgmt,
+				  struct hinic_recv_msg *recv_msg)
+{
+	wmb();  /* verify writing all, before reading */
+
+	complete(&recv_msg->recv_done);
+}
+
+/**
+ * recv_mgmt_msg_handler - handler for a message from mgmt cpu
+ * @pf_to_mgmt: PF to MGMT channel
+ * @header: the header of the message
+ * @recv_msg: received message details
+ **/
+static void recv_mgmt_msg_handler(struct hinic_pf_to_mgmt *pf_to_mgmt,
+				  u64 *header, struct hinic_recv_msg *recv_msg)
+{
+	struct hinic_hwif *hwif = pf_to_mgmt->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	int seq_id, seg_len;
+	u8 *msg_body;
+
+	seq_id = HINIC_MSG_HEADER_GET(*header, SEQID);
+	seg_len = HINIC_MSG_HEADER_GET(*header, SEG_LEN);
+
+	if (seq_id >= (MAX_MSG_LEN / SEGMENT_LEN)) {
+		dev_err(&pdev->dev, "recv big mgmt msg\n");
+		return;
+	}
+
+	msg_body = (u8 *)header + sizeof(*header);
+	memcpy(recv_msg->msg + seq_id * SEGMENT_LEN, msg_body, seg_len);
+
+	if (!HINIC_MSG_HEADER_GET(*header, LAST))
+		return;
+
+	recv_msg->cmd = HINIC_MSG_HEADER_GET(*header, CMD);
+	recv_msg->mod = HINIC_MSG_HEADER_GET(*header, MODULE);
+	recv_msg->async_mgmt_to_pf = HINIC_MSG_HEADER_GET(*header,
+							  ASYNC_MGMT_TO_PF);
+	recv_msg->msg_len = HINIC_MSG_HEADER_GET(*header, MSG_LEN);
+	recv_msg->msg_id = HINIC_MSG_HEADER_GET(*header, MSG_ID);
+
+	if (HINIC_MSG_HEADER_GET(*header, DIRECTION) == MGMT_RESP)
+		mgmt_resp_msg_handler(pf_to_mgmt, recv_msg);
+	else
+		mgmt_recv_msg_handler(pf_to_mgmt, recv_msg);
+}
+
+/**
+ * mgmt_msg_aeqe_handler - handler for a mgmt message event
+ * @handle: PF to MGMT channel
+ * @data: the header of the message
+ * @size: unused
+ **/
+static void mgmt_msg_aeqe_handler(void *handle, void *data, u8 size)
+{
+	struct hinic_pf_to_mgmt *pf_to_mgmt = handle;
+	struct hinic_recv_msg *recv_msg;
+	u64 *header = (u64 *)data;
+
+	recv_msg = HINIC_MSG_HEADER_GET(*header, DIRECTION) ==
+		   MGMT_DIRECT_SEND ?
+		   &pf_to_mgmt->recv_msg_from_mgmt :
+		   &pf_to_mgmt->recv_resp_msg_from_mgmt;
+
+	recv_mgmt_msg_handler(pf_to_mgmt, header, recv_msg);
+}
+
+/**
+ * alloc_recv_msg - allocate receive message memory
+ * @pf_to_mgmt: PF to MGMT channel
+ * @recv_msg: pointer that will hold the allocated data
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int alloc_recv_msg(struct hinic_pf_to_mgmt *pf_to_mgmt,
+			  struct hinic_recv_msg *recv_msg)
+{
+	struct hinic_hwif *hwif = pf_to_mgmt->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+
+	recv_msg->msg = devm_kzalloc(&pdev->dev, MAX_PF_MGMT_BUF_SIZE,
+				     GFP_KERNEL);
+	if (!recv_msg->msg)
+		return -ENOMEM;
+
+	recv_msg->buf_out = devm_kzalloc(&pdev->dev, MAX_PF_MGMT_BUF_SIZE,
+					 GFP_KERNEL);
+	if (!recv_msg->buf_out)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * alloc_msg_buf - allocate all the message buffers of PF to MGMT channel
+ * @pf_to_mgmt: PF to MGMT channel
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int alloc_msg_buf(struct hinic_pf_to_mgmt *pf_to_mgmt)
+{
+	struct hinic_hwif *hwif = pf_to_mgmt->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	int err;
+
+	err = alloc_recv_msg(pf_to_mgmt,
+			     &pf_to_mgmt->recv_msg_from_mgmt);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate recv msg\n");
+		return err;
+	}
+
+	err = alloc_recv_msg(pf_to_mgmt,
+			     &pf_to_mgmt->recv_resp_msg_from_mgmt);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate resp recv msg\n");
+		return err;
+	}
+
+	pf_to_mgmt->sync_msg_buf = devm_kzalloc(&pdev->dev,
+						MAX_PF_MGMT_BUF_SIZE,
+						GFP_KERNEL);
+	if (!pf_to_mgmt->sync_msg_buf)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * hinic_pf_to_mgmt_init - initialize PF to MGMT channel
+ * @pf_to_mgmt: PF to MGMT channel
+ * @hwif: HW interface the PF to MGMT will use for accessing HW
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_pf_to_mgmt_init(struct hinic_pf_to_mgmt *pf_to_mgmt,
+			  struct hinic_hwif *hwif)
+{
+	struct hinic_pfhwdev *pfhwdev = mgmt_to_pfhwdev(pf_to_mgmt);
+	struct hinic_hwdev *hwdev = &pfhwdev->hwdev;
+	struct pci_dev *pdev = hwif->pdev;
+	int err;
+
+	pf_to_mgmt->hwif = hwif;
+
+	sema_init(&pf_to_mgmt->sync_msg_lock, 1);
+	pf_to_mgmt->sync_msg_id = 0;
+
+	err = alloc_msg_buf(pf_to_mgmt);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate msg buffers\n");
+		return err;
+	}
+
+	err = hinic_api_cmd_init(pf_to_mgmt->cmd_chain, hwif);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to initialize cmd chains\n");
+		return err;
+	}
+
+	hinic_aeq_register_hw_cb(&hwdev->aeqs, HINIC_MSG_FROM_MGMT_CPU,
+				 pf_to_mgmt,
+				 mgmt_msg_aeqe_handler);
+	return 0;
+}
+
+/**
+ * hinic_pf_to_mgmt_free - free PF to MGMT channel
+ * @pf_to_mgmt: PF to MGMT channel
+ **/
+void hinic_pf_to_mgmt_free(struct hinic_pf_to_mgmt *pf_to_mgmt)
+{
+	struct hinic_pfhwdev *pfhwdev = mgmt_to_pfhwdev(pf_to_mgmt);
+	struct hinic_hwdev *hwdev = &pfhwdev->hwdev;
+
+	hinic_aeq_unregister_hw_cb(&hwdev->aeqs, HINIC_MSG_FROM_MGMT_CPU);
+	hinic_api_cmd_free(pf_to_mgmt->cmd_chain);
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h
new file mode 100644
index 0000000..320711e
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h
@@ -0,0 +1,153 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_HW_MGMT_H
+#define HINIC_HW_MGMT_H
+
+#include <linux/types.h>
+#include <linux/semaphore.h>
+#include <linux/completion.h>
+#include <linux/bitops.h>
+
+#include "hinic_hw_if.h"
+#include "hinic_hw_api_cmd.h"
+
+#define HINIC_MSG_HEADER_MSG_LEN_SHIFT                          0
+#define HINIC_MSG_HEADER_MODULE_SHIFT                           11
+#define HINIC_MSG_HEADER_SEG_LEN_SHIFT                          16
+#define HINIC_MSG_HEADER_NO_ACK_SHIFT                           22
+#define HINIC_MSG_HEADER_ASYNC_MGMT_TO_PF_SHIFT                 23
+#define HINIC_MSG_HEADER_SEQID_SHIFT                            24
+#define HINIC_MSG_HEADER_LAST_SHIFT                             30
+#define HINIC_MSG_HEADER_DIRECTION_SHIFT                        31
+#define HINIC_MSG_HEADER_CMD_SHIFT                              32
+#define HINIC_MSG_HEADER_ZEROS_SHIFT                            40
+#define HINIC_MSG_HEADER_PCI_INTF_SHIFT                         48
+#define HINIC_MSG_HEADER_PF_IDX_SHIFT                           50
+#define HINIC_MSG_HEADER_MSG_ID_SHIFT                           54
+
+#define HINIC_MSG_HEADER_MSG_LEN_MASK                           0x7FF
+#define HINIC_MSG_HEADER_MODULE_MASK                            0x1F
+#define HINIC_MSG_HEADER_SEG_LEN_MASK                           0x3F
+#define HINIC_MSG_HEADER_NO_ACK_MASK                            0x1
+#define HINIC_MSG_HEADER_ASYNC_MGMT_TO_PF_MASK                  0x1
+#define HINIC_MSG_HEADER_SEQID_MASK                             0x3F
+#define HINIC_MSG_HEADER_LAST_MASK                              0x1
+#define HINIC_MSG_HEADER_DIRECTION_MASK                         0x1
+#define HINIC_MSG_HEADER_CMD_MASK                               0xFF
+#define HINIC_MSG_HEADER_ZEROS_MASK                             0xFF
+#define HINIC_MSG_HEADER_PCI_INTF_MASK                          0x3
+#define HINIC_MSG_HEADER_PF_IDX_MASK                            0xF
+#define HINIC_MSG_HEADER_MSG_ID_MASK                            0x3FF
+
+#define HINIC_MSG_HEADER_SET(val, member)                       \
+		((u64)((val) & HINIC_MSG_HEADER_##member##_MASK) << \
+		 HINIC_MSG_HEADER_##member##_SHIFT)
+
+#define HINIC_MSG_HEADER_GET(val, member)                       \
+		(((val) >> HINIC_MSG_HEADER_##member##_SHIFT) & \
+		 HINIC_MSG_HEADER_##member##_MASK)
+
+enum hinic_mgmt_msg_type {
+	HINIC_MGMT_MSG_SYNC = 1,
+};
+
+enum hinic_cfg_cmd {
+	HINIC_CFG_NIC_CAP = 0,
+};
+
+enum hinic_comm_cmd {
+	HINIC_COMM_CMD_IO_STATUS_GET    = 0x3,
+
+	HINIC_COMM_CMD_CMDQ_CTXT_SET    = 0x10,
+	HINIC_COMM_CMD_CMDQ_CTXT_GET    = 0x11,
+
+	HINIC_COMM_CMD_HWCTXT_SET       = 0x12,
+	HINIC_COMM_CMD_HWCTXT_GET       = 0x13,
+
+	HINIC_COMM_CMD_SQ_HI_CI_SET     = 0x14,
+
+	HINIC_COMM_CMD_RES_STATE_SET    = 0x24,
+
+	HINIC_COMM_CMD_IO_RES_CLEAR     = 0x29,
+
+	HINIC_COMM_CMD_MAX              = 0x32,
+};
+
+enum hinic_mgmt_cb_state {
+	HINIC_MGMT_CB_ENABLED = BIT(0),
+	HINIC_MGMT_CB_RUNNING = BIT(1),
+};
+
+struct hinic_recv_msg {
+	u8                      *msg;
+	u8                      *buf_out;
+
+	struct completion       recv_done;
+
+	u16                     cmd;
+	enum hinic_mod_type     mod;
+	int                     async_mgmt_to_pf;
+
+	u16                     msg_len;
+	u16                     msg_id;
+};
+
+struct hinic_mgmt_cb {
+	void    (*cb)(void *handle, u8 cmd,
+		      void *buf_in, u16 in_size,
+		      void *buf_out, u16 *out_size);
+
+	void            *handle;
+	unsigned long   state;
+};
+
+struct hinic_pf_to_mgmt {
+	struct hinic_hwif               *hwif;
+
+	struct semaphore                sync_msg_lock;
+	u16                             sync_msg_id;
+	u8                              *sync_msg_buf;
+
+	struct hinic_recv_msg           recv_resp_msg_from_mgmt;
+	struct hinic_recv_msg           recv_msg_from_mgmt;
+
+	struct hinic_api_cmd_chain      *cmd_chain[HINIC_API_CMD_MAX];
+
+	struct hinic_mgmt_cb            mgmt_cb[HINIC_MOD_MAX];
+};
+
+void hinic_register_mgmt_msg_cb(struct hinic_pf_to_mgmt *pf_to_mgmt,
+				enum hinic_mod_type mod,
+				void *handle,
+				void (*callback)(void *handle,
+						 u8 cmd, void *buf_in,
+						 u16 in_size, void *buf_out,
+						 u16 *out_size));
+
+void hinic_unregister_mgmt_msg_cb(struct hinic_pf_to_mgmt *pf_to_mgmt,
+				  enum hinic_mod_type mod);
+
+int hinic_msg_to_mgmt(struct hinic_pf_to_mgmt *pf_to_mgmt,
+		      enum hinic_mod_type mod, u8 cmd,
+		      void *buf_in, u16 in_size, void *buf_out, u16 *out_size,
+		      enum hinic_mgmt_msg_type sync);
+
+int hinic_pf_to_mgmt_init(struct hinic_pf_to_mgmt *pf_to_mgmt,
+			  struct hinic_hwif *hwif);
+
+void hinic_pf_to_mgmt_free(struct hinic_pf_to_mgmt *pf_to_mgmt);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
new file mode 100644
index 0000000..b9db6d6
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
@@ -0,0 +1,887 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+#include <linux/errno.h>
+#include <linux/sizes.h>
+#include <linux/atomic.h>
+#include <linux/skbuff.h>
+#include <linux/io.h>
+#include <asm/barrier.h>
+#include <asm/byteorder.h>
+
+#include "hinic_common.h"
+#include "hinic_hw_if.h"
+#include "hinic_hw_wqe.h"
+#include "hinic_hw_wq.h"
+#include "hinic_hw_qp_ctxt.h"
+#include "hinic_hw_qp.h"
+#include "hinic_hw_io.h"
+
+#define SQ_DB_OFF               SZ_2K
+
+/* The number of cache line to prefetch Until threshold state */
+#define WQ_PREFETCH_MAX         2
+/* The number of cache line to prefetch After threshold state */
+#define WQ_PREFETCH_MIN         1
+/* Threshold state */
+#define WQ_PREFETCH_THRESHOLD   256
+
+/* sizes of the SQ/RQ ctxt */
+#define Q_CTXT_SIZE             48
+#define CTXT_RSVD               240
+
+#define SQ_CTXT_OFFSET(max_sqs, max_rqs, q_id)  \
+		(((max_rqs) + (max_sqs)) * CTXT_RSVD + (q_id) * Q_CTXT_SIZE)
+
+#define RQ_CTXT_OFFSET(max_sqs, max_rqs, q_id)  \
+		(((max_rqs) + (max_sqs)) * CTXT_RSVD + \
+		 (max_sqs + (q_id)) * Q_CTXT_SIZE)
+
+#define SIZE_16BYTES(size)              (ALIGN(size, 16) >> 4)
+#define SIZE_8BYTES(size)               (ALIGN(size, 8) >> 3)
+#define SECT_SIZE_FROM_8BYTES(size)     ((size) << 3)
+
+#define SQ_DB_PI_HI_SHIFT       8
+#define SQ_DB_PI_HI(prod_idx)   ((prod_idx) >> SQ_DB_PI_HI_SHIFT)
+
+#define SQ_DB_PI_LOW_MASK       0xFF
+#define SQ_DB_PI_LOW(prod_idx)  ((prod_idx) & SQ_DB_PI_LOW_MASK)
+
+#define SQ_DB_ADDR(sq, pi)      ((u64 *)((sq)->db_base) + SQ_DB_PI_LOW(pi))
+
+#define SQ_MASKED_IDX(sq, idx)  ((idx) & (sq)->wq->mask)
+#define RQ_MASKED_IDX(rq, idx)  ((idx) & (rq)->wq->mask)
+
+#define TX_MAX_MSS_DEFAULT      0x3E00
+
+enum sq_wqe_type {
+	SQ_NORMAL_WQE = 0,
+};
+
+enum rq_completion_fmt {
+	RQ_COMPLETE_SGE = 1
+};
+
+void hinic_qp_prepare_header(struct hinic_qp_ctxt_header *qp_ctxt_hdr,
+			     enum hinic_qp_ctxt_type ctxt_type,
+			     u16 num_queues, u16 max_queues)
+{
+	u16 max_sqs = max_queues;
+	u16 max_rqs = max_queues;
+
+	qp_ctxt_hdr->num_queues = num_queues;
+	qp_ctxt_hdr->queue_type = ctxt_type;
+
+	if (ctxt_type == HINIC_QP_CTXT_TYPE_SQ)
+		qp_ctxt_hdr->addr_offset = SQ_CTXT_OFFSET(max_sqs, max_rqs, 0);
+	else
+		qp_ctxt_hdr->addr_offset = RQ_CTXT_OFFSET(max_sqs, max_rqs, 0);
+
+	qp_ctxt_hdr->addr_offset = SIZE_16BYTES(qp_ctxt_hdr->addr_offset);
+
+	hinic_cpu_to_be32(qp_ctxt_hdr, sizeof(*qp_ctxt_hdr));
+}
+
+void hinic_sq_prepare_ctxt(struct hinic_sq_ctxt *sq_ctxt,
+			   struct hinic_sq *sq, u16 global_qid)
+{
+	u32 wq_page_pfn_hi, wq_page_pfn_lo, wq_block_pfn_hi, wq_block_pfn_lo;
+	u64 wq_page_addr, wq_page_pfn, wq_block_pfn;
+	u16 pi_start, ci_start;
+	struct hinic_wq *wq;
+
+	wq = sq->wq;
+	ci_start = atomic_read(&wq->cons_idx);
+	pi_start = atomic_read(&wq->prod_idx);
+
+	/* Read the first page paddr from the WQ page paddr ptrs */
+	wq_page_addr = be64_to_cpu(*wq->block_vaddr);
+
+	wq_page_pfn = HINIC_WQ_PAGE_PFN(wq_page_addr);
+	wq_page_pfn_hi = upper_32_bits(wq_page_pfn);
+	wq_page_pfn_lo = lower_32_bits(wq_page_pfn);
+
+	wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
+	wq_block_pfn_hi = upper_32_bits(wq_block_pfn);
+	wq_block_pfn_lo = lower_32_bits(wq_block_pfn);
+
+	sq_ctxt->ceq_attr = HINIC_SQ_CTXT_CEQ_ATTR_SET(global_qid,
+						       GLOBAL_SQ_ID) |
+			    HINIC_SQ_CTXT_CEQ_ATTR_SET(0, EN);
+
+	sq_ctxt->ci_wrapped = HINIC_SQ_CTXT_CI_SET(ci_start, IDX) |
+			      HINIC_SQ_CTXT_CI_SET(1, WRAPPED);
+
+	sq_ctxt->wq_hi_pfn_pi =
+			HINIC_SQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi, HI_PFN) |
+			HINIC_SQ_CTXT_WQ_PAGE_SET(pi_start, PI);
+
+	sq_ctxt->wq_lo_pfn = wq_page_pfn_lo;
+
+	sq_ctxt->pref_cache =
+		HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_MIN, CACHE_MIN) |
+		HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_MAX, CACHE_MAX) |
+		HINIC_SQ_CTXT_PREF_SET(WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD);
+
+	sq_ctxt->pref_wrapped = 1;
+
+	sq_ctxt->pref_wq_hi_pfn_ci =
+		HINIC_SQ_CTXT_PREF_SET(ci_start, CI) |
+		HINIC_SQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_HI_PFN);
+
+	sq_ctxt->pref_wq_lo_pfn = wq_page_pfn_lo;
+
+	sq_ctxt->wq_block_hi_pfn =
+		HINIC_SQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, HI_PFN);
+
+	sq_ctxt->wq_block_lo_pfn = wq_block_pfn_lo;
+
+	hinic_cpu_to_be32(sq_ctxt, sizeof(*sq_ctxt));
+}
+
+void hinic_rq_prepare_ctxt(struct hinic_rq_ctxt *rq_ctxt,
+			   struct hinic_rq *rq, u16 global_qid)
+{
+	u32 wq_page_pfn_hi, wq_page_pfn_lo, wq_block_pfn_hi, wq_block_pfn_lo;
+	u64 wq_page_addr, wq_page_pfn, wq_block_pfn;
+	u16 pi_start, ci_start;
+	struct hinic_wq *wq;
+
+	wq = rq->wq;
+	ci_start = atomic_read(&wq->cons_idx);
+	pi_start = atomic_read(&wq->prod_idx);
+
+	/* Read the first page paddr from the WQ page paddr ptrs */
+	wq_page_addr = be64_to_cpu(*wq->block_vaddr);
+
+	wq_page_pfn = HINIC_WQ_PAGE_PFN(wq_page_addr);
+	wq_page_pfn_hi = upper_32_bits(wq_page_pfn);
+	wq_page_pfn_lo = lower_32_bits(wq_page_pfn);
+
+	wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
+	wq_block_pfn_hi = upper_32_bits(wq_block_pfn);
+	wq_block_pfn_lo = lower_32_bits(wq_block_pfn);
+
+	rq_ctxt->ceq_attr = HINIC_RQ_CTXT_CEQ_ATTR_SET(0, EN) |
+			    HINIC_RQ_CTXT_CEQ_ATTR_SET(1, WRAPPED);
+
+	rq_ctxt->pi_intr_attr = HINIC_RQ_CTXT_PI_SET(pi_start, IDX) |
+				HINIC_RQ_CTXT_PI_SET(rq->msix_entry, INTR);
+
+	rq_ctxt->wq_hi_pfn_ci = HINIC_RQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi,
+							  HI_PFN) |
+				HINIC_RQ_CTXT_WQ_PAGE_SET(ci_start, CI);
+
+	rq_ctxt->wq_lo_pfn = wq_page_pfn_lo;
+
+	rq_ctxt->pref_cache =
+		HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_MIN, CACHE_MIN) |
+		HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_MAX, CACHE_MAX) |
+		HINIC_RQ_CTXT_PREF_SET(WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD);
+
+	rq_ctxt->pref_wrapped = 1;
+
+	rq_ctxt->pref_wq_hi_pfn_ci =
+		HINIC_RQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_HI_PFN) |
+		HINIC_RQ_CTXT_PREF_SET(ci_start, CI);
+
+	rq_ctxt->pref_wq_lo_pfn = wq_page_pfn_lo;
+
+	rq_ctxt->pi_paddr_hi = upper_32_bits(rq->pi_dma_addr);
+	rq_ctxt->pi_paddr_lo = lower_32_bits(rq->pi_dma_addr);
+
+	rq_ctxt->wq_block_hi_pfn =
+		HINIC_RQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, HI_PFN);
+
+	rq_ctxt->wq_block_lo_pfn = wq_block_pfn_lo;
+
+	hinic_cpu_to_be32(rq_ctxt, sizeof(*rq_ctxt));
+}
+
+/**
+ * alloc_sq_skb_arr - allocate sq array for saved skb
+ * @sq: HW Send Queue
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int alloc_sq_skb_arr(struct hinic_sq *sq)
+{
+	struct hinic_wq *wq = sq->wq;
+	size_t skb_arr_size;
+
+	skb_arr_size = wq->q_depth * sizeof(*sq->saved_skb);
+	sq->saved_skb = vzalloc(skb_arr_size);
+	if (!sq->saved_skb)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * free_sq_skb_arr - free sq array for saved skb
+ * @sq: HW Send Queue
+ **/
+static void free_sq_skb_arr(struct hinic_sq *sq)
+{
+	vfree(sq->saved_skb);
+}
+
+/**
+ * alloc_rq_skb_arr - allocate rq array for saved skb
+ * @rq: HW Receive Queue
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int alloc_rq_skb_arr(struct hinic_rq *rq)
+{
+	struct hinic_wq *wq = rq->wq;
+	size_t skb_arr_size;
+
+	skb_arr_size = wq->q_depth * sizeof(*rq->saved_skb);
+	rq->saved_skb = vzalloc(skb_arr_size);
+	if (!rq->saved_skb)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * free_rq_skb_arr - free rq array for saved skb
+ * @rq: HW Receive Queue
+ **/
+static void free_rq_skb_arr(struct hinic_rq *rq)
+{
+	vfree(rq->saved_skb);
+}
+
+/**
+ * hinic_init_sq - Initialize HW Send Queue
+ * @sq: HW Send Queue
+ * @hwif: HW Interface for accessing HW
+ * @wq: Work Queue for the data of the SQ
+ * @entry: msix entry for sq
+ * @ci_addr: address for reading the current HW consumer index
+ * @ci_dma_addr: dma address for reading the current HW consumer index
+ * @db_base: doorbell base address
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_init_sq(struct hinic_sq *sq, struct hinic_hwif *hwif,
+		  struct hinic_wq *wq, struct msix_entry *entry,
+		  void *ci_addr, dma_addr_t ci_dma_addr,
+		  void __iomem *db_base)
+{
+	sq->hwif = hwif;
+
+	sq->wq = wq;
+
+	sq->irq = entry->vector;
+	sq->msix_entry = entry->entry;
+
+	sq->hw_ci_addr = ci_addr;
+	sq->hw_ci_dma_addr = ci_dma_addr;
+
+	sq->db_base = db_base + SQ_DB_OFF;
+
+	return alloc_sq_skb_arr(sq);
+}
+
+/**
+ * hinic_clean_sq - Clean HW Send Queue's Resources
+ * @sq: Send Queue
+ **/
+void hinic_clean_sq(struct hinic_sq *sq)
+{
+	free_sq_skb_arr(sq);
+}
+
+/**
+ * alloc_rq_cqe - allocate rq completion queue elements
+ * @rq: HW Receive Queue
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int alloc_rq_cqe(struct hinic_rq *rq)
+{
+	struct hinic_hwif *hwif = rq->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	size_t cqe_dma_size, cqe_size;
+	struct hinic_wq *wq = rq->wq;
+	int j, i;
+
+	cqe_size = wq->q_depth * sizeof(*rq->cqe);
+	rq->cqe = vzalloc(cqe_size);
+	if (!rq->cqe)
+		return -ENOMEM;
+
+	cqe_dma_size = wq->q_depth * sizeof(*rq->cqe_dma);
+	rq->cqe_dma = vzalloc(cqe_dma_size);
+	if (!rq->cqe_dma)
+		goto err_cqe_dma_arr_alloc;
+
+	for (i = 0; i < wq->q_depth; i++) {
+		rq->cqe[i] = dma_zalloc_coherent(&pdev->dev,
+						 sizeof(*rq->cqe[i]),
+						 &rq->cqe_dma[i], GFP_KERNEL);
+		if (!rq->cqe[i])
+			goto err_cqe_alloc;
+	}
+
+	return 0;
+
+err_cqe_alloc:
+	for (j = 0; j < i; j++)
+		dma_free_coherent(&pdev->dev, sizeof(*rq->cqe[j]), rq->cqe[j],
+				  rq->cqe_dma[j]);
+
+	vfree(rq->cqe_dma);
+
+err_cqe_dma_arr_alloc:
+	vfree(rq->cqe);
+	return -ENOMEM;
+}
+
+/**
+ * free_rq_cqe - free rq completion queue elements
+ * @rq: HW Receive Queue
+ **/
+static void free_rq_cqe(struct hinic_rq *rq)
+{
+	struct hinic_hwif *hwif = rq->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_wq *wq = rq->wq;
+	int i;
+
+	for (i = 0; i < wq->q_depth; i++)
+		dma_free_coherent(&pdev->dev, sizeof(*rq->cqe[i]), rq->cqe[i],
+				  rq->cqe_dma[i]);
+
+	vfree(rq->cqe_dma);
+	vfree(rq->cqe);
+}
+
+/**
+ * hinic_init_rq - Initialize HW Receive Queue
+ * @rq: HW Receive Queue
+ * @hwif: HW Interface for accessing HW
+ * @wq: Work Queue for the data of the RQ
+ * @entry: msix entry for rq
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_init_rq(struct hinic_rq *rq, struct hinic_hwif *hwif,
+		  struct hinic_wq *wq, struct msix_entry *entry)
+{
+	struct pci_dev *pdev = hwif->pdev;
+	size_t pi_size;
+	int err;
+
+	rq->hwif = hwif;
+
+	rq->wq = wq;
+
+	rq->irq = entry->vector;
+	rq->msix_entry = entry->entry;
+
+	rq->buf_sz = HINIC_RX_BUF_SZ;
+
+	err = alloc_rq_skb_arr(rq);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate rq priv data\n");
+		return err;
+	}
+
+	err = alloc_rq_cqe(rq);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate rq cqe\n");
+		goto err_alloc_rq_cqe;
+	}
+
+	/* HW requirements: Must be at least 32 bit */
+	pi_size = ALIGN(sizeof(*rq->pi_virt_addr), sizeof(u32));
+	rq->pi_virt_addr = dma_zalloc_coherent(&pdev->dev, pi_size,
+					       &rq->pi_dma_addr, GFP_KERNEL);
+	if (!rq->pi_virt_addr) {
+		dev_err(&pdev->dev, "Failed to allocate PI address\n");
+		err = -ENOMEM;
+		goto err_pi_virt;
+	}
+
+	return 0;
+
+err_pi_virt:
+	free_rq_cqe(rq);
+
+err_alloc_rq_cqe:
+	free_rq_skb_arr(rq);
+	return err;
+}
+
+/**
+ * hinic_clean_rq - Clean HW Receive Queue's Resources
+ * @rq: HW Receive Queue
+ **/
+void hinic_clean_rq(struct hinic_rq *rq)
+{
+	struct hinic_hwif *hwif = rq->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	size_t pi_size;
+
+	pi_size = ALIGN(sizeof(*rq->pi_virt_addr), sizeof(u32));
+	dma_free_coherent(&pdev->dev, pi_size, rq->pi_virt_addr,
+			  rq->pi_dma_addr);
+
+	free_rq_cqe(rq);
+	free_rq_skb_arr(rq);
+}
+
+/**
+ * hinic_get_sq_free_wqebbs - return number of free wqebbs for use
+ * @sq: send queue
+ *
+ * Return number of free wqebbs
+ **/
+int hinic_get_sq_free_wqebbs(struct hinic_sq *sq)
+{
+	struct hinic_wq *wq = sq->wq;
+
+	return atomic_read(&wq->delta) - 1;
+}
+
+/**
+ * hinic_get_rq_free_wqebbs - return number of free wqebbs for use
+ * @rq: recv queue
+ *
+ * Return number of free wqebbs
+ **/
+int hinic_get_rq_free_wqebbs(struct hinic_rq *rq)
+{
+	struct hinic_wq *wq = rq->wq;
+
+	return atomic_read(&wq->delta) - 1;
+}
+
+static void sq_prepare_ctrl(struct hinic_sq_ctrl *ctrl, u16 prod_idx,
+			    int nr_descs)
+{
+	u32 ctrl_size, task_size, bufdesc_size;
+
+	ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl));
+	task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task));
+	bufdesc_size = nr_descs * sizeof(struct hinic_sq_bufdesc);
+	bufdesc_size = SIZE_8BYTES(bufdesc_size);
+
+	ctrl->ctrl_info = HINIC_SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) |
+			  HINIC_SQ_CTRL_SET(task_size, TASKSECT_LEN)        |
+			  HINIC_SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT)     |
+			  HINIC_SQ_CTRL_SET(ctrl_size, LEN);
+
+	ctrl->queue_info = HINIC_SQ_CTRL_SET(TX_MAX_MSS_DEFAULT,
+					     QUEUE_INFO_MSS);
+}
+
+static void sq_prepare_task(struct hinic_sq_task *task)
+{
+	task->pkt_info0 =
+		HINIC_SQ_TASK_INFO0_SET(0, L2HDR_LEN) |
+		HINIC_SQ_TASK_INFO0_SET(HINIC_L4_OFF_DISABLE, L4_OFFLOAD) |
+		HINIC_SQ_TASK_INFO0_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
+					INNER_L3TYPE) |
+		HINIC_SQ_TASK_INFO0_SET(HINIC_VLAN_OFF_DISABLE,
+					VLAN_OFFLOAD) |
+		HINIC_SQ_TASK_INFO0_SET(HINIC_PKT_NOT_PARSED, PARSE_FLAG);
+
+	task->pkt_info1 =
+		HINIC_SQ_TASK_INFO1_SET(HINIC_MEDIA_UNKNOWN, MEDIA_TYPE) |
+		HINIC_SQ_TASK_INFO1_SET(0, INNER_L4_LEN) |
+		HINIC_SQ_TASK_INFO1_SET(0, INNER_L3_LEN);
+
+	task->pkt_info2 =
+		HINIC_SQ_TASK_INFO2_SET(0, TUNNEL_L4_LEN) |
+		HINIC_SQ_TASK_INFO2_SET(0, OUTER_L3_LEN)  |
+		HINIC_SQ_TASK_INFO2_SET(HINIC_TUNNEL_L4TYPE_UNKNOWN,
+					TUNNEL_L4TYPE)    |
+		HINIC_SQ_TASK_INFO2_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
+					OUTER_L3TYPE);
+
+	task->ufo_v6_identify = 0;
+
+	task->pkt_info4 = HINIC_SQ_TASK_INFO4_SET(HINIC_L2TYPE_ETH, L2TYPE);
+
+	task->zero_pad = 0;
+}
+
+/**
+ * hinic_sq_prepare_wqe - prepare wqe before insert to the queue
+ * @sq: send queue
+ * @prod_idx: pi value
+ * @sq_wqe: wqe to prepare
+ * @sges: sges for use by the wqe for send for buf addresses
+ * @nr_sges: number of sges
+ **/
+void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx,
+			  struct hinic_sq_wqe *sq_wqe, struct hinic_sge *sges,
+			  int nr_sges)
+{
+	int i;
+
+	sq_prepare_ctrl(&sq_wqe->ctrl, prod_idx, nr_sges);
+
+	sq_prepare_task(&sq_wqe->task);
+
+	for (i = 0; i < nr_sges; i++)
+		sq_wqe->buf_descs[i].sge = sges[i];
+}
+
+/**
+ * sq_prepare_db - prepare doorbell to write
+ * @sq: send queue
+ * @prod_idx: pi value for the doorbell
+ * @cos: cos of the doorbell
+ *
+ * Return db value
+ **/
+static u32 sq_prepare_db(struct hinic_sq *sq, u16 prod_idx, unsigned int cos)
+{
+	struct hinic_qp *qp = container_of(sq, struct hinic_qp, sq);
+	u8 hi_prod_idx = SQ_DB_PI_HI(SQ_MASKED_IDX(sq, prod_idx));
+
+	/* Data should be written to HW in Big Endian Format */
+	return cpu_to_be32(HINIC_SQ_DB_INFO_SET(hi_prod_idx, PI_HI)     |
+			   HINIC_SQ_DB_INFO_SET(HINIC_DB_SQ_TYPE, TYPE) |
+			   HINIC_SQ_DB_INFO_SET(HINIC_DATA_PATH, PATH)  |
+			   HINIC_SQ_DB_INFO_SET(cos, COS)               |
+			   HINIC_SQ_DB_INFO_SET(qp->q_id, QID));
+}
+
+/**
+ * hinic_sq_write_db- write doorbell
+ * @sq: send queue
+ * @prod_idx: pi value for the doorbell
+ * @wqe_size: wqe size
+ * @cos: cos of the wqe
+ **/
+void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size,
+		       unsigned int cos)
+{
+	struct hinic_wq *wq = sq->wq;
+
+	/* increment prod_idx to the next */
+	prod_idx += ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+
+	wmb();  /* Write all before the doorbell */
+
+	writel(sq_prepare_db(sq, prod_idx, cos), SQ_DB_ADDR(sq, prod_idx));
+}
+
+/**
+ * hinic_sq_get_wqe - get wqe ptr in the current pi and update the pi
+ * @sq: sq to get wqe from
+ * @wqe_size: wqe size
+ * @prod_idx: returned pi
+ *
+ * Return wqe pointer
+ **/
+struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
+				      unsigned int wqe_size, u16 *prod_idx)
+{
+	struct hinic_hw_wqe *hw_wqe = hinic_get_wqe(sq->wq, wqe_size,
+						    prod_idx);
+
+	if (IS_ERR(hw_wqe))
+		return NULL;
+
+	return &hw_wqe->sq_wqe;
+}
+
+/**
+ * hinic_sq_write_wqe - write the wqe to the sq
+ * @sq: send queue
+ * @prod_idx: pi of the wqe
+ * @sq_wqe: the wqe to write
+ * @skb: skb to save
+ * @wqe_size: the size of the wqe
+ **/
+void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx,
+			struct hinic_sq_wqe *sq_wqe,
+			struct sk_buff *skb, unsigned int wqe_size)
+{
+	struct hinic_hw_wqe *hw_wqe = (struct hinic_hw_wqe *)sq_wqe;
+
+	sq->saved_skb[prod_idx] = skb;
+
+	/* The data in the HW should be in Big Endian Format */
+	hinic_cpu_to_be32(sq_wqe, wqe_size);
+
+	hinic_write_wqe(sq->wq, hw_wqe, wqe_size);
+}
+
+/**
+ * hinic_sq_read_wqe - read wqe ptr in the current ci and update the ci
+ * @sq: send queue
+ * @skb: return skb that was saved
+ * @wqe_size: the size of the wqe
+ * @cons_idx: consumer index of the wqe
+ *
+ * Return wqe in ci position
+ **/
+struct hinic_sq_wqe *hinic_sq_read_wqe(struct hinic_sq *sq,
+				       struct sk_buff **skb,
+				       unsigned int *wqe_size, u16 *cons_idx)
+{
+	struct hinic_hw_wqe *hw_wqe;
+	struct hinic_sq_wqe *sq_wqe;
+	struct hinic_sq_ctrl *ctrl;
+	unsigned int buf_sect_len;
+	u32 ctrl_info;
+
+	/* read the ctrl section for getting wqe size */
+	hw_wqe = hinic_read_wqe(sq->wq, sizeof(*ctrl), cons_idx);
+	if (IS_ERR(hw_wqe))
+		return NULL;
+
+	sq_wqe = &hw_wqe->sq_wqe;
+	ctrl = &sq_wqe->ctrl;
+	ctrl_info = be32_to_cpu(ctrl->ctrl_info);
+	buf_sect_len = HINIC_SQ_CTRL_GET(ctrl_info, BUFDESC_SECT_LEN);
+
+	*wqe_size = sizeof(*ctrl) + sizeof(sq_wqe->task);
+	*wqe_size += SECT_SIZE_FROM_8BYTES(buf_sect_len);
+
+	*skb = sq->saved_skb[*cons_idx];
+
+	/* using the real wqe size to read wqe again */
+	hw_wqe = hinic_read_wqe(sq->wq, *wqe_size, cons_idx);
+
+	return &hw_wqe->sq_wqe;
+}
+
+/**
+ * hinic_sq_put_wqe - release the ci for new wqes
+ * @sq: send queue
+ * @wqe_size: the size of the wqe
+ **/
+void hinic_sq_put_wqe(struct hinic_sq *sq, unsigned int wqe_size)
+{
+	hinic_put_wqe(sq->wq, wqe_size);
+}
+
+/**
+ * hinic_sq_get_sges - get sges from the wqe
+ * @sq_wqe: wqe to get the sges from its buffer addresses
+ * @sges: returned sges
+ * @nr_sges: number sges to return
+ **/
+void hinic_sq_get_sges(struct hinic_sq_wqe *sq_wqe, struct hinic_sge *sges,
+		       int nr_sges)
+{
+	int i;
+
+	for (i = 0; i < nr_sges && i < HINIC_MAX_SQ_BUFDESCS; i++) {
+		sges[i] = sq_wqe->buf_descs[i].sge;
+		hinic_be32_to_cpu(&sges[i], sizeof(sges[i]));
+	}
+}
+
+/**
+ * hinic_rq_get_wqe - get wqe ptr in the current pi and update the pi
+ * @rq: rq to get wqe from
+ * @wqe_size: wqe size
+ * @prod_idx: returned pi
+ *
+ * Return wqe pointer
+ **/
+struct hinic_rq_wqe *hinic_rq_get_wqe(struct hinic_rq *rq,
+				      unsigned int wqe_size, u16 *prod_idx)
+{
+	struct hinic_hw_wqe *hw_wqe = hinic_get_wqe(rq->wq, wqe_size,
+						    prod_idx);
+
+	if (IS_ERR(hw_wqe))
+		return NULL;
+
+	return &hw_wqe->rq_wqe;
+}
+
+/**
+ * hinic_rq_write_wqe - write the wqe to the rq
+ * @rq: recv queue
+ * @prod_idx: pi of the wqe
+ * @rq_wqe: the wqe to write
+ * @skb: skb to save
+ **/
+void hinic_rq_write_wqe(struct hinic_rq *rq, u16 prod_idx,
+			struct hinic_rq_wqe *rq_wqe, struct sk_buff *skb)
+{
+	struct hinic_hw_wqe *hw_wqe = (struct hinic_hw_wqe *)rq_wqe;
+
+	rq->saved_skb[prod_idx] = skb;
+
+	/* The data in the HW should be in Big Endian Format */
+	hinic_cpu_to_be32(rq_wqe, sizeof(*rq_wqe));
+
+	hinic_write_wqe(rq->wq, hw_wqe, sizeof(*rq_wqe));
+}
+
+/**
+ * hinic_rq_read_wqe - read wqe ptr in the current ci and update the ci
+ * @rq: recv queue
+ * @wqe_size: the size of the wqe
+ * @skb: return saved skb
+ * @cons_idx: consumer index of the wqe
+ *
+ * Return wqe in ci position
+ **/
+struct hinic_rq_wqe *hinic_rq_read_wqe(struct hinic_rq *rq,
+				       unsigned int wqe_size,
+				       struct sk_buff **skb, u16 *cons_idx)
+{
+	struct hinic_hw_wqe *hw_wqe;
+	struct hinic_rq_cqe *cqe;
+	int rx_done;
+	u32 status;
+
+	hw_wqe = hinic_read_wqe(rq->wq, wqe_size, cons_idx);
+	if (IS_ERR(hw_wqe))
+		return NULL;
+
+	cqe = rq->cqe[*cons_idx];
+
+	status = be32_to_cpu(cqe->status);
+
+	rx_done = HINIC_RQ_CQE_STATUS_GET(status, RXDONE);
+	if (!rx_done)
+		return NULL;
+
+	*skb = rq->saved_skb[*cons_idx];
+
+	return &hw_wqe->rq_wqe;
+}
+
+/**
+ * hinic_rq_read_next_wqe - increment ci and read the wqe in ci position
+ * @rq: recv queue
+ * @wqe_size: the size of the wqe
+ * @skb: return saved skb
+ * @cons_idx: consumer index in the wq
+ *
+ * Return wqe in incremented ci position
+ **/
+struct hinic_rq_wqe *hinic_rq_read_next_wqe(struct hinic_rq *rq,
+					    unsigned int wqe_size,
+					    struct sk_buff **skb,
+					    u16 *cons_idx)
+{
+	struct hinic_wq *wq = rq->wq;
+	struct hinic_hw_wqe *hw_wqe;
+	unsigned int num_wqebbs;
+
+	wqe_size = ALIGN(wqe_size, wq->wqebb_size);
+	num_wqebbs = wqe_size / wq->wqebb_size;
+
+	*cons_idx = RQ_MASKED_IDX(rq, *cons_idx + num_wqebbs);
+
+	*skb = rq->saved_skb[*cons_idx];
+
+	hw_wqe = hinic_read_wqe_direct(wq, *cons_idx);
+
+	return &hw_wqe->rq_wqe;
+}
+
+/**
+ * hinic_put_wqe - release the ci for new wqes
+ * @rq: recv queue
+ * @cons_idx: consumer index of the wqe
+ * @wqe_size: the size of the wqe
+ **/
+void hinic_rq_put_wqe(struct hinic_rq *rq, u16 cons_idx,
+		      unsigned int wqe_size)
+{
+	struct hinic_rq_cqe *cqe = rq->cqe[cons_idx];
+	u32 status = be32_to_cpu(cqe->status);
+
+	status = HINIC_RQ_CQE_STATUS_CLEAR(status, RXDONE);
+
+	/* Rx WQE size is 1 WQEBB, no wq shadow*/
+	cqe->status = cpu_to_be32(status);
+
+	wmb();          /* clear done flag */
+
+	hinic_put_wqe(rq->wq, wqe_size);
+}
+
+/**
+ * hinic_rq_get_sge - get sge from the wqe
+ * @rq: recv queue
+ * @rq_wqe: wqe to get the sge from its buf address
+ * @cons_idx: consumer index
+ * @sge: returned sge
+ **/
+void hinic_rq_get_sge(struct hinic_rq *rq, struct hinic_rq_wqe *rq_wqe,
+		      u16 cons_idx, struct hinic_sge *sge)
+{
+	struct hinic_rq_cqe *cqe = rq->cqe[cons_idx];
+	u32 len = be32_to_cpu(cqe->len);
+
+	sge->hi_addr = be32_to_cpu(rq_wqe->buf_desc.hi_addr);
+	sge->lo_addr = be32_to_cpu(rq_wqe->buf_desc.lo_addr);
+	sge->len = HINIC_RQ_CQE_SGE_GET(len, LEN);
+}
+
+/**
+ * hinic_rq_prepare_wqe - prepare wqe before insert to the queue
+ * @rq: recv queue
+ * @prod_idx: pi value
+ * @rq_wqe: the wqe
+ * @sge: sge for use by the wqe for recv buf address
+ **/
+void hinic_rq_prepare_wqe(struct hinic_rq *rq, u16 prod_idx,
+			  struct hinic_rq_wqe *rq_wqe, struct hinic_sge *sge)
+{
+	struct hinic_rq_cqe_sect *cqe_sect = &rq_wqe->cqe_sect;
+	struct hinic_rq_bufdesc *buf_desc = &rq_wqe->buf_desc;
+	struct hinic_rq_cqe *cqe = rq->cqe[prod_idx];
+	struct hinic_rq_ctrl *ctrl = &rq_wqe->ctrl;
+	dma_addr_t cqe_dma = rq->cqe_dma[prod_idx];
+
+	ctrl->ctrl_info =
+		HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*ctrl)), LEN) |
+		HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*cqe_sect)),
+				  COMPLETE_LEN)                    |
+		HINIC_RQ_CTRL_SET(SIZE_8BYTES(sizeof(*buf_desc)),
+				  BUFDESC_SECT_LEN)                |
+		HINIC_RQ_CTRL_SET(RQ_COMPLETE_SGE, COMPLETE_FORMAT);
+
+	hinic_set_sge(&cqe_sect->sge, cqe_dma, sizeof(*cqe));
+
+	buf_desc->hi_addr = sge->hi_addr;
+	buf_desc->lo_addr = sge->lo_addr;
+}
+
+/**
+ * hinic_rq_update - update pi of the rq
+ * @rq: recv queue
+ * @prod_idx: pi value
+ **/
+void hinic_rq_update(struct hinic_rq *rq, u16 prod_idx)
+{
+	*rq->pi_virt_addr = cpu_to_be16(RQ_MASKED_IDX(rq, prod_idx + 1));
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
new file mode 100644
index 0000000..df729a1
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
@@ -0,0 +1,201 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_HW_QP_H
+#define HINIC_HW_QP_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/pci.h>
+#include <linux/skbuff.h>
+
+#include "hinic_common.h"
+#include "hinic_hw_if.h"
+#include "hinic_hw_wqe.h"
+#include "hinic_hw_wq.h"
+#include "hinic_hw_qp_ctxt.h"
+
+#define HINIC_SQ_DB_INFO_PI_HI_SHIFT            0
+#define HINIC_SQ_DB_INFO_QID_SHIFT              8
+#define HINIC_SQ_DB_INFO_PATH_SHIFT             23
+#define HINIC_SQ_DB_INFO_COS_SHIFT              24
+#define HINIC_SQ_DB_INFO_TYPE_SHIFT             27
+
+#define HINIC_SQ_DB_INFO_PI_HI_MASK             0xFF
+#define HINIC_SQ_DB_INFO_QID_MASK               0x3FF
+#define HINIC_SQ_DB_INFO_PATH_MASK              0x1
+#define HINIC_SQ_DB_INFO_COS_MASK               0x7
+#define HINIC_SQ_DB_INFO_TYPE_MASK              0x1F
+
+#define HINIC_SQ_DB_INFO_SET(val, member)       \
+		(((u32)(val) & HINIC_SQ_DB_INFO_##member##_MASK) \
+		 << HINIC_SQ_DB_INFO_##member##_SHIFT)
+
+#define HINIC_SQ_WQEBB_SIZE                     64
+#define HINIC_RQ_WQEBB_SIZE                     32
+
+#define HINIC_SQ_PAGE_SIZE                      SZ_4K
+#define HINIC_RQ_PAGE_SIZE                      SZ_4K
+
+#define HINIC_SQ_DEPTH                          SZ_4K
+#define HINIC_RQ_DEPTH                          SZ_4K
+
+/* In any change to HINIC_RX_BUF_SZ, HINIC_RX_BUF_SZ_IDX must be changed */
+#define HINIC_RX_BUF_SZ                         2048
+#define HINIC_RX_BUF_SZ_IDX			HINIC_RX_BUF_SZ_2048_IDX
+
+#define HINIC_MIN_TX_WQE_SIZE(wq)               \
+		ALIGN(HINIC_SQ_WQE_SIZE(1), (wq)->wqebb_size)
+
+#define HINIC_MIN_TX_NUM_WQEBBS(sq)             \
+		(HINIC_MIN_TX_WQE_SIZE((sq)->wq) / (sq)->wq->wqebb_size)
+
+enum hinic_rx_buf_sz_idx {
+	HINIC_RX_BUF_SZ_32_IDX,
+	HINIC_RX_BUF_SZ_64_IDX,
+	HINIC_RX_BUF_SZ_96_IDX,
+	HINIC_RX_BUF_SZ_128_IDX,
+	HINIC_RX_BUF_SZ_192_IDX,
+	HINIC_RX_BUF_SZ_256_IDX,
+	HINIC_RX_BUF_SZ_384_IDX,
+	HINIC_RX_BUF_SZ_512_IDX,
+	HINIC_RX_BUF_SZ_768_IDX,
+	HINIC_RX_BUF_SZ_1024_IDX,
+	HINIC_RX_BUF_SZ_1536_IDX,
+	HINIC_RX_BUF_SZ_2048_IDX,
+	HINIC_RX_BUF_SZ_3072_IDX,
+	HINIC_RX_BUF_SZ_4096_IDX,
+	HINIC_RX_BUF_SZ_8192_IDX,
+	HINIC_RX_BUF_SZ_16384_IDX,
+};
+
+struct hinic_sq {
+	struct hinic_hwif       *hwif;
+
+	struct hinic_wq         *wq;
+
+	u32                     irq;
+	u16                     msix_entry;
+
+	void                    *hw_ci_addr;
+	dma_addr_t              hw_ci_dma_addr;
+
+	void __iomem            *db_base;
+
+	struct sk_buff          **saved_skb;
+};
+
+struct hinic_rq {
+	struct hinic_hwif       *hwif;
+
+	struct hinic_wq         *wq;
+
+	u32                     irq;
+	u16                     msix_entry;
+
+	size_t                  buf_sz;
+
+	struct sk_buff          **saved_skb;
+
+	struct hinic_rq_cqe     **cqe;
+	dma_addr_t              *cqe_dma;
+
+	u16                     *pi_virt_addr;
+	dma_addr_t              pi_dma_addr;
+};
+
+struct hinic_qp {
+	struct hinic_sq         sq;
+	struct hinic_rq         rq;
+
+	u16     q_id;
+};
+
+void hinic_qp_prepare_header(struct hinic_qp_ctxt_header *qp_ctxt_hdr,
+			     enum hinic_qp_ctxt_type ctxt_type,
+			     u16 num_queues, u16 max_queues);
+
+void hinic_sq_prepare_ctxt(struct hinic_sq_ctxt *sq_ctxt,
+			   struct hinic_sq *sq, u16 global_qid);
+
+void hinic_rq_prepare_ctxt(struct hinic_rq_ctxt *rq_ctxt,
+			   struct hinic_rq *rq, u16 global_qid);
+
+int hinic_init_sq(struct hinic_sq *sq, struct hinic_hwif *hwif,
+		  struct hinic_wq *wq, struct msix_entry *entry, void *ci_addr,
+		  dma_addr_t ci_dma_addr, void __iomem *db_base);
+
+void hinic_clean_sq(struct hinic_sq *sq);
+
+int hinic_init_rq(struct hinic_rq *rq, struct hinic_hwif *hwif,
+		  struct hinic_wq *wq, struct msix_entry *entry);
+
+void hinic_clean_rq(struct hinic_rq *rq);
+
+int hinic_get_sq_free_wqebbs(struct hinic_sq *sq);
+
+int hinic_get_rq_free_wqebbs(struct hinic_rq *rq);
+
+void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx,
+			  struct hinic_sq_wqe *wqe, struct hinic_sge *sges,
+			  int nr_sges);
+
+void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size,
+		       unsigned int cos);
+
+struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
+				      unsigned int wqe_size, u16 *prod_idx);
+
+void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx,
+			struct hinic_sq_wqe *wqe, struct sk_buff *skb,
+			unsigned int wqe_size);
+
+struct hinic_sq_wqe *hinic_sq_read_wqe(struct hinic_sq *sq,
+				       struct sk_buff **skb,
+				       unsigned int *wqe_size, u16 *cons_idx);
+
+void hinic_sq_put_wqe(struct hinic_sq *sq, unsigned int wqe_size);
+
+void hinic_sq_get_sges(struct hinic_sq_wqe *wqe, struct hinic_sge *sges,
+		       int nr_sges);
+
+struct hinic_rq_wqe *hinic_rq_get_wqe(struct hinic_rq *rq,
+				      unsigned int wqe_size, u16 *prod_idx);
+
+void hinic_rq_write_wqe(struct hinic_rq *rq, u16 prod_idx,
+			struct hinic_rq_wqe *wqe, struct sk_buff *skb);
+
+struct hinic_rq_wqe *hinic_rq_read_wqe(struct hinic_rq *rq,
+				       unsigned int wqe_size,
+				       struct sk_buff **skb, u16 *cons_idx);
+
+struct hinic_rq_wqe *hinic_rq_read_next_wqe(struct hinic_rq *rq,
+					    unsigned int wqe_size,
+					    struct sk_buff **skb,
+					    u16 *cons_idx);
+
+void hinic_rq_put_wqe(struct hinic_rq *rq, u16 cons_idx,
+		      unsigned int wqe_size);
+
+void hinic_rq_get_sge(struct hinic_rq *rq, struct hinic_rq_wqe *wqe,
+		      u16 cons_idx, struct hinic_sge *sge);
+
+void hinic_rq_prepare_wqe(struct hinic_rq *rq, u16 prod_idx,
+			  struct hinic_rq_wqe *wqe, struct hinic_sge *sge);
+
+void hinic_rq_update(struct hinic_rq *rq, u16 prod_idx);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h
new file mode 100644
index 0000000..376abf0
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h
@@ -0,0 +1,214 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_HW_QP_CTXT_H
+#define HINIC_HW_QP_CTXT_H
+
+#include <linux/types.h>
+
+#include "hinic_hw_cmdq.h"
+
+#define HINIC_SQ_CTXT_CEQ_ATTR_GLOBAL_SQ_ID_SHIFT       13
+#define HINIC_SQ_CTXT_CEQ_ATTR_EN_SHIFT                 23
+
+#define HINIC_SQ_CTXT_CEQ_ATTR_GLOBAL_SQ_ID_MASK        0x3FF
+#define HINIC_SQ_CTXT_CEQ_ATTR_EN_MASK                  0x1
+
+#define HINIC_SQ_CTXT_CEQ_ATTR_SET(val, member)         \
+	(((u32)(val) & HINIC_SQ_CTXT_CEQ_ATTR_##member##_MASK) \
+	 << HINIC_SQ_CTXT_CEQ_ATTR_##member##_SHIFT)
+
+#define HINIC_SQ_CTXT_CI_IDX_SHIFT                      11
+#define HINIC_SQ_CTXT_CI_WRAPPED_SHIFT                  23
+
+#define HINIC_SQ_CTXT_CI_IDX_MASK                       0xFFF
+#define HINIC_SQ_CTXT_CI_WRAPPED_MASK                   0x1
+
+#define HINIC_SQ_CTXT_CI_SET(val, member)               \
+	(((u32)(val) & HINIC_SQ_CTXT_CI_##member##_MASK) \
+	 << HINIC_SQ_CTXT_CI_##member##_SHIFT)
+
+#define HINIC_SQ_CTXT_WQ_PAGE_HI_PFN_SHIFT              0
+#define HINIC_SQ_CTXT_WQ_PAGE_PI_SHIFT                  20
+
+#define HINIC_SQ_CTXT_WQ_PAGE_HI_PFN_MASK               0xFFFFF
+#define HINIC_SQ_CTXT_WQ_PAGE_PI_MASK                   0xFFF
+
+#define HINIC_SQ_CTXT_WQ_PAGE_SET(val, member)          \
+	(((u32)(val) & HINIC_SQ_CTXT_WQ_PAGE_##member##_MASK) \
+	 << HINIC_SQ_CTXT_WQ_PAGE_##member##_SHIFT)
+
+#define HINIC_SQ_CTXT_PREF_CACHE_THRESHOLD_SHIFT        0
+#define HINIC_SQ_CTXT_PREF_CACHE_MAX_SHIFT              14
+#define HINIC_SQ_CTXT_PREF_CACHE_MIN_SHIFT              25
+
+#define HINIC_SQ_CTXT_PREF_CACHE_THRESHOLD_MASK         0x3FFF
+#define HINIC_SQ_CTXT_PREF_CACHE_MAX_MASK               0x7FF
+#define HINIC_SQ_CTXT_PREF_CACHE_MIN_MASK               0x7F
+
+#define HINIC_SQ_CTXT_PREF_WQ_HI_PFN_SHIFT              0
+#define HINIC_SQ_CTXT_PREF_CI_SHIFT                     20
+
+#define HINIC_SQ_CTXT_PREF_WQ_HI_PFN_MASK               0xFFFFF
+#define HINIC_SQ_CTXT_PREF_CI_MASK                      0xFFF
+
+#define HINIC_SQ_CTXT_PREF_SET(val, member)             \
+	(((u32)(val) & HINIC_SQ_CTXT_PREF_##member##_MASK) \
+	 << HINIC_SQ_CTXT_PREF_##member##_SHIFT)
+
+#define HINIC_SQ_CTXT_WQ_BLOCK_HI_PFN_SHIFT             0
+
+#define HINIC_SQ_CTXT_WQ_BLOCK_HI_PFN_MASK              0x7FFFFF
+
+#define HINIC_SQ_CTXT_WQ_BLOCK_SET(val, member)         \
+	(((u32)(val) & HINIC_SQ_CTXT_WQ_BLOCK_##member##_MASK) \
+	 << HINIC_SQ_CTXT_WQ_BLOCK_##member##_SHIFT)
+
+#define HINIC_RQ_CTXT_CEQ_ATTR_EN_SHIFT                 0
+#define HINIC_RQ_CTXT_CEQ_ATTR_WRAPPED_SHIFT            1
+
+#define HINIC_RQ_CTXT_CEQ_ATTR_EN_MASK                  0x1
+#define HINIC_RQ_CTXT_CEQ_ATTR_WRAPPED_MASK             0x1
+
+#define HINIC_RQ_CTXT_CEQ_ATTR_SET(val, member)         \
+	(((u32)(val) & HINIC_RQ_CTXT_CEQ_ATTR_##member##_MASK) \
+	 << HINIC_RQ_CTXT_CEQ_ATTR_##member##_SHIFT)
+
+#define HINIC_RQ_CTXT_PI_IDX_SHIFT                      0
+#define HINIC_RQ_CTXT_PI_INTR_SHIFT                     22
+
+#define HINIC_RQ_CTXT_PI_IDX_MASK                       0xFFF
+#define HINIC_RQ_CTXT_PI_INTR_MASK                      0x3FF
+
+#define HINIC_RQ_CTXT_PI_SET(val, member)               \
+	(((u32)(val) & HINIC_RQ_CTXT_PI_##member##_MASK) << \
+	 HINIC_RQ_CTXT_PI_##member##_SHIFT)
+
+#define HINIC_RQ_CTXT_WQ_PAGE_HI_PFN_SHIFT              0
+#define HINIC_RQ_CTXT_WQ_PAGE_CI_SHIFT                  20
+
+#define HINIC_RQ_CTXT_WQ_PAGE_HI_PFN_MASK               0xFFFFF
+#define HINIC_RQ_CTXT_WQ_PAGE_CI_MASK                   0xFFF
+
+#define HINIC_RQ_CTXT_WQ_PAGE_SET(val, member)          \
+	(((u32)(val) & HINIC_RQ_CTXT_WQ_PAGE_##member##_MASK) << \
+	 HINIC_RQ_CTXT_WQ_PAGE_##member##_SHIFT)
+
+#define HINIC_RQ_CTXT_PREF_CACHE_THRESHOLD_SHIFT        0
+#define HINIC_RQ_CTXT_PREF_CACHE_MAX_SHIFT              14
+#define HINIC_RQ_CTXT_PREF_CACHE_MIN_SHIFT              25
+
+#define HINIC_RQ_CTXT_PREF_CACHE_THRESHOLD_MASK         0x3FFF
+#define HINIC_RQ_CTXT_PREF_CACHE_MAX_MASK               0x7FF
+#define HINIC_RQ_CTXT_PREF_CACHE_MIN_MASK               0x7F
+
+#define HINIC_RQ_CTXT_PREF_WQ_HI_PFN_SHIFT              0
+#define HINIC_RQ_CTXT_PREF_CI_SHIFT                     20
+
+#define HINIC_RQ_CTXT_PREF_WQ_HI_PFN_MASK               0xFFFFF
+#define HINIC_RQ_CTXT_PREF_CI_MASK                      0xFFF
+
+#define HINIC_RQ_CTXT_PREF_SET(val, member)             \
+	(((u32)(val) & HINIC_RQ_CTXT_PREF_##member##_MASK) << \
+	 HINIC_RQ_CTXT_PREF_##member##_SHIFT)
+
+#define HINIC_RQ_CTXT_WQ_BLOCK_HI_PFN_SHIFT             0
+
+#define HINIC_RQ_CTXT_WQ_BLOCK_HI_PFN_MASK              0x7FFFFF
+
+#define HINIC_RQ_CTXT_WQ_BLOCK_SET(val, member)         \
+	(((u32)(val) & HINIC_RQ_CTXT_WQ_BLOCK_##member##_MASK) << \
+	 HINIC_RQ_CTXT_WQ_BLOCK_##member##_SHIFT)
+
+#define HINIC_SQ_CTXT_SIZE(num_sqs) (sizeof(struct hinic_qp_ctxt_header) \
+				     + (num_sqs) * sizeof(struct hinic_sq_ctxt))
+
+#define HINIC_RQ_CTXT_SIZE(num_rqs) (sizeof(struct hinic_qp_ctxt_header) \
+				     + (num_rqs) * sizeof(struct hinic_rq_ctxt))
+
+#define HINIC_WQ_PAGE_PFN_SHIFT         12
+#define HINIC_WQ_BLOCK_PFN_SHIFT        9
+
+#define HINIC_WQ_PAGE_PFN(page_addr)    ((page_addr) >> HINIC_WQ_PAGE_PFN_SHIFT)
+#define HINIC_WQ_BLOCK_PFN(page_addr)   ((page_addr) >> \
+					 HINIC_WQ_BLOCK_PFN_SHIFT)
+
+#define HINIC_Q_CTXT_MAX                \
+		((HINIC_CMDQ_BUF_SIZE - sizeof(struct hinic_qp_ctxt_header)) \
+		 / sizeof(struct hinic_sq_ctxt))
+
+enum hinic_qp_ctxt_type {
+	HINIC_QP_CTXT_TYPE_SQ,
+	HINIC_QP_CTXT_TYPE_RQ
+};
+
+struct hinic_qp_ctxt_header {
+	u16     num_queues;
+	u16     queue_type;
+	u32     addr_offset;
+};
+
+struct hinic_sq_ctxt {
+	u32     ceq_attr;
+
+	u32     ci_wrapped;
+
+	u32     wq_hi_pfn_pi;
+	u32     wq_lo_pfn;
+
+	u32     pref_cache;
+	u32     pref_wrapped;
+	u32     pref_wq_hi_pfn_ci;
+	u32     pref_wq_lo_pfn;
+
+	u32     rsvd0;
+	u32     rsvd1;
+
+	u32     wq_block_hi_pfn;
+	u32     wq_block_lo_pfn;
+};
+
+struct hinic_rq_ctxt {
+	u32     ceq_attr;
+
+	u32     pi_intr_attr;
+
+	u32     wq_hi_pfn_ci;
+	u32     wq_lo_pfn;
+
+	u32     pref_cache;
+	u32     pref_wrapped;
+
+	u32     pref_wq_hi_pfn_ci;
+	u32     pref_wq_lo_pfn;
+
+	u32     pi_paddr_hi;
+	u32     pi_paddr_lo;
+
+	u32     wq_block_hi_pfn;
+	u32     wq_block_lo_pfn;
+};
+
+struct hinic_sq_ctxt_block {
+	struct hinic_qp_ctxt_header hdr;
+	struct hinic_sq_ctxt sq_ctxt[HINIC_Q_CTXT_MAX];
+};
+
+struct hinic_rq_ctxt_block {
+	struct hinic_qp_ctxt_header hdr;
+	struct hinic_rq_ctxt rq_ctxt[HINIC_Q_CTXT_MAX];
+};
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
new file mode 100644
index 0000000..3e3181c08
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
@@ -0,0 +1,878 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/semaphore.h>
+#include <linux/errno.h>
+#include <linux/vmalloc.h>
+#include <linux/err.h>
+#include <asm/byteorder.h>
+
+#include "hinic_hw_if.h"
+#include "hinic_hw_wqe.h"
+#include "hinic_hw_wq.h"
+#include "hinic_hw_cmdq.h"
+
+#define WQS_BLOCKS_PER_PAGE             4
+
+#define WQ_BLOCK_SIZE                   4096
+#define WQS_PAGE_SIZE                   (WQS_BLOCKS_PER_PAGE * WQ_BLOCK_SIZE)
+
+#define WQS_MAX_NUM_BLOCKS              128
+#define WQS_FREE_BLOCKS_SIZE(wqs)       (WQS_MAX_NUM_BLOCKS * \
+					 sizeof((wqs)->free_blocks[0]))
+
+#define WQ_SIZE(wq)                     ((wq)->q_depth * (wq)->wqebb_size)
+
+#define WQ_PAGE_ADDR_SIZE               sizeof(u64)
+#define WQ_MAX_PAGES                    (WQ_BLOCK_SIZE / WQ_PAGE_ADDR_SIZE)
+
+#define CMDQ_BLOCK_SIZE                 512
+#define CMDQ_PAGE_SIZE                  4096
+
+#define CMDQ_WQ_MAX_PAGES               (CMDQ_BLOCK_SIZE / WQ_PAGE_ADDR_SIZE)
+
+#define WQ_BASE_VADDR(wqs, wq)          \
+			((void *)((wqs)->page_vaddr[(wq)->page_idx]) \
+				+ (wq)->block_idx * WQ_BLOCK_SIZE)
+
+#define WQ_BASE_PADDR(wqs, wq)          \
+			((wqs)->page_paddr[(wq)->page_idx] \
+				+ (wq)->block_idx * WQ_BLOCK_SIZE)
+
+#define WQ_BASE_ADDR(wqs, wq)           \
+			((void *)((wqs)->shadow_page_vaddr[(wq)->page_idx]) \
+				+ (wq)->block_idx * WQ_BLOCK_SIZE)
+
+#define CMDQ_BASE_VADDR(cmdq_pages, wq) \
+			((void *)((cmdq_pages)->page_vaddr) \
+				+ (wq)->block_idx * CMDQ_BLOCK_SIZE)
+
+#define CMDQ_BASE_PADDR(cmdq_pages, wq) \
+			((cmdq_pages)->page_paddr \
+				+ (wq)->block_idx * CMDQ_BLOCK_SIZE)
+
+#define CMDQ_BASE_ADDR(cmdq_pages, wq)  \
+			((void *)((cmdq_pages)->shadow_page_vaddr) \
+				+ (wq)->block_idx * CMDQ_BLOCK_SIZE)
+
+#define WQE_PAGE_OFF(wq, idx)   (((idx) & ((wq)->num_wqebbs_per_page - 1)) * \
+					(wq)->wqebb_size)
+
+#define WQE_PAGE_NUM(wq, idx)   (((idx) / ((wq)->num_wqebbs_per_page)) \
+					& ((wq)->num_q_pages - 1))
+
+#define WQ_PAGE_ADDR(wq, idx)           \
+			((wq)->shadow_block_vaddr[WQE_PAGE_NUM(wq, idx)])
+
+#define MASKED_WQE_IDX(wq, idx)         ((idx) & (wq)->mask)
+
+#define WQE_IN_RANGE(wqe, start, end)   \
+		(((unsigned long)(wqe) >= (unsigned long)(start)) && \
+		 ((unsigned long)(wqe) < (unsigned long)(end)))
+
+#define WQE_SHADOW_PAGE(wq, wqe)        \
+		(((unsigned long)(wqe) - (unsigned long)(wq)->shadow_wqe) \
+			/ (wq)->max_wqe_size)
+
+/**
+ * queue_alloc_page - allocate page for Queue
+ * @hwif: HW interface for allocating DMA
+ * @vaddr: virtual address will be returned in this address
+ * @paddr: physical address will be returned in this address
+ * @shadow_vaddr: VM area will be return here for holding WQ page addresses
+ * @page_sz: page size of each WQ page
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int queue_alloc_page(struct hinic_hwif *hwif, u64 **vaddr, u64 *paddr,
+			    void ***shadow_vaddr, size_t page_sz)
+{
+	struct pci_dev *pdev = hwif->pdev;
+	dma_addr_t dma_addr;
+
+	*vaddr = dma_zalloc_coherent(&pdev->dev, page_sz, &dma_addr,
+				     GFP_KERNEL);
+	if (!*vaddr) {
+		dev_err(&pdev->dev, "Failed to allocate dma for wqs page\n");
+		return -ENOMEM;
+	}
+
+	*paddr = (u64)dma_addr;
+
+	/* use vzalloc for big mem */
+	*shadow_vaddr = vzalloc(page_sz);
+	if (!*shadow_vaddr)
+		goto err_shadow_vaddr;
+
+	return 0;
+
+err_shadow_vaddr:
+	dma_free_coherent(&pdev->dev, page_sz, *vaddr, dma_addr);
+	return -ENOMEM;
+}
+
+/**
+ * wqs_allocate_page - allocate page for WQ set
+ * @wqs: Work Queue Set
+ * @page_idx: the page index of the page will be allocated
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int wqs_allocate_page(struct hinic_wqs *wqs, int page_idx)
+{
+	return queue_alloc_page(wqs->hwif, &wqs->page_vaddr[page_idx],
+				&wqs->page_paddr[page_idx],
+				&wqs->shadow_page_vaddr[page_idx],
+				WQS_PAGE_SIZE);
+}
+
+/**
+ * wqs_free_page - free page of WQ set
+ * @wqs: Work Queue Set
+ * @page_idx: the page index of the page will be freed
+ **/
+static void wqs_free_page(struct hinic_wqs *wqs, int page_idx)
+{
+	struct hinic_hwif *hwif = wqs->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+
+	dma_free_coherent(&pdev->dev, WQS_PAGE_SIZE,
+			  wqs->page_vaddr[page_idx],
+			  (dma_addr_t)wqs->page_paddr[page_idx]);
+	vfree(wqs->shadow_page_vaddr[page_idx]);
+}
+
+/**
+ * cmdq_allocate_page - allocate page for cmdq
+ * @cmdq_pages: the pages of the cmdq queue struct to hold the page
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int cmdq_allocate_page(struct hinic_cmdq_pages *cmdq_pages)
+{
+	return queue_alloc_page(cmdq_pages->hwif, &cmdq_pages->page_vaddr,
+				&cmdq_pages->page_paddr,
+				&cmdq_pages->shadow_page_vaddr,
+				CMDQ_PAGE_SIZE);
+}
+
+/**
+ * cmdq_free_page - free page from cmdq
+ * @cmdq_pages: the pages of the cmdq queue struct that hold the page
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static void cmdq_free_page(struct hinic_cmdq_pages *cmdq_pages)
+{
+	struct hinic_hwif *hwif = cmdq_pages->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+
+	dma_free_coherent(&pdev->dev, CMDQ_PAGE_SIZE,
+			  cmdq_pages->page_vaddr,
+			  (dma_addr_t)cmdq_pages->page_paddr);
+	vfree(cmdq_pages->shadow_page_vaddr);
+}
+
+static int alloc_page_arrays(struct hinic_wqs *wqs)
+{
+	struct hinic_hwif *hwif = wqs->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	size_t size;
+
+	size = wqs->num_pages * sizeof(*wqs->page_paddr);
+	wqs->page_paddr = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+	if (!wqs->page_paddr)
+		return -ENOMEM;
+
+	size = wqs->num_pages * sizeof(*wqs->page_vaddr);
+	wqs->page_vaddr = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+	if (!wqs->page_vaddr)
+		goto err_page_vaddr;
+
+	size = wqs->num_pages * sizeof(*wqs->shadow_page_vaddr);
+	wqs->shadow_page_vaddr = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+	if (!wqs->shadow_page_vaddr)
+		goto err_page_shadow_vaddr;
+
+	return 0;
+
+err_page_shadow_vaddr:
+	devm_kfree(&pdev->dev, wqs->page_vaddr);
+
+err_page_vaddr:
+	devm_kfree(&pdev->dev, wqs->page_paddr);
+	return -ENOMEM;
+}
+
+static void free_page_arrays(struct hinic_wqs *wqs)
+{
+	struct hinic_hwif *hwif = wqs->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+
+	devm_kfree(&pdev->dev, wqs->shadow_page_vaddr);
+	devm_kfree(&pdev->dev, wqs->page_vaddr);
+	devm_kfree(&pdev->dev, wqs->page_paddr);
+}
+
+static int wqs_next_block(struct hinic_wqs *wqs, int *page_idx,
+			  int *block_idx)
+{
+	int pos;
+
+	down(&wqs->alloc_blocks_lock);
+
+	wqs->num_free_blks--;
+
+	if (wqs->num_free_blks < 0) {
+		wqs->num_free_blks++;
+		up(&wqs->alloc_blocks_lock);
+		return -ENOMEM;
+	}
+
+	pos = wqs->alloc_blk_pos++;
+	pos &= WQS_MAX_NUM_BLOCKS - 1;
+
+	*page_idx = wqs->free_blocks[pos].page_idx;
+	*block_idx = wqs->free_blocks[pos].block_idx;
+
+	wqs->free_blocks[pos].page_idx = -1;
+	wqs->free_blocks[pos].block_idx = -1;
+
+	up(&wqs->alloc_blocks_lock);
+	return 0;
+}
+
+static void wqs_return_block(struct hinic_wqs *wqs, int page_idx,
+			     int block_idx)
+{
+	int pos;
+
+	down(&wqs->alloc_blocks_lock);
+
+	pos = wqs->return_blk_pos++;
+	pos &= WQS_MAX_NUM_BLOCKS - 1;
+
+	wqs->free_blocks[pos].page_idx = page_idx;
+	wqs->free_blocks[pos].block_idx = block_idx;
+
+	wqs->num_free_blks++;
+
+	up(&wqs->alloc_blocks_lock);
+}
+
+static void init_wqs_blocks_arr(struct hinic_wqs *wqs)
+{
+	int page_idx, blk_idx, pos = 0;
+
+	for (page_idx = 0; page_idx < wqs->num_pages; page_idx++) {
+		for (blk_idx = 0; blk_idx < WQS_BLOCKS_PER_PAGE; blk_idx++) {
+			wqs->free_blocks[pos].page_idx = page_idx;
+			wqs->free_blocks[pos].block_idx = blk_idx;
+			pos++;
+		}
+	}
+
+	wqs->alloc_blk_pos = 0;
+	wqs->return_blk_pos = pos;
+	wqs->num_free_blks = pos;
+
+	sema_init(&wqs->alloc_blocks_lock, 1);
+}
+
+/**
+ * hinic_wqs_alloc - allocate Work Queues set
+ * @wqs: Work Queue Set
+ * @max_wqs: maximum wqs to allocate
+ * @hwif: HW interface for use for the allocation
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_wqs_alloc(struct hinic_wqs *wqs, int max_wqs,
+		    struct hinic_hwif *hwif)
+{
+	struct pci_dev *pdev = hwif->pdev;
+	int err, i, page_idx;
+
+	max_wqs = ALIGN(max_wqs, WQS_BLOCKS_PER_PAGE);
+	if (max_wqs > WQS_MAX_NUM_BLOCKS)  {
+		dev_err(&pdev->dev, "Invalid max_wqs = %d\n", max_wqs);
+		return -EINVAL;
+	}
+
+	wqs->hwif = hwif;
+	wqs->num_pages = max_wqs / WQS_BLOCKS_PER_PAGE;
+
+	if (alloc_page_arrays(wqs)) {
+		dev_err(&pdev->dev,
+			"Failed to allocate mem for page addresses\n");
+		return -ENOMEM;
+	}
+
+	for (page_idx = 0; page_idx < wqs->num_pages; page_idx++) {
+		err = wqs_allocate_page(wqs, page_idx);
+		if (err) {
+			dev_err(&pdev->dev, "Failed wq page allocation\n");
+			goto err_wq_allocate_page;
+		}
+	}
+
+	wqs->free_blocks = devm_kzalloc(&pdev->dev, WQS_FREE_BLOCKS_SIZE(wqs),
+					GFP_KERNEL);
+	if (!wqs->free_blocks) {
+		err = -ENOMEM;
+		goto err_alloc_blocks;
+	}
+
+	init_wqs_blocks_arr(wqs);
+	return 0;
+
+err_alloc_blocks:
+err_wq_allocate_page:
+	for (i = 0; i < page_idx; i++)
+		wqs_free_page(wqs, i);
+
+	free_page_arrays(wqs);
+	return err;
+}
+
+/**
+ * hinic_wqs_free - free Work Queues set
+ * @wqs: Work Queue Set
+ **/
+void hinic_wqs_free(struct hinic_wqs *wqs)
+{
+	struct hinic_hwif *hwif = wqs->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	int page_idx;
+
+	devm_kfree(&pdev->dev, wqs->free_blocks);
+
+	for (page_idx = 0; page_idx < wqs->num_pages; page_idx++)
+		wqs_free_page(wqs, page_idx);
+
+	free_page_arrays(wqs);
+}
+
+/**
+ * alloc_wqes_shadow - allocate WQE shadows for WQ
+ * @wq: WQ to allocate shadows for
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int alloc_wqes_shadow(struct hinic_wq *wq)
+{
+	struct hinic_hwif *hwif = wq->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	size_t size;
+
+	size = wq->num_q_pages * wq->max_wqe_size;
+	wq->shadow_wqe = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+	if (!wq->shadow_wqe)
+		return -ENOMEM;
+
+	size = wq->num_q_pages * sizeof(wq->prod_idx);
+	wq->shadow_idx = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+	if (!wq->shadow_idx)
+		goto err_shadow_idx;
+
+	return 0;
+
+err_shadow_idx:
+	devm_kfree(&pdev->dev, wq->shadow_wqe);
+	return -ENOMEM;
+}
+
+/**
+ * free_wqes_shadow - free WQE shadows of WQ
+ * @wq: WQ to free shadows from
+ **/
+static void free_wqes_shadow(struct hinic_wq *wq)
+{
+	struct hinic_hwif *hwif = wq->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+
+	devm_kfree(&pdev->dev, wq->shadow_idx);
+	devm_kfree(&pdev->dev, wq->shadow_wqe);
+}
+
+/**
+ * free_wq_pages - free pages of WQ
+ * @hwif: HW interface for releasing dma addresses
+ * @wq: WQ to free pages from
+ * @num_q_pages: number pages to free
+ **/
+static void free_wq_pages(struct hinic_wq *wq, struct hinic_hwif *hwif,
+			  int num_q_pages)
+{
+	struct pci_dev *pdev = hwif->pdev;
+	int i;
+
+	for (i = 0; i < num_q_pages; i++) {
+		void **vaddr = &wq->shadow_block_vaddr[i];
+		u64 *paddr = &wq->block_vaddr[i];
+		dma_addr_t dma_addr;
+
+		dma_addr = (dma_addr_t)be64_to_cpu(*paddr);
+		dma_free_coherent(&pdev->dev, wq->wq_page_size, *vaddr,
+				  dma_addr);
+	}
+
+	free_wqes_shadow(wq);
+}
+
+/**
+ * alloc_wq_pages - alloc pages for WQ
+ * @hwif: HW interface for allocating dma addresses
+ * @wq: WQ to allocate pages for
+ * @max_pages: maximum pages allowed
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int alloc_wq_pages(struct hinic_wq *wq, struct hinic_hwif *hwif,
+			  int max_pages)
+{
+	struct pci_dev *pdev = hwif->pdev;
+	int i, err, num_q_pages;
+
+	num_q_pages = ALIGN(WQ_SIZE(wq), wq->wq_page_size) / wq->wq_page_size;
+	if (num_q_pages > max_pages) {
+		dev_err(&pdev->dev, "Number wq pages exceeds the limit\n");
+		return -EINVAL;
+	}
+
+	if (num_q_pages & (num_q_pages - 1)) {
+		dev_err(&pdev->dev, "Number wq pages must be power of 2\n");
+		return -EINVAL;
+	}
+
+	wq->num_q_pages = num_q_pages;
+
+	err = alloc_wqes_shadow(wq);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate wqe shadow\n");
+		return err;
+	}
+
+	for (i = 0; i < num_q_pages; i++) {
+		void **vaddr = &wq->shadow_block_vaddr[i];
+		u64 *paddr = &wq->block_vaddr[i];
+		dma_addr_t dma_addr;
+
+		*vaddr = dma_zalloc_coherent(&pdev->dev, wq->wq_page_size,
+					     &dma_addr, GFP_KERNEL);
+		if (!*vaddr) {
+			dev_err(&pdev->dev, "Failed to allocate wq page\n");
+			goto err_alloc_wq_pages;
+		}
+
+		/* HW uses Big Endian Format */
+		*paddr = cpu_to_be64(dma_addr);
+	}
+
+	return 0;
+
+err_alloc_wq_pages:
+	free_wq_pages(wq, hwif, i);
+	return -ENOMEM;
+}
+
+/**
+ * hinic_wq_allocate - Allocate the WQ resources from the WQS
+ * @wqs: WQ set from which to allocate the WQ resources
+ * @wq: WQ to allocate resources for it from the WQ set
+ * @wqebb_size: Work Queue Block Byte Size
+ * @wq_page_size: the page size in the Work Queue
+ * @q_depth: number of wqebbs in WQ
+ * @max_wqe_size: maximum WQE size that will be used in the WQ
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_wq_allocate(struct hinic_wqs *wqs, struct hinic_wq *wq,
+		      u16 wqebb_size, u16 wq_page_size, u16 q_depth,
+		      u16 max_wqe_size)
+{
+	struct hinic_hwif *hwif = wqs->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 num_wqebbs_per_page;
+	int err;
+
+	if (wqebb_size == 0) {
+		dev_err(&pdev->dev, "wqebb_size must be > 0\n");
+		return -EINVAL;
+	}
+
+	if (wq_page_size == 0) {
+		dev_err(&pdev->dev, "wq_page_size must be > 0\n");
+		return -EINVAL;
+	}
+
+	if (q_depth & (q_depth - 1)) {
+		dev_err(&pdev->dev, "WQ q_depth must be power of 2\n");
+		return -EINVAL;
+	}
+
+	num_wqebbs_per_page = ALIGN(wq_page_size, wqebb_size) / wqebb_size;
+
+	if (num_wqebbs_per_page & (num_wqebbs_per_page - 1)) {
+		dev_err(&pdev->dev, "num wqebbs per page must be power of 2\n");
+		return -EINVAL;
+	}
+
+	wq->hwif = hwif;
+
+	err = wqs_next_block(wqs, &wq->page_idx, &wq->block_idx);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to get free wqs next block\n");
+		return err;
+	}
+
+	wq->wqebb_size = wqebb_size;
+	wq->wq_page_size = wq_page_size;
+	wq->q_depth = q_depth;
+	wq->max_wqe_size = max_wqe_size;
+	wq->num_wqebbs_per_page = num_wqebbs_per_page;
+
+	wq->block_vaddr = WQ_BASE_VADDR(wqs, wq);
+	wq->shadow_block_vaddr = WQ_BASE_ADDR(wqs, wq);
+	wq->block_paddr = WQ_BASE_PADDR(wqs, wq);
+
+	err = alloc_wq_pages(wq, wqs->hwif, WQ_MAX_PAGES);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate wq pages\n");
+		goto err_alloc_wq_pages;
+	}
+
+	atomic_set(&wq->cons_idx, 0);
+	atomic_set(&wq->prod_idx, 0);
+	atomic_set(&wq->delta, q_depth);
+	wq->mask = q_depth - 1;
+
+	return 0;
+
+err_alloc_wq_pages:
+	wqs_return_block(wqs, wq->page_idx, wq->block_idx);
+	return err;
+}
+
+/**
+ * hinic_wq_free - Free the WQ resources to the WQS
+ * @wqs: WQ set to free the WQ resources to it
+ * @wq: WQ to free its resources to the WQ set resources
+ **/
+void hinic_wq_free(struct hinic_wqs *wqs, struct hinic_wq *wq)
+{
+	free_wq_pages(wq, wqs->hwif, wq->num_q_pages);
+
+	wqs_return_block(wqs, wq->page_idx, wq->block_idx);
+}
+
+/**
+ * hinic_wqs_cmdq_alloc - Allocate wqs for cmdqs
+ * @cmdq_pages: will hold the pages of the cmdq
+ * @wq: returned wqs
+ * @hwif: HW interface
+ * @cmdq_blocks: number of cmdq blocks/wq to allocate
+ * @wqebb_size: Work Queue Block Byte Size
+ * @wq_page_size: the page size in the Work Queue
+ * @q_depth: number of wqebbs in WQ
+ * @max_wqe_size: maximum WQE size that will be used in the WQ
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_wqs_cmdq_alloc(struct hinic_cmdq_pages *cmdq_pages,
+			 struct hinic_wq *wq, struct hinic_hwif *hwif,
+			 int cmdq_blocks, u16 wqebb_size, u16 wq_page_size,
+			 u16 q_depth, u16 max_wqe_size)
+{
+	struct pci_dev *pdev = hwif->pdev;
+	u16 num_wqebbs_per_page;
+	int i, j, err = -ENOMEM;
+
+	if (wqebb_size == 0) {
+		dev_err(&pdev->dev, "wqebb_size must be > 0\n");
+		return -EINVAL;
+	}
+
+	if (wq_page_size == 0) {
+		dev_err(&pdev->dev, "wq_page_size must be > 0\n");
+		return -EINVAL;
+	}
+
+	if (q_depth & (q_depth - 1)) {
+		dev_err(&pdev->dev, "WQ q_depth must be power of 2\n");
+		return -EINVAL;
+	}
+
+	num_wqebbs_per_page = ALIGN(wq_page_size, wqebb_size) / wqebb_size;
+
+	if (num_wqebbs_per_page & (num_wqebbs_per_page - 1)) {
+		dev_err(&pdev->dev, "num wqebbs per page must be power of 2\n");
+		return -EINVAL;
+	}
+
+	cmdq_pages->hwif = hwif;
+
+	err = cmdq_allocate_page(cmdq_pages);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to allocate CMDQ page\n");
+		return err;
+	}
+
+	for (i = 0; i < cmdq_blocks; i++) {
+		wq[i].hwif = hwif;
+		wq[i].page_idx = 0;
+		wq[i].block_idx = i;
+
+		wq[i].wqebb_size = wqebb_size;
+		wq[i].wq_page_size = wq_page_size;
+		wq[i].q_depth = q_depth;
+		wq[i].max_wqe_size = max_wqe_size;
+		wq[i].num_wqebbs_per_page = num_wqebbs_per_page;
+
+		wq[i].block_vaddr = CMDQ_BASE_VADDR(cmdq_pages, &wq[i]);
+		wq[i].shadow_block_vaddr = CMDQ_BASE_ADDR(cmdq_pages, &wq[i]);
+		wq[i].block_paddr = CMDQ_BASE_PADDR(cmdq_pages, &wq[i]);
+
+		err = alloc_wq_pages(&wq[i], cmdq_pages->hwif,
+				     CMDQ_WQ_MAX_PAGES);
+		if (err) {
+			dev_err(&pdev->dev, "Failed to alloc CMDQ blocks\n");
+			goto err_cmdq_block;
+		}
+
+		atomic_set(&wq[i].cons_idx, 0);
+		atomic_set(&wq[i].prod_idx, 0);
+		atomic_set(&wq[i].delta, q_depth);
+		wq[i].mask = q_depth - 1;
+	}
+
+	return 0;
+
+err_cmdq_block:
+	for (j = 0; j < i; j++)
+		free_wq_pages(&wq[j], cmdq_pages->hwif, wq[j].num_q_pages);
+
+	cmdq_free_page(cmdq_pages);
+	return err;
+}
+
+/**
+ * hinic_wqs_cmdq_free - Free wqs from cmdqs
+ * @cmdq_pages: hold the pages of the cmdq
+ * @wq: wqs to free
+ * @cmdq_blocks: number of wqs to free
+ **/
+void hinic_wqs_cmdq_free(struct hinic_cmdq_pages *cmdq_pages,
+			 struct hinic_wq *wq, int cmdq_blocks)
+{
+	int i;
+
+	for (i = 0; i < cmdq_blocks; i++)
+		free_wq_pages(&wq[i], cmdq_pages->hwif, wq[i].num_q_pages);
+
+	cmdq_free_page(cmdq_pages);
+}
+
+static void copy_wqe_to_shadow(struct hinic_wq *wq, void *shadow_addr,
+			       int num_wqebbs, u16 idx)
+{
+	void *wqebb_addr;
+	int i;
+
+	for (i = 0; i < num_wqebbs; i++, idx++) {
+		idx = MASKED_WQE_IDX(wq, idx);
+		wqebb_addr = WQ_PAGE_ADDR(wq, idx) +
+			     WQE_PAGE_OFF(wq, idx);
+
+		memcpy(shadow_addr, wqebb_addr, wq->wqebb_size);
+
+		shadow_addr += wq->wqebb_size;
+	}
+}
+
+static void copy_wqe_from_shadow(struct hinic_wq *wq, void *shadow_addr,
+				 int num_wqebbs, u16 idx)
+{
+	void *wqebb_addr;
+	int i;
+
+	for (i = 0; i < num_wqebbs; i++, idx++) {
+		idx = MASKED_WQE_IDX(wq, idx);
+		wqebb_addr = WQ_PAGE_ADDR(wq, idx) +
+			     WQE_PAGE_OFF(wq, idx);
+
+		memcpy(wqebb_addr, shadow_addr, wq->wqebb_size);
+		shadow_addr += wq->wqebb_size;
+	}
+}
+
+/**
+ * hinic_get_wqe - get wqe ptr in the current pi and update the pi
+ * @wq: wq to get wqe from
+ * @wqe_size: wqe size
+ * @prod_idx: returned pi
+ *
+ * Return wqe pointer
+ **/
+struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
+				   u16 *prod_idx)
+{
+	int curr_pg, end_pg, num_wqebbs;
+	u16 curr_prod_idx, end_prod_idx;
+
+	*prod_idx = MASKED_WQE_IDX(wq, atomic_read(&wq->prod_idx));
+
+	num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+
+	if (atomic_sub_return(num_wqebbs, &wq->delta) <= 0) {
+		atomic_add(num_wqebbs, &wq->delta);
+		return ERR_PTR(-EBUSY);
+	}
+
+	end_prod_idx = atomic_add_return(num_wqebbs, &wq->prod_idx);
+
+	end_prod_idx = MASKED_WQE_IDX(wq, end_prod_idx);
+	curr_prod_idx = end_prod_idx - num_wqebbs;
+	curr_prod_idx = MASKED_WQE_IDX(wq, curr_prod_idx);
+
+	/* end prod index points to the next wqebb, therefore minus 1 */
+	end_prod_idx = MASKED_WQE_IDX(wq, end_prod_idx - 1);
+
+	curr_pg = WQE_PAGE_NUM(wq, curr_prod_idx);
+	end_pg = WQE_PAGE_NUM(wq, end_prod_idx);
+
+	*prod_idx = curr_prod_idx;
+
+	if (curr_pg != end_pg) {
+		void *shadow_addr = &wq->shadow_wqe[curr_pg * wq->max_wqe_size];
+
+		copy_wqe_to_shadow(wq, shadow_addr, num_wqebbs, *prod_idx);
+
+		wq->shadow_idx[curr_pg] = *prod_idx;
+		return shadow_addr;
+	}
+
+	return WQ_PAGE_ADDR(wq, *prod_idx) + WQE_PAGE_OFF(wq, *prod_idx);
+}
+
+/**
+ * hinic_put_wqe - return the wqe place to use for a new wqe
+ * @wq: wq to return wqe
+ * @wqe_size: wqe size
+ **/
+void hinic_put_wqe(struct hinic_wq *wq, unsigned int wqe_size)
+{
+	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+
+	atomic_add(num_wqebbs, &wq->cons_idx);
+
+	atomic_add(num_wqebbs, &wq->delta);
+}
+
+/**
+ * hinic_read_wqe - read wqe ptr in the current ci
+ * @wq: wq to get read from
+ * @wqe_size: wqe size
+ * @cons_idx: returned ci
+ *
+ * Return wqe pointer
+ **/
+struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size,
+				    u16 *cons_idx)
+{
+	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+	u16 curr_cons_idx, end_cons_idx;
+	int curr_pg, end_pg;
+
+	if ((atomic_read(&wq->delta) + num_wqebbs) > wq->q_depth)
+		return ERR_PTR(-EBUSY);
+
+	curr_cons_idx = atomic_read(&wq->cons_idx);
+
+	curr_cons_idx = MASKED_WQE_IDX(wq, curr_cons_idx);
+	end_cons_idx = MASKED_WQE_IDX(wq, curr_cons_idx + num_wqebbs - 1);
+
+	curr_pg = WQE_PAGE_NUM(wq, curr_cons_idx);
+	end_pg = WQE_PAGE_NUM(wq, end_cons_idx);
+
+	*cons_idx = curr_cons_idx;
+
+	if (curr_pg != end_pg) {
+		void *shadow_addr = &wq->shadow_wqe[curr_pg * wq->max_wqe_size];
+
+		copy_wqe_to_shadow(wq, shadow_addr, num_wqebbs, *cons_idx);
+		return shadow_addr;
+	}
+
+	return WQ_PAGE_ADDR(wq, *cons_idx) + WQE_PAGE_OFF(wq, *cons_idx);
+}
+
+/**
+ * hinic_read_wqe_direct - read wqe directly from ci position
+ * @wq: wq
+ * @cons_idx: ci position
+ *
+ * Return wqe
+ **/
+struct hinic_hw_wqe *hinic_read_wqe_direct(struct hinic_wq *wq, u16 cons_idx)
+{
+	return WQ_PAGE_ADDR(wq, cons_idx) + WQE_PAGE_OFF(wq, cons_idx);
+}
+
+/**
+ * wqe_shadow - check if a wqe is shadow
+ * @wq: wq of the wqe
+ * @wqe: the wqe for shadow checking
+ *
+ * Return true - shadow, false - Not shadow
+ **/
+static inline bool wqe_shadow(struct hinic_wq *wq, struct hinic_hw_wqe *wqe)
+{
+	size_t wqe_shadow_size = wq->num_q_pages * wq->max_wqe_size;
+
+	return WQE_IN_RANGE(wqe, wq->shadow_wqe,
+			    &wq->shadow_wqe[wqe_shadow_size]);
+}
+
+/**
+ * hinic_write_wqe - write the wqe to the wq
+ * @wq: wq to write wqe to
+ * @wqe: wqe to write
+ * @wqe_size: wqe size
+ **/
+void hinic_write_wqe(struct hinic_wq *wq, struct hinic_hw_wqe *wqe,
+		     unsigned int wqe_size)
+{
+	int curr_pg, num_wqebbs;
+	void *shadow_addr;
+	u16 prod_idx;
+
+	if (wqe_shadow(wq, wqe)) {
+		curr_pg = WQE_SHADOW_PAGE(wq, wqe);
+
+		prod_idx = wq->shadow_idx[curr_pg];
+		num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+		shadow_addr = &wq->shadow_wqe[curr_pg * wq->max_wqe_size];
+
+		copy_wqe_from_shadow(wq, shadow_addr, num_wqebbs, prod_idx);
+	}
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
new file mode 100644
index 0000000..9c030a0
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
@@ -0,0 +1,117 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_HW_WQ_H
+#define HINIC_HW_WQ_H
+
+#include <linux/types.h>
+#include <linux/semaphore.h>
+#include <linux/atomic.h>
+
+#include "hinic_hw_if.h"
+#include "hinic_hw_wqe.h"
+
+struct hinic_free_block {
+	int     page_idx;
+	int     block_idx;
+};
+
+struct hinic_wq {
+	struct hinic_hwif       *hwif;
+
+	int             page_idx;
+	int             block_idx;
+
+	u16             wqebb_size;
+	u16             wq_page_size;
+	u16             q_depth;
+	u16             max_wqe_size;
+	u16             num_wqebbs_per_page;
+
+	/* The addresses are 64 bit in the HW */
+	u64             block_paddr;
+	void            **shadow_block_vaddr;
+	u64             *block_vaddr;
+
+	int             num_q_pages;
+	u8              *shadow_wqe;
+	u16             *shadow_idx;
+
+	atomic_t        cons_idx;
+	atomic_t        prod_idx;
+	atomic_t        delta;
+	u16             mask;
+};
+
+struct hinic_wqs {
+	struct hinic_hwif       *hwif;
+	int                     num_pages;
+
+	/* The addresses are 64 bit in the HW */
+	u64                     *page_paddr;
+	u64                     **page_vaddr;
+	void                    ***shadow_page_vaddr;
+
+	struct hinic_free_block *free_blocks;
+	int                     alloc_blk_pos;
+	int                     return_blk_pos;
+	int                     num_free_blks;
+
+	/* Lock for getting a free block from the WQ set */
+	struct semaphore        alloc_blocks_lock;
+};
+
+struct hinic_cmdq_pages {
+	/* The addresses are 64 bit in the HW */
+	u64                     page_paddr;
+	u64                     *page_vaddr;
+	void                    **shadow_page_vaddr;
+
+	struct hinic_hwif       *hwif;
+};
+
+int hinic_wqs_cmdq_alloc(struct hinic_cmdq_pages *cmdq_pages,
+			 struct hinic_wq *wq, struct hinic_hwif *hwif,
+			 int cmdq_blocks, u16 wqebb_size, u16 wq_page_size,
+			 u16 q_depth, u16 max_wqe_size);
+
+void hinic_wqs_cmdq_free(struct hinic_cmdq_pages *cmdq_pages,
+			 struct hinic_wq *wq, int cmdq_blocks);
+
+int hinic_wqs_alloc(struct hinic_wqs *wqs, int num_wqs,
+		    struct hinic_hwif *hwif);
+
+void hinic_wqs_free(struct hinic_wqs *wqs);
+
+int hinic_wq_allocate(struct hinic_wqs *wqs, struct hinic_wq *wq,
+		      u16 wqebb_size, u16 wq_page_size, u16 q_depth,
+		      u16 max_wqe_size);
+
+void hinic_wq_free(struct hinic_wqs *wqs, struct hinic_wq *wq);
+
+struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
+				   u16 *prod_idx);
+
+void hinic_put_wqe(struct hinic_wq *wq, unsigned int wqe_size);
+
+struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size,
+				    u16 *cons_idx);
+
+struct hinic_hw_wqe *hinic_read_wqe_direct(struct hinic_wq *wq, u16 cons_idx);
+
+void hinic_write_wqe(struct hinic_wq *wq, struct hinic_hw_wqe *wqe,
+		     unsigned int wqe_size);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
new file mode 100644
index 0000000..bc73485
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
@@ -0,0 +1,368 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_HW_WQE_H
+#define HINIC_HW_WQE_H
+
+#include "hinic_common.h"
+
+#define HINIC_CMDQ_CTRL_PI_SHIFT                        0
+#define HINIC_CMDQ_CTRL_CMD_SHIFT                       16
+#define HINIC_CMDQ_CTRL_MOD_SHIFT                       24
+#define HINIC_CMDQ_CTRL_ACK_TYPE_SHIFT                  29
+#define HINIC_CMDQ_CTRL_HW_BUSY_BIT_SHIFT               31
+
+#define HINIC_CMDQ_CTRL_PI_MASK                         0xFFFF
+#define HINIC_CMDQ_CTRL_CMD_MASK                        0xFF
+#define HINIC_CMDQ_CTRL_MOD_MASK                        0x1F
+#define HINIC_CMDQ_CTRL_ACK_TYPE_MASK                   0x3
+#define HINIC_CMDQ_CTRL_HW_BUSY_BIT_MASK                0x1
+
+#define HINIC_CMDQ_CTRL_SET(val, member)                        \
+			(((u32)(val) & HINIC_CMDQ_CTRL_##member##_MASK) \
+			 << HINIC_CMDQ_CTRL_##member##_SHIFT)
+
+#define HINIC_CMDQ_CTRL_GET(val, member)                        \
+			(((val) >> HINIC_CMDQ_CTRL_##member##_SHIFT) \
+			 & HINIC_CMDQ_CTRL_##member##_MASK)
+
+#define HINIC_CMDQ_WQE_HEADER_BUFDESC_LEN_SHIFT         0
+#define HINIC_CMDQ_WQE_HEADER_COMPLETE_FMT_SHIFT        15
+#define HINIC_CMDQ_WQE_HEADER_DATA_FMT_SHIFT            22
+#define HINIC_CMDQ_WQE_HEADER_COMPLETE_REQ_SHIFT        23
+#define HINIC_CMDQ_WQE_HEADER_COMPLETE_SECT_LEN_SHIFT   27
+#define HINIC_CMDQ_WQE_HEADER_CTRL_LEN_SHIFT            29
+#define HINIC_CMDQ_WQE_HEADER_TOGGLED_WRAPPED_SHIFT     31
+
+#define HINIC_CMDQ_WQE_HEADER_BUFDESC_LEN_MASK          0xFF
+#define HINIC_CMDQ_WQE_HEADER_COMPLETE_FMT_MASK         0x1
+#define HINIC_CMDQ_WQE_HEADER_DATA_FMT_MASK             0x1
+#define HINIC_CMDQ_WQE_HEADER_COMPLETE_REQ_MASK         0x1
+#define HINIC_CMDQ_WQE_HEADER_COMPLETE_SECT_LEN_MASK    0x3
+#define HINIC_CMDQ_WQE_HEADER_CTRL_LEN_MASK             0x3
+#define HINIC_CMDQ_WQE_HEADER_TOGGLED_WRAPPED_MASK      0x1
+
+#define HINIC_CMDQ_WQE_HEADER_SET(val, member)                  \
+			(((u32)(val) & HINIC_CMDQ_WQE_HEADER_##member##_MASK) \
+			 << HINIC_CMDQ_WQE_HEADER_##member##_SHIFT)
+
+#define HINIC_CMDQ_WQE_HEADER_GET(val, member)                  \
+			(((val) >> HINIC_CMDQ_WQE_HEADER_##member##_SHIFT) \
+			 & HINIC_CMDQ_WQE_HEADER_##member##_MASK)
+
+#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT    0
+#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT        16
+#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT         22
+#define HINIC_SQ_CTRL_LEN_SHIFT                 29
+
+#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK     0xFF
+#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK         0x1F
+#define HINIC_SQ_CTRL_DATA_FORMAT_MASK          0x1
+#define HINIC_SQ_CTRL_LEN_MASK                  0x3
+
+#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT      13
+
+#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK       0x3FFF
+
+#define HINIC_SQ_CTRL_SET(val, member)          \
+		(((u32)(val) & HINIC_SQ_CTRL_##member##_MASK) \
+		 << HINIC_SQ_CTRL_##member##_SHIFT)
+
+#define HINIC_SQ_CTRL_GET(val, member)          \
+		(((val) >> HINIC_SQ_CTRL_##member##_SHIFT) \
+		 & HINIC_SQ_CTRL_##member##_MASK)
+
+#define HINIC_SQ_TASK_INFO0_L2HDR_LEN_SHIFT     0
+#define HINIC_SQ_TASK_INFO0_L4_OFFLOAD_SHIFT    8
+#define HINIC_SQ_TASK_INFO0_INNER_L3TYPE_SHIFT  10
+#define HINIC_SQ_TASK_INFO0_VLAN_OFFLOAD_SHIFT  12
+#define HINIC_SQ_TASK_INFO0_PARSE_FLAG_SHIFT    13
+/* 1 bit reserved */
+#define HINIC_SQ_TASK_INFO0_TSO_FLAG_SHIFT      15
+#define HINIC_SQ_TASK_INFO0_VLAN_TAG_SHIFT      16
+
+#define HINIC_SQ_TASK_INFO0_L2HDR_LEN_MASK      0xFF
+#define HINIC_SQ_TASK_INFO0_L4_OFFLOAD_MASK     0x3
+#define HINIC_SQ_TASK_INFO0_INNER_L3TYPE_MASK   0x3
+#define HINIC_SQ_TASK_INFO0_VLAN_OFFLOAD_MASK   0x1
+#define HINIC_SQ_TASK_INFO0_PARSE_FLAG_MASK     0x1
+/* 1 bit reserved */
+#define HINIC_SQ_TASK_INFO0_TSO_FLAG_MASK       0x1
+#define HINIC_SQ_TASK_INFO0_VLAN_TAG_MASK       0xFFFF
+
+#define HINIC_SQ_TASK_INFO0_SET(val, member)    \
+		(((u32)(val) & HINIC_SQ_TASK_INFO0_##member##_MASK) <<  \
+		 HINIC_SQ_TASK_INFO0_##member##_SHIFT)
+
+/* 8 bits reserved */
+#define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_SHIFT    8
+#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_SHIFT  16
+#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_SHIFT  24
+
+/* 8 bits reserved */
+#define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_MASK     0xFF
+#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_MASK   0xFF
+#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_MASK   0xFF
+
+#define HINIC_SQ_TASK_INFO1_SET(val, member)    \
+		(((u32)(val) & HINIC_SQ_TASK_INFO1_##member##_MASK) <<  \
+		 HINIC_SQ_TASK_INFO1_##member##_SHIFT)
+
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_SHIFT 0
+#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_SHIFT  12
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 19
+/* 1 bit reserved */
+#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT  22
+/* 8 bits reserved */
+
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_MASK  0xFFF
+#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_MASK   0x7F
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK  0x3
+/* 1 bit reserved */
+#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_MASK   0x3
+/* 8 bits reserved */
+
+#define HINIC_SQ_TASK_INFO2_SET(val, member)    \
+		(((u32)(val) & HINIC_SQ_TASK_INFO2_##member##_MASK) <<  \
+		 HINIC_SQ_TASK_INFO2_##member##_SHIFT)
+
+/* 31 bits reserved */
+#define HINIC_SQ_TASK_INFO4_L2TYPE_SHIFT        31
+
+/* 31 bits reserved */
+#define HINIC_SQ_TASK_INFO4_L2TYPE_MASK         0x1
+
+#define HINIC_SQ_TASK_INFO4_SET(val, member)    \
+		(((u32)(val) & HINIC_SQ_TASK_INFO4_##member##_MASK) << \
+		 HINIC_SQ_TASK_INFO4_##member##_SHIFT)
+
+#define HINIC_RQ_CQE_STATUS_RXDONE_SHIFT        31
+
+#define HINIC_RQ_CQE_STATUS_RXDONE_MASK         0x1
+
+#define HINIC_RQ_CQE_STATUS_GET(val, member)    \
+		(((val) >> HINIC_RQ_CQE_STATUS_##member##_SHIFT) & \
+		 HINIC_RQ_CQE_STATUS_##member##_MASK)
+
+#define HINIC_RQ_CQE_STATUS_CLEAR(val, member)  \
+		((val) & (~(HINIC_RQ_CQE_STATUS_##member##_MASK << \
+		 HINIC_RQ_CQE_STATUS_##member##_SHIFT)))
+
+#define HINIC_RQ_CQE_SGE_LEN_SHIFT              16
+
+#define HINIC_RQ_CQE_SGE_LEN_MASK               0xFFFF
+
+#define HINIC_RQ_CQE_SGE_GET(val, member)       \
+		(((val) >> HINIC_RQ_CQE_SGE_##member##_SHIFT) & \
+		 HINIC_RQ_CQE_SGE_##member##_MASK)
+
+#define HINIC_RQ_CTRL_BUFDESC_SECT_LEN_SHIFT    0
+#define HINIC_RQ_CTRL_COMPLETE_FORMAT_SHIFT     15
+#define HINIC_RQ_CTRL_COMPLETE_LEN_SHIFT        27
+#define HINIC_RQ_CTRL_LEN_SHIFT                 29
+
+#define HINIC_RQ_CTRL_BUFDESC_SECT_LEN_MASK     0xFF
+#define HINIC_RQ_CTRL_COMPLETE_FORMAT_MASK      0x1
+#define HINIC_RQ_CTRL_COMPLETE_LEN_MASK         0x3
+#define HINIC_RQ_CTRL_LEN_MASK                  0x3
+
+#define HINIC_RQ_CTRL_SET(val, member)          \
+		(((u32)(val) & HINIC_RQ_CTRL_##member##_MASK) << \
+		 HINIC_RQ_CTRL_##member##_SHIFT)
+
+#define HINIC_SQ_WQE_SIZE(nr_sges)              \
+		(sizeof(struct hinic_sq_ctrl) + \
+		 sizeof(struct hinic_sq_task) + \
+		 (nr_sges) * sizeof(struct hinic_sq_bufdesc))
+
+#define HINIC_SCMD_DATA_LEN             16
+
+#define HINIC_MAX_SQ_BUFDESCS           17
+
+#define HINIC_SQ_WQE_MAX_SIZE           320
+#define HINIC_RQ_WQE_SIZE               32
+
+enum hinic_l4offload_type {
+	HINIC_L4_OFF_DISABLE            = 0,
+	HINIC_TCP_OFFLOAD_ENABLE        = 1,
+	HINIC_SCTP_OFFLOAD_ENABLE       = 2,
+	HINIC_UDP_OFFLOAD_ENABLE        = 3,
+};
+
+enum hinic_vlan_offload {
+	HINIC_VLAN_OFF_DISABLE = 0,
+	HINIC_VLAN_OFF_ENABLE  = 1,
+};
+
+enum hinic_pkt_parsed {
+	HINIC_PKT_NOT_PARSED = 0,
+	HINIC_PKT_PARSED     = 1,
+};
+
+enum hinic_outer_l3type {
+	HINIC_OUTER_L3TYPE_UNKNOWN              = 0,
+	HINIC_OUTER_L3TYPE_IPV6                 = 1,
+	HINIC_OUTER_L3TYPE_IPV4_NO_CHKSUM       = 2,
+	HINIC_OUTER_L3TYPE_IPV4_CHKSUM          = 3,
+};
+
+enum hinic_media_type {
+	HINIC_MEDIA_UNKNOWN = 0,
+};
+
+enum hinic_l2type {
+	HINIC_L2TYPE_ETH = 0,
+};
+
+enum hinc_tunnel_l4type {
+	HINIC_TUNNEL_L4TYPE_UNKNOWN = 0,
+};
+
+struct hinic_cmdq_header {
+	u32     header_info;
+	u32     saved_data;
+};
+
+struct hinic_status {
+	u32 status_info;
+};
+
+struct hinic_ctrl {
+	u32 ctrl_info;
+};
+
+struct hinic_sge_resp {
+	struct hinic_sge        sge;
+	u32                     rsvd;
+};
+
+struct hinic_cmdq_completion {
+	/* HW Format */
+	union {
+		struct hinic_sge_resp   sge_resp;
+		u64                     direct_resp;
+	};
+};
+
+struct hinic_scmd_bufdesc {
+	u32     buf_len;
+	u32     rsvd;
+	u8      data[HINIC_SCMD_DATA_LEN];
+};
+
+struct hinic_lcmd_bufdesc {
+	struct hinic_sge        sge;
+	u32                     rsvd1;
+	u64                     rsvd2;
+	u64                     rsvd3;
+};
+
+struct hinic_cmdq_wqe_scmd {
+	struct hinic_cmdq_header        header;
+	u64                             rsvd;
+	struct hinic_status             status;
+	struct hinic_ctrl               ctrl;
+	struct hinic_cmdq_completion    completion;
+	struct hinic_scmd_bufdesc       buf_desc;
+};
+
+struct hinic_cmdq_wqe_lcmd {
+	struct hinic_cmdq_header        header;
+	struct hinic_status             status;
+	struct hinic_ctrl               ctrl;
+	struct hinic_cmdq_completion    completion;
+	struct hinic_lcmd_bufdesc       buf_desc;
+};
+
+struct hinic_cmdq_direct_wqe {
+	struct hinic_cmdq_wqe_scmd      wqe_scmd;
+};
+
+struct hinic_cmdq_wqe {
+	/* HW Format */
+	union {
+		struct hinic_cmdq_direct_wqe    direct_wqe;
+		struct hinic_cmdq_wqe_lcmd      wqe_lcmd;
+	};
+};
+
+struct hinic_sq_ctrl {
+	u32     ctrl_info;
+	u32     queue_info;
+};
+
+struct hinic_sq_task {
+	u32     pkt_info0;
+	u32     pkt_info1;
+	u32     pkt_info2;
+	u32     ufo_v6_identify;
+	u32     pkt_info4;
+	u32     zero_pad;
+};
+
+struct hinic_sq_bufdesc {
+	struct hinic_sge sge;
+	u32     rsvd;
+};
+
+struct hinic_sq_wqe {
+	struct hinic_sq_ctrl            ctrl;
+	struct hinic_sq_task            task;
+	struct hinic_sq_bufdesc         buf_descs[HINIC_MAX_SQ_BUFDESCS];
+};
+
+struct hinic_rq_cqe {
+	u32     status;
+	u32     len;
+
+	u32     rsvd2;
+	u32     rsvd3;
+	u32     rsvd4;
+	u32     rsvd5;
+	u32     rsvd6;
+	u32     rsvd7;
+};
+
+struct hinic_rq_ctrl {
+	u32     ctrl_info;
+};
+
+struct hinic_rq_cqe_sect {
+	struct hinic_sge        sge;
+	u32                     rsvd;
+};
+
+struct hinic_rq_bufdesc {
+	u32     hi_addr;
+	u32     lo_addr;
+};
+
+struct hinic_rq_wqe {
+	struct hinic_rq_ctrl            ctrl;
+	u32                             rsvd;
+	struct hinic_rq_cqe_sect        cqe_sect;
+	struct hinic_rq_bufdesc         buf_desc;
+};
+
+struct hinic_hw_wqe {
+	/* HW Format */
+	union {
+		struct hinic_cmdq_wqe   cmdq_wqe;
+		struct hinic_sq_wqe     sq_wqe;
+		struct hinic_rq_wqe     rq_wqe;
+	};
+};
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
new file mode 100644
index 0000000..eb53bd9
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -0,0 +1,1112 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/if_vlan.h>
+#include <linux/semaphore.h>
+#include <linux/workqueue.h>
+#include <net/ip.h>
+#include <linux/bitops.h>
+#include <linux/bitmap.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+
+#include "hinic_hw_qp.h"
+#include "hinic_hw_dev.h"
+#include "hinic_port.h"
+#include "hinic_tx.h"
+#include "hinic_rx.h"
+#include "hinic_dev.h"
+
+MODULE_AUTHOR("Huawei Technologies CO., Ltd");
+MODULE_DESCRIPTION("Huawei Intelligent NIC driver");
+MODULE_LICENSE("GPL");
+
+static unsigned int tx_weight = 64;
+module_param(tx_weight, uint, 0644);
+MODULE_PARM_DESC(tx_weight, "Number Tx packets for NAPI budget (default=64)");
+
+static unsigned int rx_weight = 64;
+module_param(rx_weight, uint, 0644);
+MODULE_PARM_DESC(rx_weight, "Number Rx packets for NAPI budget (default=64)");
+
+#define PCI_DEVICE_ID_HI1822_PF         0x1822
+
+#define HINIC_WQ_NAME                   "hinic_dev"
+
+#define MSG_ENABLE_DEFAULT              (NETIF_MSG_DRV | NETIF_MSG_PROBE | \
+					 NETIF_MSG_IFUP |                  \
+					 NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
+
+#define VLAN_BITMAP_SIZE(nic_dev)       (ALIGN(VLAN_N_VID, 8) / 8)
+
+#define work_to_rx_mode_work(work)      \
+		container_of(work, struct hinic_rx_mode_work, work)
+
+#define rx_mode_work_to_nic_dev(rx_mode_work) \
+		container_of(rx_mode_work, struct hinic_dev, rx_mode_work)
+
+static int change_mac_addr(struct net_device *netdev, const u8 *addr);
+
+static void set_link_speed(struct ethtool_link_ksettings *link_ksettings,
+			   enum hinic_speed speed)
+{
+	switch (speed) {
+	case HINIC_SPEED_10MB_LINK:
+		link_ksettings->base.speed = SPEED_10;
+		break;
+
+	case HINIC_SPEED_100MB_LINK:
+		link_ksettings->base.speed = SPEED_100;
+		break;
+
+	case HINIC_SPEED_1000MB_LINK:
+		link_ksettings->base.speed = SPEED_1000;
+		break;
+
+	case HINIC_SPEED_10GB_LINK:
+		link_ksettings->base.speed = SPEED_10000;
+		break;
+
+	case HINIC_SPEED_25GB_LINK:
+		link_ksettings->base.speed = SPEED_25000;
+		break;
+
+	case HINIC_SPEED_40GB_LINK:
+		link_ksettings->base.speed = SPEED_40000;
+		break;
+
+	case HINIC_SPEED_100GB_LINK:
+		link_ksettings->base.speed = SPEED_100000;
+		break;
+
+	default:
+		link_ksettings->base.speed = SPEED_UNKNOWN;
+		break;
+	}
+}
+
+static int hinic_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings
+				    *link_ksettings)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	enum hinic_port_link_state link_state;
+	struct hinic_port_cap port_cap;
+	int err;
+
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+	ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
+					     Autoneg);
+
+	link_ksettings->base.speed   = SPEED_UNKNOWN;
+	link_ksettings->base.autoneg = AUTONEG_DISABLE;
+	link_ksettings->base.duplex  = DUPLEX_UNKNOWN;
+
+	err = hinic_port_get_cap(nic_dev, &port_cap);
+	if (err) {
+		netif_err(nic_dev, drv, netdev,
+			  "Failed to get port capabilities\n");
+		return err;
+	}
+
+	err = hinic_port_link_state(nic_dev, &link_state);
+	if (err) {
+		netif_err(nic_dev, drv, netdev,
+			  "Failed to get port link state\n");
+		return err;
+	}
+
+	if (link_state != HINIC_LINK_STATE_UP) {
+		netif_info(nic_dev, drv, netdev, "No link\n");
+		return err;
+	}
+
+	set_link_speed(link_ksettings, port_cap.speed);
+
+	if (!!(port_cap.autoneg_cap & HINIC_AUTONEG_SUPPORTED))
+		ethtool_link_ksettings_add_link_mode(link_ksettings,
+						     advertising, Autoneg);
+
+	if (port_cap.autoneg_state == HINIC_AUTONEG_ACTIVE)
+		link_ksettings->base.autoneg = AUTONEG_ENABLE;
+
+	link_ksettings->base.duplex = (port_cap.duplex == HINIC_DUPLEX_FULL) ?
+				       DUPLEX_FULL : DUPLEX_HALF;
+	return 0;
+}
+
+static void hinic_get_drvinfo(struct net_device *netdev,
+			      struct ethtool_drvinfo *info)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+
+	strlcpy(info->driver, HINIC_DRV_NAME, sizeof(info->driver));
+	strlcpy(info->bus_info, pci_name(hwif->pdev), sizeof(info->bus_info));
+}
+
+static void hinic_get_ringparam(struct net_device *netdev,
+				struct ethtool_ringparam *ring)
+{
+	ring->rx_max_pending = HINIC_RQ_DEPTH;
+	ring->tx_max_pending = HINIC_SQ_DEPTH;
+	ring->rx_pending = HINIC_RQ_DEPTH;
+	ring->tx_pending = HINIC_SQ_DEPTH;
+}
+
+static void hinic_get_channels(struct net_device *netdev,
+			       struct ethtool_channels *channels)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+
+	channels->max_rx = hwdev->nic_cap.max_qps;
+	channels->max_tx = hwdev->nic_cap.max_qps;
+	channels->max_other    = 0;
+	channels->max_combined = 0;
+	channels->rx_count = hinic_hwdev_num_qps(hwdev);
+	channels->tx_count = hinic_hwdev_num_qps(hwdev);
+	channels->other_count    = 0;
+	channels->combined_count = 0;
+}
+
+static const struct ethtool_ops hinic_ethtool_ops = {
+	.get_link_ksettings = hinic_get_link_ksettings,
+	.get_drvinfo = hinic_get_drvinfo,
+	.get_link = ethtool_op_get_link,
+	.get_ringparam = hinic_get_ringparam,
+	.get_channels = hinic_get_channels,
+};
+
+static void update_rx_stats(struct hinic_dev *nic_dev, struct hinic_rxq *rxq)
+{
+	struct hinic_rxq_stats *nic_rx_stats = &nic_dev->rx_stats;
+	struct hinic_rxq_stats rx_stats;
+
+	u64_stats_init(&rx_stats.syncp);
+
+	hinic_rxq_get_stats(rxq, &rx_stats);
+
+	u64_stats_update_begin(&nic_rx_stats->syncp);
+	nic_rx_stats->bytes += rx_stats.bytes;
+	nic_rx_stats->pkts  += rx_stats.pkts;
+	u64_stats_update_end(&nic_rx_stats->syncp);
+
+	hinic_rxq_clean_stats(rxq);
+}
+
+static void update_tx_stats(struct hinic_dev *nic_dev, struct hinic_txq *txq)
+{
+	struct hinic_txq_stats *nic_tx_stats = &nic_dev->tx_stats;
+	struct hinic_txq_stats tx_stats;
+
+	u64_stats_init(&tx_stats.syncp);
+
+	hinic_txq_get_stats(txq, &tx_stats);
+
+	u64_stats_update_begin(&nic_tx_stats->syncp);
+	nic_tx_stats->bytes += tx_stats.bytes;
+	nic_tx_stats->pkts += tx_stats.pkts;
+	nic_tx_stats->tx_busy += tx_stats.tx_busy;
+	nic_tx_stats->tx_wake += tx_stats.tx_wake;
+	nic_tx_stats->tx_dropped += tx_stats.tx_dropped;
+	u64_stats_update_end(&nic_tx_stats->syncp);
+
+	hinic_txq_clean_stats(txq);
+}
+
+static void update_nic_stats(struct hinic_dev *nic_dev)
+{
+	int i, num_qps = hinic_hwdev_num_qps(nic_dev->hwdev);
+
+	for (i = 0; i < num_qps; i++)
+		update_rx_stats(nic_dev, &nic_dev->rxqs[i]);
+
+	for (i = 0; i < num_qps; i++)
+		update_tx_stats(nic_dev, &nic_dev->txqs[i]);
+}
+
+/**
+ * create_txqs - Create the Logical Tx Queues of specific NIC device
+ * @nic_dev: the specific NIC device
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int create_txqs(struct hinic_dev *nic_dev)
+{
+	int err, i, j, num_txqs = hinic_hwdev_num_qps(nic_dev->hwdev);
+	struct net_device *netdev = nic_dev->netdev;
+	size_t txq_size;
+
+	if (nic_dev->txqs)
+		return -EINVAL;
+
+	txq_size = num_txqs * sizeof(*nic_dev->txqs);
+	nic_dev->txqs = devm_kzalloc(&netdev->dev, txq_size, GFP_KERNEL);
+	if (!nic_dev->txqs)
+		return -ENOMEM;
+
+	for (i = 0; i < num_txqs; i++) {
+		struct hinic_sq *sq = hinic_hwdev_get_sq(nic_dev->hwdev, i);
+
+		err = hinic_init_txq(&nic_dev->txqs[i], sq, netdev);
+		if (err) {
+			netif_err(nic_dev, drv, netdev,
+				  "Failed to init Txq\n");
+			goto err_init_txq;
+		}
+	}
+
+	return 0;
+
+err_init_txq:
+	for (j = 0; j < i; j++)
+		hinic_clean_txq(&nic_dev->txqs[j]);
+
+	devm_kfree(&netdev->dev, nic_dev->txqs);
+	return err;
+}
+
+/**
+ * free_txqs - Free the Logical Tx Queues of specific NIC device
+ * @nic_dev: the specific NIC device
+ **/
+static void free_txqs(struct hinic_dev *nic_dev)
+{
+	int i, num_txqs = hinic_hwdev_num_qps(nic_dev->hwdev);
+	struct net_device *netdev = nic_dev->netdev;
+
+	if (!nic_dev->txqs)
+		return;
+
+	for (i = 0; i < num_txqs; i++)
+		hinic_clean_txq(&nic_dev->txqs[i]);
+
+	devm_kfree(&netdev->dev, nic_dev->txqs);
+	nic_dev->txqs = NULL;
+}
+
+/**
+ * create_txqs - Create the Logical Rx Queues of specific NIC device
+ * @nic_dev: the specific NIC device
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int create_rxqs(struct hinic_dev *nic_dev)
+{
+	int err, i, j, num_rxqs = hinic_hwdev_num_qps(nic_dev->hwdev);
+	struct net_device *netdev = nic_dev->netdev;
+	size_t rxq_size;
+
+	if (nic_dev->rxqs)
+		return -EINVAL;
+
+	rxq_size = num_rxqs * sizeof(*nic_dev->rxqs);
+	nic_dev->rxqs = devm_kzalloc(&netdev->dev, rxq_size, GFP_KERNEL);
+	if (!nic_dev->rxqs)
+		return -ENOMEM;
+
+	for (i = 0; i < num_rxqs; i++) {
+		struct hinic_rq *rq = hinic_hwdev_get_rq(nic_dev->hwdev, i);
+
+		err = hinic_init_rxq(&nic_dev->rxqs[i], rq, netdev);
+		if (err) {
+			netif_err(nic_dev, drv, netdev,
+				  "Failed to init rxq\n");
+			goto err_init_rxq;
+		}
+	}
+
+	return 0;
+
+err_init_rxq:
+	for (j = 0; j < i; j++)
+		hinic_clean_rxq(&nic_dev->rxqs[j]);
+
+	devm_kfree(&netdev->dev, nic_dev->rxqs);
+	return err;
+}
+
+/**
+ * free_txqs - Free the Logical Rx Queues of specific NIC device
+ * @nic_dev: the specific NIC device
+ **/
+static void free_rxqs(struct hinic_dev *nic_dev)
+{
+	int i, num_rxqs = hinic_hwdev_num_qps(nic_dev->hwdev);
+	struct net_device *netdev = nic_dev->netdev;
+
+	if (!nic_dev->rxqs)
+		return;
+
+	for (i = 0; i < num_rxqs; i++)
+		hinic_clean_rxq(&nic_dev->rxqs[i]);
+
+	devm_kfree(&netdev->dev, nic_dev->rxqs);
+	nic_dev->rxqs = NULL;
+}
+
+static int hinic_open(struct net_device *netdev)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	enum hinic_port_link_state link_state;
+	int err, ret, num_qps;
+
+	if (!(nic_dev->flags & HINIC_INTF_UP)) {
+		err = hinic_hwdev_ifup(nic_dev->hwdev);
+		if (err) {
+			netif_err(nic_dev, drv, netdev,
+				  "Failed - HW interface up\n");
+			return err;
+		}
+	}
+
+	err = create_txqs(nic_dev);
+	if (err) {
+		netif_err(nic_dev, drv, netdev,
+			  "Failed to create Tx queues\n");
+		goto err_create_txqs;
+	}
+
+	err = create_rxqs(nic_dev);
+	if (err) {
+		netif_err(nic_dev, drv, netdev,
+			  "Failed to create Rx queues\n");
+		goto err_create_rxqs;
+	}
+
+	num_qps = hinic_hwdev_num_qps(nic_dev->hwdev);
+	netif_set_real_num_tx_queues(netdev, num_qps);
+	netif_set_real_num_rx_queues(netdev, num_qps);
+
+	err = hinic_port_set_state(nic_dev, HINIC_PORT_ENABLE);
+	if (err) {
+		netif_err(nic_dev, drv, netdev,
+			  "Failed to set port state\n");
+		goto err_port_state;
+	}
+
+	err = hinic_port_set_func_state(nic_dev, HINIC_FUNC_PORT_ENABLE);
+	if (err) {
+		netif_err(nic_dev, drv, netdev,
+			  "Failed to set func port state\n");
+		goto err_func_port_state;
+	}
+
+	/* Wait up to 3 sec between port enable to link state */
+	msleep(3000);
+
+	down(&nic_dev->mgmt_lock);
+
+	err = hinic_port_link_state(nic_dev, &link_state);
+	if (err) {
+		netif_err(nic_dev, drv, netdev, "Failed to get link state\n");
+		goto err_port_link;
+	}
+
+	if (link_state == HINIC_LINK_STATE_UP)
+		nic_dev->flags |= HINIC_LINK_UP;
+
+	nic_dev->flags |= HINIC_INTF_UP;
+
+	if ((nic_dev->flags & (HINIC_LINK_UP | HINIC_INTF_UP)) ==
+	    (HINIC_LINK_UP | HINIC_INTF_UP)) {
+		netif_info(nic_dev, drv, netdev, "link + intf UP\n");
+		netif_carrier_on(netdev);
+		netif_tx_wake_all_queues(netdev);
+	}
+
+	up(&nic_dev->mgmt_lock);
+
+	netif_info(nic_dev, drv, netdev, "HINIC_INTF is UP\n");
+	return 0;
+
+err_port_link:
+	up(&nic_dev->mgmt_lock);
+	ret = hinic_port_set_func_state(nic_dev, HINIC_FUNC_PORT_DISABLE);
+	if (ret)
+		netif_warn(nic_dev, drv, netdev,
+			   "Failed to revert func port state\n");
+
+err_func_port_state:
+	ret = hinic_port_set_state(nic_dev, HINIC_PORT_DISABLE);
+	if (ret)
+		netif_warn(nic_dev, drv, netdev,
+			   "Failed to revert port state\n");
+
+err_port_state:
+	free_rxqs(nic_dev);
+
+err_create_rxqs:
+	free_txqs(nic_dev);
+
+err_create_txqs:
+	if (!(nic_dev->flags & HINIC_INTF_UP))
+		hinic_hwdev_ifdown(nic_dev->hwdev);
+	return err;
+}
+
+static int hinic_close(struct net_device *netdev)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	unsigned int flags;
+	int err;
+
+	down(&nic_dev->mgmt_lock);
+
+	flags = nic_dev->flags;
+	nic_dev->flags &= ~HINIC_INTF_UP;
+
+	netif_carrier_off(netdev);
+	netif_tx_disable(netdev);
+
+	update_nic_stats(nic_dev);
+
+	up(&nic_dev->mgmt_lock);
+
+	err = hinic_port_set_func_state(nic_dev, HINIC_FUNC_PORT_DISABLE);
+	if (err) {
+		netif_err(nic_dev, drv, netdev,
+			  "Failed to set func port state\n");
+		nic_dev->flags |= (flags & HINIC_INTF_UP);
+		return err;
+	}
+
+	err = hinic_port_set_state(nic_dev, HINIC_PORT_DISABLE);
+	if (err) {
+		netif_err(nic_dev, drv, netdev, "Failed to set port state\n");
+		nic_dev->flags |= (flags & HINIC_INTF_UP);
+		return err;
+	}
+
+	free_rxqs(nic_dev);
+	free_txqs(nic_dev);
+
+	if (flags & HINIC_INTF_UP)
+		hinic_hwdev_ifdown(nic_dev->hwdev);
+
+	netif_info(nic_dev, drv, netdev, "HINIC_INTF is DOWN\n");
+	return 0;
+}
+
+static int hinic_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	int err;
+
+	netif_info(nic_dev, drv, netdev, "set_mtu = %d\n", new_mtu);
+
+	err = hinic_port_set_mtu(nic_dev, new_mtu);
+	if (err)
+		netif_err(nic_dev, drv, netdev, "Failed to set port mtu\n");
+	else
+		netdev->mtu = new_mtu;
+
+	return err;
+}
+
+/**
+ * change_mac_addr - change the main mac address of network device
+ * @netdev: network device
+ * @addr: mac address to set
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int change_mac_addr(struct net_device *netdev, const u8 *addr)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	u16 vid = 0;
+	int err;
+
+	if (!is_valid_ether_addr(addr))
+		return -EADDRNOTAVAIL;
+
+	netif_info(nic_dev, drv, netdev, "change mac addr = %02x %02x %02x %02x %02x %02x\n",
+		   addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+
+	down(&nic_dev->mgmt_lock);
+
+	do {
+		err = hinic_port_del_mac(nic_dev, netdev->dev_addr, vid);
+		if (err) {
+			netif_err(nic_dev, drv, netdev,
+				  "Failed to delete mac\n");
+			break;
+		}
+
+		err = hinic_port_add_mac(nic_dev, addr, vid);
+		if (err) {
+			netif_err(nic_dev, drv, netdev, "Failed to add mac\n");
+			break;
+		}
+
+		vid = find_next_bit(nic_dev->vlan_bitmap, VLAN_N_VID, vid + 1);
+	} while (vid != VLAN_N_VID);
+
+	up(&nic_dev->mgmt_lock);
+	return err;
+}
+
+static int hinic_set_mac_addr(struct net_device *netdev, void *addr)
+{
+	unsigned char new_mac[ETH_ALEN];
+	struct sockaddr *saddr = addr;
+	int err;
+
+	memcpy(new_mac, saddr->sa_data, ETH_ALEN);
+
+	err = change_mac_addr(netdev, new_mac);
+	if (!err)
+		memcpy(netdev->dev_addr, new_mac, ETH_ALEN);
+
+	return err;
+}
+
+/**
+ * add_mac_addr - add mac address to network device
+ * @netdev: network device
+ * @addr: mac address to add
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int add_mac_addr(struct net_device *netdev, const u8 *addr)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	u16 vid = 0;
+	int err;
+
+	if (!is_valid_ether_addr(addr))
+		return -EADDRNOTAVAIL;
+
+	netif_info(nic_dev, drv, netdev, "set mac addr = %02x %02x %02x %02x %02x %02x\n",
+		   addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+
+	down(&nic_dev->mgmt_lock);
+
+	do {
+		err = hinic_port_add_mac(nic_dev, addr, vid);
+		if (err) {
+			netif_err(nic_dev, drv, netdev, "Failed to add mac\n");
+			break;
+		}
+
+		vid = find_next_bit(nic_dev->vlan_bitmap, VLAN_N_VID, vid + 1);
+	} while (vid != VLAN_N_VID);
+
+	up(&nic_dev->mgmt_lock);
+	return err;
+}
+
+/**
+ * remove_mac_addr - remove mac address from network device
+ * @netdev: network device
+ * @addr: mac address to remove
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int remove_mac_addr(struct net_device *netdev, const u8 *addr)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	u16 vid = 0;
+	int err;
+
+	if (!is_valid_ether_addr(addr))
+		return -EADDRNOTAVAIL;
+
+	netif_info(nic_dev, drv, netdev, "remove mac addr = %02x %02x %02x %02x %02x %02x\n",
+		   addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+
+	down(&nic_dev->mgmt_lock);
+
+	do {
+		err = hinic_port_del_mac(nic_dev, addr, vid);
+		if (err) {
+			netif_err(nic_dev, drv, netdev,
+				  "Failed to delete mac\n");
+			break;
+		}
+
+		vid = find_next_bit(nic_dev->vlan_bitmap, VLAN_N_VID, vid + 1);
+	} while (vid != VLAN_N_VID);
+
+	up(&nic_dev->mgmt_lock);
+	return err;
+}
+
+static int hinic_vlan_rx_add_vid(struct net_device *netdev,
+				 __always_unused __be16 proto, u16 vid)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	int ret, err;
+
+	netif_info(nic_dev, drv, netdev, "add vid = %d\n", vid);
+
+	down(&nic_dev->mgmt_lock);
+
+	err = hinic_port_add_vlan(nic_dev, vid);
+	if (err) {
+		netif_err(nic_dev, drv, netdev, "Failed to add vlan\n");
+		goto err_vlan_add;
+	}
+
+	err = hinic_port_add_mac(nic_dev, netdev->dev_addr, vid);
+	if (err) {
+		netif_err(nic_dev, drv, netdev, "Failed to set mac\n");
+		goto err_add_mac;
+	}
+
+	bitmap_set(nic_dev->vlan_bitmap, vid, 1);
+
+	up(&nic_dev->mgmt_lock);
+	return 0;
+
+err_add_mac:
+	ret = hinic_port_del_vlan(nic_dev, vid);
+	if (ret)
+		netif_err(nic_dev, drv, netdev,
+			  "Failed to revert by removing vlan\n");
+
+err_vlan_add:
+	up(&nic_dev->mgmt_lock);
+	return err;
+}
+
+static int hinic_vlan_rx_kill_vid(struct net_device *netdev,
+				  __always_unused __be16 proto, u16 vid)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	int err;
+
+	netif_info(nic_dev, drv, netdev, "remove vid = %d\n", vid);
+
+	down(&nic_dev->mgmt_lock);
+
+	err = hinic_port_del_vlan(nic_dev, vid);
+	if (err) {
+		netif_err(nic_dev, drv, netdev, "Failed to delete vlan\n");
+		goto err_del_vlan;
+	}
+
+	bitmap_clear(nic_dev->vlan_bitmap, vid, 1);
+
+	up(&nic_dev->mgmt_lock);
+	return 0;
+
+err_del_vlan:
+	up(&nic_dev->mgmt_lock);
+	return err;
+}
+
+static void set_rx_mode(struct work_struct *work)
+{
+	struct hinic_rx_mode_work *rx_mode_work = work_to_rx_mode_work(work);
+	struct hinic_dev *nic_dev = rx_mode_work_to_nic_dev(rx_mode_work);
+
+	netif_info(nic_dev, drv, nic_dev->netdev, "set rx mode work\n");
+
+	hinic_port_set_rx_mode(nic_dev, rx_mode_work->rx_mode);
+
+	__dev_uc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr);
+	__dev_mc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr);
+}
+
+static void hinic_set_rx_mode(struct net_device *netdev)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_rx_mode_work *rx_mode_work;
+	u32 rx_mode;
+
+	rx_mode_work = &nic_dev->rx_mode_work;
+
+	rx_mode = HINIC_RX_MODE_UC |
+		  HINIC_RX_MODE_MC |
+		  HINIC_RX_MODE_BC;
+
+	if (netdev->flags & IFF_PROMISC)
+		rx_mode |= HINIC_RX_MODE_PROMISC;
+	else if (netdev->flags & IFF_ALLMULTI)
+		rx_mode |= HINIC_RX_MODE_MC_ALL;
+
+	rx_mode_work->rx_mode = rx_mode;
+
+	queue_work(nic_dev->workq, &rx_mode_work->work);
+}
+
+static void hinic_tx_timeout(struct net_device *netdev)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+
+	netif_err(nic_dev, drv, netdev, "Tx timeout\n");
+}
+
+static void hinic_get_stats64(struct net_device *netdev,
+			      struct rtnl_link_stats64 *stats)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_rxq_stats *nic_rx_stats;
+	struct hinic_txq_stats *nic_tx_stats;
+
+	nic_rx_stats = &nic_dev->rx_stats;
+	nic_tx_stats = &nic_dev->tx_stats;
+
+	down(&nic_dev->mgmt_lock);
+
+	if (nic_dev->flags & HINIC_INTF_UP)
+		update_nic_stats(nic_dev);
+
+	up(&nic_dev->mgmt_lock);
+
+	stats->rx_bytes   = nic_rx_stats->bytes;
+	stats->rx_packets = nic_rx_stats->pkts;
+
+	stats->tx_bytes   = nic_tx_stats->bytes;
+	stats->tx_packets = nic_tx_stats->pkts;
+	stats->tx_errors  = nic_tx_stats->tx_dropped;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void hinic_netpoll(struct net_device *netdev)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	int i, num_qps;
+
+	num_qps = hinic_hwdev_num_qps(nic_dev->hwdev);
+	for (i = 0; i < num_qps; i++) {
+		struct hinic_txq *txq = &nic_dev->txqs[i];
+		struct hinic_rxq *rxq = &nic_dev->rxqs[i];
+
+		napi_schedule(&txq->napi);
+		napi_schedule(&rxq->napi);
+	}
+}
+#endif
+
+static const struct net_device_ops hinic_netdev_ops = {
+	.ndo_open = hinic_open,
+	.ndo_stop = hinic_close,
+	.ndo_change_mtu = hinic_change_mtu,
+	.ndo_set_mac_address = hinic_set_mac_addr,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_vlan_rx_add_vid = hinic_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid = hinic_vlan_rx_kill_vid,
+	.ndo_set_rx_mode = hinic_set_rx_mode,
+	.ndo_start_xmit = hinic_xmit_frame,
+	.ndo_tx_timeout = hinic_tx_timeout,
+	.ndo_get_stats64 = hinic_get_stats64,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller = hinic_netpoll,
+#endif
+};
+
+static void netdev_features_init(struct net_device *netdev)
+{
+	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA;
+
+	netdev->vlan_features = netdev->hw_features;
+
+	netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
+}
+
+/**
+ * link_status_event_handler - link event handler
+ * @handle: nic device for the handler
+ * @buf_in: input buffer
+ * @in_size: input size
+ * @buf_in: output buffer
+ * @out_size: returned output size
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static void link_status_event_handler(void *handle, void *buf_in, u16 in_size,
+				      void *buf_out, u16 *out_size)
+{
+	struct hinic_port_link_status *link_status, *ret_link_status;
+	struct hinic_dev *nic_dev = handle;
+
+	link_status = buf_in;
+
+	if (link_status->link == HINIC_LINK_STATE_UP) {
+		down(&nic_dev->mgmt_lock);
+
+		nic_dev->flags |= HINIC_LINK_UP;
+
+		if ((nic_dev->flags & (HINIC_LINK_UP | HINIC_INTF_UP)) ==
+		    (HINIC_LINK_UP | HINIC_INTF_UP)) {
+			netif_carrier_on(nic_dev->netdev);
+			netif_tx_wake_all_queues(nic_dev->netdev);
+		}
+
+		up(&nic_dev->mgmt_lock);
+
+		netif_info(nic_dev, drv, nic_dev->netdev, "HINIC_Link is UP\n");
+	} else {
+		down(&nic_dev->mgmt_lock);
+
+		nic_dev->flags &= ~HINIC_LINK_UP;
+
+		netif_carrier_off(nic_dev->netdev);
+		netif_tx_disable(nic_dev->netdev);
+
+		up(&nic_dev->mgmt_lock);
+
+		netif_info(nic_dev, drv, nic_dev->netdev, "HINIC_Link is DOWN\n");
+	}
+
+	ret_link_status = buf_out;
+	ret_link_status->status = 0;
+
+	*out_size = sizeof(*ret_link_status);
+}
+
+/**
+ * nic_dev_init - Initialize the NIC device
+ * @pdev: the NIC pci device
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int nic_dev_init(struct pci_dev *pdev)
+{
+	struct hinic_rx_mode_work *rx_mode_work;
+	struct hinic_txq_stats *tx_stats;
+	struct hinic_rxq_stats *rx_stats;
+	struct hinic_dev *nic_dev;
+	struct net_device *netdev;
+	struct hinic_hwdev *hwdev;
+	int err, num_qps;
+
+	hwdev = hinic_init_hwdev(pdev);
+	if (IS_ERR(hwdev)) {
+		dev_err(&pdev->dev, "Failed to initialize HW device\n");
+		return PTR_ERR(hwdev);
+	}
+
+	num_qps = hinic_hwdev_num_qps(hwdev);
+	if (num_qps <= 0) {
+		dev_err(&pdev->dev, "Invalid number of QPS\n");
+		err = -EINVAL;
+		goto err_num_qps;
+	}
+
+	netdev = alloc_etherdev_mq(sizeof(*nic_dev), num_qps);
+	if (!netdev) {
+		dev_err(&pdev->dev, "Failed to allocate Ethernet device\n");
+		err = -ENOMEM;
+		goto err_alloc_etherdev;
+	}
+
+	netdev->netdev_ops = &hinic_netdev_ops;
+	netdev->ethtool_ops = &hinic_ethtool_ops;
+	netdev->max_mtu = ETH_MAX_MTU;
+
+	nic_dev = netdev_priv(netdev);
+	nic_dev->netdev = netdev;
+	nic_dev->hwdev  = hwdev;
+	nic_dev->msg_enable = MSG_ENABLE_DEFAULT;
+	nic_dev->flags = 0;
+	nic_dev->txqs = NULL;
+	nic_dev->rxqs = NULL;
+	nic_dev->tx_weight = tx_weight;
+	nic_dev->rx_weight = rx_weight;
+
+	sema_init(&nic_dev->mgmt_lock, 1);
+
+	tx_stats = &nic_dev->tx_stats;
+	rx_stats = &nic_dev->rx_stats;
+
+	u64_stats_init(&tx_stats->syncp);
+	u64_stats_init(&rx_stats->syncp);
+
+	nic_dev->vlan_bitmap = devm_kzalloc(&pdev->dev,
+					    VLAN_BITMAP_SIZE(nic_dev),
+					    GFP_KERNEL);
+	if (!nic_dev->vlan_bitmap) {
+		err = -ENOMEM;
+		goto err_vlan_bitmap;
+	}
+
+	nic_dev->workq = create_singlethread_workqueue(HINIC_WQ_NAME);
+	if (!nic_dev->workq) {
+		err = -ENOMEM;
+		goto err_workq;
+	}
+
+	pci_set_drvdata(pdev, netdev);
+
+	err = hinic_port_get_mac(nic_dev, netdev->dev_addr);
+	if (err)
+		dev_warn(&pdev->dev, "Failed to get mac address\n");
+
+	err = hinic_port_add_mac(nic_dev, netdev->dev_addr, 0);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to add mac\n");
+		goto err_add_mac;
+	}
+
+	err = hinic_port_set_mtu(nic_dev, netdev->mtu);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to set mtu\n");
+		goto err_set_mtu;
+	}
+
+	rx_mode_work = &nic_dev->rx_mode_work;
+	INIT_WORK(&rx_mode_work->work, set_rx_mode);
+
+	netdev_features_init(netdev);
+
+	netif_carrier_off(netdev);
+
+	hinic_hwdev_cb_register(nic_dev->hwdev, HINIC_MGMT_MSG_CMD_LINK_STATUS,
+				nic_dev, link_status_event_handler);
+
+	err = register_netdev(netdev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to register netdev\n");
+		goto err_reg_netdev;
+	}
+
+	return 0;
+
+err_reg_netdev:
+	hinic_hwdev_cb_unregister(nic_dev->hwdev,
+				  HINIC_MGMT_MSG_CMD_LINK_STATUS);
+	cancel_work_sync(&rx_mode_work->work);
+
+err_set_mtu:
+err_add_mac:
+	pci_set_drvdata(pdev, NULL);
+	destroy_workqueue(nic_dev->workq);
+
+err_workq:
+err_vlan_bitmap:
+	free_netdev(netdev);
+
+err_alloc_etherdev:
+err_num_qps:
+	hinic_free_hwdev(hwdev);
+	return err;
+}
+
+static int hinic_probe(struct pci_dev *pdev,
+		       const struct pci_device_id *id)
+{
+	int err = pci_enable_device(pdev);
+
+	if (err) {
+		dev_err(&pdev->dev, "Failed to enable PCI device\n");
+		return err;
+	}
+
+	err = pci_request_regions(pdev, HINIC_DRV_NAME);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to request PCI regions\n");
+		goto err_pci_regions;
+	}
+
+	pci_set_master(pdev);
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_warn(&pdev->dev, "Couldn't set 64-bit DMA mask\n");
+		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pdev->dev, "Failed to set DMA mask\n");
+			goto err_dma_mask;
+		}
+	}
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_warn(&pdev->dev,
+			 "Couldn't set 64-bit consistent DMA mask\n");
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pdev->dev,
+				"Failed to set consistent DMA mask\n");
+			goto err_dma_consistent_mask;
+		}
+	}
+
+	err = nic_dev_init(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to initialize NIC device\n");
+		goto err_nic_dev_init;
+	}
+
+	dev_info(&pdev->dev, "HiNIC driver - probed\n");
+	return 0;
+
+err_nic_dev_init:
+err_dma_consistent_mask:
+err_dma_mask:
+	pci_release_regions(pdev);
+
+err_pci_regions:
+	pci_disable_device(pdev);
+	return err;
+}
+
+static void hinic_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_rx_mode_work *rx_mode_work;
+
+	unregister_netdev(netdev);
+
+	hinic_hwdev_cb_unregister(nic_dev->hwdev,
+				  HINIC_MGMT_MSG_CMD_LINK_STATUS);
+
+	rx_mode_work = &nic_dev->rx_mode_work;
+	cancel_work_sync(&rx_mode_work->work);
+
+	pci_set_drvdata(pdev, NULL);
+
+	destroy_workqueue(nic_dev->workq);
+
+	hinic_free_hwdev(nic_dev->hwdev);
+
+	free_netdev(netdev);
+
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+
+	dev_info(&pdev->dev, "HiNIC driver - removed\n");
+}
+
+static const struct pci_device_id hinic_pci_table[] = {
+	{ PCI_VDEVICE(HUAWEI, PCI_DEVICE_ID_HI1822_PF), 0},
+	{ 0, 0}
+};
+MODULE_DEVICE_TABLE(pci, hinic_pci_table);
+
+static struct pci_driver hinic_driver = {
+	.name           = HINIC_DRV_NAME,
+	.id_table       = hinic_pci_table,
+	.probe          = hinic_probe,
+	.remove         = hinic_remove,
+};
+
+module_pci_driver(hinic_driver);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
new file mode 100644
index 0000000..4d4e3f0
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -0,0 +1,379 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+
+#include "hinic_hw_if.h"
+#include "hinic_hw_dev.h"
+#include "hinic_port.h"
+#include "hinic_dev.h"
+
+#define HINIC_MIN_MTU_SIZE              256
+#define HINIC_MAX_JUMBO_FRAME_SIZE      15872
+
+enum mac_op {
+	MAC_DEL,
+	MAC_SET,
+};
+
+/**
+ * change_mac - change(add or delete) mac address
+ * @nic_dev: nic device
+ * @addr: mac address
+ * @vlan_id: vlan number to set with the mac
+ * @op: add or delete the mac
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int change_mac(struct hinic_dev *nic_dev, const u8 *addr,
+		      u16 vlan_id, enum mac_op op)
+{
+	struct net_device *netdev = nic_dev->netdev;
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_port_mac_cmd port_mac_cmd;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	enum hinic_port_cmd cmd;
+	u16 out_size;
+	int err;
+
+	if (vlan_id >= VLAN_N_VID) {
+		netif_err(nic_dev, drv, netdev, "Invalid VLAN number\n");
+		return -EINVAL;
+	}
+
+	if (op == MAC_SET)
+		cmd = HINIC_PORT_CMD_SET_MAC;
+	else
+		cmd = HINIC_PORT_CMD_DEL_MAC;
+
+	port_mac_cmd.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
+	port_mac_cmd.vlan_id = vlan_id;
+	memcpy(port_mac_cmd.mac, addr, ETH_ALEN);
+
+	err = hinic_port_msg_cmd(hwdev, cmd, &port_mac_cmd,
+				 sizeof(port_mac_cmd),
+				 &port_mac_cmd, &out_size);
+	if (err || (out_size != sizeof(port_mac_cmd)) || port_mac_cmd.status) {
+		dev_err(&pdev->dev, "Failed to change MAC, ret = %d\n",
+			port_mac_cmd.status);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * hinic_port_add_mac - add mac address
+ * @nic_dev: nic device
+ * @addr: mac address
+ * @vlan_id: vlan number to set with the mac
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_add_mac(struct hinic_dev *nic_dev,
+		       const u8 *addr, u16 vlan_id)
+{
+	return change_mac(nic_dev, addr, vlan_id, MAC_SET);
+}
+
+/**
+ * hinic_port_del_mac - remove mac address
+ * @nic_dev: nic device
+ * @addr: mac address
+ * @vlan_id: vlan number that is connected to the mac
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_del_mac(struct hinic_dev *nic_dev, const u8 *addr,
+		       u16 vlan_id)
+{
+	return change_mac(nic_dev, addr, vlan_id, MAC_DEL);
+}
+
+/**
+ * hinic_port_get_mac - get the mac address of the nic device
+ * @nic_dev: nic device
+ * @addr: returned mac address
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_get_mac(struct hinic_dev *nic_dev, u8 *addr)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_port_mac_cmd port_mac_cmd;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	port_mac_cmd.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_MAC,
+				 &port_mac_cmd, sizeof(port_mac_cmd),
+				 &port_mac_cmd, &out_size);
+	if (err || (out_size != sizeof(port_mac_cmd)) || port_mac_cmd.status) {
+		dev_err(&pdev->dev, "Failed to get mac, ret = %d\n",
+			port_mac_cmd.status);
+		return -EFAULT;
+	}
+
+	memcpy(addr, port_mac_cmd.mac, ETH_ALEN);
+	return 0;
+}
+
+/**
+ * hinic_port_set_mtu - set mtu
+ * @nic_dev: nic device
+ * @new_mtu: new mtu
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_set_mtu(struct hinic_dev *nic_dev, int new_mtu)
+{
+	struct net_device *netdev = nic_dev->netdev;
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_port_mtu_cmd port_mtu_cmd;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	int err, max_frame;
+	u16 out_size;
+
+	if (new_mtu < HINIC_MIN_MTU_SIZE) {
+		netif_err(nic_dev, drv, netdev, "mtu < MIN MTU size");
+		return -EINVAL;
+	}
+
+	max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
+	if (max_frame > HINIC_MAX_JUMBO_FRAME_SIZE) {
+		netif_err(nic_dev, drv, netdev, "mtu > MAX MTU size");
+		return -EINVAL;
+	}
+
+	port_mtu_cmd.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
+	port_mtu_cmd.mtu = new_mtu;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_CHANGE_MTU,
+				 &port_mtu_cmd, sizeof(port_mtu_cmd),
+				 &port_mtu_cmd, &out_size);
+	if (err || (out_size != sizeof(port_mtu_cmd)) || port_mtu_cmd.status) {
+		dev_err(&pdev->dev, "Failed to set mtu, ret = %d\n",
+			port_mtu_cmd.status);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * hinic_port_add_vlan - add vlan to the nic device
+ * @nic_dev: nic device
+ * @vlan_id: the vlan number to add
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_add_vlan(struct hinic_dev *nic_dev, u16 vlan_id)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_port_vlan_cmd port_vlan_cmd;
+
+	port_vlan_cmd.func_idx = HINIC_HWIF_FUNC_IDX(hwdev->hwif);
+	port_vlan_cmd.vlan_id = vlan_id;
+
+	return hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_ADD_VLAN,
+				  &port_vlan_cmd, sizeof(port_vlan_cmd),
+				  NULL, NULL);
+}
+
+/**
+ * hinic_port_del_vlan - delete vlan from the nic device
+ * @nic_dev: nic device
+ * @vlan_id: the vlan number to delete
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_del_vlan(struct hinic_dev *nic_dev, u16 vlan_id)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_port_vlan_cmd port_vlan_cmd;
+
+	port_vlan_cmd.func_idx = HINIC_HWIF_FUNC_IDX(hwdev->hwif);
+	port_vlan_cmd.vlan_id = vlan_id;
+
+	return hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_DEL_VLAN,
+				 &port_vlan_cmd, sizeof(port_vlan_cmd),
+				 NULL, NULL);
+}
+
+/**
+ * hinic_port_set_rx_mode - set rx mode in the nic device
+ * @nic_dev: nic device
+ * @rx_mode: the rx mode to set
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_set_rx_mode(struct hinic_dev *nic_dev, u32 rx_mode)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_port_rx_mode_cmd rx_mode_cmd;
+
+	rx_mode_cmd.func_idx = HINIC_HWIF_FUNC_IDX(hwdev->hwif);
+	rx_mode_cmd.rx_mode = rx_mode;
+
+	return hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RX_MODE,
+				  &rx_mode_cmd, sizeof(rx_mode_cmd),
+				  NULL, NULL);
+}
+
+/**
+ * hinic_port_link_state - get the link state
+ * @nic_dev: nic device
+ * @link_state: the returned link state
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_link_state(struct hinic_dev *nic_dev,
+			  enum hinic_port_link_state *link_state)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct hinic_port_link_cmd link_cmd;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+		dev_err(&pdev->dev, "unsupported PCI Function type\n");
+		return -EINVAL;
+	}
+
+	link_cmd.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_LINK_STATE,
+				 &link_cmd, sizeof(link_cmd),
+				 &link_cmd, &out_size);
+	if (err || (out_size != sizeof(link_cmd)) || link_cmd.status) {
+		dev_err(&pdev->dev, "Failed to get link state, ret = %d\n",
+			link_cmd.status);
+		return -EINVAL;
+	}
+
+	*link_state = link_cmd.state;
+	return 0;
+}
+
+/**
+ * hinic_port_set_state - set port state
+ * @nic_dev: nic device
+ * @state: the state to set
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_set_state(struct hinic_dev *nic_dev, enum hinic_port_state state)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_port_state_cmd port_state;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
+		dev_err(&pdev->dev, "unsupported PCI Function type\n");
+		return -EINVAL;
+	}
+
+	port_state.state = state;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_PORT_STATE,
+				 &port_state, sizeof(port_state),
+				 &port_state, &out_size);
+	if (err || (out_size != sizeof(port_state)) || port_state.status) {
+		dev_err(&pdev->dev, "Failed to set port state, ret = %d\n",
+			port_state.status);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * hinic_port_set_func_state- set func device state
+ * @nic_dev: nic device
+ * @state: the state to set
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_set_func_state(struct hinic_dev *nic_dev,
+			      enum hinic_func_port_state state)
+{
+	struct hinic_port_func_state_cmd func_state;
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	func_state.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
+	func_state.state = state;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_FUNC_STATE,
+				 &func_state, sizeof(func_state),
+				 &func_state, &out_size);
+	if (err || (out_size != sizeof(func_state)) || func_state.status) {
+		dev_err(&pdev->dev, "Failed to set port func state, ret = %d\n",
+			func_state.status);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * hinic_port_get_cap - get port capabilities
+ * @nic_dev: nic device
+ * @port_cap: returned port capabilities
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_get_cap(struct hinic_dev *nic_dev,
+		       struct hinic_port_cap *port_cap)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	port_cap->func_idx = HINIC_HWIF_FUNC_IDX(hwif);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_CAP,
+				 port_cap, sizeof(*port_cap),
+				 port_cap, &out_size);
+	if (err || (out_size != sizeof(*port_cap)) || port_cap->status) {
+		dev_err(&pdev->dev,
+			"Failed to get port capabilities, ret = %d\n",
+			port_cap->status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
new file mode 100644
index 0000000..9404365
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h
@@ -0,0 +1,198 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_PORT_H
+#define HINIC_PORT_H
+
+#include <linux/types.h>
+#include <linux/etherdevice.h>
+#include <linux/bitops.h>
+
+#include "hinic_dev.h"
+
+enum hinic_rx_mode {
+	HINIC_RX_MODE_UC        = BIT(0),
+	HINIC_RX_MODE_MC        = BIT(1),
+	HINIC_RX_MODE_BC        = BIT(2),
+	HINIC_RX_MODE_MC_ALL    = BIT(3),
+	HINIC_RX_MODE_PROMISC   = BIT(4),
+};
+
+enum hinic_port_link_state {
+	HINIC_LINK_STATE_DOWN,
+	HINIC_LINK_STATE_UP,
+};
+
+enum hinic_port_state {
+	HINIC_PORT_DISABLE      = 0,
+	HINIC_PORT_ENABLE       = 3,
+};
+
+enum hinic_func_port_state {
+	HINIC_FUNC_PORT_DISABLE = 0,
+	HINIC_FUNC_PORT_ENABLE  = 2,
+};
+
+enum hinic_autoneg_cap {
+	HINIC_AUTONEG_UNSUPPORTED,
+	HINIC_AUTONEG_SUPPORTED,
+};
+
+enum hinic_autoneg_state {
+	HINIC_AUTONEG_DISABLED,
+	HINIC_AUTONEG_ACTIVE,
+};
+
+enum hinic_duplex {
+	HINIC_DUPLEX_HALF,
+	HINIC_DUPLEX_FULL,
+};
+
+enum hinic_speed {
+	HINIC_SPEED_10MB_LINK = 0,
+	HINIC_SPEED_100MB_LINK,
+	HINIC_SPEED_1000MB_LINK,
+	HINIC_SPEED_10GB_LINK,
+	HINIC_SPEED_25GB_LINK,
+	HINIC_SPEED_40GB_LINK,
+	HINIC_SPEED_100GB_LINK,
+
+	HINIC_SPEED_UNKNOWN = 0xFF,
+};
+
+struct hinic_port_mac_cmd {
+	u8              status;
+	u8              version;
+	u8              rsvd0[6];
+
+	u16             func_idx;
+	u16             vlan_id;
+	u16             rsvd1;
+	unsigned char   mac[ETH_ALEN];
+};
+
+struct hinic_port_mtu_cmd {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_idx;
+	u16     rsvd1;
+	u32     mtu;
+};
+
+struct hinic_port_vlan_cmd {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_idx;
+	u16     vlan_id;
+};
+
+struct hinic_port_rx_mode_cmd {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_idx;
+	u16     rsvd;
+	u32     rx_mode;
+};
+
+struct hinic_port_link_cmd {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_idx;
+	u8      state;
+	u8      rsvd1;
+};
+
+struct hinic_port_state_cmd {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u8      state;
+	u8      rsvd1[3];
+};
+
+struct hinic_port_link_status {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     rsvd1;
+	u8      link;
+	u8      rsvd2;
+};
+
+struct hinic_port_func_state_cmd {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_idx;
+	u16     rsvd1;
+	u8      state;
+	u8      rsvd2[3];
+};
+
+struct hinic_port_cap {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u16     func_idx;
+	u16     rsvd1;
+	u8      port_type;
+	u8      autoneg_cap;
+	u8      autoneg_state;
+	u8      duplex;
+	u8      speed;
+	u8      rsvd2[3];
+};
+
+int hinic_port_add_mac(struct hinic_dev *nic_dev, const u8 *addr,
+		       u16 vlan_id);
+
+int hinic_port_del_mac(struct hinic_dev *nic_dev, const u8 *addr,
+		       u16 vlan_id);
+
+int hinic_port_get_mac(struct hinic_dev *nic_dev, u8 *addr);
+
+int hinic_port_set_mtu(struct hinic_dev *nic_dev, int new_mtu);
+
+int hinic_port_add_vlan(struct hinic_dev *nic_dev, u16 vlan_id);
+
+int hinic_port_del_vlan(struct hinic_dev *nic_dev, u16 vlan_id);
+
+int hinic_port_set_rx_mode(struct hinic_dev *nic_dev, u32 rx_mode);
+
+int hinic_port_link_state(struct hinic_dev *nic_dev,
+			  enum hinic_port_link_state *link_state);
+
+int hinic_port_set_state(struct hinic_dev *nic_dev,
+			 enum hinic_port_state state);
+
+int hinic_port_set_func_state(struct hinic_dev *nic_dev,
+			      enum hinic_func_port_state state);
+
+int hinic_port_get_cap(struct hinic_dev *nic_dev,
+		       struct hinic_port_cap *port_cap);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
new file mode 100644
index 0000000..1d4f712
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -0,0 +1,509 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/skbuff.h>
+#include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
+#include <asm/barrier.h>
+
+#include "hinic_common.h"
+#include "hinic_hw_if.h"
+#include "hinic_hw_wqe.h"
+#include "hinic_hw_wq.h"
+#include "hinic_hw_qp.h"
+#include "hinic_hw_dev.h"
+#include "hinic_rx.h"
+#include "hinic_dev.h"
+
+#define RX_IRQ_NO_PENDING               0
+#define RX_IRQ_NO_COALESC               0
+#define RX_IRQ_NO_LLI_TIMER             0
+#define RX_IRQ_NO_CREDIT                0
+#define RX_IRQ_NO_RESEND_TIMER          0
+
+/**
+ * hinic_rxq_clean_stats - Clean the statistics of specific queue
+ * @rxq: Logical Rx Queue
+ **/
+void hinic_rxq_clean_stats(struct hinic_rxq *rxq)
+{
+	struct hinic_rxq_stats *rxq_stats = &rxq->rxq_stats;
+
+	u64_stats_update_begin(&rxq_stats->syncp);
+	rxq_stats->pkts  = 0;
+	rxq_stats->bytes = 0;
+	u64_stats_update_end(&rxq_stats->syncp);
+}
+
+/**
+ * hinic_rxq_get_stats - get statistics of Rx Queue
+ * @rxq: Logical Rx Queue
+ * @stats: return updated stats here
+ **/
+void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats)
+{
+	struct hinic_rxq_stats *rxq_stats = &rxq->rxq_stats;
+	unsigned int start;
+
+	u64_stats_update_begin(&stats->syncp);
+	do {
+		start = u64_stats_fetch_begin(&rxq_stats->syncp);
+		stats->pkts = rxq_stats->pkts;
+		stats->bytes = rxq_stats->bytes;
+	} while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
+	u64_stats_update_end(&stats->syncp);
+}
+
+/**
+ * rxq_stats_init - Initialize the statistics of specific queue
+ * @rxq: Logical Rx Queue
+ **/
+static void rxq_stats_init(struct hinic_rxq *rxq)
+{
+	struct hinic_rxq_stats *rxq_stats = &rxq->rxq_stats;
+
+	u64_stats_init(&rxq_stats->syncp);
+	hinic_rxq_clean_stats(rxq);
+}
+
+/**
+ * rx_alloc_skb - allocate skb and map it to dma address
+ * @rxq: rx queue
+ * @dma_addr: returned dma address for the skb
+ *
+ * Return skb
+ **/
+static struct sk_buff *rx_alloc_skb(struct hinic_rxq *rxq,
+				    dma_addr_t *dma_addr)
+{
+	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct sk_buff *skb;
+	dma_addr_t addr;
+	int err;
+
+	skb = netdev_alloc_skb_ip_align(rxq->netdev, rxq->rq->buf_sz);
+	if (!skb) {
+		netdev_err(rxq->netdev, "Failed to allocate Rx SKB\n");
+		return NULL;
+	}
+
+	addr = dma_map_single(&pdev->dev, skb->data, rxq->rq->buf_sz,
+			      DMA_FROM_DEVICE);
+	err = dma_mapping_error(&pdev->dev, addr);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to map Rx DMA, err = %d\n", err);
+		goto err_rx_map;
+	}
+
+	*dma_addr = addr;
+	return skb;
+
+err_rx_map:
+	dev_kfree_skb_any(skb);
+	return NULL;
+}
+
+/**
+ * rx_unmap_skb - unmap the dma address of the skb
+ * @rxq: rx queue
+ * @dma_addr: dma address of the skb
+ **/
+static void rx_unmap_skb(struct hinic_rxq *rxq, dma_addr_t dma_addr)
+{
+	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+
+	dma_unmap_single(&pdev->dev, dma_addr, rxq->rq->buf_sz,
+			 DMA_FROM_DEVICE);
+}
+
+/**
+ * rx_free_skb - unmap and free skb
+ * @rxq: rx queue
+ * @skb: skb to free
+ * @dma_addr: dma address of the skb
+ **/
+static void rx_free_skb(struct hinic_rxq *rxq, struct sk_buff *skb,
+			dma_addr_t dma_addr)
+{
+	rx_unmap_skb(rxq, dma_addr);
+	dev_kfree_skb_any(skb);
+}
+
+/**
+ * rx_alloc_pkts - allocate pkts in rx queue
+ * @rxq: rx queue
+ *
+ * Return number of skbs allocated
+ **/
+static int rx_alloc_pkts(struct hinic_rxq *rxq)
+{
+	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
+	struct hinic_rq_wqe *rq_wqe;
+	unsigned int free_wqebbs;
+	struct hinic_sge sge;
+	dma_addr_t dma_addr;
+	struct sk_buff *skb;
+	int i, alloc_more;
+	u16 prod_idx;
+
+	free_wqebbs = hinic_get_rq_free_wqebbs(rxq->rq);
+	alloc_more = 0;
+
+	/* Limit the allocation chunks */
+	if (free_wqebbs > nic_dev->rx_weight)
+		free_wqebbs = nic_dev->rx_weight;
+
+	for (i = 0; i < free_wqebbs; i++) {
+		skb = rx_alloc_skb(rxq, &dma_addr);
+		if (!skb) {
+			netdev_err(rxq->netdev, "Failed to alloc Rx skb\n");
+			alloc_more = 1;
+			goto skb_out;
+		}
+
+		hinic_set_sge(&sge, dma_addr, skb->len);
+
+		rq_wqe = hinic_rq_get_wqe(rxq->rq, HINIC_RQ_WQE_SIZE,
+					  &prod_idx);
+		if (!rq_wqe) {
+			rx_free_skb(rxq, skb, dma_addr);
+			alloc_more = 1;
+			goto skb_out;
+		}
+
+		hinic_rq_prepare_wqe(rxq->rq, prod_idx, rq_wqe, &sge);
+
+		hinic_rq_write_wqe(rxq->rq, prod_idx, rq_wqe, skb);
+	}
+
+skb_out:
+	if (i) {
+		wmb();  /* write all the wqes before update PI */
+
+		hinic_rq_update(rxq->rq, prod_idx);
+	}
+
+	if (alloc_more)
+		tasklet_schedule(&rxq->rx_task);
+
+	return i;
+}
+
+/**
+ * free_all_rx_skbs - free all skbs in rx queue
+ * @rxq: rx queue
+ **/
+static void free_all_rx_skbs(struct hinic_rxq *rxq)
+{
+	struct hinic_rq *rq = rxq->rq;
+	struct hinic_hw_wqe *hw_wqe;
+	struct hinic_sge sge;
+	u16 ci;
+
+	while ((hw_wqe = hinic_read_wqe(rq->wq, HINIC_RQ_WQE_SIZE, &ci))) {
+		if (IS_ERR(hw_wqe))
+			break;
+
+		hinic_rq_get_sge(rq, &hw_wqe->rq_wqe, ci, &sge);
+
+		hinic_put_wqe(rq->wq, HINIC_RQ_WQE_SIZE);
+
+		rx_free_skb(rxq, rq->saved_skb[ci], hinic_sge_to_dma(&sge));
+	}
+}
+
+/**
+ * rx_alloc_task - tasklet for queue allocation
+ * @data: rx queue
+ **/
+static void rx_alloc_task(unsigned long data)
+{
+	struct hinic_rxq *rxq = (struct hinic_rxq *)data;
+
+	(void)rx_alloc_pkts(rxq);
+}
+
+/**
+ * rx_recv_jumbo_pkt - Rx handler for jumbo pkt
+ * @rxq: rx queue
+ * @head_skb: the first skb in the list
+ * @left_pkt_len: left size of the pkt exclude head skb
+ * @ci: consumer index
+ *
+ * Return number of wqes that used for the left of the pkt
+ **/
+static int rx_recv_jumbo_pkt(struct hinic_rxq *rxq, struct sk_buff *head_skb,
+			     unsigned int left_pkt_len, u16 ci)
+{
+	struct sk_buff *skb, *curr_skb = head_skb;
+	struct hinic_rq_wqe *rq_wqe;
+	unsigned int curr_len;
+	struct hinic_sge sge;
+	int num_wqes = 0;
+
+	while (left_pkt_len > 0) {
+		rq_wqe = hinic_rq_read_next_wqe(rxq->rq, HINIC_RQ_WQE_SIZE,
+						&skb, &ci);
+
+		num_wqes++;
+
+		hinic_rq_get_sge(rxq->rq, rq_wqe, ci, &sge);
+
+		rx_unmap_skb(rxq, hinic_sge_to_dma(&sge));
+
+		prefetch(skb->data);
+
+		curr_len = (left_pkt_len > HINIC_RX_BUF_SZ) ? HINIC_RX_BUF_SZ :
+			    left_pkt_len;
+
+		left_pkt_len -= curr_len;
+
+		__skb_put(skb, curr_len);
+
+		if (curr_skb == head_skb)
+			skb_shinfo(head_skb)->frag_list = skb;
+		else
+			curr_skb->next = skb;
+
+		head_skb->len += skb->len;
+		head_skb->data_len += skb->len;
+		head_skb->truesize += skb->truesize;
+
+		curr_skb = skb;
+	}
+
+	return num_wqes;
+}
+
+/**
+ * rxq_recv - Rx handler
+ * @rxq: rx queue
+ * @budget: maximum pkts to process
+ *
+ * Return number of pkts received
+ **/
+static int rxq_recv(struct hinic_rxq *rxq, int budget)
+{
+	struct hinic_qp *qp = container_of(rxq->rq, struct hinic_qp, rq);
+	u64 pkt_len = 0, rx_bytes = 0;
+	struct hinic_rq_wqe *rq_wqe;
+	int num_wqes, pkts = 0;
+	struct hinic_sge sge;
+	struct sk_buff *skb;
+	u16 ci;
+
+	while (pkts < budget) {
+		num_wqes = 0;
+
+		rq_wqe = hinic_rq_read_wqe(rxq->rq, HINIC_RQ_WQE_SIZE, &skb,
+					   &ci);
+		if (!rq_wqe)
+			break;
+
+		hinic_rq_get_sge(rxq->rq, rq_wqe, ci, &sge);
+
+		rx_unmap_skb(rxq, hinic_sge_to_dma(&sge));
+
+		prefetch(skb->data);
+
+		pkt_len = sge.len;
+
+		if (pkt_len <= HINIC_RX_BUF_SZ) {
+			__skb_put(skb, pkt_len);
+		} else {
+			__skb_put(skb, HINIC_RX_BUF_SZ);
+			num_wqes = rx_recv_jumbo_pkt(rxq, skb, pkt_len -
+						     HINIC_RX_BUF_SZ, ci);
+		}
+
+		hinic_rq_put_wqe(rxq->rq, ci,
+				 (num_wqes + 1) * HINIC_RQ_WQE_SIZE);
+
+		skb_record_rx_queue(skb, qp->q_id);
+		skb->protocol = eth_type_trans(skb, rxq->netdev);
+
+		napi_gro_receive(&rxq->napi, skb);
+
+		pkts++;
+		rx_bytes += pkt_len;
+	}
+
+	if (pkts)
+		tasklet_schedule(&rxq->rx_task); /* hinic_rx_alloc_pkts */
+
+	u64_stats_update_begin(&rxq->rxq_stats.syncp);
+	rxq->rxq_stats.pkts += pkts;
+	rxq->rxq_stats.bytes += rx_bytes;
+	u64_stats_update_end(&rxq->rxq_stats.syncp);
+
+	return pkts;
+}
+
+static int rx_poll(struct napi_struct *napi, int budget)
+{
+	struct hinic_rxq *rxq = container_of(napi, struct hinic_rxq, napi);
+	struct hinic_rq *rq = rxq->rq;
+	int pkts;
+
+	pkts = rxq_recv(rxq, budget);
+	if (pkts >= budget)
+		return budget;
+
+	napi_complete(napi);
+	enable_irq(rq->irq);
+	return pkts;
+}
+
+static void rx_add_napi(struct hinic_rxq *rxq)
+{
+	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
+
+	netif_napi_add(rxq->netdev, &rxq->napi, rx_poll, nic_dev->rx_weight);
+	napi_enable(&rxq->napi);
+}
+
+static void rx_del_napi(struct hinic_rxq *rxq)
+{
+	napi_disable(&rxq->napi);
+	netif_napi_del(&rxq->napi);
+}
+
+static irqreturn_t rx_irq(int irq, void *data)
+{
+	struct hinic_rxq *rxq = (struct hinic_rxq *)data;
+	struct hinic_rq *rq = rxq->rq;
+	struct hinic_dev *nic_dev;
+
+	/* Disable the interrupt until napi will be completed */
+	disable_irq_nosync(rq->irq);
+
+	nic_dev = netdev_priv(rxq->netdev);
+	hinic_hwdev_msix_cnt_set(nic_dev->hwdev, rq->msix_entry);
+
+	napi_schedule(&rxq->napi);
+	return IRQ_HANDLED;
+}
+
+static int rx_request_irq(struct hinic_rxq *rxq)
+{
+	struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_rq *rq = rxq->rq;
+	int err;
+
+	rx_add_napi(rxq);
+
+	hinic_hwdev_msix_set(hwdev, rq->msix_entry,
+			     RX_IRQ_NO_PENDING, RX_IRQ_NO_COALESC,
+			     RX_IRQ_NO_LLI_TIMER, RX_IRQ_NO_CREDIT,
+			     RX_IRQ_NO_RESEND_TIMER);
+
+	err = request_irq(rq->irq, rx_irq, 0, rxq->irq_name, rxq);
+	if (err) {
+		rx_del_napi(rxq);
+		return err;
+	}
+
+	return 0;
+}
+
+static void rx_free_irq(struct hinic_rxq *rxq)
+{
+	struct hinic_rq *rq = rxq->rq;
+
+	free_irq(rq->irq, rxq);
+	rx_del_napi(rxq);
+}
+
+/**
+ * hinic_init_rxq - Initialize the Rx Queue
+ * @rxq: Logical Rx Queue
+ * @rq: Hardware Rx Queue to connect the Logical queue with
+ * @netdev: network device to connect the Logical queue with
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_init_rxq(struct hinic_rxq *rxq, struct hinic_rq *rq,
+		   struct net_device *netdev)
+{
+	struct hinic_qp *qp = container_of(rq, struct hinic_qp, rq);
+	int err, pkts, irqname_len;
+
+	rxq->netdev = netdev;
+	rxq->rq = rq;
+
+	rxq_stats_init(rxq);
+
+	irqname_len = snprintf(NULL, 0, "hinic_rxq%d", qp->q_id) + 1;
+	rxq->irq_name = devm_kzalloc(&netdev->dev, irqname_len, GFP_KERNEL);
+	if (!rxq->irq_name)
+		return -ENOMEM;
+
+	sprintf(rxq->irq_name, "hinic_rxq%d", qp->q_id);
+
+	tasklet_init(&rxq->rx_task, rx_alloc_task, (unsigned long)rxq);
+
+	pkts = rx_alloc_pkts(rxq);
+	if (!pkts) {
+		err = -ENOMEM;
+		goto err_rx_pkts;
+	}
+
+	err = rx_request_irq(rxq);
+	if (err) {
+		netdev_err(netdev, "Failed to request Rx irq\n");
+		goto err_req_rx_irq;
+	}
+
+	return 0;
+
+err_req_rx_irq:
+err_rx_pkts:
+	tasklet_kill(&rxq->rx_task);
+	free_all_rx_skbs(rxq);
+	devm_kfree(&netdev->dev, rxq->irq_name);
+	return err;
+}
+
+/**
+ * hinic_clean_rxq - Clean the Rx Queue
+ * @rxq: Logical Rx Queue
+ **/
+void hinic_clean_rxq(struct hinic_rxq *rxq)
+{
+	struct net_device *netdev = rxq->netdev;
+
+	rx_free_irq(rxq);
+
+	tasklet_kill(&rxq->rx_task);
+	free_all_rx_skbs(rxq);
+	devm_kfree(&netdev->dev, rxq->irq_name);
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.h b/drivers/net/ethernet/huawei/hinic/hinic_rx.h
new file mode 100644
index 0000000..27c9af4
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.h
@@ -0,0 +1,55 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_RX_H
+#define HINIC_RX_H
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/interrupt.h>
+
+#include "hinic_hw_qp.h"
+
+struct hinic_rxq_stats {
+	u64                     pkts;
+	u64                     bytes;
+
+	struct u64_stats_sync   syncp;
+};
+
+struct hinic_rxq {
+	struct net_device       *netdev;
+	struct hinic_rq         *rq;
+
+	struct hinic_rxq_stats  rxq_stats;
+
+	char                    *irq_name;
+
+	struct tasklet_struct   rx_task;
+
+	struct napi_struct      napi;
+};
+
+void hinic_rxq_clean_stats(struct hinic_rxq *rxq);
+
+void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats);
+
+int hinic_init_rxq(struct hinic_rxq *rxq, struct hinic_rq *rq,
+		   struct net_device *netdev);
+
+void hinic_clean_rxq(struct hinic_rxq *rxq);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
new file mode 100644
index 0000000..abe3e38
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -0,0 +1,504 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/skbuff.h>
+#include <linux/smp.h>
+#include <asm/byteorder.h>
+
+#include "hinic_common.h"
+#include "hinic_hw_if.h"
+#include "hinic_hw_wqe.h"
+#include "hinic_hw_wq.h"
+#include "hinic_hw_qp.h"
+#include "hinic_hw_dev.h"
+#include "hinic_dev.h"
+#include "hinic_tx.h"
+
+#define TX_IRQ_NO_PENDING               0
+#define TX_IRQ_NO_COALESC               0
+#define TX_IRQ_NO_LLI_TIMER             0
+#define TX_IRQ_NO_CREDIT                0
+#define TX_IRQ_NO_RESEND_TIMER          0
+
+#define CI_UPDATE_NO_PENDING            0
+#define CI_UPDATE_NO_COALESC            0
+
+#define HW_CONS_IDX(sq)         be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
+
+#define MIN_SKB_LEN             64
+
+/**
+ * hinic_txq_clean_stats - Clean the statistics of specific queue
+ * @txq: Logical Tx Queue
+ **/
+void hinic_txq_clean_stats(struct hinic_txq *txq)
+{
+	struct hinic_txq_stats *txq_stats = &txq->txq_stats;
+
+	u64_stats_update_begin(&txq_stats->syncp);
+	txq_stats->pkts    = 0;
+	txq_stats->bytes   = 0;
+	txq_stats->tx_busy = 0;
+	txq_stats->tx_wake = 0;
+	txq_stats->tx_dropped = 0;
+	u64_stats_update_end(&txq_stats->syncp);
+}
+
+/**
+ * hinic_txq_get_stats - get statistics of Tx Queue
+ * @txq: Logical Tx Queue
+ * @stats: return updated stats here
+ **/
+void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats)
+{
+	struct hinic_txq_stats *txq_stats = &txq->txq_stats;
+	unsigned int start;
+
+	u64_stats_update_begin(&stats->syncp);
+	do {
+		start = u64_stats_fetch_begin(&txq_stats->syncp);
+		stats->pkts    = txq_stats->pkts;
+		stats->bytes   = txq_stats->bytes;
+		stats->tx_busy = txq_stats->tx_busy;
+		stats->tx_wake = txq_stats->tx_wake;
+		stats->tx_dropped = txq_stats->tx_dropped;
+	} while (u64_stats_fetch_retry(&txq_stats->syncp, start));
+	u64_stats_update_end(&stats->syncp);
+}
+
+/**
+ * txq_stats_init - Initialize the statistics of specific queue
+ * @txq: Logical Tx Queue
+ **/
+static void txq_stats_init(struct hinic_txq *txq)
+{
+	struct hinic_txq_stats *txq_stats = &txq->txq_stats;
+
+	u64_stats_init(&txq_stats->syncp);
+	hinic_txq_clean_stats(txq);
+}
+
+/**
+ * tx_map_skb - dma mapping for skb and return sges
+ * @nic_dev: nic device
+ * @skb: the skb
+ * @sges: returned sges
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int tx_map_skb(struct hinic_dev *nic_dev, struct sk_buff *skb,
+		      struct hinic_sge *sges)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct skb_frag_struct *frag;
+	dma_addr_t dma_addr;
+	int i, j;
+
+	dma_addr = dma_map_single(&pdev->dev, skb->data, skb_headlen(skb),
+				  DMA_TO_DEVICE);
+	if (dma_mapping_error(&pdev->dev, dma_addr)) {
+		dev_err(&pdev->dev, "Failed to map Tx skb data\n");
+		return -EFAULT;
+	}
+
+	hinic_set_sge(&sges[0], dma_addr, skb_headlen(skb));
+
+	for (i = 0 ; i < skb_shinfo(skb)->nr_frags; i++) {
+		frag = &skb_shinfo(skb)->frags[i];
+
+		dma_addr = skb_frag_dma_map(&pdev->dev, frag, 0,
+					    skb_frag_size(frag),
+					    DMA_TO_DEVICE);
+		if (dma_mapping_error(&pdev->dev, dma_addr)) {
+			dev_err(&pdev->dev, "Failed to map Tx skb frag\n");
+			goto err_tx_map;
+		}
+
+		hinic_set_sge(&sges[i + 1], dma_addr, skb_frag_size(frag));
+	}
+
+	return 0;
+
+err_tx_map:
+	for (j = 0; j < i; j++)
+		dma_unmap_page(&pdev->dev, hinic_sge_to_dma(&sges[j + 1]),
+			       sges[j + 1].len, DMA_TO_DEVICE);
+
+	dma_unmap_single(&pdev->dev, hinic_sge_to_dma(&sges[0]), sges[0].len,
+			 DMA_TO_DEVICE);
+	return -EFAULT;
+}
+
+/**
+ * tx_unmap_skb - unmap the dma address of the skb
+ * @nic_dev: nic device
+ * @skb: the skb
+ * @sges: the sges that are connected to the skb
+ **/
+static void tx_unmap_skb(struct hinic_dev *nic_dev, struct sk_buff *skb,
+			 struct hinic_sge *sges)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	int i;
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags ; i++)
+		dma_unmap_page(&pdev->dev, hinic_sge_to_dma(&sges[i + 1]),
+			       sges[i + 1].len, DMA_TO_DEVICE);
+
+	dma_unmap_single(&pdev->dev, hinic_sge_to_dma(&sges[0]), sges[0].len,
+			 DMA_TO_DEVICE);
+}
+
+netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct netdev_queue *netdev_txq;
+	int nr_sges, err = NETDEV_TX_OK;
+	struct hinic_sq_wqe *sq_wqe;
+	unsigned int wqe_size;
+	struct hinic_txq *txq;
+	struct hinic_qp *qp;
+	u16 prod_idx;
+
+	txq = &nic_dev->txqs[skb->queue_mapping];
+	qp = container_of(txq->sq, struct hinic_qp, sq);
+
+	if (skb->len < MIN_SKB_LEN) {
+		if (skb_pad(skb, MIN_SKB_LEN - skb->len)) {
+			netdev_err(netdev, "Failed to pad skb\n");
+			goto update_error_stats;
+		}
+
+		skb->len = MIN_SKB_LEN;
+	}
+
+	nr_sges = skb_shinfo(skb)->nr_frags + 1;
+	if (nr_sges > txq->max_sges) {
+		netdev_err(netdev, "Too many Tx sges\n");
+		goto skb_error;
+	}
+
+	err = tx_map_skb(nic_dev, skb, txq->sges);
+	if (err)
+		goto skb_error;
+
+	wqe_size = HINIC_SQ_WQE_SIZE(nr_sges);
+
+	sq_wqe = hinic_sq_get_wqe(txq->sq, wqe_size, &prod_idx);
+	if (!sq_wqe) {
+		tx_unmap_skb(nic_dev, skb, txq->sges);
+
+		netif_stop_subqueue(netdev, qp->q_id);
+
+		u64_stats_update_begin(&txq->txq_stats.syncp);
+		txq->txq_stats.tx_busy++;
+		u64_stats_update_end(&txq->txq_stats.syncp);
+		err = NETDEV_TX_BUSY;
+		goto flush_skbs;
+	}
+
+	hinic_sq_prepare_wqe(txq->sq, prod_idx, sq_wqe, txq->sges, nr_sges);
+
+	hinic_sq_write_wqe(txq->sq, prod_idx, sq_wqe, skb, wqe_size);
+
+flush_skbs:
+	netdev_txq = netdev_get_tx_queue(netdev, skb->queue_mapping);
+	if ((!skb->xmit_more) || (netif_xmit_stopped(netdev_txq)))
+		hinic_sq_write_db(txq->sq, prod_idx, wqe_size, 0);
+
+	return err;
+
+skb_error:
+	dev_kfree_skb_any(skb);
+
+update_error_stats:
+	u64_stats_update_begin(&txq->txq_stats.syncp);
+	txq->txq_stats.tx_dropped++;
+	u64_stats_update_end(&txq->txq_stats.syncp);
+	return err;
+}
+
+/**
+ * tx_free_skb - unmap and free skb
+ * @nic_dev: nic device
+ * @skb: the skb
+ * @sges: the sges that are connected to the skb
+ **/
+static void tx_free_skb(struct hinic_dev *nic_dev, struct sk_buff *skb,
+			struct hinic_sge *sges)
+{
+	tx_unmap_skb(nic_dev, skb, sges);
+
+	dev_kfree_skb_any(skb);
+}
+
+/**
+ * free_all_rx_skbs - free all skbs in tx queue
+ * @txq: tx queue
+ **/
+static void free_all_tx_skbs(struct hinic_txq *txq)
+{
+	struct hinic_dev *nic_dev = netdev_priv(txq->netdev);
+	struct hinic_sq *sq = txq->sq;
+	struct hinic_sq_wqe *sq_wqe;
+	unsigned int wqe_size;
+	struct sk_buff *skb;
+	int nr_sges;
+	u16 ci;
+
+	while ((sq_wqe = hinic_sq_read_wqe(sq, &skb, &wqe_size, &ci))) {
+		nr_sges = skb_shinfo(skb)->nr_frags + 1;
+
+		hinic_sq_get_sges(sq_wqe, txq->free_sges, nr_sges);
+
+		hinic_sq_put_wqe(sq, wqe_size);
+
+		tx_free_skb(nic_dev, skb, txq->free_sges);
+	}
+}
+
+/**
+ * free_tx_poll - free finished tx skbs in tx queue that connected to napi
+ * @napi: napi
+ * @budget: number of tx
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+static int free_tx_poll(struct napi_struct *napi, int budget)
+{
+	struct hinic_txq *txq = container_of(napi, struct hinic_txq, napi);
+	struct hinic_qp *qp = container_of(txq->sq, struct hinic_qp, sq);
+	struct hinic_dev *nic_dev = netdev_priv(txq->netdev);
+	struct netdev_queue *netdev_txq;
+	struct hinic_sq *sq = txq->sq;
+	struct hinic_wq *wq = sq->wq;
+	struct hinic_sq_wqe *sq_wqe;
+	unsigned int wqe_size;
+	int nr_sges, pkts = 0;
+	struct sk_buff *skb;
+	u64 tx_bytes = 0;
+	u16 hw_ci, sw_ci;
+
+	do {
+		hw_ci = HW_CONS_IDX(sq) & wq->mask;
+
+		sq_wqe = hinic_sq_read_wqe(sq, &skb, &wqe_size, &sw_ci);
+		if ((!sq_wqe) ||
+		    (((hw_ci - sw_ci) & wq->mask) * wq->wqebb_size < wqe_size))
+			break;
+
+		tx_bytes += skb->len;
+		pkts++;
+
+		nr_sges = skb_shinfo(skb)->nr_frags + 1;
+
+		hinic_sq_get_sges(sq_wqe, txq->free_sges, nr_sges);
+
+		hinic_sq_put_wqe(sq, wqe_size);
+
+		tx_free_skb(nic_dev, skb, txq->free_sges);
+	} while (pkts < budget);
+
+	if (__netif_subqueue_stopped(nic_dev->netdev, qp->q_id) &&
+	    hinic_get_sq_free_wqebbs(sq) >= HINIC_MIN_TX_NUM_WQEBBS(sq)) {
+		netdev_txq = netdev_get_tx_queue(txq->netdev, qp->q_id);
+
+		__netif_tx_lock(netdev_txq, smp_processor_id());
+
+		netif_wake_subqueue(nic_dev->netdev, qp->q_id);
+
+		__netif_tx_unlock(netdev_txq);
+
+		u64_stats_update_begin(&txq->txq_stats.syncp);
+		txq->txq_stats.tx_wake++;
+		u64_stats_update_end(&txq->txq_stats.syncp);
+	}
+
+	u64_stats_update_begin(&txq->txq_stats.syncp);
+	txq->txq_stats.bytes += tx_bytes;
+	txq->txq_stats.pkts += pkts;
+	u64_stats_update_end(&txq->txq_stats.syncp);
+
+	if (pkts < budget) {
+		napi_complete(napi);
+		enable_irq(sq->irq);
+		return pkts;
+	}
+
+	return budget;
+}
+
+static void tx_napi_add(struct hinic_txq *txq, int weight)
+{
+	netif_napi_add(txq->netdev, &txq->napi, free_tx_poll, weight);
+	napi_enable(&txq->napi);
+}
+
+static void tx_napi_del(struct hinic_txq *txq)
+{
+	napi_disable(&txq->napi);
+	netif_napi_del(&txq->napi);
+}
+
+static irqreturn_t tx_irq(int irq, void *data)
+{
+	struct hinic_txq *txq = data;
+	struct hinic_dev *nic_dev;
+
+	nic_dev = netdev_priv(txq->netdev);
+
+	/* Disable the interrupt until napi will be completed */
+	disable_irq_nosync(txq->sq->irq);
+
+	hinic_hwdev_msix_cnt_set(nic_dev->hwdev, txq->sq->msix_entry);
+
+	napi_schedule(&txq->napi);
+	return IRQ_HANDLED;
+}
+
+static int tx_request_irq(struct hinic_txq *txq)
+{
+	struct hinic_dev *nic_dev = netdev_priv(txq->netdev);
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	struct hinic_sq *sq = txq->sq;
+	int err;
+
+	tx_napi_add(txq, nic_dev->tx_weight);
+
+	hinic_hwdev_msix_set(nic_dev->hwdev, sq->msix_entry,
+			     TX_IRQ_NO_PENDING, TX_IRQ_NO_COALESC,
+			     TX_IRQ_NO_LLI_TIMER, TX_IRQ_NO_CREDIT,
+			     TX_IRQ_NO_RESEND_TIMER);
+
+	err = request_irq(sq->irq, tx_irq, 0, txq->irq_name, txq);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to request Tx irq\n");
+		tx_napi_del(txq);
+		return err;
+	}
+
+	return 0;
+}
+
+static void tx_free_irq(struct hinic_txq *txq)
+{
+	struct hinic_sq *sq = txq->sq;
+
+	free_irq(sq->irq, txq);
+	tx_napi_del(txq);
+}
+
+/**
+ * hinic_init_txq - Initialize the Tx Queue
+ * @txq: Logical Tx Queue
+ * @sq: Hardware Tx Queue to connect the Logical queue with
+ * @netdev: network device to connect the Logical queue with
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_init_txq(struct hinic_txq *txq, struct hinic_sq *sq,
+		   struct net_device *netdev)
+{
+	struct hinic_qp *qp = container_of(sq, struct hinic_qp, sq);
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	int err, irqname_len;
+	size_t sges_size;
+
+	txq->netdev = netdev;
+	txq->sq = sq;
+
+	txq_stats_init(txq);
+
+	txq->max_sges = HINIC_MAX_SQ_BUFDESCS;
+
+	sges_size = txq->max_sges * sizeof(*txq->sges);
+	txq->sges = devm_kzalloc(&netdev->dev, sges_size, GFP_KERNEL);
+	if (!txq->sges)
+		return -ENOMEM;
+
+	sges_size = txq->max_sges * sizeof(*txq->free_sges);
+	txq->free_sges = devm_kzalloc(&netdev->dev, sges_size, GFP_KERNEL);
+	if (!txq->free_sges) {
+		err = -ENOMEM;
+		goto err_alloc_free_sges;
+	}
+
+	irqname_len = snprintf(NULL, 0, "hinic_txq%d", qp->q_id) + 1;
+	txq->irq_name = devm_kzalloc(&netdev->dev, irqname_len, GFP_KERNEL);
+	if (!txq->irq_name) {
+		err = -ENOMEM;
+		goto err_alloc_irqname;
+	}
+
+	sprintf(txq->irq_name, "hinic_txq%d", qp->q_id);
+
+	err = hinic_hwdev_hw_ci_addr_set(hwdev, sq, CI_UPDATE_NO_PENDING,
+					 CI_UPDATE_NO_COALESC);
+	if (err)
+		goto err_hw_ci;
+
+	err = tx_request_irq(txq);
+	if (err) {
+		netdev_err(netdev, "Failed to request Tx irq\n");
+		goto err_req_tx_irq;
+	}
+
+	return 0;
+
+err_req_tx_irq:
+err_hw_ci:
+	devm_kfree(&netdev->dev, txq->irq_name);
+
+err_alloc_irqname:
+	devm_kfree(&netdev->dev, txq->free_sges);
+
+err_alloc_free_sges:
+	devm_kfree(&netdev->dev, txq->sges);
+	return err;
+}
+
+/**
+ * hinic_clean_txq - Clean the Tx Queue
+ * @txq: Logical Tx Queue
+ **/
+void hinic_clean_txq(struct hinic_txq *txq)
+{
+	struct net_device *netdev = txq->netdev;
+
+	tx_free_irq(txq);
+
+	free_all_tx_skbs(txq);
+
+	devm_kfree(&netdev->dev, txq->irq_name);
+	devm_kfree(&netdev->dev, txq->free_sges);
+	devm_kfree(&netdev->dev, txq->sges);
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.h b/drivers/net/ethernet/huawei/hinic/hinic_tx.h
new file mode 100644
index 0000000..1fa55dc
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.h
@@ -0,0 +1,62 @@
+/*
+ * Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ */
+
+#ifndef HINIC_TX_H
+#define HINIC_TX_H
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
+
+#include "hinic_common.h"
+#include "hinic_hw_qp.h"
+
+struct hinic_txq_stats {
+	u64     pkts;
+	u64     bytes;
+	u64     tx_busy;
+	u64     tx_wake;
+	u64     tx_dropped;
+
+	struct u64_stats_sync   syncp;
+};
+
+struct hinic_txq {
+	struct net_device       *netdev;
+	struct hinic_sq         *sq;
+
+	struct hinic_txq_stats  txq_stats;
+
+	int                     max_sges;
+	struct hinic_sge        *sges;
+	struct hinic_sge        *free_sges;
+
+	char                    *irq_name;
+	struct napi_struct      napi;
+};
+
+void hinic_txq_clean_stats(struct hinic_txq *txq);
+
+void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats);
+
+netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+
+int hinic_init_txq(struct hinic_txq *txq, struct hinic_sq *sq,
+		   struct net_device *netdev);
+
+void hinic_clean_txq(struct hinic_txq *txq);
+
+#endif
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index b9d310f..4878b71 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -3102,8 +3102,7 @@ static int ehea_setup_ports(struct ehea_adapter *adapter)
 		dn_log_port_id = of_get_property(eth_dn, "ibm,hea-port-no",
 						 NULL);
 		if (!dn_log_port_id) {
-			pr_err("bad device node: eth_dn name=%s\n",
-			       eth_dn->full_name);
+			pr_err("bad device node: eth_dn name=%pOF\n", eth_dn);
 			continue;
 		}
 
@@ -3425,7 +3424,7 @@ static int ehea_probe_adapter(struct platform_device *dev)
 
 	if (!adapter->handle) {
 		dev_err(&dev->dev, "failed getting handle for adapter"
-			" '%s'\n", dev->dev.of_node->full_name);
+			" '%pOF'\n", dev->dev.of_node);
 		ret = -ENODEV;
 		goto out_free_ad;
 	}
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 259e69a..7feff24 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -133,8 +133,7 @@ static inline void emac_report_timeout_error(struct emac_instance *dev,
 				  EMAC_FTR_440EP_PHY_CLK_FIX))
 		DBG(dev, "%s" NL, error);
 	else if (net_ratelimit())
-		printk(KERN_ERR "%s: %s\n", dev->ofdev->dev.of_node->full_name,
-			error);
+		printk(KERN_ERR "%pOF: %s\n", dev->ofdev->dev.of_node, error);
 }
 
 /* EMAC PHY clock workaround:
@@ -2258,8 +2257,8 @@ static void emac_ethtool_get_drvinfo(struct net_device *ndev,
 
 	strlcpy(info->driver, "ibm_emac", sizeof(info->driver));
 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
-	snprintf(info->bus_info, sizeof(info->bus_info), "PPC 4xx EMAC-%d %s",
-		 dev->cell_index, dev->ofdev->dev.of_node->full_name);
+	snprintf(info->bus_info, sizeof(info->bus_info), "PPC 4xx EMAC-%d %pOF",
+		 dev->cell_index, dev->ofdev->dev.of_node);
 }
 
 static const struct ethtool_ops emac_ethtool_ops = {
@@ -2431,8 +2430,8 @@ static int emac_read_uint_prop(struct device_node *np, const char *name,
 	const u32 *prop = of_get_property(np, name, &len);
 	if (prop == NULL || len < sizeof(u32)) {
 		if (fatal)
-			printk(KERN_ERR "%s: missing %s property\n",
-			       np->full_name, name);
+			printk(KERN_ERR "%pOF: missing %s property\n",
+			       np, name);
 		return -ENODEV;
 	}
 	*val = *prop;
@@ -2768,7 +2767,7 @@ static int emac_init_phy(struct emac_instance *dev)
 #endif
 	mutex_unlock(&emac_phy_map_lock);
 	if (i == 0x20) {
-		printk(KERN_WARNING "%s: can't find PHY!\n", np->full_name);
+		printk(KERN_WARNING "%pOF: can't find PHY!\n", np);
 		return -ENXIO;
 	}
 
@@ -2894,8 +2893,8 @@ static int emac_init_config(struct emac_instance *dev)
 #ifdef CONFIG_IBM_EMAC_NO_FLOW_CTRL
 			dev->features |= EMAC_FTR_NO_FLOW_CONTROL_40x;
 #else
-			printk(KERN_ERR "%s: Flow control not disabled!\n",
-					np->full_name);
+			printk(KERN_ERR "%pOF: Flow control not disabled!\n",
+					np);
 			return -ENXIO;
 #endif
 		}
@@ -2918,8 +2917,7 @@ static int emac_init_config(struct emac_instance *dev)
 #ifdef CONFIG_IBM_EMAC_TAH
 		dev->features |= EMAC_FTR_HAS_TAH;
 #else
-		printk(KERN_ERR "%s: TAH support not enabled !\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: TAH support not enabled !\n", np);
 		return -ENXIO;
 #endif
 	}
@@ -2928,8 +2926,7 @@ static int emac_init_config(struct emac_instance *dev)
 #ifdef CONFIG_IBM_EMAC_ZMII
 		dev->features |= EMAC_FTR_HAS_ZMII;
 #else
-		printk(KERN_ERR "%s: ZMII support not enabled !\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: ZMII support not enabled !\n", np);
 		return -ENXIO;
 #endif
 	}
@@ -2938,8 +2935,7 @@ static int emac_init_config(struct emac_instance *dev)
 #ifdef CONFIG_IBM_EMAC_RGMII
 		dev->features |= EMAC_FTR_HAS_RGMII;
 #else
-		printk(KERN_ERR "%s: RGMII support not enabled !\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: RGMII support not enabled !\n", np);
 		return -ENXIO;
 #endif
 	}
@@ -2947,8 +2943,8 @@ static int emac_init_config(struct emac_instance *dev)
 	/* Read MAC-address */
 	p = of_get_property(np, "local-mac-address", NULL);
 	if (p == NULL) {
-		printk(KERN_ERR "%s: Can't find local-mac-address property\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't find local-mac-address property\n",
+		       np);
 		return -ENXIO;
 	}
 	memcpy(dev->ndev->dev_addr, p, ETH_ALEN);
@@ -3036,30 +3032,24 @@ static int emac_probe(struct platform_device *ofdev)
 
 	/* Init various config data based on device-tree */
 	err = emac_init_config(dev);
-	if (err != 0)
+	if (err)
 		goto err_free;
 
 	/* Get interrupts. EMAC irq is mandatory, WOL irq is optional */
 	dev->emac_irq = irq_of_parse_and_map(np, 0);
 	dev->wol_irq = irq_of_parse_and_map(np, 1);
 	if (!dev->emac_irq) {
-		printk(KERN_ERR "%s: Can't map main interrupt\n", np->full_name);
+		printk(KERN_ERR "%pOF: Can't map main interrupt\n", np);
+		err = -ENODEV;
 		goto err_free;
 	}
 	ndev->irq = dev->emac_irq;
 
 	/* Map EMAC regs */
-	if (of_address_to_resource(np, 0, &dev->rsrc_regs)) {
-		printk(KERN_ERR "%s: Can't get registers address\n",
-		       np->full_name);
-		goto err_irq_unmap;
-	}
-	// TODO : request_mem_region
-	dev->emacp = ioremap(dev->rsrc_regs.start,
-			     resource_size(&dev->rsrc_regs));
+	// TODO : platform_get_resource() and devm_ioremap_resource()
+	dev->emacp = of_iomap(np, 0);
 	if (dev->emacp == NULL) {
-		printk(KERN_ERR "%s: Can't map device registers!\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't map device registers!\n", np);
 		err = -ENOMEM;
 		goto err_irq_unmap;
 	}
@@ -3068,8 +3058,7 @@ static int emac_probe(struct platform_device *ofdev)
 	err = emac_wait_deps(dev);
 	if (err) {
 		printk(KERN_ERR
-		       "%s: Timeout waiting for dependent devices\n",
-		       np->full_name);
+		       "%pOF: Timeout waiting for dependent devices\n", np);
 		/*  display more info about what's missing ? */
 		goto err_reg_unmap;
 	}
@@ -3084,8 +3073,8 @@ static int emac_probe(struct platform_device *ofdev)
 	dev->commac.rx_chan_mask = MAL_CHAN_MASK(dev->mal_rx_chan);
 	err = mal_register_commac(dev->mal, &dev->commac);
 	if (err) {
-		printk(KERN_ERR "%s: failed to register with mal %s!\n",
-		       np->full_name, dev->mal_dev->dev.of_node->full_name);
+		printk(KERN_ERR "%pOF: failed to register with mal %pOF!\n",
+		       np, dev->mal_dev->dev.of_node);
 		goto err_rel_deps;
 	}
 	dev->rx_skb_size = emac_rx_skb_size(ndev->mtu);
@@ -3161,8 +3150,8 @@ static int emac_probe(struct platform_device *ofdev)
 
 	err = register_netdev(ndev);
 	if (err) {
-		printk(KERN_ERR "%s: failed to register net device (%d)!\n",
-		       np->full_name, err);
+		printk(KERN_ERR "%pOF: failed to register net device (%d)!\n",
+		       np, err);
 		goto err_detach_tah;
 	}
 
@@ -3176,8 +3165,8 @@ static int emac_probe(struct platform_device *ofdev)
 	wake_up_all(&emac_probe_wait);
 
 
-	printk(KERN_INFO "%s: EMAC-%d %s, MAC %pM\n",
-	       ndev->name, dev->cell_index, np->full_name, ndev->dev_addr);
+	printk(KERN_INFO "%s: EMAC-%d %pOF, MAC %pM\n",
+	       ndev->name, dev->cell_index, np, ndev->dev_addr);
 
 	if (dev->phy_mode == PHY_MODE_SGMII)
 		printk(KERN_NOTICE "%s: in SGMII mode\n", ndev->name);
diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h
index f10e156..369de2c 100644
--- a/drivers/net/ethernet/ibm/emac/core.h
+++ b/drivers/net/ethernet/ibm/emac/core.h
@@ -167,7 +167,6 @@ struct emac_error_stats {
 
 struct emac_instance {
 	struct net_device		*ndev;
-	struct resource			rsrc_regs;
 	struct emac_regs		__iomem *emacp;
 	struct platform_device		*ofdev;
 	struct device_node		**blist; /* bootlist entry */
diff --git a/drivers/net/ethernet/ibm/emac/debug.h b/drivers/net/ethernet/ibm/emac/debug.h
index 5bdfc17..9d06d3b 100644
--- a/drivers/net/ethernet/ibm/emac/debug.h
+++ b/drivers/net/ethernet/ibm/emac/debug.h
@@ -31,7 +31,7 @@
 #endif
 
 #define EMAC_DBG(d, name, fmt, arg...) \
-	printk(KERN_DEBUG #name "%s: " fmt, d->ofdev->dev.of_node->full_name, ## arg)
+	printk(KERN_DEBUG #name "%pOF: " fmt, d->ofdev->dev.of_node, ## arg)
 
 #if DBG_LEVEL > 0
 #  define DBG(d,f,x...)		EMAC_DBG(d, emac, f, ##x)
diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c
index 91b1a55..2c74baa 100644
--- a/drivers/net/ethernet/ibm/emac/mal.c
+++ b/drivers/net/ethernet/ibm/emac/mal.c
@@ -579,8 +579,8 @@ static int mal_probe(struct platform_device *ofdev)
 		mal->features |= (MAL_FTR_CLEAR_ICINTSTAT |
 				MAL_FTR_COMMON_ERR_INT);
 #else
-		printk(KERN_ERR "%s: Support for 405EZ not enabled!\n",
-				ofdev->dev.of_node->full_name);
+		printk(KERN_ERR "%pOF: Support for 405EZ not enabled!\n",
+				ofdev->dev.of_node);
 		err = -ENODEV;
 		goto fail;
 #endif
@@ -687,8 +687,8 @@ static int mal_probe(struct platform_device *ofdev)
 	mal_enable_eob_irq(mal);
 
 	printk(KERN_INFO
-	       "MAL v%d %s, %d TX channels, %d RX channels\n",
-	       mal->version, ofdev->dev.of_node->full_name,
+	       "MAL v%d %pOF, %d TX channels, %d RX channels\n",
+	       mal->version, ofdev->dev.of_node,
 	       mal->num_tx_chans, mal->num_rx_chans);
 
 	/* Advertise this instance to the rest of the world */
diff --git a/drivers/net/ethernet/ibm/emac/rgmii.c b/drivers/net/ethernet/ibm/emac/rgmii.c
index 206ccbb..c4a1ac3 100644
--- a/drivers/net/ethernet/ibm/emac/rgmii.c
+++ b/drivers/net/ethernet/ibm/emac/rgmii.c
@@ -104,8 +104,8 @@ int rgmii_attach(struct platform_device *ofdev, int input, int mode)
 
 	/* Check if we need to attach to a RGMII */
 	if (input < 0 || !rgmii_valid_mode(mode)) {
-		printk(KERN_ERR "%s: unsupported settings !\n",
-		       ofdev->dev.of_node->full_name);
+		printk(KERN_ERR "%pOF: unsupported settings !\n",
+		       ofdev->dev.of_node);
 		return -ENODEV;
 	}
 
@@ -114,8 +114,8 @@ int rgmii_attach(struct platform_device *ofdev, int input, int mode)
 	/* Enable this input */
 	out_be32(&p->fer, in_be32(&p->fer) | rgmii_mode_mask(mode, input));
 
-	printk(KERN_NOTICE "%s: input %d in %s mode\n",
-	       ofdev->dev.of_node->full_name, input, rgmii_mode_name(mode));
+	printk(KERN_NOTICE "%pOF: input %d in %s mode\n",
+	       ofdev->dev.of_node, input, rgmii_mode_name(mode));
 
 	++dev->users;
 
@@ -249,8 +249,7 @@ static int rgmii_probe(struct platform_device *ofdev)
 
 	rc = -ENXIO;
 	if (of_address_to_resource(np, 0, &regs)) {
-		printk(KERN_ERR "%s: Can't get registers address\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't get registers address\n", np);
 		goto err_free;
 	}
 
@@ -258,8 +257,7 @@ static int rgmii_probe(struct platform_device *ofdev)
 	dev->base = (struct rgmii_regs __iomem *)ioremap(regs.start,
 						 sizeof(struct rgmii_regs));
 	if (dev->base == NULL) {
-		printk(KERN_ERR "%s: Can't map device registers!\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't map device registers!\n", np);
 		goto err_free;
 	}
 
@@ -278,8 +276,8 @@ static int rgmii_probe(struct platform_device *ofdev)
 	out_be32(&dev->base->fer, 0);
 
 	printk(KERN_INFO
-	       "RGMII %s initialized with%s MDIO support\n",
-	       ofdev->dev.of_node->full_name,
+	       "RGMII %pOF initialized with%s MDIO support\n",
+	       ofdev->dev.of_node,
 	       (dev->flags & EMAC_RGMII_FLAG_HAS_MDIO) ? "" : "out");
 
 	wmb();
diff --git a/drivers/net/ethernet/ibm/emac/tah.c b/drivers/net/ethernet/ibm/emac/tah.c
index 32cb6c9..9912456 100644
--- a/drivers/net/ethernet/ibm/emac/tah.c
+++ b/drivers/net/ethernet/ibm/emac/tah.c
@@ -58,8 +58,7 @@ void tah_reset(struct platform_device *ofdev)
 		--n;
 
 	if (unlikely(!n))
-		printk(KERN_ERR "%s: reset timeout\n",
-			ofdev->dev.of_node->full_name);
+		printk(KERN_ERR "%pOF: reset timeout\n", ofdev->dev.of_node);
 
 	/* 10KB TAH TX FIFO accommodates the max MTU of 9000 */
 	out_be32(&p->mr,
@@ -105,8 +104,7 @@ static int tah_probe(struct platform_device *ofdev)
 
 	rc = -ENXIO;
 	if (of_address_to_resource(np, 0, &regs)) {
-		printk(KERN_ERR "%s: Can't get registers address\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't get registers address\n", np);
 		goto err_free;
 	}
 
@@ -114,8 +112,7 @@ static int tah_probe(struct platform_device *ofdev)
 	dev->base = (struct tah_regs __iomem *)ioremap(regs.start,
 					       sizeof(struct tah_regs));
 	if (dev->base == NULL) {
-		printk(KERN_ERR "%s: Can't map device registers!\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't map device registers!\n", np);
 		goto err_free;
 	}
 
@@ -124,8 +121,7 @@ static int tah_probe(struct platform_device *ofdev)
 	/* Initialize TAH and enable IPv4 checksum verification, no TSO yet */
 	tah_reset(ofdev);
 
-	printk(KERN_INFO
-	       "TAH %s initialized\n", ofdev->dev.of_node->full_name);
+	printk(KERN_INFO "TAH %pOF initialized\n", ofdev->dev.of_node);
 	wmb();
 
 	return 0;
diff --git a/drivers/net/ethernet/ibm/emac/zmii.c b/drivers/net/ethernet/ibm/emac/zmii.c
index 8727b86..89c42d3 100644
--- a/drivers/net/ethernet/ibm/emac/zmii.c
+++ b/drivers/net/ethernet/ibm/emac/zmii.c
@@ -121,15 +121,15 @@ int zmii_attach(struct platform_device *ofdev, int input, int *mode)
 		} else
 			dev->mode = *mode;
 
-		printk(KERN_NOTICE "%s: bridge in %s mode\n",
-		       ofdev->dev.of_node->full_name,
+		printk(KERN_NOTICE "%pOF: bridge in %s mode\n",
+		       ofdev->dev.of_node,
 		       zmii_mode_name(dev->mode));
 	} else {
 		/* All inputs must use the same mode */
 		if (*mode != PHY_MODE_NA && *mode != dev->mode) {
 			printk(KERN_ERR
-			       "%s: invalid mode %d specified for input %d\n",
-			       ofdev->dev.of_node->full_name, *mode, input);
+			       "%pOF: invalid mode %d specified for input %d\n",
+			       ofdev->dev.of_node, *mode, input);
 			mutex_unlock(&dev->lock);
 			return -EINVAL;
 		}
@@ -250,8 +250,7 @@ static int zmii_probe(struct platform_device *ofdev)
 
 	rc = -ENXIO;
 	if (of_address_to_resource(np, 0, &regs)) {
-		printk(KERN_ERR "%s: Can't get registers address\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't get registers address\n", np);
 		goto err_free;
 	}
 
@@ -259,8 +258,7 @@ static int zmii_probe(struct platform_device *ofdev)
 	dev->base = (struct zmii_regs __iomem *)ioremap(regs.start,
 						sizeof(struct zmii_regs));
 	if (dev->base == NULL) {
-		printk(KERN_ERR "%s: Can't map device registers!\n",
-		       np->full_name);
+		printk(KERN_ERR "%pOF: Can't map device registers!\n", np);
 		goto err_free;
 	}
 
@@ -270,8 +268,7 @@ static int zmii_probe(struct platform_device *ofdev)
 	/* Disable all inputs by default */
 	out_be32(&dev->base->fer, 0);
 
-	printk(KERN_INFO
-	       "ZMII %s initialized\n", ofdev->dev.of_node->full_name);
+	printk(KERN_INFO "ZMII %pOF initialized\n", ofdev->dev.of_node);
 	wmb();
 	platform_set_drvdata(ofdev, dev);
 
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index d17c2b0..f210398 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1897,7 +1897,7 @@ static int ibmveth_resume(struct device *dev)
 	return 0;
 }
 
-static struct vio_device_id ibmveth_device_table[] = {
+static const struct vio_device_id ibmveth_device_table[] = {
 	{ "network", "IBM,l-lan"},
 	{ "", "" }
 };
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index c45e8e3..cb8182f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -347,6 +347,31 @@ static void replenish_pools(struct ibmvnic_adapter *adapter)
 	}
 }
 
+static void release_stats_buffers(struct ibmvnic_adapter *adapter)
+{
+	kfree(adapter->tx_stats_buffers);
+	kfree(adapter->rx_stats_buffers);
+}
+
+static int init_stats_buffers(struct ibmvnic_adapter *adapter)
+{
+	adapter->tx_stats_buffers =
+				kcalloc(adapter->req_tx_queues,
+					sizeof(struct ibmvnic_tx_queue_stats),
+					GFP_KERNEL);
+	if (!adapter->tx_stats_buffers)
+		return -ENOMEM;
+
+	adapter->rx_stats_buffers =
+				kcalloc(adapter->req_rx_queues,
+					sizeof(struct ibmvnic_rx_queue_stats),
+					GFP_KERNEL);
+	if (!adapter->rx_stats_buffers)
+		return -ENOMEM;
+
+	return 0;
+}
+
 static void release_stats_token(struct ibmvnic_adapter *adapter)
 {
 	struct device *dev = &adapter->vdev->dev;
@@ -374,6 +399,7 @@ static int init_stats_token(struct ibmvnic_adapter *adapter)
 	}
 
 	adapter->stats_token = stok;
+	netdev_dbg(adapter->netdev, "Stats token initialized (%llx)\n", stok);
 	return 0;
 }
 
@@ -387,6 +413,8 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
 	for (i = 0; i < rx_scrqs; i++) {
 		rx_pool = &adapter->rx_pool[i];
 
+		netdev_dbg(adapter->netdev, "Re-setting rx_pool[%d]\n", i);
+
 		rc = reset_long_term_buff(adapter, &rx_pool->long_term_buff);
 		if (rc)
 			return rc;
@@ -419,6 +447,8 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter)
 	for (i = 0; i < rx_scrqs; i++) {
 		rx_pool = &adapter->rx_pool[i];
 
+		netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i);
+
 		kfree(rx_pool->free_map);
 		free_long_term_buff(adapter, &rx_pool->long_term_buff);
 
@@ -465,7 +495,7 @@ static int init_rx_pools(struct net_device *netdev)
 		rx_pool = &adapter->rx_pool[i];
 
 		netdev_dbg(adapter->netdev,
-			   "Initializing rx_pool %d, %lld buffs, %lld bytes each\n",
+			   "Initializing rx_pool[%d], %lld buffs, %lld bytes each\n",
 			   i, adapter->req_rx_add_entries_per_subcrq,
 			   be64_to_cpu(size_array[i]));
 
@@ -515,6 +545,8 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter)
 
 	tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
 	for (i = 0; i < tx_scrqs; i++) {
+		netdev_dbg(adapter->netdev, "Re-setting tx_pool[%d]\n", i);
+
 		tx_pool = &adapter->tx_pool[i];
 
 		rc = reset_long_term_buff(adapter, &tx_pool->long_term_buff);
@@ -545,6 +577,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter)
 
 	tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
 	for (i = 0; i < tx_scrqs; i++) {
+		netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i);
 		tx_pool = &adapter->tx_pool[i];
 		kfree(tx_pool->tx_buff);
 		free_long_term_buff(adapter, &tx_pool->long_term_buff);
@@ -571,6 +604,11 @@ static int init_tx_pools(struct net_device *netdev)
 
 	for (i = 0; i < tx_subcrqs; i++) {
 		tx_pool = &adapter->tx_pool[i];
+
+		netdev_dbg(adapter->netdev,
+			   "Initializing tx_pool[%d], %lld buffs\n",
+			   i, adapter->req_tx_entries_per_subcrq);
+
 		tx_pool->tx_buff = kcalloc(adapter->req_tx_entries_per_subcrq,
 					   sizeof(struct ibmvnic_tx_buff),
 					   GFP_KERNEL);
@@ -641,8 +679,10 @@ static void ibmvnic_napi_disable(struct ibmvnic_adapter *adapter)
 	if (!adapter->napi_enabled)
 		return;
 
-	for (i = 0; i < adapter->req_rx_queues; i++)
+	for (i = 0; i < adapter->req_rx_queues; i++) {
+		netdev_dbg(adapter->netdev, "Disabling napi[%d]\n", i);
 		napi_disable(&adapter->napi[i]);
+	}
 
 	adapter->napi_enabled = false;
 }
@@ -700,12 +740,16 @@ static void release_resources(struct ibmvnic_adapter *adapter)
 	release_rx_pools(adapter);
 
 	release_stats_token(adapter);
+	release_stats_buffers(adapter);
 	release_error_buffers(adapter);
 
 	if (adapter->napi) {
 		for (i = 0; i < adapter->req_rx_queues; i++) {
-			if (&adapter->napi[i])
+			if (&adapter->napi[i]) {
+				netdev_dbg(adapter->netdev,
+					   "Releasing napi[%d]\n", i);
 				netif_napi_del(&adapter->napi[i]);
+			}
 		}
 	}
 }
@@ -718,7 +762,8 @@ static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state)
 	bool resend;
 	int rc;
 
-	netdev_err(netdev, "setting link state %d\n", link_state);
+	netdev_dbg(netdev, "setting link state %d\n", link_state);
+
 	memset(&crq, 0, sizeof(crq));
 	crq.logical_link_state.first = IBMVNIC_CRQ_CMD;
 	crq.logical_link_state.cmd = LOGICAL_LINK_STATE;
@@ -755,6 +800,9 @@ static int set_real_num_queues(struct net_device *netdev)
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	int rc;
 
+	netdev_dbg(netdev, "Setting real tx/rx queues (%llx/%llx)\n",
+		   adapter->req_tx_queues, adapter->req_rx_queues);
+
 	rc = netif_set_real_num_tx_queues(netdev, adapter->req_tx_queues);
 	if (rc) {
 		netdev_err(netdev, "failed to set the number of tx queues\n");
@@ -777,6 +825,10 @@ static int init_resources(struct ibmvnic_adapter *adapter)
 	if (rc)
 		return rc;
 
+	rc = init_stats_buffers(adapter);
+	if (rc)
+		return rc;
+
 	rc = init_stats_token(adapter);
 	if (rc)
 		return rc;
@@ -788,6 +840,7 @@ static int init_resources(struct ibmvnic_adapter *adapter)
 		return -ENOMEM;
 
 	for (i = 0; i < adapter->req_rx_queues; i++) {
+		netdev_dbg(netdev, "Adding napi[%d]\n", i);
 		netif_napi_add(netdev, &adapter->napi[i], ibmvnic_poll,
 			       NAPI_POLL_WEIGHT);
 	}
@@ -816,6 +869,7 @@ static int __ibmvnic_open(struct net_device *netdev)
 	 * set the logical link state to up
 	 */
 	for (i = 0; i < adapter->req_rx_queues; i++) {
+		netdev_dbg(netdev, "Enabling rx_scrq[%d] irq\n", i);
 		if (prev_state == VNIC_CLOSED)
 			enable_irq(adapter->rx_scrq[i]->irq);
 		else
@@ -823,6 +877,7 @@ static int __ibmvnic_open(struct net_device *netdev)
 	}
 
 	for (i = 0; i < adapter->req_tx_queues; i++) {
+		netdev_dbg(netdev, "Enabling tx_scrq[%d] irq\n", i);
 		if (prev_state == VNIC_CLOSED)
 			enable_irq(adapter->tx_scrq[i]->irq);
 		else
@@ -896,6 +951,7 @@ static void clean_tx_pools(struct ibmvnic_adapter *adapter)
 		if (!tx_pool)
 			continue;
 
+		netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i);
 		for (j = 0; j < tx_entries; j++) {
 			if (tx_pool->tx_buff[j].skb) {
 				dev_kfree_skb_any(tx_pool->tx_buff[j].skb);
@@ -923,8 +979,11 @@ static int __ibmvnic_close(struct net_device *netdev)
 
 	if (adapter->tx_scrq) {
 		for (i = 0; i < adapter->req_tx_queues; i++)
-			if (adapter->tx_scrq[i]->irq)
+			if (adapter->tx_scrq[i]->irq) {
+				netdev_dbg(adapter->netdev,
+					   "Disabling tx_scrq[%d] irq\n", i);
 				disable_irq(adapter->tx_scrq[i]->irq);
+			}
 	}
 
 	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
@@ -943,8 +1002,11 @@ static int __ibmvnic_close(struct net_device *netdev)
 					break;
 			}
 
-			if (adapter->rx_scrq[i]->irq)
+			if (adapter->rx_scrq[i]->irq) {
+				netdev_dbg(adapter->netdev,
+					   "Disabling rx_scrq[%d] irq\n", i);
 				disable_irq(adapter->rx_scrq[i]->irq);
+			}
 		}
 	}
 
@@ -1259,6 +1321,9 @@ out:
 	netdev->stats.tx_packets += tx_packets;
 	adapter->tx_send_failed += tx_send_failed;
 	adapter->tx_map_failed += tx_map_failed;
+	adapter->tx_stats_buffers[queue_num].packets += tx_packets;
+	adapter->tx_stats_buffers[queue_num].bytes += tx_bytes;
+	adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped;
 
 	return ret;
 }
@@ -1334,6 +1399,9 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 	struct net_device *netdev = adapter->netdev;
 	int i, rc;
 
+	netdev_dbg(adapter->netdev, "Re-setting driver (%d)\n",
+		   rwi->reset_reason);
+
 	netif_carrier_off(netdev);
 	adapter->reset_reason = rwi->reset_reason;
 
@@ -1458,6 +1526,7 @@ static void __ibmvnic_reset(struct work_struct *work)
 	}
 
 	if (rc) {
+		netdev_dbg(adapter->netdev, "Reset failed\n");
 		free_all_rwi(adapter);
 		mutex_unlock(&adapter->reset_lock);
 		return;
@@ -1491,7 +1560,7 @@ static void ibmvnic_reset(struct ibmvnic_adapter *adapter,
 	list_for_each(entry, &adapter->rwi_list) {
 		tmp = list_entry(entry, struct ibmvnic_rwi, list);
 		if (tmp->reset_reason == reason) {
-			netdev_err(netdev, "Matching reset found, skipping\n");
+			netdev_dbg(netdev, "Skipping matching reset\n");
 			mutex_unlock(&adapter->rwi_lock);
 			return;
 		}
@@ -1507,6 +1576,8 @@ static void ibmvnic_reset(struct ibmvnic_adapter *adapter,
 	rwi->reset_reason = reason;
 	list_add_tail(&rwi->list, &adapter->rwi_list);
 	mutex_unlock(&adapter->rwi_lock);
+
+	netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
 	schedule_work(&adapter->ibmvnic_reset);
 }
 
@@ -1560,7 +1631,8 @@ restart_poll:
 							  rx_comp.correlator);
 		/* do error checking */
 		if (next->rx_comp.rc) {
-			netdev_err(netdev, "rx error %x\n", next->rx_comp.rc);
+			netdev_dbg(netdev, "rx buffer returned with rc %x\n",
+				   be16_to_cpu(next->rx_comp.rc));
 			/* free the entry */
 			next->rx_comp.first = 0;
 			remove_buff_from_pool(adapter, rx_buff);
@@ -1599,6 +1671,8 @@ restart_poll:
 		napi_gro_receive(napi, skb); /* send it up */
 		netdev->stats.rx_packets++;
 		netdev->stats.rx_bytes += length;
+		adapter->rx_stats_buffers[scrq_num].packets++;
+		adapter->rx_stats_buffers[scrq_num].bytes += length;
 		frames_processed++;
 	}
 
@@ -1708,18 +1782,36 @@ static u32 ibmvnic_get_link(struct net_device *netdev)
 static void ibmvnic_get_ringparam(struct net_device *netdev,
 				  struct ethtool_ringparam *ring)
 {
-	ring->rx_max_pending = 0;
-	ring->tx_max_pending = 0;
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+
+	ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq;
+	ring->tx_max_pending = adapter->max_tx_entries_per_subcrq;
 	ring->rx_mini_max_pending = 0;
 	ring->rx_jumbo_max_pending = 0;
-	ring->rx_pending = 0;
-	ring->tx_pending = 0;
+	ring->rx_pending = adapter->req_rx_add_entries_per_subcrq;
+	ring->tx_pending = adapter->req_tx_entries_per_subcrq;
 	ring->rx_mini_pending = 0;
 	ring->rx_jumbo_pending = 0;
 }
 
+static void ibmvnic_get_channels(struct net_device *netdev,
+				 struct ethtool_channels *channels)
+{
+	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+
+	channels->max_rx = adapter->max_rx_queues;
+	channels->max_tx = adapter->max_tx_queues;
+	channels->max_other = 0;
+	channels->max_combined = 0;
+	channels->rx_count = adapter->req_rx_queues;
+	channels->tx_count = adapter->req_tx_queues;
+	channels->other_count = 0;
+	channels->combined_count = 0;
+}
+
 static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
+	struct ibmvnic_adapter *adapter = netdev_priv(dev);
 	int i;
 
 	if (stringset != ETH_SS_STATS)
@@ -1727,13 +1819,39 @@ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 
 	for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN)
 		memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
+
+	for (i = 0; i < adapter->req_tx_queues; i++) {
+		snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i);
+		data += ETH_GSTRING_LEN;
+
+		snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
+		data += ETH_GSTRING_LEN;
+
+		snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i);
+		data += ETH_GSTRING_LEN;
+	}
+
+	for (i = 0; i < adapter->req_rx_queues; i++) {
+		snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i);
+		data += ETH_GSTRING_LEN;
+
+		snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i);
+		data += ETH_GSTRING_LEN;
+
+		snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i);
+		data += ETH_GSTRING_LEN;
+	}
 }
 
 static int ibmvnic_get_sset_count(struct net_device *dev, int sset)
 {
+	struct ibmvnic_adapter *adapter = netdev_priv(dev);
+
 	switch (sset) {
 	case ETH_SS_STATS:
-		return ARRAY_SIZE(ibmvnic_stats);
+		return ARRAY_SIZE(ibmvnic_stats) +
+		       adapter->req_tx_queues * NUM_TX_STATS +
+		       adapter->req_rx_queues * NUM_RX_STATS;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1744,7 +1862,7 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(dev);
 	union ibmvnic_crq crq;
-	int i;
+	int i, j;
 
 	memset(&crq, 0, sizeof(crq));
 	crq.request_statistics.first = IBMVNIC_CRQ_CMD;
@@ -1759,7 +1877,26 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev,
 	wait_for_completion(&adapter->stats_done);
 
 	for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++)
-		data[i] = IBMVNIC_GET_STAT(adapter, ibmvnic_stats[i].offset);
+		data[i] = be64_to_cpu(IBMVNIC_GET_STAT(adapter,
+						ibmvnic_stats[i].offset));
+
+	for (j = 0; j < adapter->req_tx_queues; j++) {
+		data[i] = adapter->tx_stats_buffers[j].packets;
+		i++;
+		data[i] = adapter->tx_stats_buffers[j].bytes;
+		i++;
+		data[i] = adapter->tx_stats_buffers[j].dropped_packets;
+		i++;
+	}
+
+	for (j = 0; j < adapter->req_rx_queues; j++) {
+		data[i] = adapter->rx_stats_buffers[j].packets;
+		i++;
+		data[i] = adapter->rx_stats_buffers[j].bytes;
+		i++;
+		data[i] = adapter->rx_stats_buffers[j].interrupts;
+		i++;
+	}
 }
 
 static const struct ethtool_ops ibmvnic_ethtool_ops = {
@@ -1768,6 +1905,7 @@ static const struct ethtool_ops ibmvnic_ethtool_ops = {
 	.set_msglevel		= ibmvnic_set_msglevel,
 	.get_link		= ibmvnic_get_link,
 	.get_ringparam		= ibmvnic_get_ringparam,
+	.get_channels		= ibmvnic_get_channels,
 	.get_strings            = ibmvnic_get_strings,
 	.get_sset_count         = ibmvnic_get_sset_count,
 	.get_ethtool_stats	= ibmvnic_get_ethtool_stats,
@@ -1800,12 +1938,14 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter)
 	int i, rc;
 
 	for (i = 0; i < adapter->req_tx_queues; i++) {
+		netdev_dbg(adapter->netdev, "Re-setting tx_scrq[%d]\n", i);
 		rc = reset_one_sub_crq_queue(adapter, adapter->tx_scrq[i]);
 		if (rc)
 			return rc;
 	}
 
 	for (i = 0; i < adapter->req_rx_queues; i++) {
+		netdev_dbg(adapter->netdev, "Re-setting rx_scrq[%d]\n", i);
 		rc = reset_one_sub_crq_queue(adapter, adapter->rx_scrq[i]);
 		if (rc)
 			return rc;
@@ -1909,6 +2049,8 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 			if (!adapter->tx_scrq[i])
 				continue;
 
+			netdev_dbg(adapter->netdev, "Releasing tx_scrq[%d]\n",
+				   i);
 			if (adapter->tx_scrq[i]->irq) {
 				free_irq(adapter->tx_scrq[i]->irq,
 					 adapter->tx_scrq[i]);
@@ -1928,6 +2070,8 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter)
 			if (!adapter->rx_scrq[i])
 				continue;
 
+			netdev_dbg(adapter->netdev, "Releasing rx_scrq[%d]\n",
+				   i);
 			if (adapter->rx_scrq[i]->irq) {
 				free_irq(adapter->rx_scrq[i]->irq,
 					 adapter->rx_scrq[i]);
@@ -2064,6 +2208,8 @@ static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance)
 	struct ibmvnic_sub_crq_queue *scrq = instance;
 	struct ibmvnic_adapter *adapter = scrq->adapter;
 
+	adapter->rx_stats_buffers[scrq->scrq_num].interrupts++;
+
 	if (napi_schedule_prep(&adapter->napi[scrq->scrq_num])) {
 		disable_scrq_irq(adapter, scrq);
 		__napi_schedule(&adapter->napi[scrq->scrq_num]);
@@ -2080,6 +2226,8 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
 	int rc = 0;
 
 	for (i = 0; i < adapter->req_tx_queues; i++) {
+		netdev_dbg(adapter->netdev, "Initializing tx_scrq[%d] irq\n",
+			   i);
 		scrq = adapter->tx_scrq[i];
 		scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
 
@@ -2101,6 +2249,8 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
 	}
 
 	for (i = 0; i < adapter->req_rx_queues; i++) {
+		netdev_dbg(adapter->netdev, "Initializing rx_scrq[%d] irq\n",
+			   i);
 		scrq = adapter->rx_scrq[i];
 		scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
 		if (!scrq->irq) {
@@ -3739,31 +3889,35 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 
 	do {
 		rc = ibmvnic_init(adapter);
-		if (rc && rc != EAGAIN) {
-			free_netdev(netdev);
-			return rc;
-		}
+		if (rc && rc != EAGAIN)
+			goto ibmvnic_init_fail;
 	} while (rc == EAGAIN);
 
 	netdev->mtu = adapter->req_mtu - ETH_HLEN;
 
 	rc = device_create_file(&dev->dev, &dev_attr_failover);
-	if (rc) {
-		free_netdev(netdev);
-		return rc;
-	}
+	if (rc)
+		goto ibmvnic_init_fail;
 
 	rc = register_netdev(netdev);
 	if (rc) {
 		dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
-		device_remove_file(&dev->dev, &dev_attr_failover);
-		free_netdev(netdev);
-		return rc;
+		goto ibmvnic_register_fail;
 	}
 	dev_info(&dev->dev, "ibmvnic registered\n");
 
 	adapter->state = VNIC_PROBED;
 	return 0;
+
+ibmvnic_register_fail:
+	device_remove_file(&dev->dev, &dev_attr_failover);
+
+ibmvnic_init_fail:
+	release_sub_crqs(adapter);
+	release_crq_queue(adapter);
+	free_netdev(netdev);
+
+	return rc;
 }
 
 static int ibmvnic_remove(struct vio_dev *dev)
@@ -3859,20 +4013,16 @@ static int ibmvnic_resume(struct device *dev)
 {
 	struct net_device *netdev = dev_get_drvdata(dev);
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-	int i;
 
 	if (adapter->state != VNIC_OPEN)
 		return 0;
 
-	/* kick the interrupt handlers just in case we lost an interrupt */
-	for (i = 0; i < adapter->req_rx_queues; i++)
-		ibmvnic_interrupt_rx(adapter->rx_scrq[i]->irq,
-				     adapter->rx_scrq[i]);
+	tasklet_schedule(&adapter->tasklet);
 
 	return 0;
 }
 
-static struct vio_device_id ibmvnic_device_table[] = {
+static const struct vio_device_id ibmvnic_device_table[] = {
 	{"network", "IBM,vnic"},
 	{"", "" }
 };
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 8eff6e1..d02257c 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -166,6 +166,20 @@ struct ibmvnic_statistics {
 	u8 reserved[72];
 } __packed __aligned(8);
 
+#define NUM_TX_STATS 3
+struct ibmvnic_tx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 dropped_packets;
+};
+
+#define NUM_RX_STATS 3
+struct ibmvnic_rx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 interrupts;
+};
+
 struct ibmvnic_acl_buffer {
 	__be32 len;
 	__be32 version;
@@ -956,6 +970,9 @@ struct ibmvnic_adapter {
 	int tx_send_failed;
 	int tx_map_failed;
 
+	struct ibmvnic_tx_queue_stats *tx_stats_buffers;
+	struct ibmvnic_rx_queue_stats *rx_stats_buffers;
+
 	int phys_link_state;
 	int logical_link_state;
 
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index 66bd506..d803b1a 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -100,6 +100,10 @@ struct e1000_hw;
 #define E1000_DEV_ID_PCH_CNP_I219_V6		0x15BE
 #define E1000_DEV_ID_PCH_CNP_I219_LM7		0x15BB
 #define E1000_DEV_ID_PCH_CNP_I219_V7		0x15BC
+#define E1000_DEV_ID_PCH_ICP_I219_LM8		0x15DF
+#define E1000_DEV_ID_PCH_ICP_I219_V8		0x15E0
+#define E1000_DEV_ID_PCH_ICP_I219_LM9		0x15E1
+#define E1000_DEV_ID_PCH_ICP_I219_V9		0x15E2
 
 #define E1000_REVISION_4	4
 
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 68ea8b4..d6d4ed7 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -2437,6 +2437,8 @@ static s32 e1000_hv_phy_workarounds_ich8lan(struct e1000_hw *hw)
 		if (hw->phy.revision < 2) {
 			e1000e_phy_sw_reset(hw);
 			ret_val = e1e_wphy(hw, MII_BMCR, 0x3140);
+			if (ret_val)
+				return ret_val;
 		}
 	}
 
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 2dcb546..327dfe5b 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -7544,6 +7544,10 @@ static const struct pci_device_id e1000_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CNP_I219_V6), board_pch_cnp },
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CNP_I219_LM7), board_pch_cnp },
 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CNP_I219_V7), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_LM8), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_V8), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_LM9), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_V9), board_pch_cnp },
 
 	{ 0, 0, 0, 0, 0, 0, 0 }	/* terminate list */
 };
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 5e37387..e69d49d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1265,15 +1265,17 @@ err_queueing_scheme:
 	return err;
 }
 
-static int __fm10k_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
-			    __be16 proto, struct tc_to_netdev *tc)
+static int __fm10k_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			    void *type_data)
 {
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	struct tc_mqprio_qopt *mqprio = type_data;
+
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 
-	return fm10k_setup_tc(dev, tc->mqprio->num_tc);
+	return fm10k_setup_tc(dev, mqprio->num_tc);
 }
 
 static void fm10k_assign_l2_accel(struct fm10k_intfc *interface,
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index d616f69..d0c1bf5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -75,11 +75,11 @@
 #define I40E_MIN_VSI_ALLOC		83 /* LAN, ATR, FCOE, 64 VF */
 /* max 16 qps */
 #define i40e_default_queues_per_vmdq(pf) \
-		(((pf)->flags & I40E_FLAG_RSS_AQ_CAPABLE) ? 4 : 1)
+		(((pf)->hw_features & I40E_HW_RSS_AQ_CAPABLE) ? 4 : 1)
 #define I40E_DEFAULT_QUEUES_PER_VF	4
 #define I40E_DEFAULT_QUEUES_PER_TC	1 /* should be a power of 2 */
 #define i40e_pf_get_max_q_per_tc(pf) \
-		(((pf)->flags & I40E_FLAG_128_QP_RSS_CAPABLE) ? 128 : 64)
+		(((pf)->hw_features & I40E_HW_128_QP_RSS_CAPABLE) ? 128 : 64)
 #define I40E_FDIR_RING			0
 #define I40E_FDIR_RING_COUNT		32
 #define I40E_MAX_AQ_BUF_SIZE		4096
@@ -401,6 +401,27 @@ struct i40e_pf {
 	struct timer_list service_timer;
 	struct work_struct service_task;
 
+	u64 hw_features;
+#define I40E_HW_RSS_AQ_CAPABLE			BIT_ULL(0)
+#define I40E_HW_128_QP_RSS_CAPABLE		BIT_ULL(1)
+#define I40E_HW_ATR_EVICT_CAPABLE		BIT_ULL(2)
+#define I40E_HW_WB_ON_ITR_CAPABLE		BIT_ULL(3)
+#define I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE	BIT_ULL(4)
+#define I40E_HW_NO_PCI_LINK_CHECK		BIT_ULL(5)
+#define I40E_HW_100M_SGMII_CAPABLE		BIT_ULL(6)
+#define I40E_HW_NO_DCB_SUPPORT			BIT_ULL(7)
+#define I40E_HW_USE_SET_LLDP_MIB		BIT_ULL(8)
+#define I40E_HW_GENEVE_OFFLOAD_CAPABLE		BIT_ULL(9)
+#define I40E_HW_PTP_L4_CAPABLE			BIT_ULL(10)
+#define I40E_HW_WOL_MC_MAGIC_PKT_WAKE		BIT_ULL(11)
+#define I40E_HW_MPLS_HDR_OFFLOAD_CAPABLE	BIT_ULL(12)
+#define I40E_HW_HAVE_CRT_RETIMER		BIT_ULL(13)
+#define I40E_HW_OUTER_UDP_CSUM_CAPABLE		BIT_ULL(14)
+#define I40E_HW_PHY_CONTROLS_LEDS		BIT_ULL(15)
+#define I40E_HW_STOP_FW_LLDP			BIT_ULL(16)
+#define I40E_HW_PORT_ID_VALID			BIT_ULL(17)
+#define I40E_HW_RESTART_AUTONEG			BIT_ULL(18)
+
 	u64 flags;
 #define I40E_FLAG_RX_CSUM_ENABLED		BIT_ULL(1)
 #define I40E_FLAG_MSI_ENABLED			BIT_ULL(2)
@@ -420,33 +441,14 @@ struct i40e_pf {
 #define I40E_FLAG_PTP				BIT_ULL(25)
 #define I40E_FLAG_MFP_ENABLED			BIT_ULL(26)
 #define I40E_FLAG_UDP_FILTER_SYNC		BIT_ULL(27)
-#define I40E_FLAG_PORT_ID_VALID			BIT_ULL(28)
 #define I40E_FLAG_DCB_CAPABLE			BIT_ULL(29)
-#define I40E_FLAG_RSS_AQ_CAPABLE		BIT_ULL(31)
-#define I40E_FLAG_HW_ATR_EVICT_CAPABLE		BIT_ULL(32)
-#define I40E_FLAG_OUTER_UDP_CSUM_CAPABLE	BIT_ULL(33)
-#define I40E_FLAG_128_QP_RSS_CAPABLE		BIT_ULL(34)
-#define I40E_FLAG_WB_ON_ITR_CAPABLE		BIT_ULL(35)
 #define I40E_FLAG_VEB_STATS_ENABLED		BIT_ULL(37)
-#define I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE	BIT_ULL(38)
 #define I40E_FLAG_LINK_POLLING_ENABLED		BIT_ULL(39)
 #define I40E_FLAG_VEB_MODE_ENABLED		BIT_ULL(40)
-#define I40E_FLAG_GENEVE_OFFLOAD_CAPABLE	BIT_ULL(41)
-#define I40E_FLAG_NO_PCI_LINK_CHECK		BIT_ULL(42)
-#define I40E_FLAG_100M_SGMII_CAPABLE		BIT_ULL(43)
-#define I40E_FLAG_RESTART_AUTONEG		BIT_ULL(44)
-#define I40E_FLAG_NO_DCB_SUPPORT		BIT_ULL(45)
-#define I40E_FLAG_USE_SET_LLDP_MIB		BIT_ULL(46)
-#define I40E_FLAG_STOP_FW_LLDP			BIT_ULL(47)
-#define I40E_FLAG_PHY_CONTROLS_LEDS		BIT_ULL(48)
-#define I40E_FLAG_PF_MAC			BIT_ULL(50)
 #define I40E_FLAG_TRUE_PROMISC_SUPPORT		BIT_ULL(51)
-#define I40E_FLAG_HAVE_CRT_RETIMER		BIT_ULL(52)
-#define I40E_FLAG_PTP_L4_CAPABLE		BIT_ULL(53)
 #define I40E_FLAG_CLIENT_RESET			BIT_ULL(54)
 #define I40E_FLAG_TEMP_LINK_POLLING		BIT_ULL(55)
 #define I40E_FLAG_CLIENT_L2_CHANGE		BIT_ULL(56)
-#define I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE		BIT_ULL(57)
 #define I40E_FLAG_LEGACY_RX			BIT_ULL(58)
 
 	struct i40e_client_instance *cinst;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 8e082a9..111426b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -328,9 +328,9 @@ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc,
 			len = buf_len;
 		/* write the full 16-byte chunks */
 		if (hw->debug_mask & mask) {
-			char prefix[20];
+			char prefix[27];
 
-			snprintf(prefix, 20,
+			snprintf(prefix, sizeof(prefix),
 				 "i40e %02x:%02x.%x: \t0x",
 				 hw->bus.bus_id,
 				 hw->bus.device,
@@ -2529,6 +2529,10 @@ i40e_status i40e_update_link_info(struct i40e_hw *hw)
 		if (status)
 			return status;
 
+		hw->phy.link_info.req_fec_info =
+			abilities.fec_cfg_curr_mod_ext_info &
+			(I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS);
+
 		memcpy(hw->phy.link_info.module_type, &abilities.module_type,
 		       sizeof(hw->phy.link_info.module_type));
 	}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 9692a52..05e8986 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -271,7 +271,7 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
 		*advertising |= ADVERTISED_Autoneg;
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
 			*advertising |= ADVERTISED_1000baseT_Full;
-		if (pf->flags & I40E_FLAG_100M_SGMII_CAPABLE) {
+		if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
 			*supported |= SUPPORTED_100baseT_Full;
 			*advertising |= ADVERTISED_100baseT_Full;
 		}
@@ -340,12 +340,12 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
 			*advertising |= ADVERTISED_20000baseKR2_Full;
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR) {
-		if (!(pf->flags & I40E_FLAG_HAVE_CRT_RETIMER))
+		if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
 			*supported |= SUPPORTED_10000baseKR_Full |
 				      SUPPORTED_Autoneg;
 		*advertising |= ADVERTISED_Autoneg;
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-			if (!(pf->flags & I40E_FLAG_HAVE_CRT_RETIMER))
+			if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
 				*advertising |= ADVERTISED_10000baseKR_Full;
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
@@ -356,12 +356,12 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
 			*advertising |= ADVERTISED_10000baseKX4_Full;
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX) {
-		if (!(pf->flags & I40E_FLAG_HAVE_CRT_RETIMER))
+		if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
 			*supported |= SUPPORTED_1000baseKX_Full |
 				      SUPPORTED_Autoneg;
 		*advertising |= ADVERTISED_Autoneg;
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-			if (!(pf->flags & I40E_FLAG_HAVE_CRT_RETIMER))
+			if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
 				*advertising |= ADVERTISED_1000baseKX_Full;
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
@@ -474,7 +474,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
 			    SUPPORTED_1000baseT_Full;
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
 			advertising |= ADVERTISED_1000baseT_Full;
-		if (pf->flags & I40E_FLAG_100M_SGMII_CAPABLE) {
+		if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
 			supported |= SUPPORTED_100baseT_Full;
 			if (hw_link_info->requested_speeds &
 			    I40E_LINK_SPEED_100MB)
@@ -1091,7 +1091,7 @@ static void i40e_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
 	struct i40e_pf *pf = np->vsi->back;
 	struct i40e_hw *hw = &pf->hw;
 	u32 *reg_buf = p;
-	int i, j, ri;
+	unsigned int i, j, ri;
 	u32 reg;
 
 	/* Tell ethtool which driver-version-specific regs output we have.
@@ -1550,9 +1550,9 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 	struct i40e_ring *tx_ring, *rx_ring;
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
+	unsigned int j;
 	int i = 0;
 	char *p;
-	int j;
 	struct rtnl_link_stats64 *net_stats = i40e_get_vsi_stats_struct(vsi);
 	unsigned int start;
 
@@ -1637,7 +1637,7 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset,
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
 	char *p = (char *)data;
-	int i;
+	unsigned int i;
 
 	switch (stringset) {
 	case ETH_SS_TEST:
@@ -1765,7 +1765,7 @@ static int i40e_get_ts_info(struct net_device *dev,
 			   BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
 			   BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ);
 
-	if (pf->flags & I40E_FLAG_PTP_L4_CAPABLE)
+	if (pf->hw_features & I40E_HW_PTP_L4_CAPABLE)
 		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
 				    BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
 				    BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) |
@@ -2005,7 +2005,7 @@ static int i40e_set_phys_id(struct net_device *netdev,
 
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
-		if (!(pf->flags & I40E_FLAG_PHY_CONTROLS_LEDS)) {
+		if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) {
 			pf->led_status = i40e_led_get(hw);
 		} else {
 			i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL, NULL);
@@ -2015,19 +2015,19 @@ static int i40e_set_phys_id(struct net_device *netdev,
 		}
 		return blink_freq;
 	case ETHTOOL_ID_ON:
-		if (!(pf->flags & I40E_FLAG_PHY_CONTROLS_LEDS))
+		if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS))
 			i40e_led_set(hw, 0xf, false);
 		else
 			ret = i40e_led_set_phy(hw, true, pf->led_status, 0);
 		break;
 	case ETHTOOL_ID_OFF:
-		if (!(pf->flags & I40E_FLAG_PHY_CONTROLS_LEDS))
+		if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS))
 			i40e_led_set(hw, 0x0, false);
 		else
 			ret = i40e_led_set_phy(hw, false, pf->led_status, 0);
 		break;
 	case ETHTOOL_ID_INACTIVE:
-		if (!(pf->flags & I40E_FLAG_PHY_CONTROLS_LEDS)) {
+		if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) {
 			i40e_led_set(hw, pf->led_status, false);
 		} else {
 			ret = i40e_led_set_phy(hw, false, pf->led_status,
@@ -2194,14 +2194,29 @@ static int __i40e_set_coalesce(struct net_device *netdev,
 			       int queue)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	u16 intrl_reg, cur_rx_itr, cur_tx_itr;
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
-	u16 intrl_reg;
 	int i;
 
 	if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
 		vsi->work_limit = ec->tx_max_coalesced_frames_irq;
 
+	if (queue < 0) {
+		cur_rx_itr = vsi->rx_rings[0]->rx_itr_setting;
+		cur_tx_itr = vsi->tx_rings[0]->tx_itr_setting;
+	} else if (queue < vsi->num_queue_pairs) {
+		cur_rx_itr = vsi->rx_rings[queue]->rx_itr_setting;
+		cur_tx_itr = vsi->tx_rings[queue]->tx_itr_setting;
+	} else {
+		netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
+			   vsi->num_queue_pairs - 1);
+		return -EINVAL;
+	}
+
+	cur_tx_itr &= ~I40E_ITR_DYNAMIC;
+	cur_rx_itr &= ~I40E_ITR_DYNAMIC;
+
 	/* tx_coalesce_usecs_high is ignored, use rx-usecs-high instead */
 	if (ec->tx_coalesce_usecs_high != vsi->int_rate_limit) {
 		netif_info(pf, drv, netdev, "tx-usecs-high is not used, please program rx-usecs-high\n");
@@ -2214,15 +2229,34 @@ static int __i40e_set_coalesce(struct net_device *netdev,
 		return -EINVAL;
 	}
 
-	if (ec->rx_coalesce_usecs == 0) {
-		if (ec->use_adaptive_rx_coalesce)
-			netif_info(pf, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
-	} else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
-		   (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
-			netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
-			return -EINVAL;
+	if (ec->rx_coalesce_usecs != cur_rx_itr &&
+	    ec->use_adaptive_rx_coalesce) {
+		netif_info(pf, drv, netdev, "RX interrupt moderation cannot be changed if adaptive-rx is enabled.\n");
+		return -EINVAL;
 	}
 
+	if (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)) {
+		netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
+		return -EINVAL;
+	}
+
+	if (ec->tx_coalesce_usecs != cur_tx_itr &&
+	    ec->use_adaptive_tx_coalesce) {
+		netif_info(pf, drv, netdev, "TX interrupt moderation cannot be changed if adaptive-tx is enabled.\n");
+		return -EINVAL;
+	}
+
+	if (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)) {
+		netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
+		return -EINVAL;
+	}
+
+	if (ec->use_adaptive_rx_coalesce && !cur_rx_itr)
+		ec->rx_coalesce_usecs = I40E_MIN_ITR << 1;
+
+	if (ec->use_adaptive_tx_coalesce && !cur_tx_itr)
+		ec->tx_coalesce_usecs = I40E_MIN_ITR << 1;
+
 	intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high);
 	vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg);
 	if (vsi->int_rate_limit != ec->rx_coalesce_usecs_high) {
@@ -2230,27 +2264,14 @@ static int __i40e_set_coalesce(struct net_device *netdev,
 			   vsi->int_rate_limit);
 	}
 
-	if (ec->tx_coalesce_usecs == 0) {
-		if (ec->use_adaptive_tx_coalesce)
-			netif_info(pf, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n");
-	} else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
-		   (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
-			netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
-			return -EINVAL;
-	}
-
 	/* rx and tx usecs has per queue value. If user doesn't specify the queue,
 	 * apply to all queues.
 	 */
 	if (queue < 0) {
 		for (i = 0; i < vsi->num_queue_pairs; i++)
 			i40e_set_itr_per_queue(vsi, ec, i);
-	} else if (queue < vsi->num_queue_pairs) {
-		i40e_set_itr_per_queue(vsi, ec, queue);
 	} else {
-		netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
-			   vsi->num_queue_pairs - 1);
-		return -EINVAL;
+		i40e_set_itr_per_queue(vsi, ec, queue);
 	}
 
 	return 0;
@@ -2727,22 +2748,22 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
 	switch (nfc->flow_type) {
 	case TCP_V4_FLOW:
 		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
-		if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+		if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
 			hena |=
 			  BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
 		break;
 	case TCP_V6_FLOW:
 		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_TCP;
-		if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+		if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
 			hena |=
 			  BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
-		if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+		if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
 			hena |=
 			  BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK);
 		break;
 	case UDP_V4_FLOW:
 		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
-		if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+		if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
 			hena |=
 			  BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) |
 			  BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP);
@@ -2751,7 +2772,7 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
 		break;
 	case UDP_V6_FLOW:
 		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_UDP;
-		if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+		if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
 			hena |=
 			  BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) |
 			  BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP);
@@ -4069,23 +4090,26 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
 	struct i40e_netdev_priv *np = netdev_priv(dev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
-	u64 changed_flags;
+	u64 orig_flags, new_flags, changed_flags;
 	u32 i, j;
 
-	changed_flags = pf->flags;
+	orig_flags = READ_ONCE(pf->flags);
+	new_flags = orig_flags;
 
 	for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
 		const struct i40e_priv_flags *priv_flags;
 
 		priv_flags = &i40e_gstrings_priv_flags[i];
 
-		if (priv_flags->read_only)
-			continue;
-
 		if (flags & BIT(i))
-			pf->flags |= priv_flags->flag;
+			new_flags |= priv_flags->flag;
 		else
-			pf->flags &= ~(priv_flags->flag);
+			new_flags &= ~(priv_flags->flag);
+
+		/* If this is a read-only flag, it can't be changed */
+		if (priv_flags->read_only &&
+		    ((orig_flags ^ new_flags) & ~BIT(i)))
+			return -EOPNOTSUPP;
 	}
 
 	if (pf->hw.pf_id != 0)
@@ -4096,18 +4120,40 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
 
 		priv_flags = &i40e_gl_gstrings_priv_flags[j];
 
-		if (priv_flags->read_only)
-			continue;
-
 		if (flags & BIT(i + j))
-			pf->flags |= priv_flags->flag;
+			new_flags |= priv_flags->flag;
 		else
-			pf->flags &= ~(priv_flags->flag);
+			new_flags &= ~(priv_flags->flag);
+
+		/* If this is a read-only flag, it can't be changed */
+		if (priv_flags->read_only &&
+		    ((orig_flags ^ new_flags) & ~BIT(i)))
+			return -EOPNOTSUPP;
 	}
 
 flags_complete:
-	/* check for flags that changed */
-	changed_flags ^= pf->flags;
+	/* Before we finalize any flag changes, we need to perform some
+	 * checks to ensure that the changes are supported and safe.
+	 */
+
+	/* ATR eviction is not supported on all devices */
+	if ((new_flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) &&
+	    !(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE))
+		return -EOPNOTSUPP;
+
+	/* Compare and exchange the new flags into place. If we failed, that
+	 * is if cmpxchg64 returns anything but the old value, this means that
+	 * something else has modified the flags variable since we copied it
+	 * originally. We'll just punt with an error and log something in the
+	 * message buffer.
+	 */
+	if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
+		dev_warn(&pf->pdev->dev,
+			 "Unable to update pf->flags as it was modified by another thread...\n");
+		return -EAGAIN;
+	}
+
+	changed_flags = orig_flags ^ new_flags;
 
 	/* Process any additional changes needed as a result of flag changes.
 	 * The changed_flags value reflects the list of bits that were
@@ -4121,10 +4167,6 @@ flags_complete:
 		set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
 	}
 
-	/* Only allow ATR evict on hardware that is capable of handling it */
-	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
-		pf->flags &= ~I40E_FLAG_HW_ATR_EVICT_ENABLED;
-
 	if (changed_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT) {
 		u16 sw_flags = 0, valid_flags = 0;
 		int ret;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2db93d3..6498da8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2595,9 +2595,20 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, u16 vid)
 {
 	int err;
 
-	if (!vid || vsi->info.pvid)
+	if (vsi->info.pvid)
 		return -EINVAL;
 
+	/* The network stack will attempt to add VID=0, with the intention to
+	 * receive priority tagged packets with a VLAN of 0. Our HW receives
+	 * these packets by default when configured to receive untagged
+	 * packets, so we don't need to add a filter for this case.
+	 * Additionally, HW interprets adding a VID=0 filter as meaning to
+	 * receive *only* tagged traffic and stops receiving untagged traffic.
+	 * Thus, we do not want to actually add a filter for VID=0
+	 */
+	if (!vid)
+		return 0;
+
 	/* Locked once because all functions invoked below iterates list*/
 	spin_lock_bh(&vsi->mac_filter_hash_lock);
 	err = i40e_add_vlan_all_mac(vsi, vid);
@@ -2674,15 +2685,7 @@ static int i40e_vlan_rx_add_vid(struct net_device *netdev,
 	if (vid >= VLAN_N_VID)
 		return -EINVAL;
 
-	/* If the network stack called us with vid = 0 then
-	 * it is asking to receive priority tagged packets with
-	 * vlan id 0.  Our HW receives them by default when configured
-	 * to receive untagged packets so there is no need to add an
-	 * extra filter for vlan 0 tagged packets.
-	 */
-	if (vid)
-		ret = i40e_vsi_add_vlan(vsi, vid);
-
+	ret = i40e_vsi_add_vlan(vsi, vid);
 	if (!ret)
 		set_bit(vid, vsi->active_vlans);
 
@@ -2714,44 +2717,6 @@ static int i40e_vlan_rx_kill_vid(struct net_device *netdev,
 }
 
 /**
- * i40e_macaddr_init - explicitly write the mac address filters
- *
- * @vsi: pointer to the vsi
- * @macaddr: the MAC address
- *
- * This is needed when the macaddr has been obtained by other
- * means than the default, e.g., from Open Firmware or IDPROM.
- * Returns 0 on success, negative on failure
- **/
-static int i40e_macaddr_init(struct i40e_vsi *vsi, u8 *macaddr)
-{
-	int ret;
-	struct i40e_aqc_add_macvlan_element_data element;
-
-	ret = i40e_aq_mac_address_write(&vsi->back->hw,
-					I40E_AQC_WRITE_TYPE_LAA_WOL,
-					macaddr, NULL);
-	if (ret) {
-		dev_info(&vsi->back->pdev->dev,
-			 "Addr change for VSI failed: %d\n", ret);
-		return -EADDRNOTAVAIL;
-	}
-
-	memset(&element, 0, sizeof(element));
-	ether_addr_copy(element.mac_addr, macaddr);
-	element.flags = cpu_to_le16(I40E_AQC_MACVLAN_ADD_PERFECT_MATCH);
-	ret = i40e_aq_add_macvlan(&vsi->back->hw, vsi->seid, &element, 1, NULL);
-	if (ret) {
-		dev_info(&vsi->back->pdev->dev,
-			 "add filter failed err %s aq_err %s\n",
-			 i40e_stat_str(&vsi->back->hw, ret),
-			 i40e_aq_str(&vsi->back->hw,
-				     vsi->back->hw.aq.asq_last_status));
-	}
-	return ret;
-}
-
-/**
  * i40e_restore_vlan - Reinstate vlans when vsi/netdev comes back up
  * @vsi: the vsi being brought back up
  **/
@@ -2909,22 +2874,15 @@ static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi)
 static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
 {
 	struct i40e_vsi *vsi = ring->vsi;
-	cpumask_var_t mask;
 
 	if (!ring->q_vector || !ring->netdev)
 		return;
 
-	/* Single TC mode enable XPS */
-	if (vsi->tc_config.numtc <= 1) {
-		if (!test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state))
-			netif_set_xps_queue(ring->netdev,
-					    &ring->q_vector->affinity_mask,
-					    ring->queue_index);
-	} else if (alloc_cpumask_var(&mask, GFP_KERNEL)) {
-		/* Disable XPS to allow selection based on TC */
-		bitmap_zero(cpumask_bits(mask), nr_cpumask_bits);
-		netif_set_xps_queue(ring->netdev, mask, ring->queue_index);
-		free_cpumask_var(mask);
+	if ((vsi->tc_config.numtc <= 1) &&
+	    !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state)) {
+		netif_set_xps_queue(ring->netdev,
+				    get_cpu_mask(ring->q_vector->v_idx),
+				    ring->queue_index);
 	}
 
 	/* schedule our worker thread which will take care of
@@ -3203,19 +3161,8 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi)
  **/
 static void i40e_set_vsi_rx_mode(struct i40e_vsi *vsi)
 {
-	struct i40e_pf *pf = vsi->back;
-	int err;
-
 	if (vsi->netdev)
 		i40e_set_rx_mode(vsi->netdev);
-
-	if (!!(pf->flags & I40E_FLAG_PF_MAC)) {
-		err = i40e_macaddr_init(vsi, pf->hw.mac.addr);
-		if (err) {
-			dev_warn(&pf->pdev->dev,
-				 "could not set up macaddr; err %d\n", err);
-		}
-	}
 }
 
 /**
@@ -3495,7 +3442,7 @@ static void i40e_irq_affinity_notify(struct irq_affinity_notify *notify,
 	struct i40e_q_vector *q_vector =
 		container_of(notify, struct i40e_q_vector, affinity_notify);
 
-	q_vector->affinity_mask = *mask;
+	cpumask_copy(&q_vector->affinity_mask, mask);
 }
 
 /**
@@ -3559,8 +3506,10 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
 		q_vector->affinity_notify.notify = i40e_irq_affinity_notify;
 		q_vector->affinity_notify.release = i40e_irq_affinity_release;
 		irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
-		/* assign the mask for this irq */
-		irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
+		/* get_cpu_mask returns a static constant mask with
+		 * a permanent lifetime so it's ok to use here.
+		 */
+		irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx));
 	}
 
 	vsi->irqs_ready = true;
@@ -4342,7 +4291,7 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi)
 
 			/* clear the affinity notifier in the IRQ descriptor */
 			irq_set_affinity_notifier(irq_num, NULL);
-			/* clear the affinity_mask in the IRQ descriptor */
+			/* remove our suggested affinity mask for this IRQ */
 			irq_set_affinity_hint(irq_num, NULL);
 			synchronize_irq(irq_num);
 			free_irq(irq_num, vsi->q_vectors[i]);
@@ -4773,7 +4722,7 @@ static void i40e_detect_recover_hung(struct i40e_pf *pf)
 {
 	struct net_device *netdev;
 	struct i40e_vsi *vsi;
-	int i;
+	unsigned int i;
 
 	/* Only for LAN VSI */
 	vsi = pf->vsi[pf->lan_vsi];
@@ -5350,7 +5299,7 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
 	int err = 0;
 
 	/* Do not enable DCB for SW1 and SW2 images even if the FW is capable */
-	if (pf->flags & I40E_FLAG_NO_DCB_SUPPORT)
+	if (pf->hw_features & I40E_HW_NO_DCB_SUPPORT)
 		goto out;
 
 	/* Get the initial DCB configuration */
@@ -5400,6 +5349,7 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 	char *speed = "Unknown";
 	char *fc = "Unknown";
 	char *fec = "";
+	char *req_fec = "";
 	char *an = "";
 
 	new_speed = vsi->back->hw.phy.link_info.link_speed;
@@ -5461,6 +5411,7 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 	}
 
 	if (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
+		req_fec = ", Requested FEC: None";
 		fec = ", FEC: None";
 		an = ", Autoneg: False";
 
@@ -5473,10 +5424,22 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 		else if (vsi->back->hw.phy.link_info.fec_info &
 			 I40E_AQ_CONFIG_FEC_RS_ENA)
 			fec = ", FEC: CL108 RS-FEC";
+
+		/* 'CL108 RS-FEC' should be displayed when RS is requested, or
+		 * both RS and FC are requested
+		 */
+		if (vsi->back->hw.phy.link_info.req_fec_info &
+		    (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) {
+			if (vsi->back->hw.phy.link_info.req_fec_info &
+			    I40E_AQ_REQUEST_FEC_RS)
+				req_fec = ", Requested FEC: CL108 RS-FEC";
+			else
+				req_fec = ", Requested FEC: CL74 FC-FEC/BASE-R";
+		}
 	}
 
-	netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex%s%s, Flow Control: %s\n",
-		    speed, fec, an, fc);
+	netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex%s%s%s, Flow Control: %s\n",
+		    speed, req_fec, fec, an, fc);
 }
 
 /**
@@ -5656,16 +5619,17 @@ exit:
 	return ret;
 }
 
-static int __i40e_setup_tc(struct net_device *netdev, u32 handle,
-			   u32 chain_index, __be16 proto,
-			   struct tc_to_netdev *tc)
+static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+			   void *type_data)
 {
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	struct tc_mqprio_qopt *mqprio = type_data;
 
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	return i40e_setup_tc(netdev, tc->mqprio->num_tc);
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+	return i40e_setup_tc(netdev, mqprio->num_tc);
 }
 
 /**
@@ -7331,7 +7295,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 		wr32(hw, I40E_REG_MSS, val);
 	}
 
-	if (pf->flags & I40E_FLAG_RESTART_AUTONEG) {
+	if (pf->hw_features & I40E_HW_RESTART_AUTONEG) {
 		msleep(75);
 		ret = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
 		if (ret)
@@ -7520,6 +7484,18 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
 	i40e_flush(hw);
 }
 
+static const char *i40e_tunnel_name(struct i40e_udp_port_config *port)
+{
+	switch (port->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		return "vxlan";
+	case UDP_TUNNEL_TYPE_GENEVE:
+		return "geneve";
+	default:
+		return "unknown";
+	}
+}
+
 /**
  * i40e_sync_udp_filters - Trigger a sync event for existing UDP filters
  * @pf: board private structure
@@ -7565,14 +7541,14 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
 				ret = i40e_aq_del_udp_tunnel(hw, i, NULL);
 
 			if (ret) {
-				dev_dbg(&pf->pdev->dev,
-					"%s %s port %d, index %d failed, err %s aq_err %s\n",
-					pf->udp_ports[i].type ? "vxlan" : "geneve",
-					port ? "add" : "delete",
-					port, i,
-					i40e_stat_str(&pf->hw, ret),
-					i40e_aq_str(&pf->hw,
-						    pf->hw.aq.asq_last_status));
+				dev_info(&pf->pdev->dev,
+					 "%s %s port %d, index %d failed, err %s aq_err %s\n",
+					 i40e_tunnel_name(&pf->udp_ports[i]),
+					 port ? "add" : "delete",
+					 port, i,
+					 i40e_stat_str(&pf->hw, ret),
+					 i40e_aq_str(&pf->hw,
+						     pf->hw.aq.asq_last_status));
 				pf->udp_ports[i].port = 0;
 			}
 		}
@@ -7957,7 +7933,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
 		ring->count = vsi->num_desc;
 		ring->size = 0;
 		ring->dcb_tc = 0;
-		if (vsi->back->flags & I40E_FLAG_WB_ON_ITR_CAPABLE)
+		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
 			ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
 		ring->tx_itr_setting = pf->tx_itr_default;
 		vsi->tx_rings[i] = ring++;
@@ -7974,7 +7950,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
 		ring->count = vsi->num_desc;
 		ring->size = 0;
 		ring->dcb_tc = 0;
-		if (vsi->back->flags & I40E_FLAG_WB_ON_ITR_CAPABLE)
+		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
 			ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
 		set_ring_xdp(ring);
 		ring->tx_itr_setting = pf->tx_itr_default;
@@ -8261,7 +8237,7 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
 
 	q_vector->vsi = vsi;
 	q_vector->v_idx = v_idx;
-	cpumask_set_cpu(cpu, &q_vector->affinity_mask);
+	cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask);
 
 	if (vsi->netdev)
 		netif_napi_add(vsi->netdev, &q_vector->napi,
@@ -8510,7 +8486,7 @@ static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
 	u8 *lut;
 	int ret;
 
-	if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE))
+	if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
 		return 0;
 
 	if (!vsi->rss_size)
@@ -8640,7 +8616,7 @@ int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 {
 	struct i40e_pf *pf = vsi->back;
 
-	if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE)
+	if (pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)
 		return i40e_config_rss_aq(vsi, seed, lut, lut_size);
 	else
 		return i40e_config_rss_reg(vsi, seed, lut, lut_size);
@@ -8659,7 +8635,7 @@ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 {
 	struct i40e_pf *pf = vsi->back;
 
-	if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE)
+	if (pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)
 		return i40e_get_rss_aq(vsi, seed, lut, lut_size);
 	else
 		return i40e_get_rss_reg(vsi, seed, lut, lut_size);
@@ -8987,25 +8963,56 @@ static int i40e_sw_init(struct i40e_pf *pf)
 				 pf->hw.func_caps.fd_filters_best_effort;
 	}
 
+	if (pf->hw.mac.type == I40E_MAC_X722) {
+		pf->hw_features |= (I40E_HW_RSS_AQ_CAPABLE |
+				    I40E_HW_128_QP_RSS_CAPABLE |
+				    I40E_HW_ATR_EVICT_CAPABLE |
+				    I40E_HW_WB_ON_ITR_CAPABLE |
+				    I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE |
+				    I40E_HW_NO_PCI_LINK_CHECK |
+				    I40E_HW_USE_SET_LLDP_MIB |
+				    I40E_HW_GENEVE_OFFLOAD_CAPABLE |
+				    I40E_HW_PTP_L4_CAPABLE |
+				    I40E_HW_WOL_MC_MAGIC_PKT_WAKE |
+				    I40E_HW_OUTER_UDP_CSUM_CAPABLE);
+
+#define I40E_FDEVICT_PCTYPE_DEFAULT 0xc03
+		if (rd32(&pf->hw, I40E_GLQF_FDEVICTENA(1)) !=
+		    I40E_FDEVICT_PCTYPE_DEFAULT) {
+			dev_warn(&pf->pdev->dev,
+				 "FD EVICT PCTYPES are not right, disable FD HW EVICT\n");
+			pf->hw_features &= ~I40E_HW_ATR_EVICT_CAPABLE;
+		}
+	} else if ((pf->hw.aq.api_maj_ver > 1) ||
+		   ((pf->hw.aq.api_maj_ver == 1) &&
+		    (pf->hw.aq.api_min_ver > 4))) {
+		/* Supported in FW API version higher than 1.4 */
+		pf->hw_features |= I40E_HW_GENEVE_OFFLOAD_CAPABLE;
+	}
+
+	/* Enable HW ATR eviction if possible */
+	if (pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE)
+		pf->flags |= I40E_FLAG_HW_ATR_EVICT_ENABLED;
+
 	if ((pf->hw.mac.type == I40E_MAC_XL710) &&
 	    (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) ||
 	    (pf->hw.aq.fw_maj_ver < 4))) {
-		pf->flags |= I40E_FLAG_RESTART_AUTONEG;
+		pf->hw_features |= I40E_HW_RESTART_AUTONEG;
 		/* No DCB support  for FW < v4.33 */
-		pf->flags |= I40E_FLAG_NO_DCB_SUPPORT;
+		pf->hw_features |= I40E_HW_NO_DCB_SUPPORT;
 	}
 
 	/* Disable FW LLDP if FW < v4.3 */
 	if ((pf->hw.mac.type == I40E_MAC_XL710) &&
 	    (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) ||
 	    (pf->hw.aq.fw_maj_ver < 4)))
-		pf->flags |= I40E_FLAG_STOP_FW_LLDP;
+		pf->hw_features |= I40E_HW_STOP_FW_LLDP;
 
 	/* Use the FW Set LLDP MIB API if FW > v4.40 */
 	if ((pf->hw.mac.type == I40E_MAC_XL710) &&
 	    (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver >= 40)) ||
 	    (pf->hw.aq.fw_maj_ver >= 5)))
-		pf->flags |= I40E_FLAG_USE_SET_LLDP_MIB;
+		pf->hw_features |= I40E_HW_USE_SET_LLDP_MIB;
 
 	if (pf->hw.func_caps.vmdq) {
 		pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
@@ -9028,29 +9035,6 @@ static int i40e_sw_init(struct i40e_pf *pf)
 					I40E_MAX_VF_COUNT);
 	}
 #endif /* CONFIG_PCI_IOV */
-	if (pf->hw.mac.type == I40E_MAC_X722) {
-		pf->flags |= I40E_FLAG_RSS_AQ_CAPABLE
-			     | I40E_FLAG_128_QP_RSS_CAPABLE
-			     | I40E_FLAG_HW_ATR_EVICT_CAPABLE
-			     | I40E_FLAG_OUTER_UDP_CSUM_CAPABLE
-			     | I40E_FLAG_WB_ON_ITR_CAPABLE
-			     | I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE
-			     | I40E_FLAG_NO_PCI_LINK_CHECK
-			     | I40E_FLAG_USE_SET_LLDP_MIB
-			     | I40E_FLAG_GENEVE_OFFLOAD_CAPABLE
-			     | I40E_FLAG_PTP_L4_CAPABLE
-			     | I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE;
-	} else if ((pf->hw.aq.api_maj_ver > 1) ||
-		   ((pf->hw.aq.api_maj_ver == 1) &&
-		    (pf->hw.aq.api_min_ver > 4))) {
-		/* Supported in FW API version higher than 1.4 */
-		pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
-	}
-
-	/* Enable HW ATR eviction if possible */
-	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
-		pf->flags |= I40E_FLAG_HW_ATR_EVICT_ENABLED;
-
 	pf->eeprom_version = 0xDEAD;
 	pf->lan_veb = I40E_NO_VEB;
 	pf->lan_vsi = I40E_NO_VSI;
@@ -9231,7 +9215,7 @@ static void i40e_udp_tunnel_add(struct net_device *netdev,
 		pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN;
 		break;
 	case UDP_TUNNEL_TYPE_GENEVE:
-		if (!(pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE))
+		if (!(pf->hw_features & I40E_HW_GENEVE_OFFLOAD_CAPABLE))
 			return;
 		pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_NGE;
 		break;
@@ -9298,7 +9282,7 @@ static int i40e_get_phys_port_id(struct net_device *netdev,
 	struct i40e_pf *pf = np->vsi->back;
 	struct i40e_hw *hw = &pf->hw;
 
-	if (!(pf->flags & I40E_FLAG_PORT_ID_VALID))
+	if (!(pf->hw_features & I40E_HW_PORT_ID_VALID))
 		return -EOPNOTSUPP;
 
 	ppid->id_len = min_t(int, sizeof(hw->mac.port_addr), sizeof(ppid->id));
@@ -9589,6 +9573,7 @@ static int i40e_xdp(struct net_device *dev,
 		return i40e_xdp_setup(vsi, xdp->prog);
 	case XDP_QUERY_PROG:
 		xdp->prog_attached = i40e_enabled_xdp_vsi(vsi);
+		xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
 		return 0;
 	default:
 		return -EINVAL;
@@ -9675,7 +9660,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 			  NETIF_F_RXCSUM		|
 			  0;
 
-	if (!(pf->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE))
+	if (!(pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE))
 		netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
 
 	netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
@@ -9714,8 +9699,13 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 		i40e_add_mac_filter(vsi, mac_addr);
 		spin_unlock_bh(&vsi->mac_filter_hash_lock);
 	} else {
-		/* relate the VSI_VMDQ name to the VSI_MAIN name */
-		snprintf(netdev->name, IFNAMSIZ, "%sv%%d",
+		/* Relate the VSI_VMDQ name to the VSI_MAIN name. Note that we
+		 * are still limited by IFNAMSIZ, but we're adding 'v%d\0' to
+		 * the end, which is 4 bytes long, so force truncation of the
+		 * original name by IFNAMSIZ - 4
+		 */
+		snprintf(netdev->name, IFNAMSIZ, "%.*sv%%d",
+			 IFNAMSIZ - 4,
 			 pf->vsi[pf->lan_vsi]->netdev->name);
 		random_ether_addr(mac_addr);
 
@@ -9756,8 +9746,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 
 	/* MTU range: 68 - 9706 */
 	netdev->min_mtu = ETH_MIN_MTU;
-	netdev->max_mtu = I40E_MAX_RXBUFFER -
-			  (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
+	netdev->max_mtu = I40E_MAX_RXBUFFER - I40E_PACKET_HDR_PAD;
 
 	return 0;
 }
@@ -9890,13 +9879,15 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 			 */
 			ret = i40e_vsi_config_tc(vsi, enabled_tc);
 			if (ret) {
+				/* Single TC condition is not fatal,
+				 * message and continue
+				 */
 				dev_info(&pf->pdev->dev,
 					 "failed to configure TCs for main VSI tc_map 0x%08x, err %s aq_err %s\n",
 					 enabled_tc,
 					 i40e_stat_str(&pf->hw, ret),
 					 i40e_aq_str(&pf->hw,
 						    pf->hw.aq.asq_last_status));
-				ret = -ENOENT;
 			}
 		}
 		break;
@@ -10387,17 +10378,6 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
 	switch (vsi->type) {
 	/* setup the netdev if needed */
 	case I40E_VSI_MAIN:
-		/* Apply relevant filters if a platform-specific mac
-		 * address was selected.
-		 */
-		if (!!(pf->flags & I40E_FLAG_PF_MAC)) {
-			ret = i40e_macaddr_init(vsi, pf->hw.mac.addr);
-			if (ret) {
-				dev_warn(&pf->pdev->dev,
-					 "could not set up macaddr; err %d\n",
-					 ret);
-			}
-		}
 	case I40E_VSI_VMDQ2:
 		ret = i40e_config_netdev(vsi);
 		if (ret)
@@ -10434,7 +10414,7 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
 		break;
 	}
 
-	if ((pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) &&
+	if ((pf->hw_features & I40E_HW_RSS_AQ_CAPABLE) &&
 	    (vsi->type == I40E_VSI_VMDQ2)) {
 		ret = i40e_vsi_config_rss(vsi);
 	}
@@ -11443,7 +11423,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * Ignore error return codes because if it was already disabled via
 	 * hardware settings this will fail
 	 */
-	if (pf->flags & I40E_FLAG_STOP_FW_LLDP) {
+	if (pf->hw_features & I40E_HW_STOP_FW_LLDP) {
 		dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n");
 		i40e_aq_stop_lldp(hw, true, NULL);
 	}
@@ -11460,7 +11440,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	ether_addr_copy(hw->mac.perm_addr, hw->mac.addr);
 	i40e_get_port_mac_addr(hw, hw->mac.port_addr);
 	if (is_valid_ether_addr(hw->mac.port_addr))
-		pf->flags |= I40E_FLAG_PORT_ID_VALID;
+		pf->hw_features |= I40E_HW_PORT_ID_VALID;
 
 	pci_set_drvdata(pdev, pf);
 	pci_save_state(pdev);
@@ -11576,7 +11556,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		wr32(hw, I40E_REG_MSS, val);
 	}
 
-	if (pf->flags & I40E_FLAG_RESTART_AUTONEG) {
+	if (pf->hw_features & I40E_HW_RESTART_AUTONEG) {
 		msleep(75);
 		err = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
 		if (err)
@@ -11663,7 +11643,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * and will report PCI Gen 1 x 1 by default so don't bother
 	 * checking them.
 	 */
-	if (!(pf->flags & I40E_FLAG_NO_PCI_LINK_CHECK)) {
+	if (!(pf->hw_features & I40E_HW_NO_PCI_LINK_CHECK)) {
 		char speed[PCI_SPEED_SIZE] = "Unknown";
 		char width[PCI_WIDTH_SIZE] = "Unknown";
 
@@ -11734,9 +11714,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) ||
 		(pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4))
-		pf->flags |= I40E_FLAG_PHY_CONTROLS_LEDS;
+		pf->hw_features |= I40E_HW_PHY_CONTROLS_LEDS;
 	if (pf->hw.device_id == I40E_DEV_ID_SFP_I_X722)
-		pf->flags |= I40E_FLAG_HAVE_CRT_RETIMER;
+		pf->hw_features |= I40E_HW_HAVE_CRT_RETIMER;
 	/* print a string summarizing features */
 	i40e_print_features(pf);
 
@@ -12048,7 +12028,7 @@ static void i40e_shutdown(struct pci_dev *pdev)
 	 */
 	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
 
-	if (pf->wol_en && (pf->flags & I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE))
+	if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
 		i40e_enable_mc_magic_wake(pf);
 
 	i40e_prep_for_reset(pf, false);
@@ -12080,7 +12060,7 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
 	set_bit(__I40E_SUSPENDED, pf->state);
 	set_bit(__I40E_DOWN, pf->state);
 
-	if (pf->wol_en && (pf->flags & I40E_FLAG_WOL_MC_MAGIC_PKT_WAKE))
+	if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
 		i40e_enable_mc_magic_wake(pf);
 
 	i40e_prep_for_reset(pf, false);
@@ -12089,7 +12069,10 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
 	wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
 
 	i40e_stop_misc_vector(pf);
-
+	if (pf->msix_entries) {
+		synchronize_irq(pf->msix_entries[0].vector);
+		free_irq(pf->msix_entries[0].vector, pf);
+	}
 	retval = pci_save_state(pdev);
 	if (retval)
 		return retval;
@@ -12129,6 +12112,15 @@ static int i40e_resume(struct pci_dev *pdev)
 	/* handling the reset will rebuild the device state */
 	if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) {
 		clear_bit(__I40E_DOWN, pf->state);
+		if (pf->msix_entries) {
+			err = request_irq(pf->msix_entries[0].vector,
+					  i40e_intr, 0, pf->int_name, pf);
+			if (err) {
+				dev_err(&pf->pdev->dev,
+					"request_irq for %s failed: %d\n",
+					pf->int_name, err);
+			}
+		}
 		i40e_reset_and_rebuild(pf, false, false);
 	}
 
@@ -12168,12 +12160,14 @@ static int __init i40e_init_module(void)
 		i40e_driver_string, i40e_driver_version_str);
 	pr_info("%s: %s\n", i40e_driver_name, i40e_copyright);
 
-	/* we will see if single thread per module is enough for now,
-	 * it can't be any worse than using the system workqueue which
-	 * was already single threaded
+	/* There is no need to throttle the number of active tasks because
+	 * each device limits its own task using a state bit for scheduling
+	 * the service task, and the device tasks do not interfere with each
+	 * other, so we don't set a max task limit. We must set WQ_MEM_RECLAIM
+	 * since we need to be able to guarantee forward progress even under
+	 * memory pressure.
 	 */
-	i40e_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1,
-				  i40e_driver_name);
+	i40e_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, i40e_driver_name);
 	if (!i40e_wq) {
 		pr_err("%s: Failed to create workqueue\n", i40e_driver_name);
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 800bd55..57505b1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -134,8 +134,25 @@ i40e_i40e_acquire_nvm_exit:
  **/
 void i40e_release_nvm(struct i40e_hw *hw)
 {
-	if (!hw->nvm.blank_nvm_mode)
-		i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL);
+	i40e_status ret_code = I40E_SUCCESS;
+	u32 total_delay = 0;
+
+	if (hw->nvm.blank_nvm_mode)
+		return;
+
+	ret_code = i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL);
+
+	/* there are some rare cases when trying to release the resource
+	 * results in an admin Q timeout, so handle them correctly
+	 */
+	while ((ret_code == I40E_ERR_ADMIN_QUEUE_TIMEOUT) &&
+	       (total_delay < hw->aq.asq_cmd_timeout)) {
+		usleep_range(1000, 2000);
+		ret_code = i40e_aq_release_resource(hw,
+						    I40E_NVM_RESOURCE_ID,
+						    0, NULL);
+		total_delay++;
+	}
 }
 
 /**
@@ -230,6 +247,7 @@ static i40e_status i40e_read_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
 	struct i40e_asq_cmd_details cmd_details;
 
 	memset(&cmd_details, 0, sizeof(cmd_details));
+	cmd_details.wb_desc = &hw->nvm_wb_desc;
 
 	/* Here we are checking the SR limit only for the flat memory model.
 	 * We cannot do it for the module-based model, as we did not acquire
@@ -266,7 +284,7 @@ static i40e_status i40e_read_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
  * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
  * @data: word read from the Shadow RAM
  *
- * Reads one 16 bit word from the Shadow RAM using the GLNVM_SRCTL register.
+ * Reads one 16 bit word from the Shadow RAM using the AdminQ
  **/
 static i40e_status i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset,
 					 u16 *data)
@@ -280,27 +298,49 @@ static i40e_status i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset,
 }
 
 /**
- * i40e_read_nvm_word - Reads Shadow RAM
+ * __i40e_read_nvm_word - Reads nvm word, assumes called does the locking
  * @hw: pointer to the HW structure
  * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
  * @data: word read from the Shadow RAM
  *
- * Reads one 16 bit word from the Shadow RAM using the GLNVM_SRCTL register.
+ * Reads one 16 bit word from the Shadow RAM.
+ *
+ * Do not use this function except in cases where the nvm lock is already
+ * taken via i40e_acquire_nvm().
+ **/
+static i40e_status __i40e_read_nvm_word(struct i40e_hw *hw,
+					u16 offset, u16 *data)
+{
+	i40e_status ret_code = 0;
+
+	if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
+		ret_code = i40e_read_nvm_word_aq(hw, offset, data);
+	else
+		ret_code = i40e_read_nvm_word_srctl(hw, offset, data);
+	return ret_code;
+}
+
+/**
+ * i40e_read_nvm_word - Reads nvm word and acquire lock if necessary
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM.
  **/
 i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
 			       u16 *data)
 {
-	enum i40e_status_code ret_code = 0;
+	i40e_status ret_code = 0;
 
 	ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
-	if (!ret_code) {
-		if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) {
-			ret_code = i40e_read_nvm_word_aq(hw, offset, data);
-		} else {
-			ret_code = i40e_read_nvm_word_srctl(hw, offset, data);
-		}
-		i40e_release_nvm(hw);
-	}
+	if (ret_code)
+		return ret_code;
+
+	ret_code = __i40e_read_nvm_word(hw, offset, data);
+
+	i40e_release_nvm(hw);
+
 	return ret_code;
 }
 
@@ -393,31 +433,25 @@ read_nvm_buffer_aq_exit:
 }
 
 /**
- * i40e_read_nvm_buffer - Reads Shadow RAM buffer
+ * __i40e_read_nvm_buffer - Reads nvm buffer, caller must acquire lock
  * @hw: pointer to the HW structure
  * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
  * @words: (in) number of words to read; (out) number of words actually read
  * @data: words read from the Shadow RAM
  *
  * Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_srrd()
- * method. The buffer read is preceded by the NVM ownership take
- * and followed by the release.
+ * method.
  **/
-i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
-				 u16 *words, u16 *data)
+static i40e_status __i40e_read_nvm_buffer(struct i40e_hw *hw,
+					  u16 offset, u16 *words,
+					  u16 *data)
 {
-	enum i40e_status_code ret_code = 0;
+	i40e_status ret_code = 0;
 
-	if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) {
-		ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
-		if (!ret_code) {
-			ret_code = i40e_read_nvm_buffer_aq(hw, offset, words,
-							   data);
-			i40e_release_nvm(hw);
-		}
-	} else {
+	if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
+		ret_code = i40e_read_nvm_buffer_aq(hw, offset, words, data);
+	else
 		ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data);
-	}
 	return ret_code;
 }
 
@@ -499,15 +533,15 @@ static i40e_status i40e_calc_nvm_checksum(struct i40e_hw *hw,
 	data = (u16 *)vmem.va;
 
 	/* read pointer to VPD area */
-	ret_code = i40e_read_nvm_word(hw, I40E_SR_VPD_PTR, &vpd_module);
+	ret_code = __i40e_read_nvm_word(hw, I40E_SR_VPD_PTR, &vpd_module);
 	if (ret_code) {
 		ret_code = I40E_ERR_NVM_CHECKSUM;
 		goto i40e_calc_nvm_checksum_exit;
 	}
 
 	/* read pointer to PCIe Alt Auto-load module */
-	ret_code = i40e_read_nvm_word(hw, I40E_SR_PCIE_ALT_AUTO_LOAD_PTR,
-				      &pcie_alt_module);
+	ret_code = __i40e_read_nvm_word(hw, I40E_SR_PCIE_ALT_AUTO_LOAD_PTR,
+					&pcie_alt_module);
 	if (ret_code) {
 		ret_code = I40E_ERR_NVM_CHECKSUM;
 		goto i40e_calc_nvm_checksum_exit;
@@ -521,7 +555,7 @@ static i40e_status i40e_calc_nvm_checksum(struct i40e_hw *hw,
 		if ((i % I40E_SR_SECTOR_SIZE_IN_WORDS) == 0) {
 			u16 words = I40E_SR_SECTOR_SIZE_IN_WORDS;
 
-			ret_code = i40e_read_nvm_buffer(hw, i, &words, data);
+			ret_code = __i40e_read_nvm_buffer(hw, i, &words, data);
 			if (ret_code) {
 				ret_code = I40E_ERR_NVM_CHECKSUM;
 				goto i40e_calc_nvm_checksum_exit;
@@ -593,14 +627,19 @@ i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
 	u16 checksum_sr = 0;
 	u16 checksum_local = 0;
 
+	/* We must acquire the NVM lock in order to correctly synchronize the
+	 * NVM accesses across multiple PFs. Without doing so it is possible
+	 * for one of the PFs to read invalid data potentially indicating that
+	 * the checksum is invalid.
+	 */
+	ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+	if (ret_code)
+		return ret_code;
 	ret_code = i40e_calc_nvm_checksum(hw, &checksum_local);
+	__i40e_read_nvm_word(hw, I40E_SR_SW_CHECKSUM_WORD, &checksum_sr);
+	i40e_release_nvm(hw);
 	if (ret_code)
-		goto i40e_validate_nvm_checksum_exit;
-
-	/* Do not use i40e_read_nvm_word() because we do not want to take
-	 * the synchronization semaphores twice here.
-	 */
-	i40e_read_nvm_word(hw, I40E_SR_SW_CHECKSUM_WORD, &checksum_sr);
+		return ret_code;
 
 	/* Verify read checksum from EEPROM is the same as
 	 * calculated checksum
@@ -612,7 +651,6 @@ i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
 	if (checksum)
 		*checksum = checksum_local;
 
-i40e_validate_nvm_checksum_exit:
 	return ret_code;
 }
 
@@ -736,6 +774,15 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
 		hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
 	}
 
+	/* Acquire lock to prevent race condition where adminq_task
+	 * can execute after i40e_nvmupd_nvm_read/write but before state
+	 * variables (nvm_wait_opcode, nvm_release_on_done) are updated.
+	 *
+	 * During NVMUpdate, it is observed that lock could be held for
+	 * ~5ms for most commands. However lock is held for ~60ms for
+	 * NVMUPD_CSUM_LCB command.
+	 */
+	mutex_lock(&hw->aq.arq_mutex);
 	switch (hw->nvmupd_state) {
 	case I40E_NVMUPD_STATE_INIT:
 		status = i40e_nvmupd_state_init(hw, cmd, bytes, perrno);
@@ -756,7 +803,8 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
 		 */
 		if (cmd->offset == 0xffff) {
 			i40e_nvmupd_check_wait_event(hw, hw->nvm_wait_opcode);
-			return 0;
+			status = 0;
+			goto exit;
 		}
 
 		status = I40E_ERR_NOT_READY;
@@ -771,6 +819,8 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
 		*perrno = -ESRCH;
 		break;
 	}
+exit:
+	mutex_unlock(&hw->aq.arq_mutex);
 	return status;
 }
 
@@ -997,6 +1047,7 @@ retry:
 		break;
 
 	case I40E_NVMUPD_CSUM_CON:
+		/* Assumes the caller has acquired the nvm */
 		status = i40e_update_nvm_checksum(hw);
 		if (status) {
 			*perrno = hw->aq.asq_last_status ?
@@ -1011,6 +1062,7 @@ retry:
 		break;
 
 	case I40E_NVMUPD_CSUM_LCB:
+		/* Assumes the caller has acquired the nvm */
 		status = i40e_update_nvm_checksum(hw);
 		if (status) {
 			*perrno = hw->aq.asq_last_status ?
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index df613ea..a39b131 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -311,8 +311,6 @@ i40e_status i40e_acquire_nvm(struct i40e_hw *hw,
 void i40e_release_nvm(struct i40e_hw *hw);
 i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
 					 u16 *data);
-i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
-					   u16 *words, u16 *data);
 i40e_status i40e_update_nvm_checksum(struct i40e_hw *hw);
 i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
 						 u16 *checksum);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 1a0be83..d8456c3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -158,13 +158,12 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
 	struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
-	struct timespec64 now, then;
+	struct timespec64 now;
 
-	then = ns_to_timespec64(delta);
 	mutex_lock(&pf->tmreg_lock);
 
 	i40e_ptp_read(pf, &now);
-	now = timespec64_add(now, then);
+	timespec64_add_ns(&now, delta);
 	i40e_ptp_write(pf, (const struct timespec64 *)&now);
 
 	mutex_unlock(&pf->tmreg_lock);
@@ -570,7 +569,7 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
-		if (!(pf->flags & I40E_FLAG_PTP_L4_CAPABLE))
+		if (!(pf->hw_features & I40E_HW_PTP_L4_CAPABLE))
 			return -ERANGE;
 		pf->ptp_rx = true;
 		tsyntype = I40E_PRTTSYN_CTL1_V1MESSTYPE0_MASK |
@@ -584,7 +583,7 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
 	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
 	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
-		if (!(pf->flags & I40E_FLAG_PTP_L4_CAPABLE))
+		if (!(pf->hw_features & I40E_HW_PTP_L4_CAPABLE))
 			return -ERANGE;
 		/* fall through */
 	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
@@ -593,7 +592,7 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
 		pf->ptp_rx = true;
 		tsyntype = I40E_PRTTSYN_CTL1_V2MESSTYPE0_MASK |
 			   I40E_PRTTSYN_CTL1_TSYNTYPE_V2;
-		if (pf->flags & I40E_FLAG_PTP_L4_CAPABLE) {
+		if (pf->hw_features & I40E_HW_PTP_L4_CAPABLE) {
 			tsyntype |= I40E_PRTTSYN_CTL1_UDP_ENA_MASK;
 			config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
 		} else {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 2194960..1519dfb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -860,7 +860,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 	netdev_tx_completed_queue(txring_txq(tx_ring),
 				  total_packets, total_bytes);
 
-#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
 	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
 		     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
 		/* Make sure that anybody stopping the queue after this
@@ -959,19 +959,31 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
 static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 {
 	enum i40e_latency_range new_latency_range = rc->latency_range;
-	struct i40e_q_vector *qv = rc->ring->q_vector;
 	u32 new_itr = rc->itr;
 	int bytes_per_int;
-	int usecs;
+	unsigned int usecs, estimated_usecs;
 
 	if (rc->total_packets == 0 || !rc->itr)
 		return false;
 
+	usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
+	bytes_per_int = rc->total_bytes / usecs;
+
+	/* The calculations in this algorithm depend on interrupts actually
+	 * firing at the ITR rate. This may not happen if the packet rate is
+	 * really low, or if we've been napi polling. Check to make sure
+	 * that's not the case before we continue.
+	 */
+	estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update);
+	if (estimated_usecs > usecs) {
+		new_latency_range = I40E_LOW_LATENCY;
+		goto reset_latency;
+	}
+
 	/* simple throttlerate management
 	 *   0-10MB/s   lowest (50000 ints/s)
 	 *  10-20MB/s   low    (20000 ints/s)
 	 *  20-1249MB/s bulk   (18000 ints/s)
-	 *  > 40000 Rx packets per second (8000 ints/s)
 	 *
 	 * The math works out because the divisor is in 10^(-6) which
 	 * turns the bytes/us input value into MB/s values, but
@@ -979,9 +991,6 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 	 * are in 2 usec increments in the ITR registers, and make sure
 	 * to use the smoothed values that the countdown timer gives us.
 	 */
-	usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
-	bytes_per_int = rc->total_bytes / usecs;
-
 	switch (new_latency_range) {
 	case I40E_LOWEST_LATENCY:
 		if (bytes_per_int > 10)
@@ -994,24 +1003,13 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 			new_latency_range = I40E_LOWEST_LATENCY;
 		break;
 	case I40E_BULK_LATENCY:
-	case I40E_ULTRA_LATENCY:
 	default:
 		if (bytes_per_int <= 20)
 			new_latency_range = I40E_LOW_LATENCY;
 		break;
 	}
 
-	/* this is to adjust RX more aggressively when streaming small
-	 * packets.  The value of 40000 was picked as it is just beyond
-	 * what the hardware can receive per second if in low latency
-	 * mode.
-	 */
-#define RX_ULTRA_PACKET_RATE 40000
-
-	if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) &&
-	    (&qv->rx == rc))
-		new_latency_range = I40E_ULTRA_LATENCY;
-
+reset_latency:
 	rc->latency_range = new_latency_range;
 
 	switch (new_latency_range) {
@@ -1024,21 +1022,18 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 	case I40E_BULK_LATENCY:
 		new_itr = I40E_ITR_18K;
 		break;
-	case I40E_ULTRA_LATENCY:
-		new_itr = I40E_ITR_8K;
-		break;
 	default:
 		break;
 	}
 
 	rc->total_bytes = 0;
 	rc->total_packets = 0;
+	rc->last_itr_update = jiffies;
 
 	if (new_itr != rc->itr) {
 		rc->itr = new_itr;
 		return true;
 	}
-
 	return false;
 }
 
@@ -2065,7 +2060,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
 	bool failure = false, xdp_xmit = false;
 
-	while (likely(total_rx_packets < budget)) {
+	while (likely(total_rx_packets < (unsigned int)budget)) {
 		struct i40e_rx_buffer *rx_buffer;
 		union i40e_rx_desc *rx_desc;
 		struct xdp_buff xdp;
@@ -2198,7 +2193,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
 
 	/* guarantee a trip back through this routine if there was a failure */
-	return failure ? budget : total_rx_packets;
+	return failure ? budget : (int)total_rx_packets;
 }
 
 static u32 i40e_buildreg_itr(const int type, const u16 itr)
@@ -2243,6 +2238,12 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 	int idx = q_vector->v_idx;
 	int rx_itr_setting, tx_itr_setting;
 
+	/* If we don't have MSIX, then we only need to re-enable icr0 */
+	if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
+		i40e_irq_dynamic_enable_icr0(vsi->back, false);
+		return;
+	}
+
 	vector = (q_vector->v_idx + vsi->base_vector);
 
 	/* avoid dynamic calculation if in countdown mode OR if
@@ -2363,7 +2364,6 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
 
 	/* If work not completed, return budget and polling will return */
 	if (!clean_complete) {
-		const cpumask_t *aff_mask = &q_vector->affinity_mask;
 		int cpu_id = smp_processor_id();
 
 		/* It is possible that the interrupt affinity has changed but,
@@ -2373,15 +2373,22 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
 		 * continue to poll, otherwise we must stop polling so the
 		 * interrupt can move to the correct cpu.
 		 */
-		if (likely(cpumask_test_cpu(cpu_id, aff_mask) ||
-			   !(vsi->back->flags & I40E_FLAG_MSIX_ENABLED))) {
+		if (!cpumask_test_cpu(cpu_id, &q_vector->affinity_mask)) {
+			/* Tell napi that we are done polling */
+			napi_complete_done(napi, work_done);
+
+			/* Force an interrupt */
+			i40e_force_wb(vsi, q_vector);
+
+			/* Return budget-1 so that polling stops */
+			return budget - 1;
+		}
 tx_only:
-			if (arm_wb) {
-				q_vector->tx.ring[0].tx_stats.tx_force_wb++;
-				i40e_enable_wb_on_itr(vsi, q_vector);
-			}
-			return budget;
+		if (arm_wb) {
+			q_vector->tx.ring[0].tx_stats.tx_force_wb++;
+			i40e_enable_wb_on_itr(vsi, q_vector);
 		}
+		return budget;
 	}
 
 	if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
@@ -2390,16 +2397,7 @@ tx_only:
 	/* Work is done so exit the polling mode and re-enable the interrupt */
 	napi_complete_done(napi, work_done);
 
-	/* If we're prematurely stopping polling to fix the interrupt
-	 * affinity we want to make sure polling starts back up so we
-	 * issue a call to i40e_force_wb which triggers a SW interrupt.
-	 */
-	if (!clean_complete)
-		i40e_force_wb(vsi, q_vector);
-	else if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED))
-		i40e_irq_dynamic_enable_icr0(vsi->back, false);
-	else
-		i40e_update_enable_itr(vsi, q_vector);
+	i40e_update_enable_itr(vsi, q_vector);
 
 	return min(work_done, budget - 1);
 }
@@ -2453,9 +2451,15 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
 		hlen = (hdr.network[0] & 0x0F) << 2;
 		l4_proto = hdr.ipv4->protocol;
 	} else {
-		hlen = hdr.network - skb->data;
-		l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL);
-		hlen -= hdr.network - skb->data;
+		/* find the start of the innermost ipv6 header */
+		unsigned int inner_hlen = hdr.network - skb->data;
+		unsigned int h_offset = inner_hlen;
+
+		/* this function updates h_offset to the end of the header */
+		l4_proto =
+		  ipv6_find_hdr(skb, &h_offset, IPPROTO_TCP, NULL, NULL);
+		/* hlen will contain our best estimate of the tcp header */
+		hlen = h_offset - inner_hlen;
 	}
 
 	if (l4_proto != IPPROTO_TCP)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index b288d58..2f848bc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -112,7 +112,7 @@ enum i40e_dyn_idx_t {
 	BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP))
 
 #define i40e_pf_get_default_rss_hena(pf) \
-	(((pf)->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) ? \
+	(((pf)->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) ? \
 	  I40E_DEFAULT_RSS_HENA_EXPANDED : I40E_DEFAULT_RSS_HENA)
 
 /* Supported Rx Buffer Sizes (a multiple of 128) */
@@ -130,6 +130,7 @@ enum i40e_dyn_idx_t {
  * i.e. RXBUFFER_512 --> 1216 byte skb (size-2048 slab)
  */
 #define I40E_RX_HDR_SIZE I40E_RXBUFFER_256
+#define I40E_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
 #define i40e_rx_desc i40e_32byte_rx_desc
 
 #define I40E_RX_DMA_ATTR \
@@ -453,7 +454,6 @@ enum i40e_latency_range {
 	I40E_LOWEST_LATENCY = 0,
 	I40E_LOW_LATENCY = 1,
 	I40E_BULK_LATENCY = 2,
-	I40E_ULTRA_LATENCY = 3,
 };
 
 struct i40e_ring_container {
@@ -461,6 +461,7 @@ struct i40e_ring_container {
 	struct i40e_ring *ring;
 	unsigned int total_bytes;	/* total bytes processed this int */
 	unsigned int total_packets;	/* total packets processed this int */
+	unsigned long last_itr_update;	/* jiffies of last ITR update */
 	u16 count;
 	enum i40e_latency_range latency_range;
 	u16 itr;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 3a18ed1..fd4bbdd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -185,6 +185,7 @@ struct i40e_link_status {
 	enum i40e_aq_link_speed link_speed;
 	u8 link_info;
 	u8 an_info;
+	u8 req_fec_info;
 	u8 fec_info;
 	u8 ext_info;
 	u8 loopback;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index ecbe40e..4d1e670 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1528,54 +1528,54 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 				  VIRTCHNL_VF_OFFLOAD_RSS_REG |
 				  VIRTCHNL_VF_OFFLOAD_VLAN;
 
-	vfres->vf_offload_flags = VIRTCHNL_VF_OFFLOAD_L2;
+	vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2;
 	vsi = pf->vsi[vf->lan_vsi_idx];
 	if (!vsi->info.pvid)
-		vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_VLAN;
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN;
 
 	if (i40e_vf_client_capable(pf, vf->vf_id) &&
 	    (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_IWARP)) {
-		vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_IWARP;
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_IWARP;
 		set_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states);
 	}
 
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
-		vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF;
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF;
 	} else {
-		if ((pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) &&
+		if ((pf->hw_features & I40E_HW_RSS_AQ_CAPABLE) &&
 		    (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ))
-			vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ;
+			vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ;
 		else
-			vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG;
+			vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG;
 	}
 
-	if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
+	if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
 		if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
-			vfres->vf_offload_flags |=
+			vfres->vf_cap_flags |=
 				VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
 	}
 
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP)
-		vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP;
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP;
 
-	if ((pf->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) &&
+	if ((pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE) &&
 	    (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM))
-		vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
 
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_POLLING) {
 		if (pf->flags & I40E_FLAG_MFP_ENABLED) {
 			dev_err(&pf->pdev->dev,
 				"VF %d requested polling mode: this feature is supported only when the device is running in single function per port (SFP) mode\n",
 				 vf->vf_id);
-			ret = I40E_ERR_PARAM;
+			aq_ret = I40E_ERR_PARAM;
 			goto err;
 		}
-		vfres->vf_offload_flags |= VIRTCHNL_VF_OFFLOAD_RX_POLLING;
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_POLLING;
 	}
 
-	if (pf->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) {
+	if (pf->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) {
 		if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
-			vfres->vf_offload_flags |=
+			vfres->vf_cap_flags |=
 					VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
 	}
 
@@ -1741,16 +1741,14 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
 							    NULL);
 	} else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) {
 		hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
-			aq_ret = 0;
-			if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID) {
-				aq_ret =
-				i40e_aq_set_vsi_uc_promisc_on_vlan(hw,
-								   vsi->seid,
-								   alluni,
-								   f->vlan,
-								   NULL);
-				aq_err = pf->hw.aq.asq_last_status;
-			}
+			if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID)
+				continue;
+			aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw,
+								    vsi->seid,
+								    alluni,
+								    f->vlan,
+								    NULL);
+			aq_err = pf->hw.aq.asq_last_status;
 			if (aq_ret)
 				dev_err(&pf->pdev->dev,
 					"Could not add VLAN %d to Unicast promiscuous domain err %s aq_err %s\n",
@@ -1760,7 +1758,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
 		}
 	} else {
 		aq_ret = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid,
-							     allmulti, NULL,
+							     alluni, NULL,
 							     true);
 		aq_err = pf->hw.aq.asq_last_status;
 		if (aq_ret) {
@@ -2532,6 +2530,60 @@ err:
 }
 
 /**
+ * i40e_vc_enable_vlan_stripping
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * Enable vlan header stripping for the VF
+ **/
+static int i40e_vc_enable_vlan_stripping(struct i40e_vf *vf, u8 *msg,
+					 u16 msglen)
+{
+	struct i40e_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+	i40e_status aq_ret = 0;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	i40e_vlan_stripping_enable(vsi);
+
+	/* send the response to the VF */
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_disable_vlan_stripping
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * Disable vlan header stripping for the VF
+ **/
+static int i40e_vc_disable_vlan_stripping(struct i40e_vf *vf, u8 *msg,
+					  u16 msglen)
+{
+	struct i40e_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+	i40e_status aq_ret = 0;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto err;
+	}
+
+	i40e_vlan_stripping_disable(vsi);
+
+	/* send the response to the VF */
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
+				       aq_ret);
+}
+
+/**
  * i40e_vc_process_vf_msg
  * @pf: pointer to the PF structure
  * @vf_id: source VF id
@@ -2650,6 +2702,12 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
 	case VIRTCHNL_OP_SET_RSS_HENA:
 		ret = i40e_vc_set_rss_hena(vf, msg, msglen);
 		break;
+	case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
+		ret = i40e_vc_enable_vlan_stripping(vf, msg, msglen);
+		break;
+	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
+		ret = i40e_vc_disable_vlan_stripping(vf, msg, msglen);
+		break;
 
 	case VIRTCHNL_OP_UNKNOWN:
 	default:
@@ -2764,7 +2822,6 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 
 	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
-	dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n", mac, vf_id);
 	/* program mac filter */
 	if (i40e_sync_vsi_filters(vsi)) {
 		dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
@@ -2772,7 +2829,16 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 		goto error_param;
 	}
 	ether_addr_copy(vf->default_lan_addr.addr, mac);
-	vf->pf_set_mac = true;
+
+	if (is_zero_ether_addr(mac)) {
+		vf->pf_set_mac = false;
+		dev_info(&pf->pdev->dev, "Removing MAC on VF %d\n", vf_id);
+	} else {
+		vf->pf_set_mac = true;
+		dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n",
+			 mac, vf_id);
+	}
+
 	/* Force the VF driver stop so it has to reload with new MAC address */
 	i40e_vc_disable_vf(pf, vf);
 	dev_info(&pf->pdev->dev, "Reload the VF driver to make this change effective.\n");
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 1dd1938..8d3a2bf 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -333,9 +333,9 @@ void i40evf_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc,
 			len = buf_len;
 		/* write the full 16-byte chunks */
 		if (hw->debug_mask & mask) {
-			char prefix[20];
+			char prefix[27];
 
-			snprintf(prefix, 20,
+			snprintf(prefix, sizeof(prefix),
 				 "i40evf %02x:%02x.%x: \t0x",
 				 hw->bus.bus_id,
 				 hw->bus.device,
@@ -1104,7 +1104,7 @@ void i40e_vf_parse_hw_config(struct i40e_hw *hw,
 	hw->dev_caps.num_rx_qp = msg->num_queue_pairs;
 	hw->dev_caps.num_tx_qp = msg->num_queue_pairs;
 	hw->dev_caps.num_msix_vectors_vf = msg->max_vectors;
-	hw->dev_caps.dcb = msg->vf_offload_flags &
+	hw->dev_caps.dcb = msg->vf_cap_flags &
 			   VIRTCHNL_VF_OFFLOAD_L2;
 	hw->dev_caps.fcoe = 0;
 	for (i = 0; i < msg->num_vsis; i++) {
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
index 5e314fd..a907377 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_osdep.h
@@ -54,7 +54,7 @@ struct i40e_dma_mem {
 	void *va;
 	dma_addr_t pa;
 	u32 size;
-} __packed;
+};
 
 #define i40e_allocate_dma_mem(h, m, unused, s, a) \
 	i40evf_allocate_dma_mem_d(h, m, s, a)
@@ -63,7 +63,7 @@ struct i40e_dma_mem {
 struct i40e_virt_mem {
 	void *va;
 	u32 size;
-} __packed;
+};
 #define i40e_allocate_virt_mem(h, m, s) i40evf_allocate_virt_mem_d(h, m, s)
 #define i40e_free_virt_mem(h, m) i40evf_free_virt_mem_d(h, m)
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 12b02e5..c32c624 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -275,7 +275,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 	netdev_tx_completed_queue(txring_txq(tx_ring),
 				  total_packets, total_bytes);
 
-#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
 	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
 		     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
 		/* Make sure that anybody stopping the queue after this
@@ -357,19 +357,31 @@ void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
 static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 {
 	enum i40e_latency_range new_latency_range = rc->latency_range;
-	struct i40e_q_vector *qv = rc->ring->q_vector;
 	u32 new_itr = rc->itr;
 	int bytes_per_int;
-	int usecs;
+	unsigned int usecs, estimated_usecs;
 
 	if (rc->total_packets == 0 || !rc->itr)
 		return false;
 
+	usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
+	bytes_per_int = rc->total_bytes / usecs;
+
+	/* The calculations in this algorithm depend on interrupts actually
+	 * firing at the ITR rate. This may not happen if the packet rate is
+	 * really low, or if we've been napi polling. Check to make sure
+	 * that's not the case before we continue.
+	 */
+	estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update);
+	if (estimated_usecs > usecs) {
+		new_latency_range = I40E_LOW_LATENCY;
+		goto reset_latency;
+	}
+
 	/* simple throttlerate management
 	 *   0-10MB/s   lowest (50000 ints/s)
 	 *  10-20MB/s   low    (20000 ints/s)
 	 *  20-1249MB/s bulk   (18000 ints/s)
-	 *  > 40000 Rx packets per second (8000 ints/s)
 	 *
 	 * The math works out because the divisor is in 10^(-6) which
 	 * turns the bytes/us input value into MB/s values, but
@@ -377,9 +389,6 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 	 * are in 2 usec increments in the ITR registers, and make sure
 	 * to use the smoothed values that the countdown timer gives us.
 	 */
-	usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
-	bytes_per_int = rc->total_bytes / usecs;
-
 	switch (new_latency_range) {
 	case I40E_LOWEST_LATENCY:
 		if (bytes_per_int > 10)
@@ -392,24 +401,13 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 			new_latency_range = I40E_LOWEST_LATENCY;
 		break;
 	case I40E_BULK_LATENCY:
-	case I40E_ULTRA_LATENCY:
 	default:
 		if (bytes_per_int <= 20)
 			new_latency_range = I40E_LOW_LATENCY;
 		break;
 	}
 
-	/* this is to adjust RX more aggressively when streaming small
-	 * packets.  The value of 40000 was picked as it is just beyond
-	 * what the hardware can receive per second if in low latency
-	 * mode.
-	 */
-#define RX_ULTRA_PACKET_RATE 40000
-
-	if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) &&
-	    (&qv->rx == rc))
-		new_latency_range = I40E_ULTRA_LATENCY;
-
+reset_latency:
 	rc->latency_range = new_latency_range;
 
 	switch (new_latency_range) {
@@ -422,21 +420,18 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
 	case I40E_BULK_LATENCY:
 		new_itr = I40E_ITR_18K;
 		break;
-	case I40E_ULTRA_LATENCY:
-		new_itr = I40E_ITR_8K;
-		break;
 	default:
 		break;
 	}
 
 	rc->total_bytes = 0;
 	rc->total_packets = 0;
+	rc->last_itr_update = jiffies;
 
 	if (new_itr != rc->itr) {
 		rc->itr = new_itr;
 		return true;
 	}
-
 	return false;
 }
 
@@ -1299,7 +1294,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
 	bool failure = false;
 
-	while (likely(total_rx_packets < budget)) {
+	while (likely(total_rx_packets < (unsigned int)budget)) {
 		struct i40e_rx_buffer *rx_buffer;
 		union i40e_rx_desc *rx_desc;
 		unsigned int size;
@@ -1406,7 +1401,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
 
 	/* guarantee a trip back through this routine if there was a failure */
-	return failure ? budget : total_rx_packets;
+	return failure ? budget : (int)total_rx_packets;
 }
 
 static u32 i40e_buildreg_itr(const int type, const u16 itr)
@@ -1575,7 +1570,6 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget)
 
 	/* If work not completed, return budget and polling will return */
 	if (!clean_complete) {
-		const cpumask_t *aff_mask = &q_vector->affinity_mask;
 		int cpu_id = smp_processor_id();
 
 		/* It is possible that the interrupt affinity has changed but,
@@ -1585,14 +1579,22 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget)
 		 * continue to poll, otherwise we must stop polling so the
 		 * interrupt can move to the correct cpu.
 		 */
-		if (likely(cpumask_test_cpu(cpu_id, aff_mask))) {
+		if (!cpumask_test_cpu(cpu_id, &q_vector->affinity_mask)) {
+			/* Tell napi that we are done polling */
+			napi_complete_done(napi, work_done);
+
+			/* Force an interrupt */
+			i40evf_force_wb(vsi, q_vector);
+
+			/* Return budget-1 so that polling stops */
+			return budget - 1;
+		}
 tx_only:
-			if (arm_wb) {
-				q_vector->tx.ring[0].tx_stats.tx_force_wb++;
-				i40e_enable_wb_on_itr(vsi, q_vector);
-			}
-			return budget;
+		if (arm_wb) {
+			q_vector->tx.ring[0].tx_stats.tx_force_wb++;
+			i40e_enable_wb_on_itr(vsi, q_vector);
 		}
+		return budget;
 	}
 
 	if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
@@ -1601,14 +1603,7 @@ tx_only:
 	/* Work is done so exit the polling mode and re-enable the interrupt */
 	napi_complete_done(napi, work_done);
 
-	/* If we're prematurely stopping polling to fix the interrupt
-	 * affinity we want to make sure polling starts back up so we
-	 * issue a call to i40evf_force_wb which triggers a SW interrupt.
-	 */
-	if (!clean_complete)
-		i40evf_force_wb(vsi, q_vector);
-	else
-		i40e_update_enable_itr(vsi, q_vector);
+	i40e_update_enable_itr(vsi, q_vector);
 
 	return min(work_done, budget - 1);
 }
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 901282c..0d9f98b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -98,10 +98,6 @@ enum i40e_dyn_idx_t {
 	BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \
 	BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP))
 
-#define i40e_pf_get_default_rss_hena(pf) \
-	(((pf)->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) ? \
-	  I40E_DEFAULT_RSS_HENA_EXPANDED : I40E_DEFAULT_RSS_HENA)
-
 /* Supported Rx Buffer Sizes (a multiple of 128) */
 #define I40E_RXBUFFER_256   256
 #define I40E_RXBUFFER_1536  1536  /* 128B aligned standard Ethernet frame */
@@ -117,6 +113,7 @@ enum i40e_dyn_idx_t {
  * i.e. RXBUFFER_512 --> 1216 byte skb (size-2048 slab)
  */
 #define I40E_RX_HDR_SIZE I40E_RXBUFFER_256
+#define I40E_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
 #define i40e_rx_desc i40e_32byte_rx_desc
 
 #define I40E_RX_DMA_ATTR \
@@ -428,7 +425,6 @@ enum i40e_latency_range {
 	I40E_LOWEST_LATENCY = 0,
 	I40E_LOW_LATENCY = 1,
 	I40E_BULK_LATENCY = 2,
-	I40E_ULTRA_LATENCY = 3,
 };
 
 struct i40e_ring_container {
@@ -436,6 +432,7 @@ struct i40e_ring_container {
 	struct i40e_ring *ring;
 	unsigned int total_bytes;	/* total bytes processed this int */
 	unsigned int total_packets;	/* total packets processed this int */
+	unsigned long last_itr_update;	/* jiffies of last ITR update */
 	u16 count;
 	enum i40e_latency_range latency_range;
 	u16 itr;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h
index bde7f24..2ea919d 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h
@@ -159,6 +159,7 @@ struct i40e_link_status {
 	enum i40e_aq_link_speed link_speed;
 	u8 link_info;
 	u8 an_info;
+	u8 req_fec_info;
 	u8 fec_info;
 	u8 ext_info;
 	u8 loopback;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 6cc9208..82f6903 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -39,6 +39,18 @@
 #include <linux/tcp.h>
 #include <linux/sctp.h>
 #include <linux/ipv6.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/gfp.h>
+#include <linux/skbuff.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/socket.h>
+#include <linux/jiffies.h>
 #include <net/ip6_checksum.h>
 #include <net/udp.h>
 
@@ -109,7 +121,7 @@ struct i40e_q_vector {
 #define ITR_COUNTDOWN_START 100
 	u8 itr_countdown;	/* when 0 or 1 update ITR */
 	int v_idx;	/* vector index in list */
-	char name[IFNAMSIZ + 9];
+	char name[IFNAMSIZ + 15];
 	bool arm_wb_state;
 	cpumask_t affinity_mask;
 	struct irq_affinity_notify affinity_notify;
@@ -183,6 +195,7 @@ struct i40evf_adapter {
 	struct work_struct adminq_task;
 	struct delayed_work client_task;
 	struct delayed_work init_task;
+	wait_queue_head_t down_waitqueue;
 	struct i40e_q_vector *q_vectors;
 	struct list_head vlan_filter_list;
 	char misc_vector_name[IFNAMSIZ + 9];
@@ -225,8 +238,6 @@ struct i40evf_adapter {
 /* duplicates for common code */
 #define I40E_FLAG_DCB_ENABLED			0
 #define I40E_FLAG_RX_CSUM_ENABLED		I40EVF_FLAG_RX_CSUM_ENABLED
-#define I40E_FLAG_WB_ON_ITR_CAPABLE		I40EVF_FLAG_WB_ON_ITR_CAPABLE
-#define I40E_FLAG_OUTER_UDP_CSUM_CAPABLE	I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE
 #define I40E_FLAG_LEGACY_RX			I40EVF_FLAG_LEGACY_RX
 	/* flags for admin queue service task */
 	u32 aq_required;
@@ -250,6 +261,8 @@ struct i40evf_adapter {
 #define I40EVF_FLAG_AQ_RELEASE_PROMISC		BIT(16)
 #define I40EVF_FLAG_AQ_REQUEST_ALLMULTI		BIT(17)
 #define I40EVF_FLAG_AQ_RELEASE_ALLMULTI		BIT(18)
+#define I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING	BIT(19)
+#define I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING	BIT(20)
 
 	/* OS defined structs */
 	struct net_device *netdev;
@@ -266,19 +279,19 @@ struct i40evf_adapter {
 	enum virtchnl_link_speed link_speed;
 	enum virtchnl_ops current_op;
 #define CLIENT_ALLOWED(_a) ((_a)->vf_res ? \
-			    (_a)->vf_res->vf_offload_flags & \
+			    (_a)->vf_res->vf_cap_flags & \
 				VIRTCHNL_VF_OFFLOAD_IWARP : \
 			    0)
 #define CLIENT_ENABLED(_a) ((_a)->cinst)
 /* RSS by the PF should be preferred over RSS via other methods. */
-#define RSS_PF(_a) ((_a)->vf_res->vf_offload_flags & \
+#define RSS_PF(_a) ((_a)->vf_res->vf_cap_flags & \
 		    VIRTCHNL_VF_OFFLOAD_RSS_PF)
-#define RSS_AQ(_a) ((_a)->vf_res->vf_offload_flags & \
+#define RSS_AQ(_a) ((_a)->vf_res->vf_cap_flags & \
 		    VIRTCHNL_VF_OFFLOAD_RSS_AQ)
-#define RSS_REG(_a) (!((_a)->vf_res->vf_offload_flags & \
+#define RSS_REG(_a) (!((_a)->vf_res->vf_cap_flags & \
 		       (VIRTCHNL_VF_OFFLOAD_RSS_AQ | \
 			VIRTCHNL_VF_OFFLOAD_RSS_PF)))
-#define VLAN_ALLOWED(_a) ((_a)->vf_res->vf_offload_flags & \
+#define VLAN_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \
 			  VIRTCHNL_VF_OFFLOAD_VLAN)
 	struct virtchnl_vf_resource *vf_res; /* incl. all VSIs */
 	struct virtchnl_vsi_resource *vsi_res; /* our LAN VSI */
@@ -347,6 +360,8 @@ void i40evf_get_hena(struct i40evf_adapter *adapter);
 void i40evf_set_hena(struct i40evf_adapter *adapter);
 void i40evf_set_rss_key(struct i40evf_adapter *adapter);
 void i40evf_set_rss_lut(struct i40evf_adapter *adapter);
+void i40evf_enable_vlan_stripping(struct i40evf_adapter *adapter);
+void i40evf_disable_vlan_stripping(struct i40evf_adapter *adapter);
 void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 				enum virtchnl_ops v_opcode,
 				i40e_status v_retval, u8 *msg, u16 msglen);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index 9bb2cc7..65874d6 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -165,7 +165,7 @@ static void i40evf_get_ethtool_stats(struct net_device *netdev,
 				     struct ethtool_stats *stats, u64 *data)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	int i, j;
+	unsigned int i, j;
 	char *p;
 
 	for (i = 0; i < I40EVF_GLOBAL_STATS_LEN; i++) {
@@ -197,7 +197,7 @@ static void i40evf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
 	int i;
 
 	if (sset == ETH_SS_STATS) {
-		for (i = 0; i < I40EVF_GLOBAL_STATS_LEN; i++) {
+		for (i = 0; i < (int)I40EVF_GLOBAL_STATS_LEN; i++) {
 			memcpy(p, i40evf_gstrings_stats[i].stat_string,
 			       ETH_GSTRING_LEN);
 			p += ETH_GSTRING_LEN;
@@ -258,29 +258,50 @@ static u32 i40evf_get_priv_flags(struct net_device *netdev)
 static int i40evf_set_priv_flags(struct net_device *netdev, u32 flags)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	u64 changed_flags;
+	u32 orig_flags, new_flags, changed_flags;
 	u32 i;
 
-	changed_flags = adapter->flags;
+	orig_flags = READ_ONCE(adapter->flags);
+	new_flags = orig_flags;
 
 	for (i = 0; i < I40EVF_PRIV_FLAGS_STR_LEN; i++) {
 		const struct i40evf_priv_flags *priv_flags;
 
 		priv_flags = &i40evf_gstrings_priv_flags[i];
 
-		if (priv_flags->read_only)
-			continue;
-
 		if (flags & BIT(i))
-			adapter->flags |= priv_flags->flag;
+			new_flags |= priv_flags->flag;
 		else
-			adapter->flags &= ~(priv_flags->flag);
+			new_flags &= ~(priv_flags->flag);
+
+		if (priv_flags->read_only &&
+		    ((orig_flags ^ new_flags) & ~BIT(i)))
+			return -EOPNOTSUPP;
+	}
+
+	/* Before we finalize any flag changes, any checks which we need to
+	 * perform to determine if the new flags will be supported should go
+	 * here...
+	 */
+
+	/* Compare and exchange the new flags into place. If we failed, that
+	 * is if cmpxchg returns anything but the old value, this means
+	 * something else must have modified the flags variable since we
+	 * copied it. We'll just punt with an error and log something in the
+	 * message buffer.
+	 */
+	if (cmpxchg(&adapter->flags, orig_flags, new_flags) != orig_flags) {
+		dev_warn(&adapter->pdev->dev,
+			 "Unable to update adapter->flags as it was modified by another thread...\n");
+		return -EAGAIN;
 	}
 
-	/* check for flags that changed */
-	changed_flags ^= adapter->flags;
+	changed_flags = orig_flags ^ new_flags;
 
-	/* Process any additional changes needed as a result of flag changes. */
+	/* Process any additional changes needed as a result of flag changes.
+	 * The changed_flags value reflects the list of bits that were changed
+	 * in the code above.
+	 */
 
 	/* issue a reset to force legacy-rx change to take effect */
 	if (changed_flags & I40EVF_FLAG_LEGACY_RX) {
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 7c213a3..1825d95 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -520,7 +520,7 @@ static void i40evf_irq_affinity_notify(struct irq_affinity_notify *notify,
 	struct i40e_q_vector *q_vector =
 		container_of(notify, struct i40e_q_vector, affinity_notify);
 
-	q_vector->affinity_mask = *mask;
+	cpumask_copy(&q_vector->affinity_mask, mask);
 }
 
 /**
@@ -543,9 +543,9 @@ static void i40evf_irq_affinity_release(struct kref *ref) {}
 static int
 i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
 {
-	int vector, err, q_vectors;
-	int rx_int_idx = 0, tx_int_idx = 0;
-	int irq_num;
+	unsigned int vector, q_vectors;
+	unsigned int rx_int_idx = 0, tx_int_idx = 0;
+	int irq_num, err;
 
 	i40evf_irq_disable(adapter);
 	/* Decrement for Other and TCP Timer vectors */
@@ -556,18 +556,15 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
 		irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
 
 		if (q_vector->tx.ring && q_vector->rx.ring) {
-			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
-				 "i40evf-%s-%s-%d", basename,
-				 "TxRx", rx_int_idx++);
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "i40evf-%s-TxRx-%d", basename, rx_int_idx++);
 			tx_int_idx++;
 		} else if (q_vector->rx.ring) {
-			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
-				 "i40evf-%s-%s-%d", basename,
-				 "rx", rx_int_idx++);
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "i40evf-%s-rx-%d", basename, rx_int_idx++);
 		} else if (q_vector->tx.ring) {
-			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
-				 "i40evf-%s-%s-%d", basename,
-				 "tx", tx_int_idx++);
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "i40evf-%s-tx-%d", basename, tx_int_idx++);
 		} else {
 			/* skip this unused q_vector */
 			continue;
@@ -587,8 +584,10 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
 		q_vector->affinity_notify.release =
 						   i40evf_irq_affinity_release;
 		irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
-		/* assign the mask for this irq */
-		irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
+		/* get_cpu_mask returns a static constant mask with
+		 * a permanent lifetime so it's ok to use here.
+		 */
+		irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx));
 	}
 
 	return 0;
@@ -1143,6 +1142,7 @@ void i40evf_down(struct i40evf_adapter *adapter)
 	}
 
 	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+	mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
 }
 
 /**
@@ -1241,7 +1241,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
 		tx_ring->dev = &adapter->pdev->dev;
 		tx_ring->count = adapter->tx_desc_count;
 		tx_ring->tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF);
-		if (adapter->flags & I40E_FLAG_WB_ON_ITR_CAPABLE)
+		if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE)
 			tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR;
 
 		rx_ring = &adapter->rx_rings[i];
@@ -1417,7 +1417,7 @@ static int i40evf_init_rss(struct i40evf_adapter *adapter)
 
 	if (!RSS_PF(adapter)) {
 		/* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */
-		if (adapter->vf_res->vf_offload_flags &
+		if (adapter->vf_res->vf_cap_flags &
 		    VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
 			adapter->hena = I40E_DEFAULT_RSS_HENA_EXPANDED;
 		else
@@ -1458,6 +1458,7 @@ static int i40evf_alloc_q_vectors(struct i40evf_adapter *adapter)
 		q_vector->adapter = adapter;
 		q_vector->vsi = &adapter->vsi;
 		q_vector->v_idx = q_idx;
+		cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask);
 		netif_napi_add(adapter->netdev, &q_vector->napi,
 			       i40evf_napi_poll, NAPI_POLL_WEIGHT);
 	}
@@ -1678,6 +1679,16 @@ static void i40evf_watchdog_task(struct work_struct *work)
 		goto watchdog_done;
 	}
 
+	if (adapter->aq_required & I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING) {
+		i40evf_enable_vlan_stripping(adapter);
+		goto watchdog_done;
+	}
+
+	if (adapter->aq_required & I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING) {
+		i40evf_disable_vlan_stripping(adapter);
+		goto watchdog_done;
+	}
+
 	if (adapter->aq_required & I40EVF_FLAG_AQ_CONFIGURE_QUEUES) {
 		i40evf_configure_queues(adapter);
 		goto watchdog_done;
@@ -1794,6 +1805,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter)
 	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
 	adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
 	adapter->state = __I40EVF_DOWN;
+	wake_up(&adapter->down_waitqueue);
 	dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n");
 }
 
@@ -1877,7 +1889,7 @@ static void i40evf_reset_task(struct work_struct *work)
 	}
 
 continue_reset:
-	if (netif_running(adapter->netdev)) {
+	if (netif_running(netdev)) {
 		netif_carrier_off(netdev);
 		netif_tx_stop_all_queues(netdev);
 		adapter->link_up = false;
@@ -1939,12 +1951,13 @@ continue_reset:
 		i40evf_irq_enable(adapter, true);
 	} else {
 		adapter->state = __I40EVF_DOWN;
+		wake_up(&adapter->down_waitqueue);
 	}
 
 	return;
 reset_err:
 	dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
-	i40evf_close(adapter->netdev);
+	i40evf_close(netdev);
 }
 
 /**
@@ -1957,8 +1970,8 @@ static void i40evf_adminq_task(struct work_struct *work)
 		container_of(work, struct i40evf_adapter, adminq_task);
 	struct i40e_hw *hw = &adapter->hw;
 	struct i40e_arq_event_info event;
-	struct virtchnl_msg *v_msg;
-	i40e_status ret;
+	enum virtchnl_ops v_op;
+	i40e_status ret, v_ret;
 	u32 val, oldval;
 	u16 pending;
 
@@ -1970,15 +1983,15 @@ static void i40evf_adminq_task(struct work_struct *work)
 	if (!event.msg_buf)
 		goto out;
 
-	v_msg = (struct virtchnl_msg *)&event.desc;
 	do {
 		ret = i40evf_clean_arq_element(hw, &event, &pending);
-		if (ret || !v_msg->v_opcode)
+		v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
+		v_ret = (i40e_status)le32_to_cpu(event.desc.cookie_low);
+
+		if (ret || !v_op)
 			break; /* No event to process or error cleaning ARQ */
 
-		i40evf_virtchnl_completion(adapter, v_msg->v_opcode,
-					   (i40e_status)v_msg->v_retval,
-					   event.msg_buf,
+		i40evf_virtchnl_completion(adapter, v_op, v_ret, event.msg_buf,
 					   event.msg_len);
 		if (pending != 0)
 			memset(event.msg_buf, 0, I40EVF_MAX_AQ_BUF_SIZE);
@@ -2238,6 +2251,7 @@ err_setup_tx:
 static int i40evf_close(struct net_device *netdev)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	int status;
 
 	if (adapter->state <= __I40EVF_DOWN_PENDING)
 		return 0;
@@ -2255,7 +2269,18 @@ static int i40evf_close(struct net_device *netdev)
 	 * still active and can DMA into memory. Resources are cleared in
 	 * i40evf_virtchnl_completion() after we get confirmation from the PF
 	 * driver that the rings have been stopped.
+	 *
+	 * Also, we wait for state to transition to __I40EVF_DOWN before
+	 * returning. State change occurs in i40evf_virtchnl_completion() after
+	 * VF resources are released (which occurs after PF driver processes and
+	 * responds to admin queue commands).
 	 */
+
+	status = wait_event_timeout(adapter->down_waitqueue,
+				    adapter->state == __I40EVF_DOWN,
+				    msecs_to_jiffies(200));
+	if (!status)
+		netdev_warn(netdev, "Device resources not yet released\n");
 	return 0;
 }
 
@@ -2282,6 +2307,28 @@ static int i40evf_change_mtu(struct net_device *netdev, int new_mtu)
 }
 
 /**
+ * i40e_set_features - set the netdev feature flags
+ * @netdev: ptr to the netdev being adjusted
+ * @features: the feature set that the stack is suggesting
+ * Note: expects to be called while under rtnl_lock()
+ **/
+static int i40evf_set_features(struct net_device *netdev,
+			       netdev_features_t features)
+{
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+
+	if (!VLAN_ALLOWED(adapter))
+		return -EINVAL;
+
+	if (features & NETIF_F_HW_VLAN_CTAG_RX)
+		adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING;
+	else
+		adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING;
+
+	return 0;
+}
+
+/**
  * i40evf_features_check - Validate encapsulated packet conforms to limits
  * @skb: skb buff
  * @netdev: This physical port's netdev
@@ -2356,7 +2403,7 @@ static netdev_features_t i40evf_fix_features(struct net_device *netdev,
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 
 	features &= ~I40EVF_VLAN_FEATURES;
-	if (adapter->vf_res->vf_offload_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
+	if (adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
 		features |= I40EVF_VLAN_FEATURES;
 	return features;
 }
@@ -2374,6 +2421,7 @@ static const struct net_device_ops i40evf_netdev_ops = {
 	.ndo_vlan_rx_kill_vid	= i40evf_vlan_rx_kill_vid,
 	.ndo_features_check	= i40evf_features_check,
 	.ndo_fix_features	= i40evf_fix_features,
+	.ndo_set_features	= i40evf_set_features,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= i40evf_netpoll,
 #endif
@@ -2443,7 +2491,7 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
 	/* advertise to stack only if offloads for encapsulated packets is
 	 * supported
 	 */
-	if (vfres->vf_offload_flags & VIRTCHNL_VF_OFFLOAD_ENCAP) {
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ENCAP) {
 		hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL	|
 				   NETIF_F_GSO_GRE		|
 				   NETIF_F_GSO_GRE_CSUM		|
@@ -2453,7 +2501,7 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
 				   NETIF_F_GSO_PARTIAL		|
 				   0;
 
-		if (!(vfres->vf_offload_flags &
+		if (!(vfres->vf_cap_flags &
 		      VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM))
 			netdev->gso_partial_features |=
 				NETIF_F_GSO_UDP_TUNNEL_CSUM;
@@ -2481,7 +2529,7 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
 	adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK;
 	vsi->netdev = adapter->netdev;
 	vsi->qs_handle = adapter->vsi_res->qset_handle;
-	if (vfres->vf_offload_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
 		adapter->rss_key_size = vfres->rss_key_size;
 		adapter->rss_lut_size = vfres->rss_lut_size;
 	} else {
@@ -2625,7 +2673,7 @@ static void i40evf_init_task(struct work_struct *work)
 
 	/* MTU range: 68 - 9710 */
 	netdev->min_mtu = ETH_MIN_MTU;
-	netdev->max_mtu = I40E_MAX_RXBUFFER - (ETH_HLEN + ETH_FCS_LEN);
+	netdev->max_mtu = I40E_MAX_RXBUFFER - I40E_PACKET_HDR_PAD;
 
 	if (!is_valid_ether_addr(adapter->hw.mac.addr)) {
 		dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n",
@@ -2649,7 +2697,7 @@ static void i40evf_init_task(struct work_struct *work)
 	if (err)
 		goto err_sw_init;
 	i40evf_map_rings_to_vectors(adapter);
-	if (adapter->vf_res->vf_offload_flags &
+	if (adapter->vf_res->vf_cap_flags &
 	    VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
 		adapter->flags |= I40EVF_FLAG_WB_ON_ITR_CAPABLE;
 
@@ -2683,6 +2731,7 @@ static void i40evf_init_task(struct work_struct *work)
 	adapter->state = __I40EVF_DOWN;
 	set_bit(__I40E_VSI_DOWN, adapter->vsi.state);
 	i40evf_misc_irq_enable(adapter);
+	wake_up(&adapter->down_waitqueue);
 
 	adapter->rss_key = kzalloc(adapter->rss_key_size, GFP_KERNEL);
 	adapter->rss_lut = kzalloc(adapter->rss_lut_size, GFP_KERNEL);
@@ -2844,6 +2893,9 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	schedule_delayed_work(&adapter->init_task,
 			      msecs_to_jiffies(5 * (pdev->devfn & 0x07)));
 
+	/* Setup the wait queue for indicating transition to down status */
+	init_waitqueue_head(&adapter->down_waitqueue);
+
 	return 0;
 
 err_ioremap:
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index d2bb250..85876f4 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -821,6 +821,46 @@ void i40evf_set_rss_lut(struct i40evf_adapter *adapter)
 }
 
 /**
+ * i40evf_enable_vlan_stripping
+ * @adapter: adapter structure
+ *
+ * Request VLAN header stripping to be enabled
+ **/
+void i40evf_enable_vlan_stripping(struct i40evf_adapter *adapter)
+{
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot enable stripping, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_ENABLE_VLAN_STRIPPING;
+	adapter->aq_required &= ~I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING;
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING,
+			   NULL, 0);
+}
+
+/**
+ * i40evf_disable_vlan_stripping
+ * @adapter: adapter structure
+ *
+ * Request VLAN header stripping to be disabled
+ **/
+void i40evf_disable_vlan_stripping(struct i40evf_adapter *adapter)
+{
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot disable stripping, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_DISABLE_VLAN_STRIPPING;
+	adapter->aq_required &= ~I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING;
+	i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
+			   NULL, 0);
+}
+
+/**
  * i40evf_print_link_message - print link up or down
  * @adapter: adapter structure
  *
@@ -991,8 +1031,10 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
 	case VIRTCHNL_OP_DISABLE_QUEUES:
 		i40evf_free_all_tx_resources(adapter);
 		i40evf_free_all_rx_resources(adapter);
-		if (adapter->state == __I40EVF_DOWN_PENDING)
+		if (adapter->state == __I40EVF_DOWN_PENDING) {
 			adapter->state = __I40EVF_DOWN;
+			wake_up(&adapter->down_waitqueue);
+		}
 		break;
 	case VIRTCHNL_OP_VERSION:
 	case VIRTCHNL_OP_CONFIG_IRQ_MAP:
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index 4a50870..c37cc8b 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -340,6 +340,9 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw)
 		phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state_82580;
 		phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_m88;
 		break;
+	case BCM54616_E_PHY_ID:
+		phy->type = e1000_phy_bcm54616;
+		break;
 	default:
 		ret_val = -E1000_ERR_PHY;
 		goto out;
@@ -1659,6 +1662,9 @@ static s32 igb_setup_copper_link_82575(struct e1000_hw *hw)
 	case e1000_phy_82580:
 		ret_val = igb_copper_link_setup_82580(hw);
 		break;
+	case e1000_phy_bcm54616:
+		ret_val = 0;
+		break;
 	default:
 		ret_val = -E1000_ERR_PHY;
 		break;
diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index d851777..1de82f2 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -889,6 +889,7 @@
 #define I210_I_PHY_ID        0x01410C00
 #define M88E1543_E_PHY_ID    0x01410EA0
 #define M88E1512_E_PHY_ID    0x01410DD0
+#define BCM54616_E_PHY_ID    0x03625D10
 
 /* M88E1000 Specific Registers */
 #define M88E1000_PHY_SPEC_CTRL     0x10  /* PHY Specific Control Register */
diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h
index 2fb2213..6c9485a 100644
--- a/drivers/net/ethernet/intel/igb/e1000_hw.h
+++ b/drivers/net/ethernet/intel/igb/e1000_hw.h
@@ -128,6 +128,7 @@ enum e1000_phy_type {
 	e1000_phy_ife,
 	e1000_phy_82580,
 	e1000_phy_i210,
+	e1000_phy_bcm54616,
 };
 
 enum e1000_bus_type {
@@ -491,13 +492,16 @@ struct e1000_fc_info {
 
 struct e1000_mbx_operations {
 	s32 (*init_params)(struct e1000_hw *hw);
-	s32 (*read)(struct e1000_hw *, u32 *, u16,  u16);
-	s32 (*write)(struct e1000_hw *, u32 *, u16, u16);
-	s32 (*read_posted)(struct e1000_hw *, u32 *, u16,  u16);
-	s32 (*write_posted)(struct e1000_hw *, u32 *, u16, u16);
-	s32 (*check_for_msg)(struct e1000_hw *, u16);
-	s32 (*check_for_ack)(struct e1000_hw *, u16);
-	s32 (*check_for_rst)(struct e1000_hw *, u16);
+	s32 (*read)(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id,
+		    bool unlock);
+	s32 (*write)(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id);
+	s32 (*read_posted)(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id);
+	s32 (*write_posted)(struct e1000_hw *hw, u32 *msg, u16 size,
+			    u16 mbx_id);
+	s32 (*check_for_msg)(struct e1000_hw *hw, u16 mbx_id);
+	s32 (*check_for_ack)(struct e1000_hw *hw, u16 mbx_id);
+	s32 (*check_for_rst)(struct e1000_hw *hw, u16 mbx_id);
+	s32 (*unlock)(struct e1000_hw *hw, u16 mbx_id);
 };
 
 struct e1000_mbx_stats {
diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c
index 00e263f..bffd58f 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mbx.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c
@@ -32,7 +32,8 @@
  *
  *  returns SUCCESS if it successfully read message from buffer
  **/
-s32 igb_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+s32 igb_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id,
+		 bool unlock)
 {
 	struct e1000_mbx_info *mbx = &hw->mbx;
 	s32 ret_val = -E1000_ERR_MBX;
@@ -42,7 +43,7 @@ s32 igb_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
 		size = mbx->size;
 
 	if (mbx->ops.read)
-		ret_val = mbx->ops.read(hw, msg, size, mbx_id);
+		ret_val = mbx->ops.read(hw, msg, size, mbx_id, unlock);
 
 	return ret_val;
 }
@@ -125,6 +126,24 @@ s32 igb_check_for_rst(struct e1000_hw *hw, u16 mbx_id)
 }
 
 /**
+ *  igb_unlock_mbx - unlock the mailbox
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to check
+ *
+ *  returns SUCCESS if the mailbox was unlocked or else ERR_MBX
+ **/
+s32 igb_unlock_mbx(struct e1000_hw *hw, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (mbx->ops.unlock)
+		ret_val = mbx->ops.unlock(hw, mbx_id);
+
+	return ret_val;
+}
+
+/**
  *  igb_poll_for_msg - Wait for message notification
  *  @hw: pointer to the HW structure
  *  @mbx_id: id of mailbox to write
@@ -204,7 +223,7 @@ static s32 igb_read_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size,
 	ret_val = igb_poll_for_msg(hw, mbx_id);
 
 	if (!ret_val)
-		ret_val = mbx->ops.read(hw, msg, size, mbx_id);
+		ret_val = mbx->ops.read(hw, msg, size, mbx_id, true);
 out:
 	return ret_val;
 }
@@ -341,6 +360,26 @@ static s32 igb_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number)
 }
 
 /**
+ *  igb_release_mbx_lock_pf - release mailbox lock
+ *  @hw: pointer to the HW structure
+ *  @vf_number: the VF index
+ *
+ *  return SUCCESS if we released the mailbox lock
+ **/
+static s32 igb_release_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number)
+{
+	u32 p2v_mailbox;
+
+	/* drop PF lock of mailbox, if set */
+	p2v_mailbox = rd32(E1000_P2VMAILBOX(vf_number));
+	if (p2v_mailbox & E1000_P2VMAILBOX_PFU)
+		wr32(E1000_P2VMAILBOX(vf_number),
+		     p2v_mailbox & ~E1000_P2VMAILBOX_PFU);
+
+	return 0;
+}
+
+/**
  *  igb_write_mbx_pf - Places a message in the mailbox
  *  @hw: pointer to the HW structure
  *  @msg: The message buffer
@@ -385,13 +424,14 @@ out_no_write:
  *  @msg: The message buffer
  *  @size: Length of buffer
  *  @vf_number: the VF index
+ *  @unlock: unlock the mailbox when done?
  *
  *  This function copies a message from the mailbox buffer to the caller's
  *  memory buffer.  The presumption is that the caller knows that there was
  *  a message due to a VF request so no polling for message is needed.
  **/
 static s32 igb_read_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size,
-			   u16 vf_number)
+			   u16 vf_number, bool unlock)
 {
 	s32 ret_val;
 	u16 i;
@@ -405,8 +445,12 @@ static s32 igb_read_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size,
 	for (i = 0; i < size; i++)
 		msg[i] = array_rd32(E1000_VMBMEM(vf_number), i);
 
-	/* Acknowledge the message and release buffer */
-	wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_ACK);
+	/* Acknowledge the message and release mailbox lock (or not) */
+	if (unlock)
+		wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_ACK);
+	else
+		wr32(E1000_P2VMAILBOX(vf_number),
+		     E1000_P2VMAILBOX_ACK | E1000_P2VMAILBOX_PFU);
 
 	/* update stats */
 	hw->mbx.stats.msgs_rx++;
@@ -437,6 +481,7 @@ s32 igb_init_mbx_params_pf(struct e1000_hw *hw)
 	mbx->ops.check_for_msg = igb_check_for_msg_pf;
 	mbx->ops.check_for_ack = igb_check_for_ack_pf;
 	mbx->ops.check_for_rst = igb_check_for_rst_pf;
+	mbx->ops.unlock = igb_release_mbx_lock_pf;
 
 	mbx->stats.msgs_tx = 0;
 	mbx->stats.msgs_rx = 0;
diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.h b/drivers/net/ethernet/intel/igb/e1000_mbx.h
index 3e7fed7..a62b08e 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mbx.h
+++ b/drivers/net/ethernet/intel/igb/e1000_mbx.h
@@ -67,11 +67,13 @@
 
 #define E1000_PF_CONTROL_MSG	0x0100 /* PF control message */
 
-s32 igb_read_mbx(struct e1000_hw *, u32 *, u16, u16);
-s32 igb_write_mbx(struct e1000_hw *, u32 *, u16, u16);
-s32 igb_check_for_msg(struct e1000_hw *, u16);
-s32 igb_check_for_ack(struct e1000_hw *, u16);
-s32 igb_check_for_rst(struct e1000_hw *, u16);
-s32 igb_init_mbx_params_pf(struct e1000_hw *);
+s32 igb_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id,
+		 bool unlock);
+s32 igb_write_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id);
+s32 igb_check_for_msg(struct e1000_hw *hw, u16 mbx_id);
+s32 igb_check_for_ack(struct e1000_hw *hw, u16 mbx_id);
+s32 igb_check_for_rst(struct e1000_hw *hw, u16 mbx_id);
+s32 igb_unlock_mbx(struct e1000_hw *hw, u16 mbx_id);
+s32 igb_init_mbx_params_pf(struct e1000_hw *hw);
 
 #endif /* _E1000_MBX_H_ */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index ec62410..fd4a46b 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -1791,6 +1791,8 @@ void igb_down(struct igb_adapter *adapter)
 	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
 	/* flush and sleep below */
 
+	igb_nfc_filter_exit(adapter);
+
 	netif_carrier_off(netdev);
 	netif_tx_stop_all_queues(netdev);
 
@@ -3317,8 +3319,6 @@ static int __igb_close(struct net_device *netdev, bool suspending)
 	igb_down(adapter);
 	igb_free_irq(adapter);
 
-	igb_nfc_filter_exit(adapter);
-
 	igb_free_all_tx_resources(adapter);
 	igb_free_all_rx_resources(adapter);
 
@@ -5380,7 +5380,8 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
 		struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
 
-		if (!test_and_set_bit_lock(__IGB_PTP_TX_IN_PROGRESS,
+		if (adapter->tstamp_config.tx_type & HWTSTAMP_TX_ON &&
+		    !test_and_set_bit_lock(__IGB_PTP_TX_IN_PROGRESS,
 					   &adapter->state)) {
 			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 			tx_flags |= IGB_TX_FLAGS_TSTAMP;
@@ -5745,8 +5746,6 @@ static void igb_tsync_interrupt(struct igb_adapter *adapter)
 		event.type = PTP_CLOCK_PPS;
 		if (adapter->ptp_caps.pps)
 			ptp_clock_event(adapter->ptp_clock, &event);
-		else
-			dev_err(&adapter->pdev->dev, "unexpected SYS WRAP");
 		ack |= TSINTR_SYS_WRAP;
 	}
 
@@ -6676,32 +6675,33 @@ static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
 	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
 	s32 retval;
 
-	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
+	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf, false);
 
 	if (retval) {
 		/* if receive failed revoke VF CTS stats and restart init */
 		dev_err(&pdev->dev, "Error receiving message from VF\n");
 		vf_data->flags &= ~IGB_VF_FLAG_CTS;
 		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
-			return;
+			goto unlock;
 		goto out;
 	}
 
 	/* this is a message we already processed, do nothing */
 	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
-		return;
+		goto unlock;
 
 	/* until the vf completes a reset it should not be
 	 * allowed to start any configuration.
 	 */
 	if (msgbuf[0] == E1000_VF_RESET) {
+		/* unlocks mailbox */
 		igb_vf_reset_msg(adapter, vf);
 		return;
 	}
 
 	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
 		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
-			return;
+			goto unlock;
 		retval = -1;
 		goto out;
 	}
@@ -6742,7 +6742,12 @@ out:
 	else
 		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
 
+	/* unlocks mailbox */
 	igb_write_mbx(hw, msgbuf, 1, vf);
+	return;
+
+unlock:
+	igb_unlock_mbx(hw, vf);
 }
 
 static void igb_msg_task(struct igb_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c
index 34faa11..a127688 100644
--- a/drivers/net/ethernet/intel/igbvf/ethtool.c
+++ b/drivers/net/ethernet/intel/igbvf/ethtool.c
@@ -296,8 +296,12 @@ static int igbvf_link_test(struct igbvf_adapter *adapter, u64 *data)
 	struct e1000_hw *hw = &adapter->hw;
 	*data = 0;
 
+	spin_lock_bh(&hw->mbx_lock);
+
 	hw->mac.ops.check_for_link(hw);
 
+	spin_unlock_bh(&hw->mbx_lock);
+
 	if (!(er32(STATUS) & E1000_STATUS_LU))
 		*data = 1;
 
diff --git a/drivers/net/ethernet/intel/igbvf/mbx.c b/drivers/net/ethernet/intel/igbvf/mbx.c
index 01752f4..c9a4416 100644
--- a/drivers/net/ethernet/intel/igbvf/mbx.c
+++ b/drivers/net/ethernet/intel/igbvf/mbx.c
@@ -264,6 +264,8 @@ static s32 e1000_write_mbx_vf(struct e1000_hw *hw, u32 *msg, u16 size)
 	s32 err;
 	u16 i;
 
+	WARN_ON_ONCE(!spin_is_locked(&hw->mbx_lock));
+
 	/* lock the mailbox to prevent pf/vf race condition */
 	err = e1000_obtain_mbx_lock_vf(hw);
 	if (err)
@@ -300,6 +302,8 @@ static s32 e1000_read_mbx_vf(struct e1000_hw *hw, u32 *msg, u16 size)
 	s32 err;
 	u16 i;
 
+	WARN_ON_ONCE(!spin_is_locked(&hw->mbx_lock));
+
 	/* lock the mailbox to prevent pf/vf race condition */
 	err = e1000_obtain_mbx_lock_vf(hw);
 	if (err)
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 1b9cbbe..1ed5569 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -1235,7 +1235,12 @@ static void igbvf_set_rlpml(struct igbvf_adapter *adapter)
 	struct e1000_hw *hw = &adapter->hw;
 
 	max_frame_size = adapter->max_frame_size + VLAN_TAG_SIZE;
+
+	spin_lock_bh(&hw->mbx_lock);
+
 	e1000_rlpml_set_vf(hw, max_frame_size);
+
+	spin_unlock_bh(&hw->mbx_lock);
 }
 
 static int igbvf_vlan_rx_add_vid(struct net_device *netdev,
@@ -1244,10 +1249,16 @@ static int igbvf_vlan_rx_add_vid(struct net_device *netdev,
 	struct igbvf_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
 
+	spin_lock_bh(&hw->mbx_lock);
+
 	if (hw->mac.ops.set_vfta(hw, vid, true)) {
 		dev_err(&adapter->pdev->dev, "Failed to add vlan id %d\n", vid);
+		spin_unlock_bh(&hw->mbx_lock);
 		return -EINVAL;
 	}
+
+	spin_unlock_bh(&hw->mbx_lock);
+
 	set_bit(vid, adapter->active_vlans);
 	return 0;
 }
@@ -1258,11 +1269,17 @@ static int igbvf_vlan_rx_kill_vid(struct net_device *netdev,
 	struct igbvf_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
 
+	spin_lock_bh(&hw->mbx_lock);
+
 	if (hw->mac.ops.set_vfta(hw, vid, false)) {
 		dev_err(&adapter->pdev->dev,
 			"Failed to remove vlan id %d\n", vid);
+		spin_unlock_bh(&hw->mbx_lock);
 		return -EINVAL;
 	}
+
+	spin_unlock_bh(&hw->mbx_lock);
+
 	clear_bit(vid, adapter->active_vlans);
 	return 0;
 }
@@ -1428,7 +1445,11 @@ static void igbvf_set_multi(struct net_device *netdev)
 	netdev_for_each_mc_addr(ha, netdev)
 		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
 
+	spin_lock_bh(&hw->mbx_lock);
+
 	hw->mac.ops.update_mc_addr_list(hw, mta_list, i, 0, 0);
+
+	spin_unlock_bh(&hw->mbx_lock);
 	kfree(mta_list);
 }
 
@@ -1449,16 +1470,24 @@ static int igbvf_set_uni(struct net_device *netdev)
 		return -ENOSPC;
 	}
 
+	spin_lock_bh(&hw->mbx_lock);
+
 	/* Clear all unicast MAC filters */
 	hw->mac.ops.set_uc_addr(hw, E1000_VF_MAC_FILTER_CLR, NULL);
 
+	spin_unlock_bh(&hw->mbx_lock);
+
 	if (!netdev_uc_empty(netdev)) {
 		struct netdev_hw_addr *ha;
 
 		/* Add MAC filters one by one */
 		netdev_for_each_uc_addr(ha, netdev) {
+			spin_lock_bh(&hw->mbx_lock);
+
 			hw->mac.ops.set_uc_addr(hw, E1000_VF_MAC_FILTER_ADD,
 						ha->addr);
+
+			spin_unlock_bh(&hw->mbx_lock);
 			udelay(200);
 		}
 	}
@@ -1503,12 +1532,16 @@ static void igbvf_reset(struct igbvf_adapter *adapter)
 	struct net_device *netdev = adapter->netdev;
 	struct e1000_hw *hw = &adapter->hw;
 
+	spin_lock_bh(&hw->mbx_lock);
+
 	/* Allow time for pending master requests to run */
 	if (mac->ops.reset_hw(hw))
 		dev_err(&adapter->pdev->dev, "PF still resetting\n");
 
 	mac->ops.init_hw(hw);
 
+	spin_unlock_bh(&hw->mbx_lock);
+
 	if (is_valid_ether_addr(adapter->hw.mac.addr)) {
 		memcpy(netdev->dev_addr, adapter->hw.mac.addr,
 		       netdev->addr_len);
@@ -1643,6 +1676,7 @@ static int igbvf_sw_init(struct igbvf_adapter *adapter)
 	igbvf_irq_disable(adapter);
 
 	spin_lock_init(&adapter->stats_lock);
+	spin_lock_init(&adapter->hw.mbx_lock);
 
 	set_bit(__IGBVF_DOWN, &adapter->state);
 	return 0;
@@ -1786,8 +1820,12 @@ static int igbvf_set_mac(struct net_device *netdev, void *p)
 
 	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
 
+	spin_lock_bh(&hw->mbx_lock);
+
 	hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
 
+	spin_unlock_bh(&hw->mbx_lock);
+
 	if (!ether_addr_equal(addr->sa_data, hw->mac.addr))
 		return -EADDRNOTAVAIL;
 
@@ -1858,7 +1896,12 @@ static bool igbvf_has_link(struct igbvf_adapter *adapter)
 	if (test_bit(__IGBVF_DOWN, &adapter->state))
 		return false;
 
+	spin_lock_bh(&hw->mbx_lock);
+
 	ret_val = hw->mac.ops.check_for_link(hw);
+
+	spin_unlock_bh(&hw->mbx_lock);
+
 	link_active = !hw->mac.get_link_status;
 
 	/* if check for link returns error we will need to reset */
@@ -2808,6 +2851,8 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	netdev->min_mtu = ETH_MIN_MTU;
 	netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
 
+	spin_lock_bh(&hw->mbx_lock);
+
 	/*reset the controller to put the device in a known good state */
 	err = hw->mac.ops.reset_hw(hw);
 	if (err) {
@@ -2824,6 +2869,8 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		       netdev->addr_len);
 	}
 
+	spin_unlock_bh(&hw->mbx_lock);
+
 	if (!is_valid_ether_addr(netdev->dev_addr)) {
 		dev_info(&pdev->dev, "Assigning random MAC address.\n");
 		eth_hw_addr_random(netdev);
diff --git a/drivers/net/ethernet/intel/igbvf/vf.c b/drivers/net/ethernet/intel/igbvf/vf.c
index 528be11..9577ccf 100644
--- a/drivers/net/ethernet/intel/igbvf/vf.c
+++ b/drivers/net/ethernet/intel/igbvf/vf.c
@@ -149,7 +149,7 @@ static s32 e1000_reset_hw_vf(struct e1000_hw *hw)
 		msgbuf[0] = E1000_VF_RESET;
 		mbx->ops.write_posted(hw, msgbuf, 1);
 
-		msleep(10);
+		mdelay(10);
 
 		/* set our "perm_addr" based on info provided by PF */
 		ret_val = mbx->ops.read_posted(hw, msgbuf, 3);
@@ -230,6 +230,7 @@ static void e1000_update_mc_addr_list_vf(struct e1000_hw *hw,
 	u16 *hash_list = (u16 *)&msgbuf[1];
 	u32 hash_value;
 	u32 cnt, i;
+	s32 ret_val;
 
 	/* Each entry in the list uses 1 16 bit word.  We have 30
 	 * 16 bit words available in our HW msg buffer (minus 1 for the
@@ -250,7 +251,9 @@ static void e1000_update_mc_addr_list_vf(struct e1000_hw *hw,
 		mc_addr_list += ETH_ALEN;
 	}
 
-	mbx->ops.write_posted(hw, msgbuf, E1000_VFMAILBOX_SIZE);
+	ret_val = mbx->ops.write_posted(hw, msgbuf, E1000_VFMAILBOX_SIZE);
+	if (!ret_val)
+		mbx->ops.read_posted(hw, msgbuf, 1);
 }
 
 /**
@@ -293,11 +296,14 @@ void e1000_rlpml_set_vf(struct e1000_hw *hw, u16 max_size)
 {
 	struct e1000_mbx_info *mbx = &hw->mbx;
 	u32 msgbuf[2];
+	s32 ret_val;
 
 	msgbuf[0] = E1000_VF_SET_LPE;
 	msgbuf[1] = max_size;
 
-	mbx->ops.write_posted(hw, msgbuf, 2);
+	ret_val = mbx->ops.write_posted(hw, msgbuf, 2);
+	if (!ret_val)
+		mbx->ops.read_posted(hw, msgbuf, 1);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/igbvf/vf.h b/drivers/net/ethernet/intel/igbvf/vf.h
index 4cf78b0..d213eef 100644
--- a/drivers/net/ethernet/intel/igbvf/vf.h
+++ b/drivers/net/ethernet/intel/igbvf/vf.h
@@ -245,6 +245,7 @@ struct e1000_hw {
 
 	struct e1000_mac_info  mac;
 	struct e1000_mbx_info mbx;
+	spinlock_t mbx_lock;		/* serializes mailbox ops */
 
 	union {
 		struct e1000_dev_spec_vf vf;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 4e35e70..2c19070 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -79,16 +79,28 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
 
 	switch (hw->phy.media_type) {
 	case ixgbe_media_type_fiber:
-		hw->mac.ops.check_link(hw, &speed, &link_up, false);
-		/* if link is down, assume supported */
-		if (link_up)
-			supported = speed == IXGBE_LINK_SPEED_1GB_FULL ?
+		/* flow control autoneg black list */
+		switch (hw->device_id) {
+		case IXGBE_DEV_ID_X550EM_A_SFP:
+		case IXGBE_DEV_ID_X550EM_A_SFP_N:
+			supported = false;
+			break;
+		default:
+			hw->mac.ops.check_link(hw, &speed, &link_up, false);
+			/* if link is down, assume supported */
+			if (link_up)
+				supported = speed == IXGBE_LINK_SPEED_1GB_FULL ?
 				true : false;
-		else
-			supported = true;
+			else
+				supported = true;
+		}
+
 		break;
 	case ixgbe_media_type_backplane:
-		supported = true;
+		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_XFI)
+			supported = false;
+		else
+			supported = true;
 		break;
 	case ixgbe_media_type_copper:
 		/* only some copper devices support flow control autoneg */
@@ -111,6 +123,10 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
 		break;
 	}
 
+	if (!supported)
+		hw_dbg(hw, "Device %x does not support flow control autoneg\n",
+		       hw->device_id);
+
 	return supported;
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index b45fdc9..f1bfae0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -1018,8 +1018,12 @@ static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx)
 	struct ixgbe_q_vector *q_vector = adapter->q_vector[v_idx];
 	struct ixgbe_ring *ring;
 
-	ixgbe_for_each_ring(ring, q_vector->tx)
-		adapter->tx_ring[ring->queue_index] = NULL;
+	ixgbe_for_each_ring(ring, q_vector->tx) {
+		if (ring_is_xdp(ring))
+			adapter->xdp_ring[ring->queue_index] = NULL;
+		else
+			adapter->tx_ring[ring->queue_index] = NULL;
+	}
 
 	ixgbe_for_each_ring(ring, q_vector->rx)
 		adapter->rx_ring[ring->queue_index] = NULL;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index f1dbdf2..d962368 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -386,7 +386,7 @@ u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
 	if (ixgbe_removed(reg_addr))
 		return IXGBE_FAILED_READ_REG;
 	if (unlikely(hw->phy.nw_mng_if_sel &
-		     IXGBE_NW_MNG_IF_SEL_ENABLE_10_100M)) {
+		     IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE)) {
 		struct ixgbe_adapter *adapter;
 		int i;
 
@@ -2214,7 +2214,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
 				     struct ixgbe_ring *rx_ring,
 				     struct xdp_buff *xdp)
 {
-	int result = IXGBE_XDP_PASS;
+	int err, result = IXGBE_XDP_PASS;
 	struct bpf_prog *xdp_prog;
 	u32 act;
 
@@ -2231,6 +2231,13 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
 	case XDP_TX:
 		result = ixgbe_xmit_xdp_ring(adapter, xdp);
 		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
+		if (!err)
+			result = IXGBE_XDP_TX;
+		else
+			result = IXGBE_XDP_CONSUMED;
+		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		/* fallthrough */
@@ -2408,6 +2415,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		 */
 		wmb();
 		writel(ring->next_to_use, ring->tail);
+
+		xdp_do_flush_map();
 	}
 
 	u64_stats_update_begin(&rx_ring->syncp);
@@ -5810,6 +5819,9 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 
 	usleep_range(10000, 20000);
 
+	/* synchronize_sched() needed for pending XDP buffers to drain */
+	if (adapter->xdp_ring[0])
+		synchronize_sched();
 	netif_tx_stop_all_queues(netdev);
 
 	/* call carrier off first to avoid false dev_watchdog timeouts */
@@ -8839,7 +8851,6 @@ static int ixgbe_delete_clsu32(struct ixgbe_adapter *adapter,
 }
 
 static int ixgbe_configure_clsu32_add_hnode(struct ixgbe_adapter *adapter,
-					    __be16 protocol,
 					    struct tc_cls_u32_offload *cls)
 {
 	u32 uhtid = TC_U32_USERHTID(cls->hnode.handle);
@@ -8941,7 +8952,7 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter,
 	LIST_HEAD(actions);
 	int err;
 
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return -EINVAL;
 
 	tcf_exts_to_list(exts, &actions);
@@ -9025,9 +9036,9 @@ static int ixgbe_clsu32_build_input(struct ixgbe_fdir_filter *input,
 }
 
 static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
-				  __be16 protocol,
 				  struct tc_cls_u32_offload *cls)
 {
+	__be16 protocol = cls->common.protocol;
 	u32 loc = cls->knode.handle & 0xfffff;
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct ixgbe_mat_field *field_ptr;
@@ -9214,41 +9225,49 @@ free_jump:
 	return err;
 }
 
-static int __ixgbe_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
-			    __be16 proto, struct tc_to_netdev *tc)
+static int ixgbe_setup_tc_cls_u32(struct net_device *dev,
+				  struct tc_cls_u32_offload *cls_u32)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 
-	if (chain_index)
+	if (!is_classid_clsact_ingress(cls_u32->common.classid) ||
+	    cls_u32->common.chain_index)
 		return -EOPNOTSUPP;
 
-	if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) &&
-	    tc->type == TC_SETUP_CLSU32) {
-		switch (tc->cls_u32->command) {
-		case TC_CLSU32_NEW_KNODE:
-		case TC_CLSU32_REPLACE_KNODE:
-			return ixgbe_configure_clsu32(adapter,
-						      proto, tc->cls_u32);
-		case TC_CLSU32_DELETE_KNODE:
-			return ixgbe_delete_clsu32(adapter, tc->cls_u32);
-		case TC_CLSU32_NEW_HNODE:
-		case TC_CLSU32_REPLACE_HNODE:
-			return ixgbe_configure_clsu32_add_hnode(adapter, proto,
-								tc->cls_u32);
-		case TC_CLSU32_DELETE_HNODE:
-			return ixgbe_configure_clsu32_del_hnode(adapter,
-								tc->cls_u32);
-		default:
-			return -EINVAL;
-		}
+	switch (cls_u32->command) {
+	case TC_CLSU32_NEW_KNODE:
+	case TC_CLSU32_REPLACE_KNODE:
+		return ixgbe_configure_clsu32(adapter, cls_u32);
+	case TC_CLSU32_DELETE_KNODE:
+		return ixgbe_delete_clsu32(adapter, cls_u32);
+	case TC_CLSU32_NEW_HNODE:
+	case TC_CLSU32_REPLACE_HNODE:
+		return ixgbe_configure_clsu32_add_hnode(adapter, cls_u32);
+	case TC_CLSU32_DELETE_HNODE:
+		return ixgbe_configure_clsu32_del_hnode(adapter, cls_u32);
+	default:
+		return -EOPNOTSUPP;
 	}
+}
 
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
-
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+static int ixgbe_setup_tc_mqprio(struct net_device *dev,
+				 struct tc_mqprio_qopt *mqprio)
+{
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	return ixgbe_setup_tc(dev, mqprio->num_tc);
+}
 
-	return ixgbe_setup_tc(dev, tc->mqprio->num_tc);
+static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			    void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_CLSU32:
+		return ixgbe_setup_tc_cls_u32(dev, type_data);
+	case TC_SETUP_MQPRIO:
+		return ixgbe_setup_tc_mqprio(dev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 #ifdef CONFIG_PCI_IOV
@@ -9823,6 +9842,53 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp)
 	}
 }
 
+static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_ring *ring;
+	int err;
+
+	if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
+		return -ENETDOWN;
+
+	/* During program transitions its possible adapter->xdp_prog is assigned
+	 * but ring has not been configured yet. In this case simply abort xmit.
+	 */
+	ring = adapter->xdp_prog ? adapter->xdp_ring[smp_processor_id()] : NULL;
+	if (unlikely(!ring))
+		return -ENXIO;
+
+	err = ixgbe_xmit_xdp_ring(adapter, xdp);
+	if (err != IXGBE_XDP_TX)
+		return -ENOSPC;
+
+	return 0;
+}
+
+static void ixgbe_xdp_flush(struct net_device *dev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_ring *ring;
+
+	/* Its possible the device went down between xdp xmit and flush so
+	 * we need to ensure device is still up.
+	 */
+	if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
+		return;
+
+	ring = adapter->xdp_prog ? adapter->xdp_ring[smp_processor_id()] : NULL;
+	if (unlikely(!ring))
+		return;
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.
+	 */
+	wmb();
+	writel(ring->next_to_use, ring->tail);
+
+	return;
+}
+
 static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_open		= ixgbe_open,
 	.ndo_stop		= ixgbe_close,
@@ -9869,6 +9935,8 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_udp_tunnel_del	= ixgbe_del_udp_tunnel_port,
 	.ndo_features_check	= ixgbe_features_check,
 	.ndo_xdp		= ixgbe_xdp,
+	.ndo_xdp_xmit		= ixgbe_xdp_xmit,
+	.ndo_xdp_flush		= ixgbe_xdp_flush,
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 0760bd7..112d24c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -679,8 +679,9 @@ update_vlvfb:
 static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter,
 				int vf, int index, unsigned char *mac_addr)
 {
-	struct list_head *pos;
 	struct vf_macvlans *entry;
+	struct list_head *pos;
+	int retval = 0;
 
 	if (index <= 1) {
 		list_for_each(pos, &adapter->vf_mvs.l) {
@@ -721,13 +722,15 @@ static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter,
 	if (!entry || !entry->free)
 		return -ENOSPC;
 
+	retval = ixgbe_add_mac_filter(adapter, mac_addr, vf);
+	if (retval < 0)
+		return retval;
+
 	entry->free = false;
 	entry->is_macvlan = true;
 	entry->vf = vf;
 	memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN);
 
-	ixgbe_add_mac_filter(adapter, mac_addr, vf);
-
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 9c2460c..ffa0ee5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -3778,8 +3778,8 @@ struct ixgbe_info {
 #define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_1G	BIT(19)
 #define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_2_5G	BIT(20)
 #define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_10G	BIT(21)
-#define IXGBE_NW_MNG_IF_SEL_ENABLE_10_100M	BIT(23)
-#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE	BIT(24)
+#define IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE	BIT(25)
+#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE	BIT(24) /* X552 only */
 #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT	3
 #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD	\
 				(0x1F << IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 72d84a0..19fbb2f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -1555,9 +1555,14 @@ static s32 ixgbe_restart_an_internal_phy_x550em(struct ixgbe_hw *hw)
  **/
 static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
 {
+	struct ixgbe_mac_info *mac = &hw->mac;
 	s32 status;
 	u32 reg_val;
 
+	/* iXFI is only supported with X552 */
+	if (mac->type != ixgbe_mac_X550EM_x)
+		return IXGBE_ERR_LINK_SETUP;
+
 	/* Disable AN and force speed to 10G Serial. */
 	status = ixgbe_read_iosf_sb_reg_x550(hw,
 					IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
@@ -1874,8 +1879,10 @@ static s32 ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw *hw,
 	else
 		force_speed = IXGBE_LINK_SPEED_1GB_FULL;
 
-	/* If internal link mode is XFI, then setup XFI internal link. */
-	if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
+	/* If X552 and internal link mode is XFI, then setup XFI internal link.
+	 */
+	if (hw->mac.type == ixgbe_mac_X550EM_x &&
+	    !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
 		status = ixgbe_setup_ixfi_x550em(hw, &force_speed);
 
 		if (status)
@@ -2404,17 +2411,30 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
 	status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc);
 
 	/* Enable link status change alarm */
-	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
-				      MDIO_MMD_AN, &reg);
-	if (status)
-		return status;
 
-	reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN;
+	/* Enable the LASI interrupts on X552 devices to receive notifications
+	 * of the link configurations of the external PHY and correspondingly
+	 * support the configuration of the internal iXFI link, since iXFI does
+	 * not support auto-negotiation. This is not required for X553 devices
+	 * having KR support, which performs auto-negotiations and which is used
+	 * as the internal link to the external PHY. Hence adding a check here
+	 * to avoid enabling LASI interrupts for X553 devices.
+	 */
+	if (hw->mac.type != ixgbe_mac_x550em_a) {
+		status = hw->phy.ops.read_reg(hw,
+					    IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
+					    MDIO_MMD_AN, &reg);
+		if (status)
+			return status;
 
-	status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
-				       MDIO_MMD_AN, reg);
-	if (status)
-		return status;
+		reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN;
+
+		status = hw->phy.ops.write_reg(hw,
+					    IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
+					    MDIO_MMD_AN, reg);
+		if (status)
+			return status;
+	}
 
 	/* Enable high temperature failure and global fault alarms */
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
@@ -2615,7 +2635,8 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw)
 	if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
 		return IXGBE_ERR_CONFIG;
 
-	if (hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) {
+	if (!(hw->mac.type == ixgbe_mac_X550EM_x &&
+	      !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE))) {
 		speed = IXGBE_LINK_SPEED_10GB_FULL |
 			IXGBE_LINK_SPEED_1GB_FULL;
 		return ixgbe_setup_kr_speed_x550em(hw, speed);
@@ -2822,7 +2843,7 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw)
 {
 	bool pause, asm_dir;
 	u32 reg_val;
-	s32 rc;
+	s32 rc = 0;
 
 	/* Validate the requested mode */
 	if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
@@ -2865,32 +2886,37 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw)
 		return IXGBE_ERR_CONFIG;
 	}
 
-	if (hw->device_id != IXGBE_DEV_ID_X550EM_X_KR &&
-	    hw->device_id != IXGBE_DEV_ID_X550EM_A_KR &&
-	    hw->device_id != IXGBE_DEV_ID_X550EM_A_KR_L)
-		return 0;
-
-	rc = hw->mac.ops.read_iosf_sb_reg(hw,
-					  IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
-					  IXGBE_SB_IOSF_TARGET_KR_PHY,
-					  &reg_val);
-	if (rc)
-		return rc;
-
-	reg_val &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
-		     IXGBE_KRM_AN_CNTL_1_ASM_PAUSE);
-	if (pause)
-		reg_val |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE;
-	if (asm_dir)
-		reg_val |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
-	rc = hw->mac.ops.write_iosf_sb_reg(hw,
-					   IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
-					   IXGBE_SB_IOSF_TARGET_KR_PHY,
-					   reg_val);
-
-	/* This device does not fully support AN. */
-	hw->fc.disable_fc_autoneg = true;
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_X550EM_X_KR:
+	case IXGBE_DEV_ID_X550EM_A_KR:
+	case IXGBE_DEV_ID_X550EM_A_KR_L:
+		rc = hw->mac.ops.read_iosf_sb_reg(hw,
+					    IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+					    IXGBE_SB_IOSF_TARGET_KR_PHY,
+					    &reg_val);
+		if (rc)
+			return rc;
 
+		reg_val &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
+			     IXGBE_KRM_AN_CNTL_1_ASM_PAUSE);
+		if (pause)
+			reg_val |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE;
+		if (asm_dir)
+			reg_val |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
+		rc = hw->mac.ops.write_iosf_sb_reg(hw,
+					    IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+					    IXGBE_SB_IOSF_TARGET_KR_PHY,
+					    reg_val);
+
+		/* This device does not fully support AN. */
+		hw->fc.disable_fc_autoneg = true;
+		break;
+	case IXGBE_DEV_ID_X550EM_X_XFI:
+		hw->fc.disable_fc_autoneg = true;
+		break;
+	default:
+		break;
+	}
 	return rc;
 }
 
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 9c94ea9..81c1fac 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -183,8 +183,6 @@ static char mv643xx_eth_driver_version[] = "1.4";
 #define DEFAULT_TX_QUEUE_SIZE	512
 #define SKB_DMA_REALIGN		((PAGE_SIZE - NET_SKB_PAD) % SMP_CACHE_BYTES)
 
-#define TSO_HEADER_SIZE		128
-
 /* Max number of allowed TCP segments for software TSO */
 #define MV643XX_MAX_TSO_SEGS 100
 #define MV643XX_MAX_SKB_DESCS (MV643XX_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)
@@ -1123,7 +1121,7 @@ static int txq_reclaim(struct tx_queue *txq, int budget, int force)
 			struct sk_buff *skb = __skb_dequeue(&txq->tx_skb);
 
 			if (!WARN_ON(!skb))
-				dev_kfree_skb(skb);
+				dev_consume_skb_any(skb);
 		}
 
 		if (cmd_sts & ERROR_SUMMARY) {
@@ -2026,7 +2024,7 @@ static void rxq_deinit(struct rx_queue *rxq)
 
 	for (i = 0; i < rxq->rx_ring_size; i++) {
 		if (rxq->rx_skb[i]) {
-			dev_kfree_skb(rxq->rx_skb[i]);
+			dev_consume_skb_any(rxq->rx_skb[i]);
 			rxq->rx_desc_count--;
 		}
 	}
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 0aab74c..64a0497 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -281,9 +281,6 @@
  */
 #define MVNETA_RSS_LU_TABLE_SIZE	1
 
-/* TSO header size */
-#define TSO_HEADER_SIZE 128
-
 /* Max number of Rx descriptors */
 #define MVNETA_MAX_RXD 128
 
@@ -4332,7 +4329,7 @@ static int mvneta_probe(struct platform_device *pdev)
 		}
 	}
 
-	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
+	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO;
 	dev->hw_features |= dev->features;
 	dev->vlan_features |= dev->features;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 4d598ca..dd0ee26 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -18,6 +18,7 @@
 #include <linux/inetdevice.h>
 #include <linux/mbus.h>
 #include <linux/module.h>
+#include <linux/mfd/syscon.h>
 #include <linux/interrupt.h>
 #include <linux/cpumask.h>
 #include <linux/of.h>
@@ -27,12 +28,15 @@
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/phy.h>
+#include <linux/phy/phy.h>
 #include <linux/clk.h>
 #include <linux/hrtimer.h>
 #include <linux/ktime.h>
+#include <linux/regmap.h>
 #include <uapi/linux/ppp_defs.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/tso.h>
 
 /* RX Fifo Registers */
 #define MVPP2_RX_DATA_FIFO_SIZE_REG(port)	(0x00 + 4 * (port))
@@ -120,6 +124,9 @@
 #define MVPP2_TXQ_DESC_ADDR_REG			0x2084
 #define MVPP2_TXQ_DESC_SIZE_REG			0x2088
 #define     MVPP2_TXQ_DESC_SIZE_MASK		0x3ff0
+#define MVPP2_TXQ_THRESH_REG			0x2094
+#define	    MVPP2_TXQ_THRESH_OFFSET		16
+#define	    MVPP2_TXQ_THRESH_MASK		0x3fff
 #define MVPP2_AGGR_TXQ_UPDATE_REG		0x2090
 #define MVPP2_TXQ_INDEX_REG			0x2098
 #define MVPP2_TXQ_PREF_BUF_REG			0x209c
@@ -183,22 +190,25 @@
 #define MVPP22_AXI_CODE_DOMAIN_SYSTEM		3
 
 /* Interrupt Cause and Mask registers */
+#define MVPP2_ISR_TX_THRESHOLD_REG(port)	(0x5140 + 4 * (port))
+#define     MVPP2_MAX_ISR_TX_THRESHOLD		0xfffff0
+
 #define MVPP2_ISR_RX_THRESHOLD_REG(rxq)		(0x5200 + 4 * (rxq))
 #define     MVPP2_MAX_ISR_RX_THRESHOLD		0xfffff0
-#define MVPP21_ISR_RXQ_GROUP_REG(rxq)		(0x5400 + 4 * (rxq))
+#define MVPP21_ISR_RXQ_GROUP_REG(port)		(0x5400 + 4 * (port))
 
-#define MVPP22_ISR_RXQ_GROUP_INDEX_REG          0x5400
+#define MVPP22_ISR_RXQ_GROUP_INDEX_REG		0x5400
 #define MVPP22_ISR_RXQ_GROUP_INDEX_SUBGROUP_MASK 0xf
-#define MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_MASK   0x380
-#define MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET 7
+#define MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_MASK	0x380
+#define MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET	7
 
 #define MVPP22_ISR_RXQ_GROUP_INDEX_SUBGROUP_MASK 0xf
-#define MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_MASK   0x380
+#define MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_MASK	0x380
 
-#define MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG     0x5404
-#define MVPP22_ISR_RXQ_SUB_GROUP_STARTQ_MASK    0x1f
-#define MVPP22_ISR_RXQ_SUB_GROUP_SIZE_MASK      0xf00
-#define MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET    8
+#define MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG	0x5404
+#define MVPP22_ISR_RXQ_SUB_GROUP_STARTQ_MASK	0x1f
+#define MVPP22_ISR_RXQ_SUB_GROUP_SIZE_MASK	0xf00
+#define MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET	8
 
 #define MVPP2_ISR_ENABLE_REG(port)		(0x5420 + 4 * (port))
 #define     MVPP2_ISR_ENABLE_INTERRUPT(mask)	((mask) & 0xffff)
@@ -206,6 +216,7 @@
 #define MVPP2_ISR_RX_TX_CAUSE_REG(port)		(0x5480 + 4 * (port))
 #define     MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK	0xffff
 #define     MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK	0xff0000
+#define     MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET	16
 #define     MVPP2_CAUSE_RX_FIFO_OVERRUN_MASK	BIT(24)
 #define     MVPP2_CAUSE_FCS_ERR_MASK		BIT(25)
 #define     MVPP2_CAUSE_TX_FIFO_UNDERRUN_MASK	BIT(26)
@@ -265,7 +276,7 @@
 #define MVPP2_BM_VIRT_RLS_REG			0x64c0
 #define MVPP22_BM_ADDR_HIGH_RLS_REG		0x64c4
 #define     MVPP22_BM_ADDR_HIGH_PHYS_RLS_MASK	0xff
-#define	    MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK	0xff00
+#define     MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK	0xff00
 #define     MVPP22_BM_ADDR_HIGH_VIRT_RLS_SHIFT	8
 
 /* TX Scheduler registers */
@@ -307,57 +318,87 @@
 
 /* Per-port registers */
 #define MVPP2_GMAC_CTRL_0_REG			0x0
-#define      MVPP2_GMAC_PORT_EN_MASK		BIT(0)
-#define      MVPP2_GMAC_MAX_RX_SIZE_OFFS	2
-#define      MVPP2_GMAC_MAX_RX_SIZE_MASK	0x7ffc
-#define      MVPP2_GMAC_MIB_CNTR_EN_MASK	BIT(15)
+#define     MVPP2_GMAC_PORT_EN_MASK		BIT(0)
+#define     MVPP2_GMAC_PORT_TYPE_MASK		BIT(1)
+#define     MVPP2_GMAC_MAX_RX_SIZE_OFFS		2
+#define     MVPP2_GMAC_MAX_RX_SIZE_MASK		0x7ffc
+#define     MVPP2_GMAC_MIB_CNTR_EN_MASK		BIT(15)
 #define MVPP2_GMAC_CTRL_1_REG			0x4
-#define      MVPP2_GMAC_PERIODIC_XON_EN_MASK	BIT(1)
-#define      MVPP2_GMAC_GMII_LB_EN_MASK		BIT(5)
-#define      MVPP2_GMAC_PCS_LB_EN_BIT		6
-#define      MVPP2_GMAC_PCS_LB_EN_MASK		BIT(6)
-#define      MVPP2_GMAC_SA_LOW_OFFS		7
+#define     MVPP2_GMAC_PERIODIC_XON_EN_MASK	BIT(1)
+#define     MVPP2_GMAC_GMII_LB_EN_MASK		BIT(5)
+#define     MVPP2_GMAC_PCS_LB_EN_BIT		6
+#define     MVPP2_GMAC_PCS_LB_EN_MASK		BIT(6)
+#define     MVPP2_GMAC_SA_LOW_OFFS		7
 #define MVPP2_GMAC_CTRL_2_REG			0x8
-#define      MVPP2_GMAC_INBAND_AN_MASK		BIT(0)
-#define      MVPP2_GMAC_PCS_ENABLE_MASK		BIT(3)
-#define      MVPP2_GMAC_PORT_RGMII_MASK		BIT(4)
-#define      MVPP2_GMAC_PORT_RESET_MASK		BIT(6)
+#define     MVPP2_GMAC_INBAND_AN_MASK		BIT(0)
+#define     MVPP2_GMAC_FLOW_CTRL_MASK		GENMASK(2, 1)
+#define     MVPP2_GMAC_PCS_ENABLE_MASK		BIT(3)
+#define     MVPP2_GMAC_PORT_RGMII_MASK		BIT(4)
+#define     MVPP2_GMAC_DISABLE_PADDING		BIT(5)
+#define     MVPP2_GMAC_PORT_RESET_MASK		BIT(6)
 #define MVPP2_GMAC_AUTONEG_CONFIG		0xc
-#define      MVPP2_GMAC_FORCE_LINK_DOWN		BIT(0)
-#define      MVPP2_GMAC_FORCE_LINK_PASS		BIT(1)
-#define      MVPP2_GMAC_CONFIG_MII_SPEED	BIT(5)
-#define      MVPP2_GMAC_CONFIG_GMII_SPEED	BIT(6)
-#define      MVPP2_GMAC_AN_SPEED_EN		BIT(7)
-#define      MVPP2_GMAC_FC_ADV_EN		BIT(9)
-#define      MVPP2_GMAC_CONFIG_FULL_DUPLEX	BIT(12)
-#define      MVPP2_GMAC_AN_DUPLEX_EN		BIT(13)
+#define     MVPP2_GMAC_FORCE_LINK_DOWN		BIT(0)
+#define     MVPP2_GMAC_FORCE_LINK_PASS		BIT(1)
+#define     MVPP2_GMAC_IN_BAND_AUTONEG		BIT(2)
+#define     MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS	BIT(3)
+#define     MVPP2_GMAC_CONFIG_MII_SPEED	BIT(5)
+#define     MVPP2_GMAC_CONFIG_GMII_SPEED	BIT(6)
+#define     MVPP2_GMAC_AN_SPEED_EN		BIT(7)
+#define     MVPP2_GMAC_FC_ADV_EN		BIT(9)
+#define     MVPP2_GMAC_FLOW_CTRL_AUTONEG	BIT(11)
+#define     MVPP2_GMAC_CONFIG_FULL_DUPLEX	BIT(12)
+#define     MVPP2_GMAC_AN_DUPLEX_EN		BIT(13)
+#define MVPP2_GMAC_STATUS0			0x10
+#define     MVPP2_GMAC_STATUS0_LINK_UP		BIT(0)
 #define MVPP2_GMAC_PORT_FIFO_CFG_1_REG		0x1c
-#define      MVPP2_GMAC_TX_FIFO_MIN_TH_OFFS	6
-#define      MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK	0x1fc0
-#define      MVPP2_GMAC_TX_FIFO_MIN_TH_MASK(v)	(((v) << 6) & \
+#define     MVPP2_GMAC_TX_FIFO_MIN_TH_OFFS	6
+#define     MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK	0x1fc0
+#define     MVPP2_GMAC_TX_FIFO_MIN_TH_MASK(v)	(((v) << 6) & \
 					MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK)
+#define MVPP22_GMAC_INT_STAT			0x20
+#define     MVPP22_GMAC_INT_STAT_LINK		BIT(1)
+#define MVPP22_GMAC_INT_MASK			0x24
+#define     MVPP22_GMAC_INT_MASK_LINK_STAT	BIT(1)
 #define MVPP22_GMAC_CTRL_4_REG			0x90
-#define      MVPP22_CTRL4_EXT_PIN_GMII_SEL	BIT(0)
-#define      MVPP22_CTRL4_DP_CLK_SEL		BIT(5)
-#define      MVPP22_CTRL4_SYNC_BYPASS		BIT(6)
-#define      MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE	BIT(7)
+#define     MVPP22_CTRL4_EXT_PIN_GMII_SEL	BIT(0)
+#define     MVPP22_CTRL4_DP_CLK_SEL		BIT(5)
+#define     MVPP22_CTRL4_SYNC_BYPASS_DIS	BIT(6)
+#define     MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE	BIT(7)
+#define MVPP22_GMAC_INT_SUM_MASK		0xa4
+#define     MVPP22_GMAC_INT_SUM_MASK_LINK_STAT	BIT(1)
 
 /* Per-port XGMAC registers. PPv2.2 only, only for GOP port 0,
  * relative to port->base.
  */
 #define MVPP22_XLG_CTRL0_REG			0x100
-#define      MVPP22_XLG_CTRL0_PORT_EN		BIT(0)
-#define      MVPP22_XLG_CTRL0_MAC_RESET_DIS	BIT(1)
-#define      MVPP22_XLG_CTRL0_MIB_CNT_DIS	BIT(14)
-
+#define     MVPP22_XLG_CTRL0_PORT_EN		BIT(0)
+#define     MVPP22_XLG_CTRL0_MAC_RESET_DIS	BIT(1)
+#define     MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN	BIT(7)
+#define     MVPP22_XLG_CTRL0_MIB_CNT_DIS	BIT(14)
+#define MVPP22_XLG_CTRL1_REG			0x104
+#define     MVPP22_XLG_CTRL1_FRAMESIZELIMIT_OFFS	0
+#define     MVPP22_XLG_CTRL1_FRAMESIZELIMIT_MASK	0x1fff
+#define MVPP22_XLG_STATUS			0x10c
+#define     MVPP22_XLG_STATUS_LINK_UP		BIT(0)
+#define MVPP22_XLG_INT_STAT			0x114
+#define     MVPP22_XLG_INT_STAT_LINK		BIT(1)
+#define MVPP22_XLG_INT_MASK			0x118
+#define     MVPP22_XLG_INT_MASK_LINK		BIT(1)
 #define MVPP22_XLG_CTRL3_REG			0x11c
-#define      MVPP22_XLG_CTRL3_MACMODESELECT_MASK	(7 << 13)
-#define      MVPP22_XLG_CTRL3_MACMODESELECT_GMAC	(0 << 13)
-#define      MVPP22_XLG_CTRL3_MACMODESELECT_10G		(1 << 13)
+#define     MVPP22_XLG_CTRL3_MACMODESELECT_MASK	(7 << 13)
+#define     MVPP22_XLG_CTRL3_MACMODESELECT_GMAC	(0 << 13)
+#define     MVPP22_XLG_CTRL3_MACMODESELECT_10G	(1 << 13)
+#define MVPP22_XLG_EXT_INT_MASK			0x15c
+#define     MVPP22_XLG_EXT_INT_MASK_XLG		BIT(1)
+#define     MVPP22_XLG_EXT_INT_MASK_GIG		BIT(2)
+#define MVPP22_XLG_CTRL4_REG			0x184
+#define     MVPP22_XLG_CTRL4_FWD_FC		BIT(5)
+#define     MVPP22_XLG_CTRL4_FWD_PFC		BIT(6)
+#define     MVPP22_XLG_CTRL4_MACMODSELECT_GMAC	BIT(12)
 
 /* SMI registers. PPv2.2 only, relative to priv->iface_base. */
 #define MVPP22_SMI_MISC_CFG_REG			0x1204
-#define      MVPP22_SMI_POLLING_EN		BIT(10)
+#define     MVPP22_SMI_POLLING_EN		BIT(10)
 
 #define MVPP22_GMAC_BASE(port)		(0x7000 + (port) * 0x1000 + 0xe00)
 
@@ -367,11 +408,44 @@
 #define MVPP2_QUEUE_NEXT_DESC(q, index) \
 	(((index) < (q)->last_desc) ? ((index) + 1) : 0)
 
+/* XPCS registers. PPv2.2 only */
+#define MVPP22_MPCS_BASE(port)			(0x7000 + (port) * 0x1000)
+#define MVPP22_MPCS_CTRL			0x14
+#define     MVPP22_MPCS_CTRL_FWD_ERR_CONN	BIT(10)
+#define MVPP22_MPCS_CLK_RESET			0x14c
+#define     MAC_CLK_RESET_SD_TX			BIT(0)
+#define     MAC_CLK_RESET_SD_RX			BIT(1)
+#define     MAC_CLK_RESET_MAC			BIT(2)
+#define     MVPP22_MPCS_CLK_RESET_DIV_RATIO(n)	((n) << 4)
+#define     MVPP22_MPCS_CLK_RESET_DIV_SET	BIT(11)
+
+/* XPCS registers. PPv2.2 only */
+#define MVPP22_XPCS_BASE(port)			(0x7400 + (port) * 0x1000)
+#define MVPP22_XPCS_CFG0			0x0
+#define     MVPP22_XPCS_CFG0_PCS_MODE(n)	((n) << 3)
+#define     MVPP22_XPCS_CFG0_ACTIVE_LANE(n)	((n) << 5)
+
+/* System controller registers. Accessed through a regmap. */
+#define GENCONF_SOFT_RESET1				0x1108
+#define     GENCONF_SOFT_RESET1_GOP			BIT(6)
+#define GENCONF_PORT_CTRL0				0x1110
+#define     GENCONF_PORT_CTRL0_BUS_WIDTH_SELECT		BIT(1)
+#define     GENCONF_PORT_CTRL0_RX_DATA_SAMPLE		BIT(29)
+#define     GENCONF_PORT_CTRL0_CLK_DIV_PHASE_CLR	BIT(31)
+#define GENCONF_PORT_CTRL1				0x1114
+#define     GENCONF_PORT_CTRL1_EN(p)			BIT(p)
+#define     GENCONF_PORT_CTRL1_RESET(p)			(BIT(p) << 28)
+#define GENCONF_CTRL0					0x1120
+#define     GENCONF_CTRL0_PORT0_RGMII			BIT(0)
+#define     GENCONF_CTRL0_PORT1_RGMII_MII		BIT(1)
+#define     GENCONF_CTRL0_PORT1_RGMII			BIT(2)
+
 /* Various constants */
 
 /* Coalescing */
 #define MVPP2_TXDONE_COAL_PKTS_THRESH	15
 #define MVPP2_TXDONE_HRTIMER_PERIOD_NS	1000000UL
+#define MVPP2_TXDONE_COAL_USEC		1000
 #define MVPP2_RX_COAL_PKTS		32
 #define MVPP2_RX_COAL_USEC		100
 
@@ -685,7 +759,8 @@ enum mvpp2_prs_l3_cast {
 #define MVPP21_ADDR_SPACE_SZ		0
 #define MVPP22_ADDR_SPACE_SZ		SZ_64K
 
-#define MVPP2_MAX_CPUS			4
+#define MVPP2_MAX_THREADS		8
+#define MVPP2_MAX_QVECS			MVPP2_MAX_THREADS
 
 enum mvpp2_bm_type {
 	MVPP2_BM_FREE,
@@ -701,11 +776,17 @@ struct mvpp2 {
 	void __iomem *lms_base;
 	void __iomem *iface_base;
 
-	/* On PPv2.2, each CPU can access the base register through a
-	 * separate address space, each 64 KB apart from each
-	 * other.
+	/* On PPv2.2, each "software thread" can access the base
+	 * register through a separate address space, each 64 KB apart
+	 * from each other. Typically, such address spaces will be
+	 * used per CPU.
+	 */
+	void __iomem *swth_base[MVPP2_MAX_THREADS];
+
+	/* On PPv2.2, some port control registers are located into the system
+	 * controller space. These registers are accessible through a regmap.
 	 */
-	void __iomem *cpu_base[MVPP2_MAX_CPUS];
+	struct regmap *sysctrl_base;
 
 	/* Common clocks */
 	struct clk *pp_clk;
@@ -752,6 +833,18 @@ struct mvpp2_port_pcpu {
 	struct tasklet_struct tx_done_tasklet;
 };
 
+struct mvpp2_queue_vector {
+	int irq;
+	struct napi_struct napi;
+	enum { MVPP2_QUEUE_VECTOR_SHARED, MVPP2_QUEUE_VECTOR_PRIVATE } type;
+	int sw_thread_id;
+	u16 sw_thread_mask;
+	int first_rxq;
+	int nrxqs;
+	u32 pending_cause_rx;
+	struct mvpp2_port *port;
+};
+
 struct mvpp2_port {
 	u8 id;
 
@@ -760,7 +853,7 @@ struct mvpp2_port {
 	 */
 	int gop_id;
 
-	int irq;
+	int link_irq;
 
 	struct mvpp2 *priv;
 
@@ -768,14 +861,13 @@ struct mvpp2_port {
 	void __iomem *base;
 
 	struct mvpp2_rx_queue **rxqs;
+	unsigned int nrxqs;
 	struct mvpp2_tx_queue **txqs;
+	unsigned int ntxqs;
 	struct net_device *dev;
 
 	int pkt_size;
 
-	u32 pending_cause_rx;
-	struct napi_struct napi;
-
 	/* Per-CPU port control */
 	struct mvpp2_port_pcpu __percpu *pcpu;
 
@@ -788,6 +880,7 @@ struct mvpp2_port {
 
 	phy_interface_t phy_interface;
 	struct device_node *phy_node;
+	struct phy *comphy;
 	unsigned int link;
 	unsigned int duplex;
 	unsigned int speed;
@@ -797,6 +890,12 @@ struct mvpp2_port {
 
 	/* Index of first port's physical RXQ */
 	u8 first_rxq;
+
+	struct mvpp2_queue_vector qvecs[MVPP2_MAX_QVECS];
+	unsigned int nqvecs;
+	bool has_tx_irqs;
+
+	u32 tx_time_coal;
 };
 
 /* The mvpp2_tx_desc and mvpp2_rx_desc structures describe the
@@ -932,6 +1031,10 @@ struct mvpp2_txq_pcpu {
 
 	/* Index of the TX DMA descriptor to be cleaned up */
 	int txq_get_index;
+
+	/* DMA buffer for TSO headers */
+	char *tso_headers;
+	dma_addr_t tso_headers_dma;
 };
 
 struct mvpp2_tx_queue {
@@ -1062,12 +1165,14 @@ struct mvpp2_bm_pool {
 	u32 port_map;
 };
 
-/* Static declaractions */
+/* Queue modes */
+#define MVPP2_QDIST_SINGLE_MODE	0
+#define MVPP2_QDIST_MULTI_MODE	1
 
-/* Number of RXQs used by single port */
-static int rxq_number = MVPP2_DEFAULT_RXQ;
-/* Number of TXQs used by single port */
-static int txq_number = MVPP2_MAX_TXQ;
+static int queue_mode = MVPP2_QDIST_SINGLE_MODE;
+
+module_param(queue_mode, int, 0444);
+MODULE_PARM_DESC(queue_mode, "Set queue_mode (single=0, multi=1)");
 
 #define MVPP2_DRIVER_NAME "mvpp2"
 #define MVPP2_DRIVER_VERSION "1.0"
@@ -1076,12 +1181,12 @@ static int txq_number = MVPP2_MAX_TXQ;
 
 static void mvpp2_write(struct mvpp2 *priv, u32 offset, u32 data)
 {
-	writel(data, priv->cpu_base[0] + offset);
+	writel(data, priv->swth_base[0] + offset);
 }
 
 static u32 mvpp2_read(struct mvpp2 *priv, u32 offset)
 {
-	return readl(priv->cpu_base[0] + offset);
+	return readl(priv->swth_base[0] + offset);
 }
 
 /* These accessors should be used to access:
@@ -1123,13 +1228,13 @@ static u32 mvpp2_read(struct mvpp2 *priv, u32 offset)
 static void mvpp2_percpu_write(struct mvpp2 *priv, int cpu,
 			       u32 offset, u32 data)
 {
-	writel(data, priv->cpu_base[cpu] + offset);
+	writel(data, priv->swth_base[cpu] + offset);
 }
 
 static u32 mvpp2_percpu_read(struct mvpp2 *priv, int cpu,
 			     u32 offset)
 {
-	return readl(priv->cpu_base[cpu] + offset);
+	return readl(priv->swth_base[cpu] + offset);
 }
 
 static dma_addr_t mvpp2_txdesc_dma_addr_get(struct mvpp2_port *port,
@@ -4070,7 +4175,7 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 
 		port->pool_long->port_map |= (1 << port->id);
 
-		for (rxq = 0; rxq < rxq_number; rxq++)
+		for (rxq = 0; rxq < port->nrxqs; rxq++)
 			mvpp2_rxq_long_pool_set(port, rxq, port->pool_long->id);
 	}
 
@@ -4084,7 +4189,7 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
 
 		port->pool_short->port_map |= (1 << port->id);
 
-		for (rxq = 0; rxq < rxq_number; rxq++)
+		for (rxq = 0; rxq < port->nrxqs; rxq++)
 			mvpp2_rxq_short_pool_set(port, rxq,
 						 port->pool_short->id);
 	}
@@ -4125,22 +4230,40 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
 
 static inline void mvpp2_interrupts_enable(struct mvpp2_port *port)
 {
-	int cpu, cpu_mask = 0;
+	int i, sw_thread_mask = 0;
+
+	for (i = 0; i < port->nqvecs; i++)
+		sw_thread_mask |= port->qvecs[i].sw_thread_mask;
 
-	for_each_present_cpu(cpu)
-		cpu_mask |= 1 << cpu;
 	mvpp2_write(port->priv, MVPP2_ISR_ENABLE_REG(port->id),
-		    MVPP2_ISR_ENABLE_INTERRUPT(cpu_mask));
+		    MVPP2_ISR_ENABLE_INTERRUPT(sw_thread_mask));
 }
 
 static inline void mvpp2_interrupts_disable(struct mvpp2_port *port)
 {
-	int cpu, cpu_mask = 0;
+	int i, sw_thread_mask = 0;
+
+	for (i = 0; i < port->nqvecs; i++)
+		sw_thread_mask |= port->qvecs[i].sw_thread_mask;
 
-	for_each_present_cpu(cpu)
-		cpu_mask |= 1 << cpu;
 	mvpp2_write(port->priv, MVPP2_ISR_ENABLE_REG(port->id),
-		    MVPP2_ISR_DISABLE_INTERRUPT(cpu_mask));
+		    MVPP2_ISR_DISABLE_INTERRUPT(sw_thread_mask));
+}
+
+static inline void mvpp2_qvec_interrupt_enable(struct mvpp2_queue_vector *qvec)
+{
+	struct mvpp2_port *port = qvec->port;
+
+	mvpp2_write(port->priv, MVPP2_ISR_ENABLE_REG(port->id),
+		    MVPP2_ISR_ENABLE_INTERRUPT(qvec->sw_thread_mask));
+}
+
+static inline void mvpp2_qvec_interrupt_disable(struct mvpp2_queue_vector *qvec)
+{
+	struct mvpp2_port *port = qvec->port;
+
+	mvpp2_write(port->priv, MVPP2_ISR_ENABLE_REG(port->id),
+		    MVPP2_ISR_DISABLE_INTERRUPT(qvec->sw_thread_mask));
 }
 
 /* Mask the current CPU's Rx/Tx interrupts
@@ -4162,15 +4285,346 @@ static void mvpp2_interrupts_mask(void *arg)
 static void mvpp2_interrupts_unmask(void *arg)
 {
 	struct mvpp2_port *port = arg;
+	u32 val;
+
+	val = MVPP2_CAUSE_MISC_SUM_MASK |
+		MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+	if (port->has_tx_irqs)
+		val |= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
 
 	mvpp2_percpu_write(port->priv, smp_processor_id(),
-			   MVPP2_ISR_RX_TX_MASK_REG(port->id),
-			   (MVPP2_CAUSE_MISC_SUM_MASK |
-			    MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK));
+			   MVPP2_ISR_RX_TX_MASK_REG(port->id), val);
+}
+
+static void
+mvpp2_shared_interrupt_mask_unmask(struct mvpp2_port *port, bool mask)
+{
+	u32 val;
+	int i;
+
+	if (port->priv->hw_version != MVPP22)
+		return;
+
+	if (mask)
+		val = 0;
+	else
+		val = MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+
+	for (i = 0; i < port->nqvecs; i++) {
+		struct mvpp2_queue_vector *v = port->qvecs + i;
+
+		if (v->type != MVPP2_QUEUE_VECTOR_SHARED)
+			continue;
+
+		mvpp2_percpu_write(port->priv, v->sw_thread_id,
+				   MVPP2_ISR_RX_TX_MASK_REG(port->id), val);
+	}
 }
 
 /* Port configuration routines */
 
+static void mvpp22_gop_init_rgmii(struct mvpp2_port *port)
+{
+	struct mvpp2 *priv = port->priv;
+	u32 val;
+
+	regmap_read(priv->sysctrl_base, GENCONF_PORT_CTRL0, &val);
+	val |= GENCONF_PORT_CTRL0_BUS_WIDTH_SELECT;
+	regmap_write(priv->sysctrl_base, GENCONF_PORT_CTRL0, val);
+
+	regmap_read(priv->sysctrl_base, GENCONF_CTRL0, &val);
+	if (port->gop_id == 2)
+		val |= GENCONF_CTRL0_PORT0_RGMII | GENCONF_CTRL0_PORT1_RGMII;
+	else if (port->gop_id == 3)
+		val |= GENCONF_CTRL0_PORT1_RGMII_MII;
+	regmap_write(priv->sysctrl_base, GENCONF_CTRL0, val);
+}
+
+static void mvpp22_gop_init_sgmii(struct mvpp2_port *port)
+{
+	struct mvpp2 *priv = port->priv;
+	u32 val;
+
+	regmap_read(priv->sysctrl_base, GENCONF_PORT_CTRL0, &val);
+	val |= GENCONF_PORT_CTRL0_BUS_WIDTH_SELECT |
+	       GENCONF_PORT_CTRL0_RX_DATA_SAMPLE;
+	regmap_write(priv->sysctrl_base, GENCONF_PORT_CTRL0, val);
+
+	if (port->gop_id > 1) {
+		regmap_read(priv->sysctrl_base, GENCONF_CTRL0, &val);
+		if (port->gop_id == 2)
+			val &= ~GENCONF_CTRL0_PORT0_RGMII;
+		else if (port->gop_id == 3)
+			val &= ~GENCONF_CTRL0_PORT1_RGMII_MII;
+		regmap_write(priv->sysctrl_base, GENCONF_CTRL0, val);
+	}
+}
+
+static void mvpp22_gop_init_10gkr(struct mvpp2_port *port)
+{
+	struct mvpp2 *priv = port->priv;
+	void __iomem *mpcs = priv->iface_base + MVPP22_MPCS_BASE(port->gop_id);
+	void __iomem *xpcs = priv->iface_base + MVPP22_XPCS_BASE(port->gop_id);
+	u32 val;
+
+	/* XPCS */
+	val = readl(xpcs + MVPP22_XPCS_CFG0);
+	val &= ~(MVPP22_XPCS_CFG0_PCS_MODE(0x3) |
+		 MVPP22_XPCS_CFG0_ACTIVE_LANE(0x3));
+	val |= MVPP22_XPCS_CFG0_ACTIVE_LANE(2);
+	writel(val, xpcs + MVPP22_XPCS_CFG0);
+
+	/* MPCS */
+	val = readl(mpcs + MVPP22_MPCS_CTRL);
+	val &= ~MVPP22_MPCS_CTRL_FWD_ERR_CONN;
+	writel(val, mpcs + MVPP22_MPCS_CTRL);
+
+	val = readl(mpcs + MVPP22_MPCS_CLK_RESET);
+	val &= ~(MVPP22_MPCS_CLK_RESET_DIV_RATIO(0x7) | MAC_CLK_RESET_MAC |
+		 MAC_CLK_RESET_SD_RX | MAC_CLK_RESET_SD_TX);
+	val |= MVPP22_MPCS_CLK_RESET_DIV_RATIO(1);
+	writel(val, mpcs + MVPP22_MPCS_CLK_RESET);
+
+	val &= ~MVPP22_MPCS_CLK_RESET_DIV_SET;
+	val |= MAC_CLK_RESET_MAC | MAC_CLK_RESET_SD_RX | MAC_CLK_RESET_SD_TX;
+	writel(val, mpcs + MVPP22_MPCS_CLK_RESET);
+}
+
+static int mvpp22_gop_init(struct mvpp2_port *port)
+{
+	struct mvpp2 *priv = port->priv;
+	u32 val;
+
+	if (!priv->sysctrl_base)
+		return 0;
+
+	switch (port->phy_interface) {
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		if (port->gop_id == 0)
+			goto invalid_conf;
+		mvpp22_gop_init_rgmii(port);
+		break;
+	case PHY_INTERFACE_MODE_SGMII:
+		mvpp22_gop_init_sgmii(port);
+		break;
+	case PHY_INTERFACE_MODE_10GKR:
+		if (port->gop_id != 0)
+			goto invalid_conf;
+		mvpp22_gop_init_10gkr(port);
+		break;
+	default:
+		goto unsupported_conf;
+	}
+
+	regmap_read(priv->sysctrl_base, GENCONF_PORT_CTRL1, &val);
+	val |= GENCONF_PORT_CTRL1_RESET(port->gop_id) |
+	       GENCONF_PORT_CTRL1_EN(port->gop_id);
+	regmap_write(priv->sysctrl_base, GENCONF_PORT_CTRL1, val);
+
+	regmap_read(priv->sysctrl_base, GENCONF_PORT_CTRL0, &val);
+	val |= GENCONF_PORT_CTRL0_CLK_DIV_PHASE_CLR;
+	regmap_write(priv->sysctrl_base, GENCONF_PORT_CTRL0, val);
+
+	regmap_read(priv->sysctrl_base, GENCONF_SOFT_RESET1, &val);
+	val |= GENCONF_SOFT_RESET1_GOP;
+	regmap_write(priv->sysctrl_base, GENCONF_SOFT_RESET1, val);
+
+unsupported_conf:
+	return 0;
+
+invalid_conf:
+	netdev_err(port->dev, "Invalid port configuration\n");
+	return -EINVAL;
+}
+
+static void mvpp22_gop_unmask_irq(struct mvpp2_port *port)
+{
+	u32 val;
+
+	if (phy_interface_mode_is_rgmii(port->phy_interface) ||
+	    port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+		/* Enable the GMAC link status irq for this port */
+		val = readl(port->base + MVPP22_GMAC_INT_SUM_MASK);
+		val |= MVPP22_GMAC_INT_SUM_MASK_LINK_STAT;
+		writel(val, port->base + MVPP22_GMAC_INT_SUM_MASK);
+	}
+
+	if (port->gop_id == 0) {
+		/* Enable the XLG/GIG irqs for this port */
+		val = readl(port->base + MVPP22_XLG_EXT_INT_MASK);
+		if (port->phy_interface == PHY_INTERFACE_MODE_10GKR)
+			val |= MVPP22_XLG_EXT_INT_MASK_XLG;
+		else
+			val |= MVPP22_XLG_EXT_INT_MASK_GIG;
+		writel(val, port->base + MVPP22_XLG_EXT_INT_MASK);
+	}
+}
+
+static void mvpp22_gop_mask_irq(struct mvpp2_port *port)
+{
+	u32 val;
+
+	if (port->gop_id == 0) {
+		val = readl(port->base + MVPP22_XLG_EXT_INT_MASK);
+		val &= ~(MVPP22_XLG_EXT_INT_MASK_XLG |
+		         MVPP22_XLG_EXT_INT_MASK_GIG);
+		writel(val, port->base + MVPP22_XLG_EXT_INT_MASK);
+	}
+
+	if (phy_interface_mode_is_rgmii(port->phy_interface) ||
+	    port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+		val = readl(port->base + MVPP22_GMAC_INT_SUM_MASK);
+		val &= ~MVPP22_GMAC_INT_SUM_MASK_LINK_STAT;
+		writel(val, port->base + MVPP22_GMAC_INT_SUM_MASK);
+	}
+}
+
+static void mvpp22_gop_setup_irq(struct mvpp2_port *port)
+{
+	u32 val;
+
+	if (phy_interface_mode_is_rgmii(port->phy_interface) ||
+	    port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+		val = readl(port->base + MVPP22_GMAC_INT_MASK);
+		val |= MVPP22_GMAC_INT_MASK_LINK_STAT;
+		writel(val, port->base + MVPP22_GMAC_INT_MASK);
+	}
+
+	if (port->gop_id == 0) {
+		val = readl(port->base + MVPP22_XLG_INT_MASK);
+		val |= MVPP22_XLG_INT_MASK_LINK;
+		writel(val, port->base + MVPP22_XLG_INT_MASK);
+	}
+
+	mvpp22_gop_unmask_irq(port);
+}
+
+static int mvpp22_comphy_init(struct mvpp2_port *port)
+{
+	enum phy_mode mode;
+	int ret;
+
+	if (!port->comphy)
+		return 0;
+
+	switch (port->phy_interface) {
+	case PHY_INTERFACE_MODE_SGMII:
+		mode = PHY_MODE_SGMII;
+		break;
+	case PHY_INTERFACE_MODE_10GKR:
+		mode = PHY_MODE_10GKR;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = phy_set_mode(port->comphy, mode);
+	if (ret)
+		return ret;
+
+	return phy_power_on(port->comphy);
+}
+
+static void mvpp2_port_mii_gmac_configure_mode(struct mvpp2_port *port)
+{
+	u32 val;
+
+	if (port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+		val = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
+		val |= MVPP22_CTRL4_SYNC_BYPASS_DIS | MVPP22_CTRL4_DP_CLK_SEL |
+		       MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
+		val &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL;
+		writel(val, port->base + MVPP22_GMAC_CTRL_4_REG);
+
+		val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
+		val |= MVPP2_GMAC_DISABLE_PADDING;
+		val &= ~MVPP2_GMAC_FLOW_CTRL_MASK;
+		writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
+	} else if (phy_interface_mode_is_rgmii(port->phy_interface)) {
+		val = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
+		val |= MVPP22_CTRL4_EXT_PIN_GMII_SEL |
+		       MVPP22_CTRL4_SYNC_BYPASS_DIS |
+		       MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
+		val &= ~MVPP22_CTRL4_DP_CLK_SEL;
+		writel(val, port->base + MVPP22_GMAC_CTRL_4_REG);
+
+		val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
+		val &= ~MVPP2_GMAC_DISABLE_PADDING;
+		writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
+	}
+
+	/* The port is connected to a copper PHY */
+	val = readl(port->base + MVPP2_GMAC_CTRL_0_REG);
+	val &= ~MVPP2_GMAC_PORT_TYPE_MASK;
+	writel(val, port->base + MVPP2_GMAC_CTRL_0_REG);
+
+	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	val |= MVPP2_GMAC_IN_BAND_AUTONEG_BYPASS |
+	       MVPP2_GMAC_AN_SPEED_EN | MVPP2_GMAC_FLOW_CTRL_AUTONEG |
+	       MVPP2_GMAC_AN_DUPLEX_EN;
+	if (port->phy_interface == PHY_INTERFACE_MODE_SGMII)
+		val |= MVPP2_GMAC_IN_BAND_AUTONEG;
+	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+}
+
+static void mvpp2_port_mii_gmac_configure(struct mvpp2_port *port)
+{
+	u32 val;
+
+	/* Force link down */
+	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	val &= ~MVPP2_GMAC_FORCE_LINK_PASS;
+	val |= MVPP2_GMAC_FORCE_LINK_DOWN;
+	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+
+	/* Set the GMAC in a reset state */
+	val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
+	val |= MVPP2_GMAC_PORT_RESET_MASK;
+	writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
+
+	/* Configure the PCS and in-band AN */
+	val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
+	if (port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+	        val |= MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK;
+	} else if (phy_interface_mode_is_rgmii(port->phy_interface)) {
+		val &= ~MVPP2_GMAC_PCS_ENABLE_MASK;
+		val |= MVPP2_GMAC_PORT_RGMII_MASK;
+	}
+	writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
+
+	mvpp2_port_mii_gmac_configure_mode(port);
+
+	/* Unset the GMAC reset state */
+	val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
+	val &= ~MVPP2_GMAC_PORT_RESET_MASK;
+	writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
+
+	/* Stop forcing link down */
+	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	val &= ~MVPP2_GMAC_FORCE_LINK_DOWN;
+	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+}
+
+static void mvpp2_port_mii_xlg_configure(struct mvpp2_port *port)
+{
+	u32 val;
+
+	if (port->gop_id != 0)
+		return;
+
+	val = readl(port->base + MVPP22_XLG_CTRL0_REG);
+	val |= MVPP22_XLG_CTRL0_RX_FLOW_CTRL_EN;
+	writel(val, port->base + MVPP22_XLG_CTRL0_REG);
+
+	val = readl(port->base + MVPP22_XLG_CTRL4_REG);
+	val &= ~MVPP22_XLG_CTRL4_MACMODSELECT_GMAC;
+	val |= MVPP22_XLG_CTRL4_FWD_FC | MVPP22_XLG_CTRL4_FWD_PFC;
+	writel(val, port->base + MVPP22_XLG_CTRL4_REG);
+}
+
 static void mvpp22_port_mii_set(struct mvpp2_port *port)
 {
 	u32 val;
@@ -4188,38 +4642,18 @@ static void mvpp22_port_mii_set(struct mvpp2_port *port)
 
 		writel(val, port->base + MVPP22_XLG_CTRL3_REG);
 	}
-
-	val = readl(port->base + MVPP22_GMAC_CTRL_4_REG);
-	if (port->phy_interface == PHY_INTERFACE_MODE_RGMII)
-		val |= MVPP22_CTRL4_EXT_PIN_GMII_SEL;
-	else
-		val &= ~MVPP22_CTRL4_EXT_PIN_GMII_SEL;
-	val &= ~MVPP22_CTRL4_DP_CLK_SEL;
-	val |= MVPP22_CTRL4_SYNC_BYPASS;
-	val |= MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE;
-	writel(val, port->base + MVPP22_GMAC_CTRL_4_REG);
 }
 
 static void mvpp2_port_mii_set(struct mvpp2_port *port)
 {
-	u32 val;
-
 	if (port->priv->hw_version == MVPP22)
 		mvpp22_port_mii_set(port);
 
-	val = readl(port->base + MVPP2_GMAC_CTRL_2_REG);
-
-	switch (port->phy_interface) {
-	case PHY_INTERFACE_MODE_SGMII:
-		val |= MVPP2_GMAC_INBAND_AN_MASK;
-		break;
-	case PHY_INTERFACE_MODE_RGMII:
-		val |= MVPP2_GMAC_PORT_RGMII_MASK;
-	default:
-		val &= ~MVPP2_GMAC_PCS_ENABLE_MASK;
-	}
-
-	writel(val, port->base + MVPP2_GMAC_CTRL_2_REG);
+	if (phy_interface_mode_is_rgmii(port->phy_interface) ||
+	    port->phy_interface == PHY_INTERFACE_MODE_SGMII)
+		mvpp2_port_mii_gmac_configure(port);
+	else if (port->phy_interface == PHY_INTERFACE_MODE_10GKR)
+		mvpp2_port_mii_xlg_configure(port);
 }
 
 static void mvpp2_port_fc_adv_enable(struct mvpp2_port *port)
@@ -4326,6 +4760,18 @@ static inline void mvpp2_gmac_max_rx_size_set(struct mvpp2_port *port)
 	writel(val, port->base + MVPP2_GMAC_CTRL_0_REG);
 }
 
+/* Change maximum receive size of the port */
+static inline void mvpp2_xlg_max_rx_size_set(struct mvpp2_port *port)
+{
+	u32 val;
+
+	val =  readl(port->base + MVPP22_XLG_CTRL1_REG);
+	val &= ~MVPP22_XLG_CTRL1_FRAMESIZELIMIT_MASK;
+	val |= ((port->pkt_size - MVPP2_MH_SIZE) / 2) <<
+	       MVPP22_XLG_CTRL1_FRAMESIZELIMIT_OFFS;
+	writel(val, port->base + MVPP22_XLG_CTRL1_REG);
+}
+
 /* Set defaults to the MVPP2 port */
 static void mvpp2_defaults_set(struct mvpp2_port *port)
 {
@@ -4376,7 +4822,7 @@ static void mvpp2_defaults_set(struct mvpp2_port *port)
 		    MVPP2_RX_LOW_LATENCY_PKT_SIZE(256));
 
 	/* Enable Rx cache snoop */
-	for (lrxq = 0; lrxq < rxq_number; lrxq++) {
+	for (lrxq = 0; lrxq < port->nrxqs; lrxq++) {
 		queue = port->rxqs[lrxq]->id;
 		val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(queue));
 		val |= MVPP2_SNOOP_PKT_SIZE_MASK |
@@ -4394,7 +4840,7 @@ static void mvpp2_ingress_enable(struct mvpp2_port *port)
 	u32 val;
 	int lrxq, queue;
 
-	for (lrxq = 0; lrxq < rxq_number; lrxq++) {
+	for (lrxq = 0; lrxq < port->nrxqs; lrxq++) {
 		queue = port->rxqs[lrxq]->id;
 		val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(queue));
 		val &= ~MVPP2_RXQ_DISABLE_MASK;
@@ -4407,7 +4853,7 @@ static void mvpp2_ingress_disable(struct mvpp2_port *port)
 	u32 val;
 	int lrxq, queue;
 
-	for (lrxq = 0; lrxq < rxq_number; lrxq++) {
+	for (lrxq = 0; lrxq < port->nrxqs; lrxq++) {
 		queue = port->rxqs[lrxq]->id;
 		val = mvpp2_read(port->priv, MVPP2_RXQ_CONFIG_REG(queue));
 		val |= MVPP2_RXQ_DISABLE_MASK;
@@ -4426,7 +4872,7 @@ static void mvpp2_egress_enable(struct mvpp2_port *port)
 
 	/* Enable all initialized TXs. */
 	qmap = 0;
-	for (queue = 0; queue < txq_number; queue++) {
+	for (queue = 0; queue < port->ntxqs; queue++) {
 		struct mvpp2_tx_queue *txq = port->txqs[queue];
 
 		if (txq->descs)
@@ -4712,7 +5158,7 @@ static void mvpp2_txq_sent_counter_clear(void *arg)
 	struct mvpp2_port *port = arg;
 	int queue;
 
-	for (queue = 0; queue < txq_number; queue++) {
+	for (queue = 0; queue < port->ntxqs; queue++) {
 		int id = port->txqs[queue]->id;
 
 		mvpp2_percpu_read(port->priv, smp_processor_id(),
@@ -4753,7 +5199,7 @@ static void mvpp2_txp_max_tx_size_set(struct mvpp2_port *port)
 		mvpp2_write(port->priv, MVPP2_TXP_SCHED_TOKEN_SIZE_REG, val);
 	}
 
-	for (txq = 0; txq < txq_number; txq++) {
+	for (txq = 0; txq < port->ntxqs; txq++) {
 		val = mvpp2_read(port->priv,
 				 MVPP2_TXQ_SCHED_TOKEN_SIZE_REG(txq));
 		size = val & MVPP2_TXQ_TOKEN_SIZE_MAX;
@@ -4787,6 +5233,23 @@ static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port,
 	put_cpu();
 }
 
+/* For some reason in the LSP this is done on each CPU. Why ? */
+static void mvpp2_tx_pkts_coal_set(struct mvpp2_port *port,
+				   struct mvpp2_tx_queue *txq)
+{
+	int cpu = get_cpu();
+	u32 val;
+
+	if (txq->done_pkts_coal > MVPP2_TXQ_THRESH_MASK)
+		txq->done_pkts_coal = MVPP2_TXQ_THRESH_MASK;
+
+	val = (txq->done_pkts_coal << MVPP2_TXQ_THRESH_OFFSET);
+	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_NUM_REG, txq->id);
+	mvpp2_percpu_write(port->priv, cpu, MVPP2_TXQ_THRESH_REG, val);
+
+	put_cpu();
+}
+
 static u32 mvpp2_usec_to_cycles(u32 usec, unsigned long clk_hz)
 {
 	u64 tmp = (u64)clk_hz * usec;
@@ -4823,6 +5286,22 @@ static void mvpp2_rx_time_coal_set(struct mvpp2_port *port,
 	mvpp2_write(port->priv, MVPP2_ISR_RX_THRESHOLD_REG(rxq->id), val);
 }
 
+static void mvpp2_tx_time_coal_set(struct mvpp2_port *port)
+{
+	unsigned long freq = port->priv->tclk;
+	u32 val = mvpp2_usec_to_cycles(port->tx_time_coal, freq);
+
+	if (val > MVPP2_MAX_ISR_TX_THRESHOLD) {
+		port->tx_time_coal =
+			mvpp2_cycles_to_usec(MVPP2_MAX_ISR_TX_THRESHOLD, freq);
+
+		/* re-evaluate to get actual register value */
+		val = mvpp2_usec_to_cycles(port->tx_time_coal, freq);
+	}
+
+	mvpp2_write(port->priv, MVPP2_ISR_TX_THRESHOLD_REG(port->id), val);
+}
+
 /* Free Tx queue skbuffs */
 static void mvpp2_txq_bufs_free(struct mvpp2_port *port,
 				struct mvpp2_tx_queue *txq,
@@ -4881,7 +5360,8 @@ static void mvpp2_txq_done(struct mvpp2_port *port, struct mvpp2_tx_queue *txq,
 			netif_tx_wake_queue(nq);
 }
 
-static unsigned int mvpp2_tx_done(struct mvpp2_port *port, u32 cause)
+static unsigned int mvpp2_tx_done(struct mvpp2_port *port, u32 cause,
+				  int cpu)
 {
 	struct mvpp2_tx_queue *txq;
 	struct mvpp2_txq_pcpu *txq_pcpu;
@@ -4892,7 +5372,7 @@ static unsigned int mvpp2_tx_done(struct mvpp2_port *port, u32 cause)
 		if (!txq)
 			break;
 
-		txq_pcpu = this_cpu_ptr(txq->pcpu);
+		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
 
 		if (txq_pcpu->count) {
 			mvpp2_txq_done(port, txq, txq_pcpu);
@@ -4908,15 +5388,14 @@ static unsigned int mvpp2_tx_done(struct mvpp2_port *port, u32 cause)
 
 /* Allocate and initialize descriptors for aggr TXQ */
 static int mvpp2_aggr_txq_init(struct platform_device *pdev,
-			       struct mvpp2_tx_queue *aggr_txq,
-			       int desc_num, int cpu,
+			       struct mvpp2_tx_queue *aggr_txq, int cpu,
 			       struct mvpp2 *priv)
 {
 	u32 txq_dma;
 
 	/* Allocate memory for TX descriptors */
 	aggr_txq->descs = dma_alloc_coherent(&pdev->dev,
-				desc_num * MVPP2_DESC_ALIGNED_SIZE,
+				MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
 				&aggr_txq->descs_dma, GFP_KERNEL);
 	if (!aggr_txq->descs)
 		return -ENOMEM;
@@ -4937,7 +5416,8 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev,
 			MVPP22_AGGR_TXQ_DESC_ADDR_OFFS;
 
 	mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_ADDR_REG(cpu), txq_dma);
-	mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu), desc_num);
+	mvpp2_write(priv, MVPP2_AGGR_TXQ_DESC_SIZE_REG(cpu),
+		    MVPP2_AGGR_TXQ_SIZE);
 
 	return 0;
 }
@@ -5118,6 +5598,14 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
 		txq_pcpu->reserved_num = 0;
 		txq_pcpu->txq_put_index = 0;
 		txq_pcpu->txq_get_index = 0;
+
+		txq_pcpu->tso_headers =
+			dma_alloc_coherent(port->dev->dev.parent,
+					   MVPP2_AGGR_TXQ_SIZE * TSO_HEADER_SIZE,
+					   &txq_pcpu->tso_headers_dma,
+					   GFP_KERNEL);
+		if (!txq_pcpu->tso_headers)
+			goto cleanup;
 	}
 
 	return 0;
@@ -5125,6 +5613,11 @@ cleanup:
 	for_each_present_cpu(cpu) {
 		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
 		kfree(txq_pcpu->buffs);
+
+		dma_free_coherent(port->dev->dev.parent,
+				  MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
+				  txq_pcpu->tso_headers,
+				  txq_pcpu->tso_headers_dma);
 	}
 
 	dma_free_coherent(port->dev->dev.parent,
@@ -5144,6 +5637,11 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
 	for_each_present_cpu(cpu) {
 		txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
 		kfree(txq_pcpu->buffs);
+
+		dma_free_coherent(port->dev->dev.parent,
+				  MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
+				  txq_pcpu->tso_headers,
+				  txq_pcpu->tso_headers_dma);
 	}
 
 	if (txq->descs)
@@ -5229,7 +5727,7 @@ static void mvpp2_cleanup_txqs(struct mvpp2_port *port)
 	val |= MVPP2_TX_PORT_FLUSH_MASK(port->id);
 	mvpp2_write(port->priv, MVPP2_TX_PORT_FLUSH_REG, val);
 
-	for (queue = 0; queue < txq_number; queue++) {
+	for (queue = 0; queue < port->ntxqs; queue++) {
 		txq = port->txqs[queue];
 		mvpp2_txq_clean(port, txq);
 		mvpp2_txq_deinit(port, txq);
@@ -5246,7 +5744,7 @@ static void mvpp2_cleanup_rxqs(struct mvpp2_port *port)
 {
 	int queue;
 
-	for (queue = 0; queue < rxq_number; queue++)
+	for (queue = 0; queue < port->nrxqs; queue++)
 		mvpp2_rxq_deinit(port, port->rxqs[queue]);
 }
 
@@ -5255,7 +5753,7 @@ static int mvpp2_setup_rxqs(struct mvpp2_port *port)
 {
 	int queue, err;
 
-	for (queue = 0; queue < rxq_number; queue++) {
+	for (queue = 0; queue < port->nrxqs; queue++) {
 		err = mvpp2_rxq_init(port, port->rxqs[queue]);
 		if (err)
 			goto err_cleanup;
@@ -5273,13 +5771,21 @@ static int mvpp2_setup_txqs(struct mvpp2_port *port)
 	struct mvpp2_tx_queue *txq;
 	int queue, err;
 
-	for (queue = 0; queue < txq_number; queue++) {
+	for (queue = 0; queue < port->ntxqs; queue++) {
 		txq = port->txqs[queue];
 		err = mvpp2_txq_init(port, txq);
 		if (err)
 			goto err_cleanup;
 	}
 
+	if (port->has_tx_irqs) {
+		mvpp2_tx_time_coal_set(port);
+		for (queue = 0; queue < port->ntxqs; queue++) {
+			txq = port->txqs[queue];
+			mvpp2_tx_pkts_coal_set(port, txq);
+		}
+	}
+
 	on_each_cpu(mvpp2_txq_sent_counter_clear, port, 1);
 	return 0;
 
@@ -5291,72 +5797,170 @@ err_cleanup:
 /* The callback for per-port interrupt */
 static irqreturn_t mvpp2_isr(int irq, void *dev_id)
 {
+	struct mvpp2_queue_vector *qv = dev_id;
+
+	mvpp2_qvec_interrupt_disable(qv);
+
+	napi_schedule(&qv->napi);
+
+	return IRQ_HANDLED;
+}
+
+/* Per-port interrupt for link status changes */
+static irqreturn_t mvpp2_link_status_isr(int irq, void *dev_id)
+{
 	struct mvpp2_port *port = (struct mvpp2_port *)dev_id;
+	struct net_device *dev = port->dev;
+	bool event = false, link = false;
+	u32 val;
 
-	mvpp2_interrupts_disable(port);
+	mvpp22_gop_mask_irq(port);
 
-	napi_schedule(&port->napi);
+	if (port->gop_id == 0 &&
+	    port->phy_interface == PHY_INTERFACE_MODE_10GKR) {
+		val = readl(port->base + MVPP22_XLG_INT_STAT);
+		if (val & MVPP22_XLG_INT_STAT_LINK) {
+			event = true;
+			val = readl(port->base + MVPP22_XLG_STATUS);
+			if (val & MVPP22_XLG_STATUS_LINK_UP)
+				link = true;
+		}
+	} else if (phy_interface_mode_is_rgmii(port->phy_interface) ||
+		   port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+		val = readl(port->base + MVPP22_GMAC_INT_STAT);
+		if (val & MVPP22_GMAC_INT_STAT_LINK) {
+			event = true;
+			val = readl(port->base + MVPP2_GMAC_STATUS0);
+			if (val & MVPP2_GMAC_STATUS0_LINK_UP)
+				link = true;
+		}
+	}
+
+	if (!netif_running(dev) || !event)
+		goto handled;
+
+	if (link) {
+		mvpp2_interrupts_enable(port);
 
+		mvpp2_egress_enable(port);
+		mvpp2_ingress_enable(port);
+		netif_carrier_on(dev);
+		netif_tx_wake_all_queues(dev);
+	} else {
+		netif_tx_stop_all_queues(dev);
+		netif_carrier_off(dev);
+		mvpp2_ingress_disable(port);
+		mvpp2_egress_disable(port);
+
+		mvpp2_interrupts_disable(port);
+	}
+
+handled:
+	mvpp22_gop_unmask_irq(port);
 	return IRQ_HANDLED;
 }
 
+static void mvpp2_gmac_set_autoneg(struct mvpp2_port *port,
+				   struct phy_device *phydev)
+{
+	u32 val;
+
+	if (port->phy_interface != PHY_INTERFACE_MODE_RGMII &&
+	    port->phy_interface != PHY_INTERFACE_MODE_RGMII_ID &&
+	    port->phy_interface != PHY_INTERFACE_MODE_RGMII_RXID &&
+	    port->phy_interface != PHY_INTERFACE_MODE_RGMII_TXID &&
+	    port->phy_interface != PHY_INTERFACE_MODE_SGMII)
+		return;
+
+	val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+	val &= ~(MVPP2_GMAC_CONFIG_MII_SPEED |
+		 MVPP2_GMAC_CONFIG_GMII_SPEED |
+		 MVPP2_GMAC_CONFIG_FULL_DUPLEX |
+		 MVPP2_GMAC_AN_SPEED_EN |
+		 MVPP2_GMAC_AN_DUPLEX_EN);
+
+	if (phydev->duplex)
+		val |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
+
+	if (phydev->speed == SPEED_1000)
+		val |= MVPP2_GMAC_CONFIG_GMII_SPEED;
+	else if (phydev->speed == SPEED_100)
+		val |= MVPP2_GMAC_CONFIG_MII_SPEED;
+
+	writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+}
+
 /* Adjust link */
 static void mvpp2_link_event(struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
 	struct phy_device *phydev = dev->phydev;
-	int status_change = 0;
+	bool link_reconfigured = false;
 	u32 val;
 
 	if (phydev->link) {
+		if (port->phy_interface != phydev->interface && port->comphy) {
+	                /* disable current port for reconfiguration */
+	                mvpp2_interrupts_disable(port);
+	                netif_carrier_off(port->dev);
+	                mvpp2_port_disable(port);
+			phy_power_off(port->comphy);
+
+	                /* comphy reconfiguration */
+	                port->phy_interface = phydev->interface;
+	                mvpp22_comphy_init(port);
+
+	                /* gop/mac reconfiguration */
+	                mvpp22_gop_init(port);
+	                mvpp2_port_mii_set(port);
+
+	                link_reconfigured = true;
+		}
+
 		if ((port->speed != phydev->speed) ||
 		    (port->duplex != phydev->duplex)) {
-			u32 val;
-
-			val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-			val &= ~(MVPP2_GMAC_CONFIG_MII_SPEED |
-				 MVPP2_GMAC_CONFIG_GMII_SPEED |
-				 MVPP2_GMAC_CONFIG_FULL_DUPLEX |
-				 MVPP2_GMAC_AN_SPEED_EN |
-				 MVPP2_GMAC_AN_DUPLEX_EN);
-
-			if (phydev->duplex)
-				val |= MVPP2_GMAC_CONFIG_FULL_DUPLEX;
-
-			if (phydev->speed == SPEED_1000)
-				val |= MVPP2_GMAC_CONFIG_GMII_SPEED;
-			else if (phydev->speed == SPEED_100)
-				val |= MVPP2_GMAC_CONFIG_MII_SPEED;
-
-			writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+			mvpp2_gmac_set_autoneg(port, phydev);
 
 			port->duplex = phydev->duplex;
 			port->speed  = phydev->speed;
 		}
 	}
 
-	if (phydev->link != port->link) {
-		if (!phydev->link) {
-			port->duplex = -1;
-			port->speed = 0;
-		}
-
+	if (phydev->link != port->link || link_reconfigured) {
 		port->link = phydev->link;
-		status_change = 1;
-	}
 
-	if (status_change) {
 		if (phydev->link) {
-			val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
-			val |= (MVPP2_GMAC_FORCE_LINK_PASS |
-				MVPP2_GMAC_FORCE_LINK_DOWN);
-			writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+			if (port->phy_interface == PHY_INTERFACE_MODE_RGMII ||
+			    port->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
+			    port->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID ||
+			    port->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID ||
+			    port->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+				val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+				val |= (MVPP2_GMAC_FORCE_LINK_PASS |
+					MVPP2_GMAC_FORCE_LINK_DOWN);
+				writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG);
+			}
+
+			mvpp2_interrupts_enable(port);
+			mvpp2_port_enable(port);
+
 			mvpp2_egress_enable(port);
 			mvpp2_ingress_enable(port);
+			netif_carrier_on(dev);
+			netif_tx_wake_all_queues(dev);
 		} else {
+			port->duplex = -1;
+			port->speed = 0;
+
+			netif_tx_stop_all_queues(dev);
+			netif_carrier_off(dev);
 			mvpp2_ingress_disable(port);
 			mvpp2_egress_disable(port);
+
+			mvpp2_port_disable(port);
+			mvpp2_interrupts_disable(port);
 		}
+
 		phy_print_status(phydev);
 	}
 }
@@ -5385,8 +5989,8 @@ static void mvpp2_tx_proc_cb(unsigned long data)
 	port_pcpu->timer_scheduled = false;
 
 	/* Process all the Tx queues */
-	cause = (1 << txq_number) - 1;
-	tx_todo = mvpp2_tx_done(port, cause);
+	cause = (1 << port->ntxqs) - 1;
+	tx_todo = mvpp2_tx_done(port, cause, smp_processor_id());
 
 	/* Set the timer in case not all the packets were processed */
 	if (tx_todo)
@@ -5498,8 +6102,8 @@ static u32 mvpp2_skb_tx_csum(struct mvpp2_port *port, struct sk_buff *skb)
 }
 
 /* Main rx processing */
-static int mvpp2_rx(struct mvpp2_port *port, int rx_todo,
-		    struct mvpp2_rx_queue *rxq)
+static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
+		    int rx_todo, struct mvpp2_rx_queue *rxq)
 {
 	struct net_device *dev = port->dev;
 	int rx_received;
@@ -5577,7 +6181,7 @@ err_drop_frame:
 		skb->protocol = eth_type_trans(skb, dev);
 		mvpp2_rx_csum(port, rx_status, skb);
 
-		napi_gro_receive(&port->napi, skb);
+		napi_gro_receive(napi, skb);
 	}
 
 	if (rcvd_pkts) {
@@ -5665,6 +6269,123 @@ cleanup:
 	return -ENOMEM;
 }
 
+static inline void mvpp2_tso_put_hdr(struct sk_buff *skb,
+				     struct net_device *dev,
+				     struct mvpp2_tx_queue *txq,
+				     struct mvpp2_tx_queue *aggr_txq,
+				     struct mvpp2_txq_pcpu *txq_pcpu,
+				     int hdr_sz)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
+	dma_addr_t addr;
+
+	mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
+	mvpp2_txdesc_size_set(port, tx_desc, hdr_sz);
+
+	addr = txq_pcpu->tso_headers_dma +
+	       txq_pcpu->txq_put_index * TSO_HEADER_SIZE;
+	mvpp2_txdesc_offset_set(port, tx_desc, addr & MVPP2_TX_DESC_ALIGN);
+	mvpp2_txdesc_dma_addr_set(port, tx_desc, addr & ~MVPP2_TX_DESC_ALIGN);
+
+	mvpp2_txdesc_cmd_set(port, tx_desc, mvpp2_skb_tx_csum(port, skb) |
+					    MVPP2_TXD_F_DESC |
+					    MVPP2_TXD_PADDING_DISABLE);
+	mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
+}
+
+static inline int mvpp2_tso_put_data(struct sk_buff *skb,
+				     struct net_device *dev, struct tso_t *tso,
+				     struct mvpp2_tx_queue *txq,
+				     struct mvpp2_tx_queue *aggr_txq,
+				     struct mvpp2_txq_pcpu *txq_pcpu,
+				     int sz, bool left, bool last)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
+	dma_addr_t buf_dma_addr;
+
+	mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
+	mvpp2_txdesc_size_set(port, tx_desc, sz);
+
+	buf_dma_addr = dma_map_single(dev->dev.parent, tso->data, sz,
+				      DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev->dev.parent, buf_dma_addr))) {
+		mvpp2_txq_desc_put(txq);
+		return -ENOMEM;
+	}
+
+	mvpp2_txdesc_offset_set(port, tx_desc,
+				buf_dma_addr & MVPP2_TX_DESC_ALIGN);
+	mvpp2_txdesc_dma_addr_set(port, tx_desc,
+				  buf_dma_addr & ~MVPP2_TX_DESC_ALIGN);
+
+	if (!left) {
+		mvpp2_txdesc_cmd_set(port, tx_desc, MVPP2_TXD_L_DESC);
+		if (last) {
+			mvpp2_txq_inc_put(port, txq_pcpu, skb, tx_desc);
+			return 0;
+		}
+	} else {
+		mvpp2_txdesc_cmd_set(port, tx_desc, 0);
+	}
+
+	mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
+	return 0;
+}
+
+static int mvpp2_tx_tso(struct sk_buff *skb, struct net_device *dev,
+			struct mvpp2_tx_queue *txq,
+			struct mvpp2_tx_queue *aggr_txq,
+			struct mvpp2_txq_pcpu *txq_pcpu)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	struct tso_t tso;
+	int hdr_sz = skb_transport_offset(skb) + tcp_hdrlen(skb);
+	int i, len, descs = 0;
+
+	/* Check number of available descriptors */
+	if (mvpp2_aggr_desc_num_check(port->priv, aggr_txq,
+				      tso_count_descs(skb)) ||
+	    mvpp2_txq_reserved_desc_num_proc(port->priv, txq, txq_pcpu,
+					     tso_count_descs(skb)))
+		return 0;
+
+	tso_start(skb, &tso);
+	len = skb->len - hdr_sz;
+	while (len > 0) {
+		int left = min_t(int, skb_shinfo(skb)->gso_size, len);
+		char *hdr = txq_pcpu->tso_headers +
+			    txq_pcpu->txq_put_index * TSO_HEADER_SIZE;
+
+		len -= left;
+		descs++;
+
+		tso_build_hdr(skb, hdr, &tso, left, len == 0);
+		mvpp2_tso_put_hdr(skb, dev, txq, aggr_txq, txq_pcpu, hdr_sz);
+
+		while (left > 0) {
+			int sz = min_t(int, tso.size, left);
+			left -= sz;
+			descs++;
+
+			if (mvpp2_tso_put_data(skb, dev, &tso, txq, aggr_txq,
+					       txq_pcpu, sz, left, len == 0))
+				goto release;
+			tso_build_data(skb, &tso, sz);
+		}
+	}
+
+	return descs;
+
+release:
+	for (i = descs - 1; i >= 0; i--) {
+		struct mvpp2_tx_desc *tx_desc = txq->descs + i;
+		tx_desc_unmap_put(port, txq, tx_desc);
+	}
+	return 0;
+}
+
 /* Main tx processing */
 static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
 {
@@ -5682,6 +6403,10 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
 	txq_pcpu = this_cpu_ptr(txq->pcpu);
 	aggr_txq = &port->priv->aggr_txqs[smp_processor_id()];
 
+	if (skb_is_gso(skb)) {
+		frags = mvpp2_tx_tso(skb, dev, txq, aggr_txq, txq_pcpu);
+		goto out;
+	}
 	frags = skb_shinfo(skb)->nr_frags + 1;
 
 	/* Check number of available descriptors */
@@ -5731,22 +6456,21 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 
-	txq_pcpu->reserved_num -= frags;
-	txq_pcpu->count += frags;
-	aggr_txq->count += frags;
-
-	/* Enable transmit */
-	wmb();
-	mvpp2_aggr_txq_pend_desc_add(port, frags);
-
-	if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1) {
-		struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
-
-		netif_tx_stop_queue(nq);
-	}
 out:
 	if (frags > 0) {
 		struct mvpp2_pcpu_stats *stats = this_cpu_ptr(port->stats);
+		struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
+
+		txq_pcpu->reserved_num -= frags;
+		txq_pcpu->count += frags;
+		aggr_txq->count += frags;
+
+		/* Enable transmit */
+		wmb();
+		mvpp2_aggr_txq_pend_desc_add(port, frags);
+
+		if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1)
+			netif_tx_stop_queue(nq);
 
 		u64_stats_update_begin(&stats->syncp);
 		stats->tx_packets++;
@@ -5762,7 +6486,8 @@ out:
 		mvpp2_txq_done(port, txq, txq_pcpu);
 
 	/* Set the timer in case not all frags were processed */
-	if (txq_pcpu->count <= frags && txq_pcpu->count > 0) {
+	if (!port->has_tx_irqs && txq_pcpu->count <= frags &&
+	    txq_pcpu->count > 0) {
 		struct mvpp2_port_pcpu *port_pcpu = this_cpu_ptr(port->pcpu);
 
 		mvpp2_timer_set(port_pcpu);
@@ -5783,11 +6508,14 @@ static inline void mvpp2_cause_error(struct net_device *dev, int cause)
 
 static int mvpp2_poll(struct napi_struct *napi, int budget)
 {
-	u32 cause_rx_tx, cause_rx, cause_misc;
+	u32 cause_rx_tx, cause_rx, cause_tx, cause_misc;
 	int rx_done = 0;
 	struct mvpp2_port *port = netdev_priv(napi->dev);
+	struct mvpp2_queue_vector *qv;
 	int cpu = smp_processor_id();
 
+	qv = container_of(napi, struct mvpp2_queue_vector, napi);
+
 	/* Rx/Tx cause register
 	 *
 	 * Bits 0-15: each bit indicates received packets on the Rx queue
@@ -5798,11 +6526,10 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 	 *
 	 * Each CPU has its own Rx/Tx cause register
 	 */
-	cause_rx_tx = mvpp2_percpu_read(port->priv, cpu,
+	cause_rx_tx = mvpp2_percpu_read(port->priv, qv->sw_thread_id,
 					MVPP2_ISR_RX_TX_CAUSE_REG(port->id));
-	cause_rx_tx &= ~MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
-	cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK;
 
+	cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK;
 	if (cause_misc) {
 		mvpp2_cause_error(port->dev, cause_misc);
 
@@ -5813,10 +6540,16 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 				   cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK);
 	}
 
-	cause_rx = cause_rx_tx & MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+	cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK;
+	if (cause_tx) {
+		cause_tx >>= MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_OFFSET;
+		mvpp2_tx_done(port, cause_tx, qv->sw_thread_id);
+	}
 
 	/* Process RX packets */
-	cause_rx |= port->pending_cause_rx;
+	cause_rx = cause_rx_tx & MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK;
+	cause_rx <<= qv->first_rxq;
+	cause_rx |= qv->pending_cause_rx;
 	while (cause_rx && budget > 0) {
 		int count;
 		struct mvpp2_rx_queue *rxq;
@@ -5825,7 +6558,7 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 		if (!rxq)
 			break;
 
-		count = mvpp2_rx(port, budget, rxq);
+		count = mvpp2_rx(port, napi, budget, rxq);
 		rx_done += count;
 		budget -= count;
 		if (budget > 0) {
@@ -5841,9 +6574,9 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 		cause_rx = 0;
 		napi_complete_done(napi, rx_done);
 
-		mvpp2_interrupts_enable(port);
+		mvpp2_qvec_interrupt_enable(qv);
 	}
-	port->pending_cause_rx = cause_rx;
+	qv->pending_cause_rx = cause_rx;
 	return rx_done;
 }
 
@@ -5851,17 +6584,32 @@ static int mvpp2_poll(struct napi_struct *napi, int budget)
 static void mvpp2_start_dev(struct mvpp2_port *port)
 {
 	struct net_device *ndev = port->dev;
+	int i;
+
+	if (port->gop_id == 0 &&
+	    (port->phy_interface == PHY_INTERFACE_MODE_XAUI ||
+	     port->phy_interface == PHY_INTERFACE_MODE_10GKR))
+		mvpp2_xlg_max_rx_size_set(port);
+	else
+		mvpp2_gmac_max_rx_size_set(port);
 
-	mvpp2_gmac_max_rx_size_set(port);
 	mvpp2_txp_max_tx_size_set(port);
 
-	napi_enable(&port->napi);
+	for (i = 0; i < port->nqvecs; i++)
+		napi_enable(&port->qvecs[i].napi);
 
 	/* Enable interrupts on all CPUs */
 	mvpp2_interrupts_enable(port);
 
+	if (port->priv->hw_version == MVPP22) {
+		mvpp22_comphy_init(port);
+		mvpp22_gop_init(port);
+	}
+
+	mvpp2_port_mii_set(port);
 	mvpp2_port_enable(port);
-	phy_start(ndev->phydev);
+	if (ndev->phydev)
+		phy_start(ndev->phydev);
 	netif_tx_start_all_queues(port->dev);
 }
 
@@ -5869,6 +6617,7 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
 static void mvpp2_stop_dev(struct mvpp2_port *port)
 {
 	struct net_device *ndev = port->dev;
+	int i;
 
 	/* Stop new packets from arriving to RXQs */
 	mvpp2_ingress_disable(port);
@@ -5878,14 +6627,17 @@ static void mvpp2_stop_dev(struct mvpp2_port *port)
 	/* Disable interrupts on all CPUs */
 	mvpp2_interrupts_disable(port);
 
-	napi_disable(&port->napi);
+	for (i = 0; i < port->nqvecs; i++)
+		napi_disable(&port->qvecs[i].napi);
 
 	netif_carrier_off(port->dev);
 	netif_tx_stop_all_queues(port->dev);
 
 	mvpp2_egress_disable(port);
 	mvpp2_port_disable(port);
-	phy_stop(ndev->phydev);
+	if (ndev->phydev)
+		phy_stop(ndev->phydev);
+	phy_power_off(port->comphy);
 }
 
 static int mvpp2_check_ringparam_valid(struct net_device *dev,
@@ -5941,6 +6693,10 @@ static int mvpp2_phy_connect(struct mvpp2_port *port)
 {
 	struct phy_device *phy_dev;
 
+	/* No PHY is attached */
+	if (!port->phy_node)
+		return 0;
+
 	phy_dev = of_phy_connect(port->dev, port->phy_node, mvpp2_link_event, 0,
 				 port->phy_interface);
 	if (!phy_dev) {
@@ -5961,12 +6717,56 @@ static void mvpp2_phy_disconnect(struct mvpp2_port *port)
 {
 	struct net_device *ndev = port->dev;
 
+	if (!ndev->phydev)
+		return;
+
 	phy_disconnect(ndev->phydev);
 }
 
+static int mvpp2_irqs_init(struct mvpp2_port *port)
+{
+	int err, i;
+
+	for (i = 0; i < port->nqvecs; i++) {
+		struct mvpp2_queue_vector *qv = port->qvecs + i;
+
+		err = request_irq(qv->irq, mvpp2_isr, 0, port->dev->name, qv);
+		if (err)
+			goto err;
+
+		if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE)
+			irq_set_affinity_hint(qv->irq,
+					      cpumask_of(qv->sw_thread_id));
+	}
+
+	return 0;
+err:
+	for (i = 0; i < port->nqvecs; i++) {
+		struct mvpp2_queue_vector *qv = port->qvecs + i;
+
+		irq_set_affinity_hint(qv->irq, NULL);
+		free_irq(qv->irq, qv);
+	}
+
+	return err;
+}
+
+static void mvpp2_irqs_deinit(struct mvpp2_port *port)
+{
+	int i;
+
+	for (i = 0; i < port->nqvecs; i++) {
+		struct mvpp2_queue_vector *qv = port->qvecs + i;
+
+		irq_set_affinity_hint(qv->irq, NULL);
+		free_irq(qv->irq, qv);
+	}
+}
+
 static int mvpp2_open(struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
+	struct mvpp2 *priv = port->priv;
 	unsigned char mac_bcast[ETH_ALEN] = {
 			0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 	int err;
@@ -6006,28 +6806,44 @@ static int mvpp2_open(struct net_device *dev)
 		goto err_cleanup_rxqs;
 	}
 
-	err = request_irq(port->irq, mvpp2_isr, 0, dev->name, port);
+	err = mvpp2_irqs_init(port);
 	if (err) {
-		netdev_err(port->dev, "cannot request IRQ %d\n", port->irq);
+		netdev_err(port->dev, "cannot init IRQs\n");
 		goto err_cleanup_txqs;
 	}
 
+	if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq) {
+		err = request_irq(port->link_irq, mvpp2_link_status_isr, 0,
+				  dev->name, port);
+		if (err) {
+			netdev_err(port->dev, "cannot request link IRQ %d\n",
+				   port->link_irq);
+			goto err_free_irq;
+		}
+
+		mvpp22_gop_setup_irq(port);
+	}
+
 	/* In default link is down */
 	netif_carrier_off(port->dev);
 
 	err = mvpp2_phy_connect(port);
 	if (err < 0)
-		goto err_free_irq;
+		goto err_free_link_irq;
 
 	/* Unmask interrupts on all CPUs */
 	on_each_cpu(mvpp2_interrupts_unmask, port, 1);
+	mvpp2_shared_interrupt_mask_unmask(port, false);
 
 	mvpp2_start_dev(port);
 
 	return 0;
 
+err_free_link_irq:
+	if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq)
+		free_irq(port->link_irq, port);
 err_free_irq:
-	free_irq(port->irq, port);
+	mvpp2_irqs_deinit(port);
 err_cleanup_txqs:
 	mvpp2_cleanup_txqs(port);
 err_cleanup_rxqs:
@@ -6039,6 +6855,7 @@ static int mvpp2_stop(struct net_device *dev)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
 	struct mvpp2_port_pcpu *port_pcpu;
+	struct mvpp2 *priv = port->priv;
 	int cpu;
 
 	mvpp2_stop_dev(port);
@@ -6046,14 +6863,20 @@ static int mvpp2_stop(struct net_device *dev)
 
 	/* Mask interrupts on all CPUs */
 	on_each_cpu(mvpp2_interrupts_mask, port, 1);
+	mvpp2_shared_interrupt_mask_unmask(port, true);
 
-	free_irq(port->irq, port);
-	for_each_present_cpu(cpu) {
-		port_pcpu = per_cpu_ptr(port->pcpu, cpu);
+	if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq)
+		free_irq(port->link_irq, port);
+
+	mvpp2_irqs_deinit(port);
+	if (!port->has_tx_irqs) {
+		for_each_present_cpu(cpu) {
+			port_pcpu = per_cpu_ptr(port->pcpu, cpu);
 
-		hrtimer_cancel(&port_pcpu->tx_done_timer);
-		port_pcpu->timer_scheduled = false;
-		tasklet_kill(&port_pcpu->tx_done_tasklet);
+			hrtimer_cancel(&port_pcpu->tx_done_timer);
+			port_pcpu->timer_scheduled = false;
+			tasklet_kill(&port_pcpu->tx_done_tasklet);
+		}
 	}
 	mvpp2_cleanup_rxqs(port);
 	mvpp2_cleanup_txqs(port);
@@ -6228,7 +7051,7 @@ static int mvpp2_ethtool_set_coalesce(struct net_device *dev,
 	struct mvpp2_port *port = netdev_priv(dev);
 	int queue;
 
-	for (queue = 0; queue < rxq_number; queue++) {
+	for (queue = 0; queue < port->nrxqs; queue++) {
 		struct mvpp2_rx_queue *rxq = port->rxqs[queue];
 
 		rxq->time_coal = c->rx_coalesce_usecs;
@@ -6237,10 +7060,18 @@ static int mvpp2_ethtool_set_coalesce(struct net_device *dev,
 		mvpp2_rx_time_coal_set(port, rxq);
 	}
 
-	for (queue = 0; queue < txq_number; queue++) {
+	if (port->has_tx_irqs) {
+		port->tx_time_coal = c->tx_coalesce_usecs;
+		mvpp2_tx_time_coal_set(port);
+	}
+
+	for (queue = 0; queue < port->ntxqs; queue++) {
 		struct mvpp2_tx_queue *txq = port->txqs[queue];
 
 		txq->done_pkts_coal = c->tx_max_coalesced_frames;
+
+		if (port->has_tx_irqs)
+			mvpp2_tx_pkts_coal_set(port, txq);
 	}
 
 	return 0;
@@ -6365,6 +7196,129 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
 	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
+/* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that
+ * had a single IRQ defined per-port.
+ */
+static int mvpp2_simple_queue_vectors_init(struct mvpp2_port *port,
+					   struct device_node *port_node)
+{
+	struct mvpp2_queue_vector *v = &port->qvecs[0];
+
+	v->first_rxq = 0;
+	v->nrxqs = port->nrxqs;
+	v->type = MVPP2_QUEUE_VECTOR_SHARED;
+	v->sw_thread_id = 0;
+	v->sw_thread_mask = *cpumask_bits(cpu_online_mask);
+	v->port = port;
+	v->irq = irq_of_parse_and_map(port_node, 0);
+	if (v->irq <= 0)
+		return -EINVAL;
+	netif_napi_add(port->dev, &v->napi, mvpp2_poll,
+		       NAPI_POLL_WEIGHT);
+
+	port->nqvecs = 1;
+
+	return 0;
+}
+
+static int mvpp2_multi_queue_vectors_init(struct mvpp2_port *port,
+					  struct device_node *port_node)
+{
+	struct mvpp2_queue_vector *v;
+	int i, ret;
+
+	port->nqvecs = num_possible_cpus();
+	if (queue_mode == MVPP2_QDIST_SINGLE_MODE)
+		port->nqvecs += 1;
+
+	for (i = 0; i < port->nqvecs; i++) {
+		char irqname[16];
+
+		v = port->qvecs + i;
+
+		v->port = port;
+		v->type = MVPP2_QUEUE_VECTOR_PRIVATE;
+		v->sw_thread_id = i;
+		v->sw_thread_mask = BIT(i);
+
+		snprintf(irqname, sizeof(irqname), "tx-cpu%d", i);
+
+		if (queue_mode == MVPP2_QDIST_MULTI_MODE) {
+			v->first_rxq = i * MVPP2_DEFAULT_RXQ;
+			v->nrxqs = MVPP2_DEFAULT_RXQ;
+		} else if (queue_mode == MVPP2_QDIST_SINGLE_MODE &&
+			   i == (port->nqvecs - 1)) {
+			v->first_rxq = 0;
+			v->nrxqs = port->nrxqs;
+			v->type = MVPP2_QUEUE_VECTOR_SHARED;
+			strncpy(irqname, "rx-shared", sizeof(irqname));
+		}
+
+		v->irq = of_irq_get_byname(port_node, irqname);
+		if (v->irq <= 0) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		netif_napi_add(port->dev, &v->napi, mvpp2_poll,
+			       NAPI_POLL_WEIGHT);
+	}
+
+	return 0;
+
+err:
+	for (i = 0; i < port->nqvecs; i++)
+		irq_dispose_mapping(port->qvecs[i].irq);
+	return ret;
+}
+
+static int mvpp2_queue_vectors_init(struct mvpp2_port *port,
+				    struct device_node *port_node)
+{
+	if (port->has_tx_irqs)
+		return mvpp2_multi_queue_vectors_init(port, port_node);
+	else
+		return mvpp2_simple_queue_vectors_init(port, port_node);
+}
+
+static void mvpp2_queue_vectors_deinit(struct mvpp2_port *port)
+{
+	int i;
+
+	for (i = 0; i < port->nqvecs; i++)
+		irq_dispose_mapping(port->qvecs[i].irq);
+}
+
+/* Configure Rx queue group interrupt for this port */
+static void mvpp2_rx_irqs_setup(struct mvpp2_port *port)
+{
+	struct mvpp2 *priv = port->priv;
+	u32 val;
+	int i;
+
+	if (priv->hw_version == MVPP21) {
+		mvpp2_write(priv, MVPP21_ISR_RXQ_GROUP_REG(port->id),
+			    port->nrxqs);
+		return;
+	}
+
+	/* Handle the more complicated PPv2.2 case */
+	for (i = 0; i < port->nqvecs; i++) {
+		struct mvpp2_queue_vector *qv = port->qvecs + i;
+
+		if (!qv->nrxqs)
+			continue;
+
+		val = qv->sw_thread_id;
+		val |= port->id << MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET;
+		mvpp2_write(priv, MVPP22_ISR_RXQ_GROUP_INDEX_REG, val);
+
+		val = qv->first_rxq;
+		val |= qv->nrxqs << MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET;
+		mvpp2_write(priv, MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG, val);
+	}
+}
+
 /* Initialize port HW */
 static int mvpp2_port_init(struct mvpp2_port *port)
 {
@@ -6373,15 +7327,22 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 	struct mvpp2_txq_pcpu *txq_pcpu;
 	int queue, cpu, err;
 
-	if (port->first_rxq + rxq_number >
+	/* Checks for hardware constraints */
+	if (port->first_rxq + port->nrxqs >
 	    MVPP2_MAX_PORTS * priv->max_port_rxqs)
 		return -EINVAL;
 
+	if (port->nrxqs % 4 || (port->nrxqs > priv->max_port_rxqs) ||
+	    (port->ntxqs > MVPP2_MAX_TXQ))
+		return -EINVAL;
+
 	/* Disable port */
 	mvpp2_egress_disable(port);
 	mvpp2_port_disable(port);
 
-	port->txqs = devm_kcalloc(dev, txq_number, sizeof(*port->txqs),
+	port->tx_time_coal = MVPP2_TXDONE_COAL_USEC;
+
+	port->txqs = devm_kcalloc(dev, port->ntxqs, sizeof(*port->txqs),
 				  GFP_KERNEL);
 	if (!port->txqs)
 		return -ENOMEM;
@@ -6389,7 +7350,7 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 	/* Associate physical Tx queues to this port and initialize.
 	 * The mapping is predefined.
 	 */
-	for (queue = 0; queue < txq_number; queue++) {
+	for (queue = 0; queue < port->ntxqs; queue++) {
 		int queue_phy_id = mvpp2_txq_phys(port->id, queue);
 		struct mvpp2_tx_queue *txq;
 
@@ -6416,7 +7377,7 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 		port->txqs[queue] = txq;
 	}
 
-	port->rxqs = devm_kcalloc(dev, rxq_number, sizeof(*port->rxqs),
+	port->rxqs = devm_kcalloc(dev, port->nrxqs, sizeof(*port->rxqs),
 				  GFP_KERNEL);
 	if (!port->rxqs) {
 		err = -ENOMEM;
@@ -6424,7 +7385,7 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 	}
 
 	/* Allocate and initialize Rx queue for this port */
-	for (queue = 0; queue < rxq_number; queue++) {
+	for (queue = 0; queue < port->nrxqs; queue++) {
 		struct mvpp2_rx_queue *rxq;
 
 		/* Map physical Rx queue to port's logical Rx queue */
@@ -6441,22 +7402,10 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 		port->rxqs[queue] = rxq;
 	}
 
-	/* Configure Rx queue group interrupt for this port */
-	if (priv->hw_version == MVPP21) {
-		mvpp2_write(priv, MVPP21_ISR_RXQ_GROUP_REG(port->id),
-			    rxq_number);
-	} else {
-		u32 val;
-
-		val = (port->id << MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET);
-		mvpp2_write(priv, MVPP22_ISR_RXQ_GROUP_INDEX_REG, val);
-
-		val = (rxq_number << MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET);
-		mvpp2_write(priv, MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG, val);
-	}
+	mvpp2_rx_irqs_setup(port);
 
 	/* Create Rx descriptor rings */
-	for (queue = 0; queue < rxq_number; queue++) {
+	for (queue = 0; queue < port->nrxqs; queue++) {
 		struct mvpp2_rx_queue *rxq = port->rxqs[queue];
 
 		rxq->size = port->rx_ring_size;
@@ -6484,7 +7433,7 @@ static int mvpp2_port_init(struct mvpp2_port *port)
 	return 0;
 
 err_free_percpu:
-	for (queue = 0; queue < txq_number; queue++) {
+	for (queue = 0; queue < port->ntxqs; queue++) {
 		if (!port->txqs[queue])
 			continue;
 		free_percpu(port->txqs[queue]->pcpu);
@@ -6492,35 +7441,93 @@ err_free_percpu:
 	return err;
 }
 
+/* Checks if the port DT description has the TX interrupts
+ * described. On PPv2.1, there are no such interrupts. On PPv2.2,
+ * there are available, but we need to keep support for old DTs.
+ */
+static bool mvpp2_port_has_tx_irqs(struct mvpp2 *priv,
+				   struct device_node *port_node)
+{
+	char *irqs[5] = { "rx-shared", "tx-cpu0", "tx-cpu1",
+			  "tx-cpu2", "tx-cpu3" };
+	int ret, i;
+
+	if (priv->hw_version == MVPP21)
+		return false;
+
+	for (i = 0; i < 5; i++) {
+		ret = of_property_match_string(port_node, "interrupt-names",
+					       irqs[i]);
+		if (ret < 0)
+			return false;
+	}
+
+	return true;
+}
+
+static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
+				     struct device_node *port_node,
+				     char **mac_from)
+{
+	struct mvpp2_port *port = netdev_priv(dev);
+	char hw_mac_addr[ETH_ALEN] = {0};
+	const char *dt_mac_addr;
+
+	dt_mac_addr = of_get_mac_address(port_node);
+	if (dt_mac_addr && is_valid_ether_addr(dt_mac_addr)) {
+		*mac_from = "device tree";
+		ether_addr_copy(dev->dev_addr, dt_mac_addr);
+		return;
+	}
+
+	if (priv->hw_version == MVPP21) {
+		mvpp21_get_mac_address(port, hw_mac_addr);
+		if (is_valid_ether_addr(hw_mac_addr)) {
+			*mac_from = "hardware";
+			ether_addr_copy(dev->dev_addr, hw_mac_addr);
+			return;
+		}
+	}
+
+	*mac_from = "random";
+	eth_hw_addr_random(dev);
+}
+
 /* Ports initialization */
 static int mvpp2_port_probe(struct platform_device *pdev,
 			    struct device_node *port_node,
 			    struct mvpp2 *priv)
 {
 	struct device_node *phy_node;
+	struct phy *comphy;
 	struct mvpp2_port *port;
 	struct mvpp2_port_pcpu *port_pcpu;
 	struct net_device *dev;
 	struct resource *res;
-	const char *dt_mac_addr;
-	const char *mac_from;
-	char hw_mac_addr[ETH_ALEN] = {0};
+	char *mac_from = "";
+	unsigned int ntxqs, nrxqs;
+	bool has_tx_irqs;
 	u32 id;
 	int features;
 	int phy_mode;
 	int err, i, cpu;
 
-	dev = alloc_etherdev_mqs(sizeof(*port), txq_number, rxq_number);
+	has_tx_irqs = mvpp2_port_has_tx_irqs(priv, port_node);
+
+	if (!has_tx_irqs)
+		queue_mode = MVPP2_QDIST_SINGLE_MODE;
+
+	ntxqs = MVPP2_MAX_TXQ;
+	if (priv->hw_version == MVPP22 && queue_mode == MVPP2_QDIST_MULTI_MODE)
+		nrxqs = MVPP2_DEFAULT_RXQ * num_possible_cpus();
+	else
+		nrxqs = MVPP2_DEFAULT_RXQ;
+
+	dev = alloc_etherdev_mqs(sizeof(*port), ntxqs, nrxqs);
 	if (!dev)
 		return -ENOMEM;
 
 	phy_node = of_parse_phandle(port_node, "phy", 0);
-	if (!phy_node) {
-		dev_err(&pdev->dev, "missing phy\n");
-		err = -ENODEV;
-		goto err_free_netdev;
-	}
-
 	phy_mode = of_get_phy_mode(port_node);
 	if (phy_mode < 0) {
 		dev_err(&pdev->dev, "incorrect phy mode\n");
@@ -6528,6 +7535,15 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		goto err_free_netdev;
 	}
 
+	comphy = devm_of_phy_get(&pdev->dev, port_node, NULL);
+	if (IS_ERR(comphy)) {
+		if (PTR_ERR(comphy) == -EPROBE_DEFER) {
+			err = -EPROBE_DEFER;
+			goto err_free_netdev;
+		}
+		comphy = NULL;
+	}
+
 	if (of_property_read_u32(port_node, "port-id", &id)) {
 		err = -EINVAL;
 		dev_err(&pdev->dev, "missing port-id value\n");
@@ -6540,25 +7556,37 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	dev->ethtool_ops = &mvpp2_eth_tool_ops;
 
 	port = netdev_priv(dev);
+	port->dev = dev;
+	port->ntxqs = ntxqs;
+	port->nrxqs = nrxqs;
+	port->priv = priv;
+	port->has_tx_irqs = has_tx_irqs;
 
-	port->irq = irq_of_parse_and_map(port_node, 0);
-	if (port->irq <= 0) {
-		err = -EINVAL;
+	err = mvpp2_queue_vectors_init(port, port_node);
+	if (err)
 		goto err_free_netdev;
+
+	port->link_irq = of_irq_get_byname(port_node, "link");
+	if (port->link_irq == -EPROBE_DEFER) {
+		err = -EPROBE_DEFER;
+		goto err_deinit_qvecs;
 	}
+	if (port->link_irq <= 0)
+		/* the link irq is optional */
+		port->link_irq = 0;
 
 	if (of_property_read_bool(port_node, "marvell,loopback"))
 		port->flags |= MVPP2_F_LOOPBACK;
 
-	port->priv = priv;
 	port->id = id;
 	if (priv->hw_version == MVPP21)
-		port->first_rxq = port->id * rxq_number;
+		port->first_rxq = port->id * port->nrxqs;
 	else
 		port->first_rxq = port->id * priv->max_port_rxqs;
 
 	port->phy_node = phy_node;
 	port->phy_interface = phy_mode;
+	port->comphy = comphy;
 
 	if (priv->hw_version == MVPP21) {
 		res = platform_get_resource(pdev, IORESOURCE_MEM, 2 + id);
@@ -6572,7 +7600,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 					 &port->gop_id)) {
 			err = -EINVAL;
 			dev_err(&pdev->dev, "missing gop-port-id value\n");
-			goto err_free_irq;
+			goto err_deinit_qvecs;
 		}
 
 		port->base = priv->iface_base + MVPP22_GMAC_BASE(port->gop_id);
@@ -6585,25 +7613,10 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		goto err_free_irq;
 	}
 
-	dt_mac_addr = of_get_mac_address(port_node);
-	if (dt_mac_addr && is_valid_ether_addr(dt_mac_addr)) {
-		mac_from = "device tree";
-		ether_addr_copy(dev->dev_addr, dt_mac_addr);
-	} else {
-		if (priv->hw_version == MVPP21)
-			mvpp21_get_mac_address(port, hw_mac_addr);
-		if (is_valid_ether_addr(hw_mac_addr)) {
-			mac_from = "hardware";
-			ether_addr_copy(dev->dev_addr, hw_mac_addr);
-		} else {
-			mac_from = "random";
-			eth_hw_addr_random(dev);
-		}
-	}
+	mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from);
 
 	port->tx_ring_size = MVPP2_MAX_TXD;
 	port->rx_ring_size = MVPP2_MAX_RXD;
-	port->dev = dev;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	err = mvpp2_port_init(port);
@@ -6612,7 +7625,6 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		goto err_free_stats;
 	}
 
-	mvpp2_port_mii_set(port);
 	mvpp2_port_periodic_xon_disable(port);
 
 	if (priv->hw_version == MVPP21)
@@ -6626,20 +7638,22 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 		goto err_free_txq_pcpu;
 	}
 
-	for_each_present_cpu(cpu) {
-		port_pcpu = per_cpu_ptr(port->pcpu, cpu);
+	if (!port->has_tx_irqs) {
+		for_each_present_cpu(cpu) {
+			port_pcpu = per_cpu_ptr(port->pcpu, cpu);
 
-		hrtimer_init(&port_pcpu->tx_done_timer, CLOCK_MONOTONIC,
-			     HRTIMER_MODE_REL_PINNED);
-		port_pcpu->tx_done_timer.function = mvpp2_hr_timer_cb;
-		port_pcpu->timer_scheduled = false;
+			hrtimer_init(&port_pcpu->tx_done_timer, CLOCK_MONOTONIC,
+				     HRTIMER_MODE_REL_PINNED);
+			port_pcpu->tx_done_timer.function = mvpp2_hr_timer_cb;
+			port_pcpu->timer_scheduled = false;
 
-		tasklet_init(&port_pcpu->tx_done_tasklet, mvpp2_tx_proc_cb,
-			     (unsigned long)dev);
+			tasklet_init(&port_pcpu->tx_done_tasklet,
+				     mvpp2_tx_proc_cb,
+				     (unsigned long)dev);
+		}
 	}
 
-	netif_napi_add(dev, &port->napi, mvpp2_poll, NAPI_POLL_WEIGHT);
-	features = NETIF_F_SG | NETIF_F_IP_CSUM;
+	features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
 	dev->features = features | NETIF_F_RXCSUM;
 	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
 	dev->vlan_features |= features;
@@ -6662,12 +7676,15 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 err_free_port_pcpu:
 	free_percpu(port->pcpu);
 err_free_txq_pcpu:
-	for (i = 0; i < txq_number; i++)
+	for (i = 0; i < port->ntxqs; i++)
 		free_percpu(port->txqs[i]->pcpu);
 err_free_stats:
 	free_percpu(port->stats);
 err_free_irq:
-	irq_dispose_mapping(port->irq);
+	if (port->link_irq)
+		irq_dispose_mapping(port->link_irq);
+err_deinit_qvecs:
+	mvpp2_queue_vectors_deinit(port);
 err_free_netdev:
 	of_node_put(phy_node);
 	free_netdev(dev);
@@ -6683,9 +7700,11 @@ static void mvpp2_port_remove(struct mvpp2_port *port)
 	of_node_put(port->phy_node);
 	free_percpu(port->pcpu);
 	free_percpu(port->stats);
-	for (i = 0; i < txq_number; i++)
+	for (i = 0; i < port->ntxqs; i++)
 		free_percpu(port->txqs[i]->pcpu);
-	irq_dispose_mapping(port->irq);
+	mvpp2_queue_vectors_deinit(port);
+	if (port->link_irq)
+		irq_dispose_mapping(port->link_irq);
 	free_netdev(port->dev);
 }
 
@@ -6800,13 +7819,6 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
 	int err, i;
 	u32 val;
 
-	/* Checks for hardware constraints */
-	if (rxq_number % 4 || (rxq_number > priv->max_port_rxqs) ||
-	    (txq_number > MVPP2_MAX_TXQ)) {
-		dev_err(&pdev->dev, "invalid queue size parameter\n");
-		return -EINVAL;
-	}
-
 	/* MBUS windows configuration */
 	dram_target_info = mv_mbus_dram_info();
 	if (dram_target_info)
@@ -6836,8 +7848,7 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
 	for_each_present_cpu(i) {
 		priv->aggr_txqs[i].id = i;
 		priv->aggr_txqs[i].size = MVPP2_AGGR_TXQ_SIZE;
-		err = mvpp2_aggr_txq_init(pdev, &priv->aggr_txqs[i],
-					  MVPP2_AGGR_TXQ_SIZE, i, priv);
+		err = mvpp2_aggr_txq_init(pdev, &priv->aggr_txqs[i], i, priv);
 		if (err < 0)
 			return err;
 	}
@@ -6845,23 +7856,6 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
 	/* Rx Fifo Init */
 	mvpp2_rx_fifo_init(priv);
 
-	/* Reset Rx queue group interrupt configuration */
-	for (i = 0; i < MVPP2_MAX_PORTS; i++) {
-		if (priv->hw_version == MVPP21) {
-			mvpp2_write(priv, MVPP21_ISR_RXQ_GROUP_REG(i),
-				    rxq_number);
-			continue;
-		} else {
-			u32 val;
-
-			val = (i << MVPP22_ISR_RXQ_GROUP_INDEX_GROUP_OFFSET);
-			mvpp2_write(priv, MVPP22_ISR_RXQ_GROUP_INDEX_REG, val);
-
-			val = (rxq_number << MVPP22_ISR_RXQ_SUB_GROUP_SIZE_OFFSET);
-			mvpp2_write(priv, MVPP22_ISR_RXQ_SUB_GROUP_CONFIG_REG, val);
-		}
-	}
-
 	if (priv->hw_version == MVPP21)
 		writel(MVPP2_EXT_GLOBAL_CTRL_DEFAULT,
 		       priv->lms_base + MVPP2_MNG_EXTENDED_GLOBAL_CTRL_REG);
@@ -6892,7 +7886,7 @@ static int mvpp2_probe(struct platform_device *pdev)
 	struct mvpp2 *priv;
 	struct resource *res;
 	void __iomem *base;
-	int port_count, cpu;
+	int port_count, i;
 	int err;
 
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
@@ -6917,14 +7911,25 @@ static int mvpp2_probe(struct platform_device *pdev)
 		priv->iface_base = devm_ioremap_resource(&pdev->dev, res);
 		if (IS_ERR(priv->iface_base))
 			return PTR_ERR(priv->iface_base);
+
+		priv->sysctrl_base =
+			syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+							"marvell,system-controller");
+		if (IS_ERR(priv->sysctrl_base))
+			/* The system controller regmap is optional for dt
+			 * compatibility reasons. When not provided, the
+			 * configuration of the GoP relies on the
+			 * firmware/bootloader.
+			 */
+			priv->sysctrl_base = NULL;
 	}
 
-	for_each_present_cpu(cpu) {
+	for (i = 0; i < MVPP2_MAX_THREADS; i++) {
 		u32 addr_space_sz;
 
 		addr_space_sz = (priv->hw_version == MVPP21 ?
 				 MVPP21_ADDR_SPACE_SZ : MVPP22_ADDR_SPACE_SZ);
-		priv->cpu_base[cpu] = base + cpu * addr_space_sz;
+		priv->swth_base[i] = base + i * addr_space_sz;
 	}
 
 	if (priv->hw_version == MVPP21)
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index 5d7d94d..8a835e8 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -3516,7 +3516,7 @@ static const char *skge_board_name(const struct skge_hw *hw)
 		if (skge_chips[i].id == hw->chip_id)
 			return skge_chips[i].name;
 
-	snprintf(buf, sizeof buf, "chipid 0x%x", hw->chip_id);
+	snprintf(buf, sizeof(buf), "chipid 0x%x", hw->chip_id);
 	return buf;
 }
 
diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig
index 698bb89..f9149d2 100644
--- a/drivers/net/ethernet/mediatek/Kconfig
+++ b/drivers/net/ethernet/mediatek/Kconfig
@@ -7,11 +7,11 @@ config NET_VENDOR_MEDIATEK
 if NET_VENDOR_MEDIATEK
 
 config NET_MEDIATEK_SOC
-	tristate "MediaTek MT7623 Gigabit ethernet support"
-	depends on NET_VENDOR_MEDIATEK && (MACH_MT7623 || MACH_MT2701)
+	tristate "MediaTek SoC Gigabit Ethernet support"
+	depends on NET_VENDOR_MEDIATEK
 	select PHYLIB
 	---help---
 	  This driver supports the gigabit ethernet MACs in the
-	  MediaTek MT2701/MT7623 chipset family.
+	  MediaTek SoC family.
 
 endif #NET_VENDOR_MEDIATEK
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index e588a0c..5e81a72 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -53,7 +53,8 @@ static const struct mtk_ethtool_stats {
 };
 
 static const char * const mtk_clks_source_name[] = {
-	"ethif", "esw", "gp1", "gp2", "trgpll"
+	"ethif", "esw", "gp0", "gp1", "gp2", "trgpll", "sgmii_tx250m",
+	"sgmii_rx250m", "sgmii_cdr_ref", "sgmii_cdr_fb", "sgmii_ck", "eth2pll"
 };
 
 void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg)
@@ -163,6 +164,47 @@ static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed)
 	mtk_w32(eth, val, TRGMII_TCK_CTRL);
 }
 
+static void mtk_gmac_sgmii_hw_setup(struct mtk_eth *eth, int mac_id)
+{
+	u32 val;
+
+	/* Setup the link timer and QPHY power up inside SGMIISYS */
+	regmap_write(eth->sgmiisys, SGMSYS_PCS_LINK_TIMER,
+		     SGMII_LINK_TIMER_DEFAULT);
+
+	regmap_read(eth->sgmiisys, SGMSYS_SGMII_MODE, &val);
+	val |= SGMII_REMOTE_FAULT_DIS;
+	regmap_write(eth->sgmiisys, SGMSYS_SGMII_MODE, val);
+
+	regmap_read(eth->sgmiisys, SGMSYS_PCS_CONTROL_1, &val);
+	val |= SGMII_AN_RESTART;
+	regmap_write(eth->sgmiisys, SGMSYS_PCS_CONTROL_1, val);
+
+	regmap_read(eth->sgmiisys, SGMSYS_QPHY_PWR_STATE_CTRL, &val);
+	val &= ~SGMII_PHYA_PWD;
+	regmap_write(eth->sgmiisys, SGMSYS_QPHY_PWR_STATE_CTRL, val);
+
+	/* Determine MUX for which GMAC uses the SGMII interface */
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_DUAL_GMAC_SHARED_SGMII)) {
+		regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
+		val &= ~SYSCFG0_SGMII_MASK;
+		val |= !mac_id ? SYSCFG0_SGMII_GMAC1 : SYSCFG0_SGMII_GMAC2;
+		regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
+
+		dev_info(eth->dev, "setup shared sgmii for gmac=%d\n",
+			 mac_id);
+	}
+
+	/* Setup the GMAC1 going through SGMII path when SoC also support
+	 * ESW on GMAC1
+	 */
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_GMAC1_ESW | MTK_GMAC1_SGMII) &&
+	    !mac_id) {
+		mtk_w32(eth, 0, MTK_MAC_MISC);
+		dev_info(eth->dev, "setup gmac1 going through sgmii");
+	}
+}
+
 static void mtk_phy_link_adjust(struct net_device *dev)
 {
 	struct mtk_mac *mac = netdev_priv(dev);
@@ -185,7 +227,8 @@ static void mtk_phy_link_adjust(struct net_device *dev)
 		break;
 	};
 
-	if (mac->id == 0 && !mac->trgmii)
+	if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_GMAC1_TRGMII) &&
+	    !mac->id && !mac->trgmii)
 		mtk_gmac0_rgmii_adjust(mac->hw, dev->phydev->speed);
 
 	if (dev->phydev->link)
@@ -269,6 +312,7 @@ static int mtk_phy_connect(struct net_device *dev)
 	if (!np)
 		return -ENODEV;
 
+	mac->ge_mode = 0;
 	switch (of_get_phy_mode(np)) {
 	case PHY_INTERFACE_MODE_TRGMII:
 		mac->trgmii = true;
@@ -276,7 +320,10 @@ static int mtk_phy_connect(struct net_device *dev)
 	case PHY_INTERFACE_MODE_RGMII_RXID:
 	case PHY_INTERFACE_MODE_RGMII_ID:
 	case PHY_INTERFACE_MODE_RGMII:
-		mac->ge_mode = 0;
+		break;
+	case PHY_INTERFACE_MODE_SGMII:
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII))
+			mtk_gmac_sgmii_hw_setup(eth, mac->id);
 		break;
 	case PHY_INTERFACE_MODE_MII:
 		mac->ge_mode = 1;
@@ -1032,7 +1079,6 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
 	unsigned int done[MTK_MAX_DEVS];
 	unsigned int bytes[MTK_MAX_DEVS];
 	u32 cpu, dma;
-	static int condition;
 	int total = 0, i;
 
 	memset(done, 0, sizeof(done));
@@ -1056,10 +1102,8 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
 			mac = 1;
 
 		skb = tx_buf->skb;
-		if (!skb) {
-			condition = 1;
+		if (!skb)
 			break;
-		}
 
 		if (skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC) {
 			bytes[mac] += skb->len;
@@ -1241,9 +1285,19 @@ static void mtk_tx_clean(struct mtk_eth *eth)
 
 static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
 {
-	struct mtk_rx_ring *ring = &eth->rx_ring[ring_no];
+	struct mtk_rx_ring *ring;
 	int rx_data_len, rx_dma_size;
 	int i;
+	u32 offset = 0;
+
+	if (rx_flag == MTK_RX_FLAGS_QDMA) {
+		if (ring_no)
+			return -EINVAL;
+		ring = &eth->rx_ring_qdma;
+		offset = 0x1000;
+	} else {
+		ring = &eth->rx_ring[ring_no];
+	}
 
 	if (rx_flag == MTK_RX_FLAGS_HWLRO) {
 		rx_data_len = MTK_MAX_LRO_RX_LENGTH;
@@ -1293,17 +1347,16 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
 	 */
 	wmb();
 
-	mtk_w32(eth, ring->phys, MTK_PRX_BASE_PTR_CFG(ring_no));
-	mtk_w32(eth, rx_dma_size, MTK_PRX_MAX_CNT_CFG(ring_no));
-	mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg);
-	mtk_w32(eth, MTK_PST_DRX_IDX_CFG(ring_no), MTK_PDMA_RST_IDX);
+	mtk_w32(eth, ring->phys, MTK_PRX_BASE_PTR_CFG(ring_no) + offset);
+	mtk_w32(eth, rx_dma_size, MTK_PRX_MAX_CNT_CFG(ring_no) + offset);
+	mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg + offset);
+	mtk_w32(eth, MTK_PST_DRX_IDX_CFG(ring_no), MTK_PDMA_RST_IDX + offset);
 
 	return 0;
 }
 
-static void mtk_rx_clean(struct mtk_eth *eth, int ring_no)
+static void mtk_rx_clean(struct mtk_eth *eth, struct mtk_rx_ring *ring)
 {
-	struct mtk_rx_ring *ring = &eth->rx_ring[ring_no];
 	int i;
 
 	if (ring->data && ring->dma) {
@@ -1629,6 +1682,10 @@ static int mtk_dma_init(struct mtk_eth *eth)
 	if (err)
 		return err;
 
+	err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_QDMA);
+	if (err)
+		return err;
+
 	err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_NORMAL);
 	if (err)
 		return err;
@@ -1668,12 +1725,13 @@ static void mtk_dma_free(struct mtk_eth *eth)
 		eth->phy_scratch_ring = 0;
 	}
 	mtk_tx_clean(eth);
-	mtk_rx_clean(eth, 0);
+	mtk_rx_clean(eth, &eth->rx_ring[0]);
+	mtk_rx_clean(eth, &eth->rx_ring_qdma);
 
 	if (eth->hwlro) {
 		mtk_hwlro_rx_uninit(eth);
 		for (i = 1; i < MTK_MAX_RX_RING_NUM; i++)
-			mtk_rx_clean(eth, i);
+			mtk_rx_clean(eth, &eth->rx_ring[i]);
 	}
 
 	kfree(eth->scratch_head);
@@ -1740,7 +1798,9 @@ static int mtk_start_dma(struct mtk_eth *eth)
 
 	mtk_w32(eth,
 		MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
-		MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO,
+		MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO |
+		MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
+		MTK_RX_BT_32DWORDS,
 		MTK_QDMA_GLO_CFG);
 
 	mtk_w32(eth,
@@ -1837,9 +1897,36 @@ static void ethsys_reset(struct mtk_eth *eth, u32 reset_bits)
 	mdelay(10);
 }
 
+static void mtk_clk_disable(struct mtk_eth *eth)
+{
+	int clk;
+
+	for (clk = MTK_CLK_MAX - 1; clk >= 0; clk--)
+		clk_disable_unprepare(eth->clks[clk]);
+}
+
+static int mtk_clk_enable(struct mtk_eth *eth)
+{
+	int clk, ret;
+
+	for (clk = 0; clk < MTK_CLK_MAX ; clk++) {
+		ret = clk_prepare_enable(eth->clks[clk]);
+		if (ret)
+			goto err_disable_clks;
+	}
+
+	return 0;
+
+err_disable_clks:
+	while (--clk >= 0)
+		clk_disable_unprepare(eth->clks[clk]);
+
+	return ret;
+}
+
 static int mtk_hw_init(struct mtk_eth *eth)
 {
-	int i, val;
+	int i, val, ret;
 
 	if (test_and_set_bit(MTK_HW_INIT, &eth->state))
 		return 0;
@@ -1847,10 +1934,10 @@ static int mtk_hw_init(struct mtk_eth *eth)
 	pm_runtime_enable(eth->dev);
 	pm_runtime_get_sync(eth->dev);
 
-	clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]);
-	clk_prepare_enable(eth->clks[MTK_CLK_ESW]);
-	clk_prepare_enable(eth->clks[MTK_CLK_GP1]);
-	clk_prepare_enable(eth->clks[MTK_CLK_GP2]);
+	ret = mtk_clk_enable(eth);
+	if (ret)
+		goto err_disable_pm;
+
 	ethsys_reset(eth, RSTCTRL_FE);
 	ethsys_reset(eth, RSTCTRL_PPE);
 
@@ -1918,6 +2005,12 @@ static int mtk_hw_init(struct mtk_eth *eth)
 	}
 
 	return 0;
+
+err_disable_pm:
+	pm_runtime_put_sync(eth->dev);
+	pm_runtime_disable(eth->dev);
+
+	return ret;
 }
 
 static int mtk_hw_deinit(struct mtk_eth *eth)
@@ -1925,10 +2018,7 @@ static int mtk_hw_deinit(struct mtk_eth *eth)
 	if (!test_and_clear_bit(MTK_HW_INIT, &eth->state))
 		return 0;
 
-	clk_disable_unprepare(eth->clks[MTK_CLK_GP2]);
-	clk_disable_unprepare(eth->clks[MTK_CLK_GP1]);
-	clk_disable_unprepare(eth->clks[MTK_CLK_ESW]);
-	clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]);
+	mtk_clk_disable(eth);
 
 	pm_runtime_put_sync(eth->dev);
 	pm_runtime_disable(eth->dev);
@@ -2395,6 +2485,7 @@ static int mtk_get_chip_id(struct mtk_eth *eth, u32 *chip_id)
 static bool mtk_is_hwlro_supported(struct mtk_eth *eth)
 {
 	switch (eth->chip_id) {
+	case MT7622_ETH:
 	case MT7623_ETH:
 		return true;
 	}
@@ -2406,6 +2497,7 @@ static int mtk_probe(struct platform_device *pdev)
 {
 	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	struct device_node *mac_np;
+	const struct of_device_id *match;
 	struct mtk_eth *eth;
 	int err;
 	int i;
@@ -2414,6 +2506,9 @@ static int mtk_probe(struct platform_device *pdev)
 	if (!eth)
 		return -ENOMEM;
 
+	match = of_match_device(of_mtk_match, &pdev->dev);
+	eth->soc = (struct mtk_soc_data *)match->data;
+
 	eth->dev = &pdev->dev;
 	eth->base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(eth->base))
@@ -2430,6 +2525,16 @@ static int mtk_probe(struct platform_device *pdev)
 		return PTR_ERR(eth->ethsys);
 	}
 
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) {
+		eth->sgmiisys =
+		syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+						"mediatek,sgmiisys");
+		if (IS_ERR(eth->sgmiisys)) {
+			dev_err(&pdev->dev, "no sgmiisys regmap found\n");
+			return PTR_ERR(eth->sgmiisys);
+		}
+	}
+
 	eth->pctl = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
 						    "mediatek,pctl");
 	if (IS_ERR(eth->pctl)) {
@@ -2450,7 +2555,12 @@ static int mtk_probe(struct platform_device *pdev)
 		if (IS_ERR(eth->clks[i])) {
 			if (PTR_ERR(eth->clks[i]) == -EPROBE_DEFER)
 				return -EPROBE_DEFER;
-			return -ENODEV;
+			if (eth->soc->required_clks & BIT(i)) {
+				dev_err(&pdev->dev, "clock %s not found\n",
+					mtk_clks_source_name[i]);
+				return -EINVAL;
+			}
+			eth->clks[i] = NULL;
 		}
 	}
 
@@ -2553,8 +2663,25 @@ static int mtk_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct mtk_soc_data mt2701_data = {
+	.caps = MTK_GMAC1_TRGMII,
+	.required_clks = MT7623_CLKS_BITMAP
+};
+
+static const struct mtk_soc_data mt7622_data = {
+	.caps = MTK_DUAL_GMAC_SHARED_SGMII | MTK_GMAC1_ESW,
+	.required_clks = MT7622_CLKS_BITMAP
+};
+
+static const struct mtk_soc_data mt7623_data = {
+	.caps = MTK_GMAC1_TRGMII,
+	.required_clks = MT7623_CLKS_BITMAP
+};
+
 const struct of_device_id of_mtk_match[] = {
-	{ .compatible = "mediatek,mt2701-eth" },
+	{ .compatible = "mediatek,mt2701-eth", .data = &mt2701_data},
+	{ .compatible = "mediatek,mt7622-eth", .data = &mt7622_data},
+	{ .compatible = "mediatek,mt7623-eth", .data = &mt7623_data},
 	{},
 };
 MODULE_DEVICE_TABLE(of, of_mtk_match);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 5868a09..3d3c24a 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -302,6 +302,9 @@
 #define PHY_IAC_REG_SHIFT	25
 #define PHY_IAC_TIMEOUT		HZ
 
+#define MTK_MAC_MISC		0x1000c
+#define MTK_MUX_TO_ESW		BIT(0)
+
 /* Mac control registers */
 #define MTK_MAC_MCR(x)		(0x10100 + (x * 0x100))
 #define MAC_MCR_MAX_RX_1536	BIT(24)
@@ -357,11 +360,15 @@
 #define ETHSYS_CHIPID0_3	0x0
 #define ETHSYS_CHIPID4_7	0x4
 #define MT7623_ETH		7623
+#define MT7622_ETH		7622
 
 /* ethernet subsystem config register */
 #define ETHSYS_SYSCFG0		0x14
 #define SYSCFG0_GE_MASK		0x3
 #define SYSCFG0_GE_MODE(x, y)	(x << (12 + (y * 2)))
+#define SYSCFG0_SGMII_MASK	(3 << 8)
+#define SYSCFG0_SGMII_GMAC1	((2 << 8) & GENMASK(9, 8))
+#define SYSCFG0_SGMII_GMAC2	((3 << 8) & GENMASK(9, 8))
 
 /* ethernet subsystem clock register */
 #define ETHSYS_CLKCFG0		0x2c
@@ -372,6 +379,23 @@
 #define RSTCTRL_FE		BIT(6)
 #define RSTCTRL_PPE		BIT(31)
 
+/* SGMII subsystem config registers */
+/* Register to auto-negotiation restart */
+#define SGMSYS_PCS_CONTROL_1	0x0
+#define SGMII_AN_RESTART	BIT(9)
+
+/* Register to programmable link timer, the unit in 2 * 8ns */
+#define SGMSYS_PCS_LINK_TIMER	0x18
+#define SGMII_LINK_TIMER_DEFAULT	(0x186a0 & GENMASK(19, 0))
+
+/* Register to control remote fault */
+#define SGMSYS_SGMII_MODE	0x20
+#define SGMII_REMOTE_FAULT_DIS	BIT(8)
+
+/* Register to power up QPHY */
+#define SGMSYS_QPHY_PWR_STATE_CTRL 0xe8
+#define	SGMII_PHYA_PWD		BIT(4)
+
 struct mtk_rx_dma {
 	unsigned int rxd1;
 	unsigned int rxd2;
@@ -437,12 +461,31 @@ enum mtk_tx_flags {
 enum mtk_clks_map {
 	MTK_CLK_ETHIF,
 	MTK_CLK_ESW,
+	MTK_CLK_GP0,
 	MTK_CLK_GP1,
 	MTK_CLK_GP2,
 	MTK_CLK_TRGPLL,
+	MTK_CLK_SGMII_TX_250M,
+	MTK_CLK_SGMII_RX_250M,
+	MTK_CLK_SGMII_CDR_REF,
+	MTK_CLK_SGMII_CDR_FB,
+	MTK_CLK_SGMII_CK,
+	MTK_CLK_ETH2PLL,
 	MTK_CLK_MAX
 };
 
+#define MT7623_CLKS_BITMAP	(BIT(MTK_CLK_ETHIF) | BIT(MTK_CLK_ESW) |  \
+				 BIT(MTK_CLK_GP1) | BIT(MTK_CLK_GP2) | \
+				 BIT(MTK_CLK_TRGPLL))
+#define MT7622_CLKS_BITMAP	(BIT(MTK_CLK_ETHIF) | BIT(MTK_CLK_ESW) |  \
+				 BIT(MTK_CLK_GP0) | BIT(MTK_CLK_GP1) | \
+				 BIT(MTK_CLK_GP2) | \
+				 BIT(MTK_CLK_SGMII_TX_250M) | \
+				 BIT(MTK_CLK_SGMII_RX_250M) | \
+				 BIT(MTK_CLK_SGMII_CDR_REF) | \
+				 BIT(MTK_CLK_SGMII_CDR_FB) | \
+				 BIT(MTK_CLK_SGMII_CK) | \
+				 BIT(MTK_CLK_ETH2PLL))
 enum mtk_dev_state {
 	MTK_HW_INIT,
 	MTK_RESETTING
@@ -489,6 +532,7 @@ struct mtk_tx_ring {
 enum mtk_rx_flags {
 	MTK_RX_FLAGS_NORMAL = 0,
 	MTK_RX_FLAGS_HWLRO,
+	MTK_RX_FLAGS_QDMA,
 };
 
 /* struct mtk_rx_ring -	This struct holds info describing a RX ring
@@ -511,6 +555,28 @@ struct mtk_rx_ring {
 	u32 crx_idx_reg;
 };
 
+#define MTK_TRGMII			BIT(0)
+#define MTK_GMAC1_TRGMII		(BIT(1) | MTK_TRGMII)
+#define MTK_ESW				BIT(4)
+#define MTK_GMAC1_ESW			(BIT(5) | MTK_ESW)
+#define MTK_SGMII			BIT(8)
+#define MTK_GMAC1_SGMII			(BIT(9) | MTK_SGMII)
+#define MTK_GMAC2_SGMII			(BIT(10) | MTK_SGMII)
+#define MTK_DUAL_GMAC_SHARED_SGMII	(BIT(11) | MTK_GMAC1_SGMII | \
+					 MTK_GMAC2_SGMII)
+#define MTK_HAS_CAPS(caps, _x)		(((caps) & (_x)) == (_x))
+
+/* struct mtk_eth_data -	This is the structure holding all differences
+ *				among various plaforms
+ * @caps			Flags shown the extra capability for the SoC
+ * @required_clks		Flags shown the bitmap for required clocks on
+ *				the target SoC
+ */
+struct mtk_soc_data {
+	u32		caps;
+	u32		required_clks;
+};
+
 /* currently no SoC has more than 2 macs */
 #define MTK_MAX_DEVS			2
 
@@ -529,11 +595,14 @@ struct mtk_rx_ring {
  * @msg_enable:		Ethtool msg level
  * @ethsys:		The register map pointing at the range used to setup
  *			MII modes
+ * @sgmiisys:		The register map pointing at the range used to setup
+ *			SGMII modes
  * @pctl:		The register map pointing at the range used to setup
  *			GMAC port drive/slew values
  * @dma_refcnt:		track how many netdevs are using the DMA engine
- * @tx_ring:		Pointer to the memore holding info about the TX ring
- * @rx_ring:		Pointer to the memore holding info about the RX ring
+ * @tx_ring:		Pointer to the memory holding info about the TX ring
+ * @rx_ring:		Pointer to the memory holding info about the RX ring
+ * @rx_ring_qdma:	Pointer to the memory holding info about the QDMA RX ring
  * @tx_napi:		The TX NAPI struct
  * @rx_napi:		The RX NAPI struct
  * @scratch_ring:	Newer SoCs need memory for a second HW managed TX ring
@@ -542,7 +611,8 @@ struct mtk_rx_ring {
  * @clks:		clock array for all clocks required
  * @mii_bus:		If there is a bus we need to create an instance for it
  * @pending_work:	The workqueue used to reset the dma ring
- * @state               Initialization and runtime state of the device.
+ * @state:		Initialization and runtime state of the device
+ * @soc:		Holding specific data among vaious SoCs
  */
 
 struct mtk_eth {
@@ -558,12 +628,14 @@ struct mtk_eth {
 	u32				msg_enable;
 	unsigned long			sysclk;
 	struct regmap			*ethsys;
+	struct regmap			*sgmiisys;
 	struct regmap			*pctl;
 	u32				chip_id;
 	bool				hwlro;
 	atomic_t			dma_refcnt;
 	struct mtk_tx_ring		tx_ring;
 	struct mtk_rx_ring		rx_ring[MTK_MAX_RX_RING_NUM];
+	struct mtk_rx_ring		rx_ring_qdma;
 	struct napi_struct		tx_napi;
 	struct napi_struct		rx_napi;
 	struct mtk_tx_dma		*scratch_ring;
@@ -574,6 +646,8 @@ struct mtk_eth {
 	struct mii_bus			*mii_bus;
 	struct work_struct		pending_work;
 	unsigned long			state;
+
+	const struct mtk_soc_data	*soc;
 };
 
 /* struct mtk_mac -	the structure that holds the info about the MACs of the
diff --git a/drivers/net/ethernet/mellanox/Kconfig b/drivers/net/ethernet/mellanox/Kconfig
index 84a2007..872548c 100644
--- a/drivers/net/ethernet/mellanox/Kconfig
+++ b/drivers/net/ethernet/mellanox/Kconfig
@@ -5,9 +5,10 @@
 config NET_VENDOR_MELLANOX
 	bool "Mellanox devices"
 	default y
-	depends on PCI
+	depends on PCI || I2C
 	---help---
-	  If you have a network (Ethernet) card belonging to this class, say Y.
+	  If you have a network (Ethernet or RDMA) device belonging to this
+	  class, say Y.
 
 	  Note that the answer to this question doesn't directly affect the
 	  kernel: saying N will just cause the configurator to skip all
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index b651c12..6dabd98 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -186,7 +186,7 @@ int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
 	bitmap->effective_len = bitmap->avail;
 	spin_lock_init(&bitmap->lock);
 	bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) *
-				sizeof (long), GFP_KERNEL);
+				sizeof(long), GFP_KERNEL);
 	if (!bitmap->table)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 78b89ce..6a9086d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1958,19 +1958,19 @@ static void mlx4_allocate_port_vpps(struct mlx4_dev *dev, int port)
 	int i;
 	int err;
 	int num_vfs;
-	u16 availible_vpp;
+	u16 available_vpp;
 	u8 vpp_param[MLX4_NUM_UP];
 	struct mlx4_qos_manager *port_qos;
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
-	err = mlx4_ALLOCATE_VPP_get(dev, port, &availible_vpp, vpp_param);
+	err = mlx4_ALLOCATE_VPP_get(dev, port, &available_vpp, vpp_param);
 	if (err) {
-		mlx4_info(dev, "Failed query availible VPPs\n");
+		mlx4_info(dev, "Failed query available VPPs\n");
 		return;
 	}
 
 	port_qos = &priv->mfunc.master.qos_ctl[port];
-	num_vfs = (availible_vpp /
+	num_vfs = (available_vpp /
 		   bitmap_weight(port_qos->priority_bm, MLX4_NUM_UP));
 
 	for (i = 0; i < MLX4_NUM_UP; i++) {
@@ -1985,14 +1985,14 @@ static void mlx4_allocate_port_vpps(struct mlx4_dev *dev, int port)
 	}
 
 	/* Query actual allocated VPP, just to make sure */
-	err = mlx4_ALLOCATE_VPP_get(dev, port, &availible_vpp, vpp_param);
+	err = mlx4_ALLOCATE_VPP_get(dev, port, &available_vpp, vpp_param);
 	if (err) {
-		mlx4_info(dev, "Failed query availible VPPs\n");
+		mlx4_info(dev, "Failed query available VPPs\n");
 		return;
 	}
 
 	port_qos->num_of_qos_vfs = num_vfs;
-	mlx4_dbg(dev, "Port %d Availible VPPs %d\n", port, availible_vpp);
+	mlx4_dbg(dev, "Port %d Available VPPs %d\n", port, available_vpp);
 
 	for (i = 0; i < MLX4_NUM_UP; i++)
 		mlx4_dbg(dev, "Port %d UP %d Allocated %d VPPs\n", port, i,
@@ -2637,7 +2637,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev)
 	int err = 0;
 
 	priv->cmd.context = kmalloc(priv->cmd.max_cmds *
-				   sizeof (struct mlx4_cmd_context),
+				   sizeof(struct mlx4_cmd_context),
 				   GFP_KERNEL);
 	if (!priv->cmd.context)
 		return -ENOMEM;
@@ -2695,7 +2695,7 @@ struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev)
 {
 	struct mlx4_cmd_mailbox *mailbox;
 
-	mailbox = kmalloc(sizeof *mailbox, GFP_KERNEL);
+	mailbox = kmalloc(sizeof(*mailbox), GFP_KERNEL);
 	if (!mailbox)
 		return ERR_PTR(-ENOMEM);
 
@@ -2891,7 +2891,7 @@ static int mlx4_set_vport_qos(struct mlx4_priv *priv, int slave, int port,
 	memset(vpp_qos, 0, sizeof(struct mlx4_vport_qos_param) * MLX4_NUM_UP);
 
 	if (slave > port_qos->num_of_qos_vfs) {
-		mlx4_info(dev, "No availible VPP resources for this VF\n");
+		mlx4_info(dev, "No available VPP resources for this VF\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index f849eec..1e487ac 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -209,12 +209,10 @@ int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
 			      cq->moder_cnt, cq->moder_time);
 }
 
-int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
+void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
 {
 	mlx4_cq_arm(&cq->mcq, MLX4_CQ_DB_REQ_NOT, priv->mdev->uar_map,
 		    &priv->mdev->uar_lock);
-
-	return 0;
 }
 
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index 2b0cbca..686e18d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -147,7 +147,7 @@ void mlx4_en_update_loopback_state(struct net_device *dev,
 	mutex_unlock(&priv->mdev->state_lock);
 }
 
-static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
+static void mlx4_en_get_profile(struct mlx4_en_dev *mdev)
 {
 	struct mlx4_en_profile *params = &mdev->profile;
 	int i;
@@ -176,8 +176,6 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
 		params->prof[i].rss_rings = 0;
 		params->prof[i].inline_thold = inline_thold;
 	}
-
-	return 0;
 }
 
 static void *mlx4_en_get_netdev(struct mlx4_dev *dev, void *ctx, u8 port)
@@ -309,10 +307,7 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
 	}
 
 	/* Build device profile according to supplied module parameters */
-	if (mlx4_en_get_profile(mdev)) {
-		mlx4_err(mdev, "Bad module parameters, aborting\n");
-		goto err_mr;
-	}
+	mlx4_en_get_profile(mdev);
 
 	/* Configure which ports to start according to module parameters */
 	mdev->port_cnt = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index e3e6d9f..9c218f1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -130,19 +130,20 @@ out:
 	return err;
 }
 
-static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle,
-			      u32 chain_index, __be16 proto,
-			      struct tc_to_netdev *tc)
+static int __mlx4_en_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			      void *type_data)
 {
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	struct tc_mqprio_qopt *mqprio = type_data;
+
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	if (tc->mqprio->num_tc && tc->mqprio->num_tc != MLX4_EN_NUM_UP_HIGH)
+	if (mqprio->num_tc && mqprio->num_tc != MLX4_EN_NUM_UP_HIGH)
 		return -EINVAL;
 
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 
-	return mlx4_en_alloc_tx_queue_per_tc(dev, tc->mqprio->num_tc);
+	return mlx4_en_alloc_tx_queue_per_tc(dev, mqprio->num_tc);
 }
 
 #ifdef CONFIG_RFS_ACCEL
@@ -732,6 +733,21 @@ static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn,
 	return __mlx4_replace_mac(dev, priv->port, qpn, new_mac_u64);
 }
 
+static void mlx4_en_update_user_mac(struct mlx4_en_priv *priv,
+				    unsigned char new_mac[ETH_ALEN + 2])
+{
+	struct mlx4_en_dev *mdev = priv->mdev;
+	int err;
+
+	if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_USER_MAC_EN))
+		return;
+
+	err = mlx4_SET_PORT_user_mac(mdev->dev, priv->port, new_mac);
+	if (err)
+		en_err(priv, "Failed to pass user MAC(%pM) to Firmware for port %d, with error %d\n",
+		       new_mac, priv->port, err);
+}
+
 static int mlx4_en_do_set_mac(struct mlx4_en_priv *priv,
 			      unsigned char new_mac[ETH_ALEN + 2])
 {
@@ -766,8 +782,12 @@ static int mlx4_en_set_mac(struct net_device *dev, void *addr)
 	mutex_lock(&mdev->state_lock);
 	memcpy(new_mac, saddr->sa_data, ETH_ALEN);
 	err = mlx4_en_do_set_mac(priv, new_mac);
-	if (!err)
-		memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
+	if (err)
+		goto out;
+
+	memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN);
+	mlx4_en_update_user_mac(priv, new_mac);
+out:
 	mutex_unlock(&mdev->state_lock);
 
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index 86d2d42..5a47f96 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -44,7 +44,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct net_device *dev = priv->dev;
 
-	memset(context, 0, sizeof *context);
+	memset(context, 0, sizeof(*context));
 	context->flags = cpu_to_be32(7 << 16 | rss << MLX4_RSS_QPC_FLAG_OFFSET);
 	context->pd = cpu_to_be32(mdev->priv_pdn);
 	context->mtu_msgmax = 0xff;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index ec24c40..b97a55c8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -1056,7 +1056,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
 	}
 	qp->event = mlx4_en_sqp_event;
 
-	memset(context, 0, sizeof *context);
+	memset(context, 0, sizeof(*context));
 	mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0,
 				qpn, ring->cqn, -1, context);
 	context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index a81db25..8a32a8f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -644,7 +644,7 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
 			     void *fragptr)
 {
 	struct mlx4_wqe_inline_seg *inl = &tx_desc->inl;
-	int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl;
+	int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof(*inl);
 	unsigned int hlen = skb_headlen(skb);
 
 	if (skb->len <= spc) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 07406cf..6f57c05 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -259,7 +259,7 @@ int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port)
 	if (!s_slave->active)
 		return 0;
 
-	memset(&eqe, 0, sizeof eqe);
+	memset(&eqe, 0, sizeof(eqe));
 
 	eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
 	eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE;
@@ -276,7 +276,7 @@ int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port)
 	/*don't send if we don't have the that slave */
 	if (dev->persist->num_vfs < slave)
 		return 0;
-	memset(&eqe, 0, sizeof eqe);
+	memset(&eqe, 0, sizeof(eqe));
 
 	eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
 	eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO;
@@ -295,7 +295,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port,
 	/*don't send if we don't have the that slave */
 	if (dev->persist->num_vfs < slave)
 		return 0;
-	memset(&eqe, 0, sizeof eqe);
+	memset(&eqe, 0, sizeof(eqe));
 
 	eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE;
 	eqe.subtype = port_subtype_change;
@@ -432,7 +432,7 @@ int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr)
 {
 	struct mlx4_eqe eqe;
 
-	memset(&eqe, 0, sizeof eqe);
+	memset(&eqe, 0, sizeof(eqe));
 
 	eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
 	eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO;
@@ -726,7 +726,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 			}
 			memcpy(&priv->mfunc.master.comm_arm_bit_vector,
 			       eqe->event.comm_channel_arm.bit_vec,
-			       sizeof eqe->event.comm_channel_arm.bit_vec);
+			       sizeof(eqe->event.comm_channel_arm.bit_vec));
 			queue_work(priv->mfunc.master.comm_wq,
 				   &priv->mfunc.master.comm_work);
 			break;
@@ -984,15 +984,15 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
 	 */
 	npages = PAGE_ALIGN(eq->nent * dev->caps.eqe_size) / PAGE_SIZE;
 
-	eq->page_list = kmalloc(npages * sizeof *eq->page_list,
-				GFP_KERNEL);
+	eq->page_list = kmalloc_array(npages, sizeof(*eq->page_list),
+				      GFP_KERNEL);
 	if (!eq->page_list)
 		goto err_out;
 
 	for (i = 0; i < npages; ++i)
 		eq->page_list[i].buf = NULL;
 
-	dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+	dma_list = kmalloc_array(npages, sizeof(*dma_list), GFP_KERNEL);
 	if (!dma_list)
 		goto err_out_free;
 
@@ -1161,7 +1161,7 @@ int mlx4_alloc_eq_table(struct mlx4_dev *dev)
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
 	priv->eq_table.eq = kcalloc(dev->caps.num_eqs - dev->caps.reserved_eqs,
-				    sizeof *priv->eq_table.eq, GFP_KERNEL);
+				    sizeof(*priv->eq_table.eq), GFP_KERNEL);
 	if (!priv->eq_table.eq)
 		return -ENOMEM;
 
@@ -1180,7 +1180,7 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
 	int i;
 
 	priv->eq_table.uar_map = kcalloc(mlx4_num_eq_uar(dev),
-					 sizeof *priv->eq_table.uar_map,
+					 sizeof(*priv->eq_table.uar_map),
 					 GFP_KERNEL);
 	if (!priv->eq_table.uar_map) {
 		err = -ENOMEM;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 041c0ed..16c0994 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -36,6 +36,7 @@
 #include <linux/mlx4/cmd.h>
 #include <linux/module.h>
 #include <linux/cache.h>
+#include <linux/kernel.h>
 
 #include "fw.h"
 #include "icm.h"
@@ -57,7 +58,7 @@ MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: off)");
 	do {							      \
 		void *__p = (char *) (source) + (offset);	      \
 		u64 val;                                              \
-		switch (sizeof (dest)) {			      \
+		switch (sizeof(dest)) {			      \
 		case 1: (dest) = *(u8 *) __p;	    break;	      \
 		case 2: (dest) = be16_to_cpup(__p); break;	      \
 		case 4: (dest) = be32_to_cpup(__p); break;	      \
@@ -162,6 +163,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[35] = "Diag counters per port",
 		[36] = "QinQ VST mode support",
 		[37] = "sl to vl mapping table change event support",
+		[38] = "user MAC support",
 	};
 	int i;
 
@@ -679,22 +681,22 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,
 
 	if (func_cap->flags1 & QUERY_FUNC_CAP_VF_ENABLE_QP0) {
 		MLX4_GET(qkey, outbox, QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET);
-		func_cap->qp0_qkey = qkey;
+		func_cap->spec_qps.qp0_qkey = qkey;
 	} else {
-		func_cap->qp0_qkey = 0;
+		func_cap->spec_qps.qp0_qkey = 0;
 	}
 
 	MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL);
-	func_cap->qp0_tunnel_qpn = size & 0xFFFFFF;
+	func_cap->spec_qps.qp0_tunnel = size & 0xFFFFFF;
 
 	MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY);
-	func_cap->qp0_proxy_qpn = size & 0xFFFFFF;
+	func_cap->spec_qps.qp0_proxy = size & 0xFFFFFF;
 
 	MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL);
-	func_cap->qp1_tunnel_qpn = size & 0xFFFFFF;
+	func_cap->spec_qps.qp1_tunnel = size & 0xFFFFFF;
 
 	MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY);
-	func_cap->qp1_proxy_qpn = size & 0xFFFFFF;
+	func_cap->spec_qps.qp1_proxy = size & 0xFFFFFF;
 
 	if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_NIC_INFO)
 		MLX4_GET(func_cap->phys_port_id, outbox,
@@ -778,6 +780,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET	0x52
 #define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET		0x55
 #define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET	0x56
+#define QUERY_DEV_CAP_USER_MAC_EN_OFFSET	0x5C
 #define QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET	0x5D
 #define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET		0x61
 #define QUERY_DEV_CAP_RSVD_MCG_OFFSET		0x62
@@ -949,6 +952,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET);
 	dev_cap->max_sq_desc_sz = size;
 
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_USER_MAC_EN_OFFSET);
+	if (field & (1 << 2))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_USER_MAC_EN;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET);
 	if (field & 0x1)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP;
@@ -1534,7 +1540,7 @@ int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt)
 		for (i = 0; i < mlx4_icm_size(&iter) >> lg; ++i) {
 			if (virt != -1) {
 				pages[nent * 2] = cpu_to_be64(virt);
-				virt += 1 << lg;
+				virt += 1ULL << lg;
 			}
 
 			pages[nent * 2 + 1] =
@@ -2450,14 +2456,14 @@ int mlx4_config_dev_retrieval(struct mlx4_dev *dev,
 	csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT1_BIT_OFFSET) &
 			CONFIG_DEV_RX_CSUM_MODE_MASK;
 
-	if (csum_mask >= sizeof(config_dev_csum_flags)/sizeof(config_dev_csum_flags[0]))
+	if (csum_mask >= ARRAY_SIZE(config_dev_csum_flags))
 		return -EINVAL;
 	params->rx_csum_flags_port_1 = config_dev_csum_flags[csum_mask];
 
 	csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET) &
 			CONFIG_DEV_RX_CSUM_MODE_MASK;
 
-	if (csum_mask >= sizeof(config_dev_csum_flags)/sizeof(config_dev_csum_flags[0]))
+	if (csum_mask >= ARRAY_SIZE(config_dev_csum_flags))
 		return -EINVAL;
 	params->rx_csum_flags_port_2 = config_dev_csum_flags[csum_mask];
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index b52ba01..cd6399c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -144,11 +144,7 @@ struct mlx4_func_cap {
 	int	max_eq;
 	int	reserved_eq;
 	int	mcg_quota;
-	u32	qp0_qkey;
-	u32	qp0_tunnel_qpn;
-	u32	qp0_proxy_qpn;
-	u32	qp1_tunnel_qpn;
-	u32	qp1_proxy_qpn;
+	struct mlx4_spec_qps spec_qps;
 	u32	reserved_lkey;
 	u8	physical_port;
 	u8	flags0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c
index 8f2fde0..3a09d71 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c
@@ -65,7 +65,7 @@ struct mlx4_set_port_scheduler_context {
 
 /* Granular Qos (per VF) section */
 struct mlx4_alloc_vpp_param {
-	__be32 availible_vpp;
+	__be32 available_vpp;
 	__be32 vpp_p_up[MLX4_NUM_UP];
 };
 
@@ -157,7 +157,7 @@ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
 EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER);
 
 int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port,
-			  u16 *availible_vpp, u8 *vpp_p_up)
+			  u16 *available_vpp, u8 *vpp_p_up)
 {
 	int i;
 	int err;
@@ -179,7 +179,7 @@ int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port,
 		goto out;
 
 	/* Total number of supported VPPs */
-	*availible_vpp = (u16)be32_to_cpu(out_param->availible_vpp);
+	*available_vpp = (u16)be32_to_cpu(out_param->available_vpp);
 
 	for (i = 0; i < MLX4_NUM_UP; i++)
 		vpp_p_up[i] = (u8)be32_to_cpu(out_param->vpp_p_up[i]);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
index ac1f331..58299757 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h
@@ -84,23 +84,23 @@ int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc);
 int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
 			    u8 *pg, u16 *ratelimit);
 /**
- * mlx4_ALLOCATE_VPP_get - Query port VPP availible resources and allocation.
- * Before distribution of VPPs to priorities, only availible_vpp is returned.
+ * mlx4_ALLOCATE_VPP_get - Query port VPP available resources and allocation.
+ * Before distribution of VPPs to priorities, only available_vpp is returned.
  * After initialization it returns the distribution of VPPs among priorities.
  *
  * @dev: mlx4_dev.
  * @port: Physical port number.
- * @availible_vpp: Pointer to variable where number of availible VPPs is stored
+ * @available_vpp: Pointer to variable where number of available VPPs is stored
  * @vpp_p_up: Distribution of VPPs to priorities is stored in this array
  *
  * Returns 0 on success or a negative mlx4_core errno code.
  **/
 int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port,
-			  u16 *availible_vpp, u8 *vpp_p_up);
+			  u16 *available_vpp, u8 *vpp_p_up);
 /**
  * mlx4_ALLOCATE_VPP_set - Distribution of VPPs among differnt priorities.
  * The total number of VPPs assigned to all for a port must not exceed
- * the value reported by availible_vpp in mlx4_ALLOCATE_VPP_get.
+ * the value reported by available_vpp in mlx4_ALLOCATE_VPP_get.
  * VPP allocation is allowed only after the port type has been set,
  * and while no QPs are open for this port.
  *
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index 5a7816e..a822f7a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -400,7 +400,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 	obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size;
 	num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk;
 
-	table->icm      = kcalloc(num_icm, sizeof *table->icm, GFP_KERNEL);
+	table->icm      = kcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL);
 	if (!table->icm)
 		return -ENOMEM;
 	table->virt     = virt;
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h
index dee67fa..c9169a4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.h
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.h
@@ -39,8 +39,8 @@
 #include <linux/mutex.h>
 
 #define MLX4_ICM_CHUNK_LEN						\
-	((256 - sizeof (struct list_head) - 2 * sizeof (int)) /		\
-	 (sizeof (struct scatterlist)))
+	((256 - sizeof(struct list_head) - 2 * sizeof(int)) /		\
+	 (sizeof(struct scatterlist)))
 
 enum {
 	MLX4_ICM_PAGE_SHIFT	= 12,
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
index e00f627..2edcce9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -53,7 +53,7 @@ static void mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv)
 {
 	struct mlx4_device_context *dev_ctx;
 
-	dev_ctx = kmalloc(sizeof *dev_ctx, GFP_KERNEL);
+	dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL);
 	if (!dev_ctx)
 		return;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index a594bfd..e61c99e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -34,6 +34,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/pci.h>
@@ -121,7 +122,7 @@ static char mlx4_version[] =
 	DRV_NAME ": Mellanox ConnectX core driver v"
 	DRV_VERSION "\n";
 
-static struct mlx4_profile default_profile = {
+static const struct mlx4_profile default_profile = {
 	.num_qp		= 1 << 18,
 	.num_srq	= 1 << 16,
 	.rdmarc_per_qp	= 1 << 4,
@@ -131,7 +132,7 @@ static struct mlx4_profile default_profile = {
 	.num_mtt	= 1 << 20, /* It is really num mtt segements */
 };
 
-static struct mlx4_profile low_mem_profile = {
+static const struct mlx4_profile low_mem_profile = {
 	.num_qp		= 1 << 17,
 	.num_srq	= 1 << 6,
 	.rdmarc_per_qp	= 1 << 4,
@@ -819,38 +820,93 @@ static void slave_adjust_steering_mode(struct mlx4_dev *dev,
 		 mlx4_steering_mode_str(dev->caps.steering_mode));
 }
 
+static void mlx4_slave_destroy_special_qp_cap(struct mlx4_dev *dev)
+{
+	kfree(dev->caps.spec_qps);
+	dev->caps.spec_qps = NULL;
+}
+
+static int mlx4_slave_special_qp_cap(struct mlx4_dev *dev)
+{
+	struct mlx4_func_cap *func_cap = NULL;
+	struct mlx4_caps *caps = &dev->caps;
+	int i, err = 0;
+
+	func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL);
+	caps->spec_qps = kcalloc(caps->num_ports, sizeof(*caps->spec_qps), GFP_KERNEL);
+
+	if (!func_cap || !caps->spec_qps) {
+		mlx4_err(dev, "Failed to allocate memory for special qps cap\n");
+		err = -ENOMEM;
+		goto err_mem;
+	}
+
+	for (i = 1; i <= caps->num_ports; ++i) {
+		err = mlx4_QUERY_FUNC_CAP(dev, i, func_cap);
+		if (err) {
+			mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
+				 i, err);
+			goto err_mem;
+		}
+		caps->spec_qps[i - 1] = func_cap->spec_qps;
+		caps->port_mask[i] = caps->port_type[i];
+		caps->phys_port_id[i] = func_cap->phys_port_id;
+		err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
+						      &caps->gid_table_len[i],
+						      &caps->pkey_table_len[i]);
+		if (err) {
+			mlx4_err(dev, "QUERY_PORT command failed for port %d, aborting (%d)\n",
+				 i, err);
+			goto err_mem;
+		}
+	}
+
+err_mem:
+	if (err)
+		mlx4_slave_destroy_special_qp_cap(dev);
+	kfree(func_cap);
+	return err;
+}
+
 static int mlx4_slave_cap(struct mlx4_dev *dev)
 {
 	int			   err;
 	u32			   page_size;
-	struct mlx4_dev_cap	   dev_cap;
-	struct mlx4_func_cap	   func_cap;
-	struct mlx4_init_hca_param hca_param;
-	u8			   i;
+	struct mlx4_dev_cap	   *dev_cap = NULL;
+	struct mlx4_func_cap	   *func_cap = NULL;
+	struct mlx4_init_hca_param *hca_param = NULL;
+
+	hca_param = kzalloc(sizeof(*hca_param), GFP_KERNEL);
+	func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL);
+	dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
+	if (!hca_param || !func_cap || !dev_cap) {
+		mlx4_err(dev, "Failed to allocate memory for slave_cap\n");
+		err = -ENOMEM;
+		goto free_mem;
+	}
 
-	memset(&hca_param, 0, sizeof(hca_param));
-	err = mlx4_QUERY_HCA(dev, &hca_param);
+	err = mlx4_QUERY_HCA(dev, hca_param);
 	if (err) {
 		mlx4_err(dev, "QUERY_HCA command failed, aborting\n");
-		return err;
+		goto free_mem;
 	}
 
 	/* fail if the hca has an unknown global capability
 	 * at this time global_caps should be always zeroed
 	 */
-	if (hca_param.global_caps) {
+	if (hca_param->global_caps) {
 		mlx4_err(dev, "Unknown hca global capabilities\n");
-		return -EINVAL;
+		err = -EINVAL;
+		goto free_mem;
 	}
 
-	dev->caps.hca_core_clock = hca_param.hca_core_clock;
+	dev->caps.hca_core_clock = hca_param->hca_core_clock;
 
-	memset(&dev_cap, 0, sizeof(dev_cap));
-	dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
-	err = mlx4_dev_cap(dev, &dev_cap);
+	dev->caps.max_qp_dest_rdma = 1 << hca_param->log_rd_per_qp;
+	err = mlx4_dev_cap(dev, dev_cap);
 	if (err) {
 		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
-		return err;
+		goto free_mem;
 	}
 
 	err = mlx4_QUERY_FW(dev);
@@ -862,21 +918,23 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 	if (page_size > PAGE_SIZE) {
 		mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
 			 page_size, PAGE_SIZE);
-		return -ENODEV;
+		err = -ENODEV;
+		goto free_mem;
 	}
 
 	/* Set uar_page_shift for VF */
-	dev->uar_page_shift = hca_param.uar_page_sz + 12;
+	dev->uar_page_shift = hca_param->uar_page_sz + 12;
 
 	/* Make sure the master uar page size is valid */
 	if (dev->uar_page_shift > PAGE_SHIFT) {
 		mlx4_err(dev,
 			 "Invalid configuration: uar page size is larger than system page size\n");
-		return  -ENODEV;
+		err = -ENODEV;
+		goto free_mem;
 	}
 
 	/* Set reserved_uars based on the uar_page_shift */
-	mlx4_set_num_reserved_uars(dev, &dev_cap);
+	mlx4_set_num_reserved_uars(dev, dev_cap);
 
 	/* Although uar page size in FW differs from system page size,
 	 * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core)
@@ -884,34 +942,35 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 	 */
 	dev->caps.uar_page_size = PAGE_SIZE;
 
-	memset(&func_cap, 0, sizeof(func_cap));
-	err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
+	err = mlx4_QUERY_FUNC_CAP(dev, 0, func_cap);
 	if (err) {
 		mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n",
 			 err);
-		return err;
+		goto free_mem;
 	}
 
-	if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
+	if ((func_cap->pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
 	    PF_CONTEXT_BEHAVIOUR_MASK) {
 		mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n",
-			 func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK);
-		return -EINVAL;
-	}
-
-	dev->caps.num_ports		= func_cap.num_ports;
-	dev->quotas.qp			= func_cap.qp_quota;
-	dev->quotas.srq			= func_cap.srq_quota;
-	dev->quotas.cq			= func_cap.cq_quota;
-	dev->quotas.mpt			= func_cap.mpt_quota;
-	dev->quotas.mtt			= func_cap.mtt_quota;
-	dev->caps.num_qps		= 1 << hca_param.log_num_qps;
-	dev->caps.num_srqs		= 1 << hca_param.log_num_srqs;
-	dev->caps.num_cqs		= 1 << hca_param.log_num_cqs;
-	dev->caps.num_mpts		= 1 << hca_param.log_mpt_sz;
-	dev->caps.num_eqs		= func_cap.max_eq;
-	dev->caps.reserved_eqs		= func_cap.reserved_eq;
-	dev->caps.reserved_lkey		= func_cap.reserved_lkey;
+			 func_cap->pf_context_behaviour,
+			 PF_CONTEXT_BEHAVIOUR_MASK);
+		err = -EINVAL;
+		goto free_mem;
+	}
+
+	dev->caps.num_ports		= func_cap->num_ports;
+	dev->quotas.qp			= func_cap->qp_quota;
+	dev->quotas.srq			= func_cap->srq_quota;
+	dev->quotas.cq			= func_cap->cq_quota;
+	dev->quotas.mpt			= func_cap->mpt_quota;
+	dev->quotas.mtt			= func_cap->mtt_quota;
+	dev->caps.num_qps		= 1 << hca_param->log_num_qps;
+	dev->caps.num_srqs		= 1 << hca_param->log_num_srqs;
+	dev->caps.num_cqs		= 1 << hca_param->log_num_cqs;
+	dev->caps.num_mpts		= 1 << hca_param->log_mpt_sz;
+	dev->caps.num_eqs		= func_cap->max_eq;
+	dev->caps.reserved_eqs		= func_cap->reserved_eq;
+	dev->caps.reserved_lkey		= func_cap->reserved_lkey;
 	dev->caps.num_pds               = MLX4_NUM_PDS;
 	dev->caps.num_mgms              = 0;
 	dev->caps.num_amgms             = 0;
@@ -919,43 +978,16 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 	if (dev->caps.num_ports > MLX4_MAX_PORTS) {
 		mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
 			 dev->caps.num_ports, MLX4_MAX_PORTS);
-		return -ENODEV;
+		err = -ENODEV;
+		goto free_mem;
 	}
 
 	mlx4_replace_zero_macs(dev);
 
-	dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
-	dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
-	dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
-	dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
-	dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
-
-	if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
-	    !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy ||
-	    !dev->caps.qp0_qkey) {
-		err = -ENOMEM;
-		goto err_mem;
-	}
-
-	for (i = 1; i <= dev->caps.num_ports; ++i) {
-		err = mlx4_QUERY_FUNC_CAP(dev, i, &func_cap);
-		if (err) {
-			mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
-				 i, err);
-			goto err_mem;
-		}
-		dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey;
-		dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
-		dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
-		dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
-		dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
-		dev->caps.port_mask[i] = dev->caps.port_type[i];
-		dev->caps.phys_port_id[i] = func_cap.phys_port_id;
-		err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
-						      &dev->caps.gid_table_len[i],
-						      &dev->caps.pkey_table_len[i]);
-		if (err)
-			goto err_mem;
+	err = mlx4_slave_special_qp_cap(dev);
+	if (err) {
+		mlx4_err(dev, "Set special QP caps failed. aborting\n");
+		goto free_mem;
 	}
 
 	if (dev->caps.uar_page_size * (dev->caps.num_uars -
@@ -970,7 +1002,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 		goto err_mem;
 	}
 
-	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
+	if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
 		dev->caps.eqe_size   = 64;
 		dev->caps.eqe_factor = 1;
 	} else {
@@ -978,20 +1010,20 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 		dev->caps.eqe_factor = 0;
 	}
 
-	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
+	if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
 		dev->caps.cqe_size   = 64;
 		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
 	} else {
 		dev->caps.cqe_size   = 32;
 	}
 
-	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
-		dev->caps.eqe_size = hca_param.eqe_size;
+	if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
+		dev->caps.eqe_size = hca_param->eqe_size;
 		dev->caps.eqe_factor = 0;
 	}
 
-	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
-		dev->caps.cqe_size = hca_param.cqe_size;
+	if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
+		dev->caps.cqe_size = hca_param->cqe_size;
 		/* User still need to know when CQE > 32B */
 		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
 	}
@@ -999,31 +1031,27 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 	dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
 	mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
 
-	slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
+	dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_USER_MAC_EN;
+	mlx4_dbg(dev, "User MAC FW update is not supported in slave mode\n");
+
+	slave_adjust_steering_mode(dev, dev_cap, hca_param);
 	mlx4_dbg(dev, "RSS support for IP fragments is %s\n",
-		 hca_param.rss_ip_frags ? "on" : "off");
+		 hca_param->rss_ip_frags ? "on" : "off");
 
-	if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
+	if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
 	    dev->caps.bf_reg_size)
 		dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
 
-	if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
+	if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
 		dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;
 
-	return 0;
-
 err_mem:
-	kfree(dev->caps.qp0_qkey);
-	kfree(dev->caps.qp0_tunnel);
-	kfree(dev->caps.qp0_proxy);
-	kfree(dev->caps.qp1_tunnel);
-	kfree(dev->caps.qp1_proxy);
-	dev->caps.qp0_qkey = NULL;
-	dev->caps.qp0_tunnel = NULL;
-	dev->caps.qp0_proxy = NULL;
-	dev->caps.qp1_tunnel = NULL;
-	dev->caps.qp1_proxy = NULL;
-
+	if (err)
+		mlx4_slave_destroy_special_qp_cap(dev);
+free_mem:
+	kfree(hca_param);
+	kfree(func_cap);
+	kfree(dev_cap);
 	return err;
 }
 
@@ -2399,7 +2427,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 		dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
 	}
 	priv->eq_table.inta_pin = adapter.inta_pin;
-	memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
+	memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id));
 
 	return 0;
 
@@ -2407,13 +2435,8 @@ unmap_bf:
 	unmap_internal_clock(dev);
 	unmap_bf_area(dev);
 
-	if (mlx4_is_slave(dev)) {
-		kfree(dev->caps.qp0_qkey);
-		kfree(dev->caps.qp0_tunnel);
-		kfree(dev->caps.qp0_proxy);
-		kfree(dev->caps.qp1_tunnel);
-		kfree(dev->caps.qp1_proxy);
-	}
+	if (mlx4_is_slave(dev))
+		mlx4_slave_destroy_special_qp_cap(dev);
 
 err_close:
 	if (mlx4_is_slave(dev))
@@ -2870,7 +2893,7 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 				dev->caps.num_eqs - dev->caps.reserved_eqs,
 				MAX_MSIX);
 
-		entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
+		entries = kcalloc(nreq, sizeof(*entries), GFP_KERNEL);
 		if (!entries)
 			goto no_msi;
 
@@ -3597,13 +3620,8 @@ err_master_mfunc:
 		mlx4_multi_func_cleanup(dev);
 	}
 
-	if (mlx4_is_slave(dev)) {
-		kfree(dev->caps.qp0_qkey);
-		kfree(dev->caps.qp0_tunnel);
-		kfree(dev->caps.qp0_proxy);
-		kfree(dev->caps.qp1_tunnel);
-		kfree(dev->caps.qp1_proxy);
-	}
+	if (mlx4_is_slave(dev))
+		mlx4_slave_destroy_special_qp_cap(dev);
 
 err_close:
 	mlx4_close_hca(dev);
@@ -3659,7 +3677,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
 	 * per port, we must limit the number of VFs to 63 (since their are
 	 * 128 MACs)
 	 */
-	for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
+	for (i = 0; i < ARRAY_SIZE(nvfs) && i < num_vfs_argc;
 	     total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
 		nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
 		if (nvfs[i] < 0) {
@@ -3668,7 +3686,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
 			goto err_disable_pdev;
 		}
 	}
-	for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
+	for (i = 0; i < ARRAY_SIZE(prb_vf) && i < probe_vfs_argc;
 	     i++) {
 		prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
 		if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
@@ -3747,11 +3765,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
 		if (total_vfs) {
 			unsigned vfs_offset = 0;
 
-			for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
+			for (i = 0; i < ARRAY_SIZE(nvfs) &&
 			     vfs_offset + nvfs[i] < extended_func_num(pdev);
 			     vfs_offset += nvfs[i], i++)
 				;
-			if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
+			if (i == ARRAY_SIZE(nvfs)) {
 				err = -ENODEV;
 				goto err_release_regions;
 			}
@@ -3783,7 +3801,6 @@ err_release_regions:
 
 err_disable_pdev:
 	mlx4_pci_disable_device(&priv->dev);
-	pci_set_drvdata(pdev, NULL);
 	return err;
 }
 
@@ -3944,11 +3961,7 @@ static void mlx4_unload_one(struct pci_dev *pdev)
 	if (!mlx4_is_slave(dev))
 		mlx4_free_ownership(dev);
 
-	kfree(dev->caps.qp0_qkey);
-	kfree(dev->caps.qp0_tunnel);
-	kfree(dev->caps.qp0_proxy);
-	kfree(dev->caps.qp1_tunnel);
-	kfree(dev->caps.qp1_proxy);
+	mlx4_slave_destroy_special_qp_cap(dev);
 	kfree(dev->dev_vfs);
 
 	mlx4_clean_dev(dev);
@@ -3998,7 +4011,6 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	devlink_unregister(devlink);
 	kfree(dev->persist);
 	devlink_free(devlink);
-	pci_set_drvdata(pdev, NULL);
 }
 
 static int restore_current_port_types(struct mlx4_dev *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 0710b36..4c5306d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -162,7 +162,7 @@ static int new_steering_entry(struct mlx4_dev *dev, u8 port,
 		return -EINVAL;
 
 	s_steer = &mlx4_priv(dev)->steer[port - 1];
-	new_entry = kzalloc(sizeof *new_entry, GFP_KERNEL);
+	new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
 	if (!new_entry)
 		return -ENOMEM;
 
@@ -175,7 +175,7 @@ static int new_steering_entry(struct mlx4_dev *dev, u8 port,
 	 */
 	pqp = get_promisc_qp(dev, port, steer, qpn);
 	if (pqp) {
-		dqp = kmalloc(sizeof *dqp, GFP_KERNEL);
+		dqp = kmalloc(sizeof(*dqp), GFP_KERNEL);
 		if (!dqp) {
 			err = -ENOMEM;
 			goto out_alloc;
@@ -274,7 +274,7 @@ static int existing_steering_entry(struct mlx4_dev *dev, u8 port,
 	}
 
 	/* add the qp as a duplicate on this index */
-	dqp = kmalloc(sizeof *dqp, GFP_KERNEL);
+	dqp = kmalloc(sizeof(*dqp), GFP_KERNEL);
 	if (!dqp)
 		return -ENOMEM;
 	dqp->qpn = qpn;
@@ -443,7 +443,7 @@ static int add_promisc_qp(struct mlx4_dev *dev, u8 port,
 		goto out_mutex;
 	}
 
-	pqp = kmalloc(sizeof *pqp, GFP_KERNEL);
+	pqp = kmalloc(sizeof(*pqp), GFP_KERNEL);
 	if (!pqp) {
 		err = -ENOMEM;
 		goto out_mutex;
@@ -514,7 +514,7 @@ static int add_promisc_qp(struct mlx4_dev *dev, u8 port,
 	/* add the new qpn to list of promisc qps */
 	list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]);
 	/* now need to add all the promisc qps to default entry */
-	memset(mgm, 0, sizeof *mgm);
+	memset(mgm, 0, sizeof(*mgm));
 	members_count = 0;
 	list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list) {
 		if (members_count == dev->caps.num_qp_per_mgm) {
@@ -1144,7 +1144,7 @@ int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 		index += dev->caps.num_mgms;
 
 		new_entry = 1;
-		memset(mgm, 0, sizeof *mgm);
+		memset(mgm, 0, sizeof(*mgm));
 		memcpy(mgm->gid, gid, 16);
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 852d00a..c68da19 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -807,6 +807,8 @@ struct mlx4_set_port_general_context {
 	u8 phv_en;
 	u8 reserved6[5];
 	__be16 user_mtu;
+	u16 reserved7;
+	u8 user_mac[6];
 };
 
 struct mlx4_set_port_rqp_calc_context {
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index d350b21..fdb3ad0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -685,7 +685,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 			int cq_idx);
 void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
 int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
-int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
+void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
 
 void mlx4_en_tx_irq(struct mlx4_cq *mcq);
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 24282cd..c7c0764 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -106,9 +106,9 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
 	buddy->max_order = max_order;
 	spin_lock_init(&buddy->lock);
 
-	buddy->bits = kcalloc(buddy->max_order + 1, sizeof (long *),
+	buddy->bits = kcalloc(buddy->max_order + 1, sizeof(long *),
 			      GFP_KERNEL);
-	buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free,
+	buddy->num_free = kcalloc(buddy->max_order + 1, sizeof(*buddy->num_free),
 				  GFP_KERNEL);
 	if (!buddy->bits || !buddy->num_free)
 		goto err_out;
@@ -703,13 +703,13 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		return -ENOMEM;
 
 	dma_sync_single_for_cpu(&dev->persist->pdev->dev, dma_handle,
-				npages * sizeof (u64), DMA_TO_DEVICE);
+				npages * sizeof(u64), DMA_TO_DEVICE);
 
 	for (i = 0; i < npages; ++i)
 		mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
 
 	dma_sync_single_for_device(&dev->persist->pdev->dev, dma_handle,
-				   npages * sizeof (u64), DMA_TO_DEVICE);
+				   npages * sizeof(u64), DMA_TO_DEVICE);
 
 	return 0;
 }
@@ -1052,7 +1052,7 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
 		return -EINVAL;
 
 	/* All MTTs must fit in the same page */
-	if (max_pages * sizeof *fmr->mtts > PAGE_SIZE)
+	if (max_pages * sizeof(*fmr->mtts) > PAGE_SIZE)
 		return -EINVAL;
 
 	fmr->page_shift = page_shift;
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 4e36e28..3ef3406 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -52,6 +52,7 @@
 
 #define MLX4_FLAG2_V_IGNORE_FCS_MASK		BIT(1)
 #define MLX4_FLAG2_V_USER_MTU_MASK		BIT(5)
+#define MLX4_FLAG2_V_USER_MAC_MASK		BIT(6)
 #define MLX4_FLAG_V_MTU_MASK			BIT(0)
 #define MLX4_FLAG_V_PPRX_MASK			BIT(1)
 #define MLX4_FLAG_V_PPTX_MASK			BIT(2)
@@ -1700,6 +1701,30 @@ int mlx4_SET_PORT_user_mtu(struct mlx4_dev *dev, u8 port, u16 user_mtu)
 }
 EXPORT_SYMBOL(mlx4_SET_PORT_user_mtu);
 
+int mlx4_SET_PORT_user_mac(struct mlx4_dev *dev, u8 port, u8 *user_mac)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_set_port_general_context *context;
+	u32 in_mod;
+	int err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	context = mailbox->buf;
+	context->flags2 |= MLX4_FLAG2_V_USER_MAC_MASK;
+	memcpy(context->user_mac, user_mac, sizeof(context->user_mac));
+
+	in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
+		       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+		       MLX4_CMD_NATIVE);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_user_mac);
+
 int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, u8 ignore_fcs_value)
 {
 	struct mlx4_cmd_mailbox *mailbox;
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 5e5b447..728a2fb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -174,7 +174,7 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 			cpu_to_be16(mlx4_qp_roce_entropy(dev, qp->qpn));
 
 	*(__be32 *) mailbox->buf = cpu_to_be32(optpar);
-	memcpy(mailbox->buf + 8, context, sizeof *context);
+	memcpy(mailbox->buf + 8, context, sizeof(*context));
 
 	((struct mlx4_qp_context *) (mailbox->buf + 8))->local_qpn =
 		cpu_to_be32(qp->qpn);
@@ -845,24 +845,21 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
 
 		/* In mfunc, calculate proxy and tunnel qp offsets for the PF here,
 		 * since the PF does not call mlx4_slave_caps */
-		dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
-		dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
-		dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
-		dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
-
-		if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
-		    !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
+		dev->caps.spec_qps = kcalloc(dev->caps.num_ports,
+					     sizeof(*dev->caps.spec_qps),
+					     GFP_KERNEL);
+		if (!dev->caps.spec_qps) {
 			err = -ENOMEM;
 			goto err_mem;
 		}
 
 		for (k = 0; k < dev->caps.num_ports; k++) {
-			dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn +
+			dev->caps.spec_qps[k].qp0_proxy = dev->phys_caps.base_proxy_sqpn +
 				8 * mlx4_master_func_num(dev) + k;
-			dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX;
-			dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn +
+			dev->caps.spec_qps[k].qp0_tunnel = dev->caps.spec_qps[k].qp0_proxy + 8 * MLX4_MFUNC_MAX;
+			dev->caps.spec_qps[k].qp1_proxy = dev->phys_caps.base_proxy_sqpn +
 				8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k;
-			dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX;
+			dev->caps.spec_qps[k].qp1_tunnel = dev->caps.spec_qps[k].qp1_proxy + 8 * MLX4_MFUNC_MAX;
 		}
 	}
 
@@ -874,12 +871,8 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
 	return err;
 
 err_mem:
-	kfree(dev->caps.qp0_tunnel);
-	kfree(dev->caps.qp0_proxy);
-	kfree(dev->caps.qp1_tunnel);
-	kfree(dev->caps.qp1_proxy);
-	dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
-		dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
+	kfree(dev->caps.spec_qps);
+	dev->caps.spec_qps = NULL;
 	mlx4_cleanup_qp_zones(dev);
 	return err;
 }
@@ -908,7 +901,7 @@ int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
 			   MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A,
 			   MLX4_CMD_WRAPPED);
 	if (!err)
-		memcpy(context, mailbox->buf + 8, sizeof *context);
+		memcpy(context, mailbox->buf + 8, sizeof(*context));
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 215e21c..fabb533 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -1040,7 +1040,7 @@ static struct res_common *alloc_qp_tr(int id)
 {
 	struct res_qp *ret;
 
-	ret = kzalloc(sizeof *ret, GFP_KERNEL);
+	ret = kzalloc(sizeof(*ret), GFP_KERNEL);
 	if (!ret)
 		return NULL;
 
@@ -1058,7 +1058,7 @@ static struct res_common *alloc_mtt_tr(int id, int order)
 {
 	struct res_mtt *ret;
 
-	ret = kzalloc(sizeof *ret, GFP_KERNEL);
+	ret = kzalloc(sizeof(*ret), GFP_KERNEL);
 	if (!ret)
 		return NULL;
 
@@ -1074,7 +1074,7 @@ static struct res_common *alloc_mpt_tr(int id, int key)
 {
 	struct res_mpt *ret;
 
-	ret = kzalloc(sizeof *ret, GFP_KERNEL);
+	ret = kzalloc(sizeof(*ret), GFP_KERNEL);
 	if (!ret)
 		return NULL;
 
@@ -1089,7 +1089,7 @@ static struct res_common *alloc_eq_tr(int id)
 {
 	struct res_eq *ret;
 
-	ret = kzalloc(sizeof *ret, GFP_KERNEL);
+	ret = kzalloc(sizeof(*ret), GFP_KERNEL);
 	if (!ret)
 		return NULL;
 
@@ -1103,7 +1103,7 @@ static struct res_common *alloc_cq_tr(int id)
 {
 	struct res_cq *ret;
 
-	ret = kzalloc(sizeof *ret, GFP_KERNEL);
+	ret = kzalloc(sizeof(*ret), GFP_KERNEL);
 	if (!ret)
 		return NULL;
 
@@ -1118,7 +1118,7 @@ static struct res_common *alloc_srq_tr(int id)
 {
 	struct res_srq *ret;
 
-	ret = kzalloc(sizeof *ret, GFP_KERNEL);
+	ret = kzalloc(sizeof(*ret), GFP_KERNEL);
 	if (!ret)
 		return NULL;
 
@@ -1133,7 +1133,7 @@ static struct res_common *alloc_counter_tr(int id, int port)
 {
 	struct res_counter *ret;
 
-	ret = kzalloc(sizeof *ret, GFP_KERNEL);
+	ret = kzalloc(sizeof(*ret), GFP_KERNEL);
 	if (!ret)
 		return NULL;
 
@@ -1148,7 +1148,7 @@ static struct res_common *alloc_xrcdn_tr(int id)
 {
 	struct res_xrcdn *ret;
 
-	ret = kzalloc(sizeof *ret, GFP_KERNEL);
+	ret = kzalloc(sizeof(*ret), GFP_KERNEL);
 	if (!ret)
 		return NULL;
 
@@ -1162,7 +1162,7 @@ static struct res_common *alloc_fs_rule_tr(u64 id, int qpn)
 {
 	struct res_fs_rule *ret;
 
-	ret = kzalloc(sizeof *ret, GFP_KERNEL);
+	ret = kzalloc(sizeof(*ret), GFP_KERNEL);
 	if (!ret)
 		return NULL;
 
@@ -1274,7 +1274,7 @@ static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count,
 	struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
 	struct rb_root *root = &tracker->res_tree[type];
 
-	res_arr = kzalloc(count * sizeof *res_arr, GFP_KERNEL);
+	res_arr = kcalloc(count, sizeof(*res_arr), GFP_KERNEL);
 	if (!res_arr)
 		return -ENOMEM;
 
@@ -2027,7 +2027,7 @@ static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port,
 
 	if (mlx4_grant_resource(dev, slave, RES_MAC, 1, port))
 		return -EINVAL;
-	res = kzalloc(sizeof *res, GFP_KERNEL);
+	res = kzalloc(sizeof(*res), GFP_KERNEL);
 	if (!res) {
 		mlx4_release_resource(dev, slave, RES_MAC, 1, port);
 		return -ENOMEM;
@@ -4020,7 +4020,7 @@ static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp,
 	struct res_gid *res;
 	int err;
 
-	res = kzalloc(sizeof *res, GFP_KERNEL);
+	res = kzalloc(sizeof(*res), GFP_KERNEL);
 	if (!res)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 5aee059..fdaef00 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -34,6 +34,27 @@ config MLX5_CORE_EN
 	---help---
 	  Ethernet support in Mellanox Technologies ConnectX-4 NIC.
 
+config MLX5_MPFS
+        bool "Mellanox Technologies MLX5 MPFS support"
+        depends on MLX5_CORE_EN
+	default y
+        ---help---
+	  Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS)
+          support in ConnectX NIC. MPFs is required for when multi-PF configuration
+          is enabled to allow passing user configured unicast MAC addresses to the
+          requesting PF.
+
+config MLX5_ESWITCH
+	bool "Mellanox Technologies MLX5 SRIOV E-Switch support"
+	depends on MLX5_CORE_EN
+	default y
+	---help---
+	  Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC.
+          E-Switch provides internal SRIOV packet steering and switching for the
+          enabled VFs and PF in two available modes:
+                Legacy SRIOV mode (L2 mac vlan steering based).
+                Switchdev mode (eswitch offloads).
+
 config MLX5_CORE_EN_DCB
 	bool "Data Center Bridging (DCB) Support"
 	default y
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 9d17e4e..87a3099 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -4,17 +4,21 @@ subdir-ccflags-y += -I$(src)
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
 		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
-		fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o
+		fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \
+		diag/fs_tracepoint.o
 
 mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o
 
 mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
 		fpga/ipsec.o
 
-mlx5_core-$(CONFIG_MLX5_CORE_EN) += eswitch.o eswitch_offloads.o \
-		en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
-		en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
-		en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o en_selftest.o
+mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
+		en_tx.o en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
+		en_arfs.o en_fs_ethtool.o en_selftest.o
+
+mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
+
+mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o en_rep.o en_tc.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
 
@@ -22,3 +26,5 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o
 
 mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
 		en_accel/ipsec_stats.o
+
+CFLAGS_tracepoint.o := -I$(src)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 3c95f7f..47239bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -258,6 +258,7 @@ EXPORT_SYMBOL_GPL(mlx5_db_alloc);
 void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
 {
 	u32 db_per_page = PAGE_SIZE / cache_line_size();
+
 	mutex_lock(&dev->priv.pgdir_mutex);
 
 	__set_bit(db->index, db->u.pgdir->bitmap);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 1acbb72..1fffdeb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -802,7 +802,6 @@ static void cmd_work_handler(struct work_struct *work)
 	bool poll_cmd = ent->polling;
 	int alloc_ret;
 
-
 	sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
 	down(sem);
 	if (!ent->page_queue) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index a62f4b6..ff60cf7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -45,11 +45,70 @@ struct mlx5_device_context {
 	unsigned long		state;
 };
 
+struct mlx5_delayed_event {
+	struct list_head	list;
+	struct mlx5_core_dev	*dev;
+	enum mlx5_dev_event	event;
+	unsigned long		param;
+};
+
 enum {
 	MLX5_INTERFACE_ADDED,
 	MLX5_INTERFACE_ATTACHED,
 };
 
+static void add_delayed_event(struct mlx5_priv *priv,
+			      struct mlx5_core_dev *dev,
+			      enum mlx5_dev_event event,
+			      unsigned long param)
+{
+	struct mlx5_delayed_event *delayed_event;
+
+	delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC);
+	if (!delayed_event) {
+		mlx5_core_err(dev, "event %d is missed\n", event);
+		return;
+	}
+
+	mlx5_core_dbg(dev, "Accumulating event %d\n", event);
+	delayed_event->dev = dev;
+	delayed_event->event = event;
+	delayed_event->param = param;
+	list_add_tail(&delayed_event->list, &priv->waiting_events_list);
+}
+
+static void fire_delayed_event_locked(struct mlx5_device_context *dev_ctx,
+				      struct mlx5_core_dev *dev,
+				      struct mlx5_priv *priv)
+{
+	struct mlx5_delayed_event *de;
+	struct mlx5_delayed_event *n;
+
+	/* stop delaying events */
+	priv->is_accum_events = false;
+
+	/* fire all accumulated events before new event comes */
+	list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) {
+		dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
+		list_del(&de->list);
+		kfree(de);
+	}
+}
+
+static void cleanup_delayed_evets(struct mlx5_priv *priv)
+{
+	struct mlx5_delayed_event *de;
+	struct mlx5_delayed_event *n;
+
+	spin_lock_irq(&priv->ctx_lock);
+	priv->is_accum_events = false;
+	list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) {
+		list_del(&de->list);
+		kfree(de);
+	}
+	spin_unlock_irq(&priv->ctx_lock);
+}
+
 void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 {
 	struct mlx5_device_context *dev_ctx;
@@ -63,6 +122,12 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 		return;
 
 	dev_ctx->intf = intf;
+	/* accumulating events that can come after mlx5_ib calls to
+	 * ib_register_device, till adding that interface to the events list.
+	 */
+
+	priv->is_accum_events = true;
+
 	dev_ctx->context = intf->add(dev);
 	set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
 	if (intf->attach)
@@ -71,6 +136,9 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 	if (dev_ctx->context) {
 		spin_lock_irq(&priv->ctx_lock);
 		list_add_tail(&dev_ctx->list, &priv->ctx_list);
+
+		fire_delayed_event_locked(dev_ctx, dev, priv);
+
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 		if (dev_ctx->intf->pfault) {
 			if (priv->pfault) {
@@ -84,6 +152,8 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 		spin_unlock_irq(&priv->ctx_lock);
 	} else {
 		kfree(dev_ctx);
+		 /* delete all accumulated events */
+		cleanup_delayed_evets(priv);
 	}
 }
 
@@ -341,6 +411,9 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
 
 	spin_lock_irqsave(&priv->ctx_lock, flags);
 
+	if (priv->is_accum_events)
+		add_delayed_event(priv, dev, event, param);
+
 	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
 		if (dev_ctx->intf->event)
 			dev_ctx->intf->event(dev, dev_ctx->context, event, param);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile
new file mode 100644
index 0000000..d8e1711
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
new file mode 100644
index 0000000..0be4575
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define CREATE_TRACE_POINTS
+
+#include "fs_tracepoint.h"
+#include <linux/stringify.h>
+
+#define DECLARE_MASK_VAL(type, name) struct {type m; type v; } name
+#define MASK_VAL(type, spec, name, mask, val, fld)	\
+		DECLARE_MASK_VAL(type, name) =		\
+			{.m = MLX5_GET(spec, mask, fld),\
+			 .v = MLX5_GET(spec, val, fld)}
+#define MASK_VAL_BE(type, spec, name, mask, val, fld)	\
+		    DECLARE_MASK_VAL(type, name) =	\
+			{.m = MLX5_GET_BE(type, spec, mask, fld),\
+			 .v = MLX5_GET_BE(type, spec, val, fld)}
+#define GET_MASKED_VAL(name) (name.m & name.v)
+
+#define GET_MASK_VAL(name, type, mask, val, fld)	\
+		(name.m = MLX5_GET(type, mask, fld),	\
+		 name.v = MLX5_GET(type, val, fld),	\
+		 name.m & name.v)
+#define PRINT_MASKED_VAL(name, p, format) {		\
+	if (name.m)			\
+		trace_seq_printf(p, __stringify(name) "=" format " ", name.v); \
+	}
+#define PRINT_MASKED_VALP(name, cast, p, format) {	\
+	if (name.m)			\
+		trace_seq_printf(p, __stringify(name) "=" format " ",	       \
+				 (cast)&name.v);\
+	}
+
+static void print_lyr_2_4_hdrs(struct trace_seq *p,
+			       const u32 *mask, const u32 *value)
+{
+#define MASK_VAL_L2(type, name, fld) \
+	MASK_VAL(type, fte_match_set_lyr_2_4, name, mask, value, fld)
+	DECLARE_MASK_VAL(u64, smac) = {
+		.m = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16) << 16 |
+		     MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0),
+		.v = MLX5_GET(fte_match_set_lyr_2_4, value, smac_47_16) << 16 |
+		     MLX5_GET(fte_match_set_lyr_2_4, value, smac_15_0)};
+	DECLARE_MASK_VAL(u64, dmac) = {
+		.m = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16) << 16 |
+		     MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0),
+		.v = MLX5_GET(fte_match_set_lyr_2_4, value, dmac_47_16) << 16 |
+		     MLX5_GET(fte_match_set_lyr_2_4, value, dmac_15_0)};
+	MASK_VAL_L2(u16, ethertype, ethertype);
+
+	PRINT_MASKED_VALP(smac, u8 *, p, "%pM");
+	PRINT_MASKED_VALP(dmac, u8 *, p, "%pM");
+	PRINT_MASKED_VAL(ethertype, p, "%04x");
+
+	if (ethertype.m == 0xffff) {
+		if (ethertype.v == ETH_P_IP) {
+#define MASK_VAL_L2_BE(type, name, fld) \
+	MASK_VAL_BE(type, fte_match_set_lyr_2_4, name, mask, value, fld)
+			MASK_VAL_L2_BE(u32, src_ipv4,
+				       src_ipv4_src_ipv6.ipv4_layout.ipv4);
+			MASK_VAL_L2_BE(u32, dst_ipv4,
+				       dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+			PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p,
+					  "%pI4");
+			PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p,
+					  "%pI4");
+		} else if (ethertype.v == ETH_P_IPV6) {
+			static const struct in6_addr full_ones = {
+				.in6_u.u6_addr32 = {htonl(0xffffffff),
+						    htonl(0xffffffff),
+						    htonl(0xffffffff),
+						    htonl(0xffffffff)},
+			};
+			DECLARE_MASK_VAL(struct in6_addr, src_ipv6);
+			DECLARE_MASK_VAL(struct in6_addr, dst_ipv6);
+
+			memcpy(src_ipv6.m.in6_u.u6_addr8,
+			       MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+					    src_ipv4_src_ipv6.ipv6_layout.ipv6),
+			       sizeof(src_ipv6.m));
+			memcpy(dst_ipv6.m.in6_u.u6_addr8,
+			       MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+					    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+			       sizeof(dst_ipv6.m));
+			memcpy(src_ipv6.v.in6_u.u6_addr8,
+			       MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
+					    src_ipv4_src_ipv6.ipv6_layout.ipv6),
+			       sizeof(src_ipv6.v));
+			memcpy(dst_ipv6.v.in6_u.u6_addr8,
+			       MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
+					    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+			       sizeof(dst_ipv6.v));
+
+			if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones)))
+				trace_seq_printf(p, "src_ipv6=%pI6 ",
+						 &src_ipv6.v);
+			if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones)))
+				trace_seq_printf(p, "dst_ipv6=%pI6 ",
+						 &dst_ipv6.v);
+		}
+	}
+
+#define PRINT_MASKED_VAL_L2(type, name, fld, p, format) {\
+	MASK_VAL_L2(type, name, fld);		         \
+	PRINT_MASKED_VAL(name, p, format);		 \
+}
+
+	PRINT_MASKED_VAL_L2(u8, ip_protocol, ip_protocol, p, "%02x");
+	PRINT_MASKED_VAL_L2(u16, tcp_flags, tcp_flags, p, "%x");
+	PRINT_MASKED_VAL_L2(u16, tcp_sport, tcp_sport, p, "%u");
+	PRINT_MASKED_VAL_L2(u16, tcp_dport, tcp_dport, p, "%u");
+	PRINT_MASKED_VAL_L2(u16, udp_sport, udp_sport, p, "%u");
+	PRINT_MASKED_VAL_L2(u16, udp_dport, udp_dport, p, "%u");
+	PRINT_MASKED_VAL_L2(u16, first_vid, first_vid, p, "%04x");
+	PRINT_MASKED_VAL_L2(u8, first_prio, first_prio, p, "%x");
+	PRINT_MASKED_VAL_L2(u8, first_cfi, first_cfi, p, "%d");
+	PRINT_MASKED_VAL_L2(u8, ip_dscp, ip_dscp, p, "%02x");
+	PRINT_MASKED_VAL_L2(u8, ip_ecn, ip_ecn, p, "%x");
+	PRINT_MASKED_VAL_L2(u8, cvlan_tag, cvlan_tag, p, "%d");
+	PRINT_MASKED_VAL_L2(u8, svlan_tag, svlan_tag, p, "%d");
+	PRINT_MASKED_VAL_L2(u8, frag, frag, p, "%d");
+}
+
+static void print_misc_parameters_hdrs(struct trace_seq *p,
+				       const u32 *mask, const u32 *value)
+{
+#define MASK_VAL_MISC(type, name, fld) \
+	MASK_VAL(type, fte_match_set_misc, name, mask, value, fld)
+#define PRINT_MASKED_VAL_MISC(type, name, fld, p, format) {\
+	MASK_VAL_MISC(type, name, fld);			   \
+	PRINT_MASKED_VAL(name, p, format);		   \
+}
+	DECLARE_MASK_VAL(u64, gre_key) = {
+		.m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 |
+		     MLX5_GET(fte_match_set_misc, mask, gre_key_l),
+		.v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 |
+		     MLX5_GET(fte_match_set_misc, value, gre_key_l)};
+
+	PRINT_MASKED_VAL(gre_key, p, "%llu");
+	PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u");
+	PRINT_MASKED_VAL_MISC(u16, source_port, source_port, p, "%u");
+	PRINT_MASKED_VAL_MISC(u8, outer_second_prio, outer_second_prio,
+			      p, "%u");
+	PRINT_MASKED_VAL_MISC(u8, outer_second_cfi, outer_second_cfi, p, "%u");
+	PRINT_MASKED_VAL_MISC(u16, outer_second_vid, outer_second_vid, p, "%u");
+	PRINT_MASKED_VAL_MISC(u8, inner_second_prio, inner_second_prio,
+			      p, "%u");
+	PRINT_MASKED_VAL_MISC(u8, inner_second_cfi, inner_second_cfi, p, "%u");
+	PRINT_MASKED_VAL_MISC(u16, inner_second_vid, inner_second_vid, p, "%u");
+
+	PRINT_MASKED_VAL_MISC(u8, outer_second_cvlan_tag,
+			      outer_second_cvlan_tag, p, "%u");
+	PRINT_MASKED_VAL_MISC(u8, inner_second_cvlan_tag,
+			      inner_second_cvlan_tag, p, "%u");
+	PRINT_MASKED_VAL_MISC(u8, outer_second_svlan_tag,
+			      outer_second_svlan_tag, p, "%u");
+	PRINT_MASKED_VAL_MISC(u8, inner_second_svlan_tag,
+			      inner_second_svlan_tag, p, "%u");
+
+	PRINT_MASKED_VAL_MISC(u8, gre_protocol, gre_protocol, p, "%u");
+
+	PRINT_MASKED_VAL_MISC(u32, vxlan_vni, vxlan_vni, p, "%u");
+	PRINT_MASKED_VAL_MISC(u32, outer_ipv6_flow_label, outer_ipv6_flow_label,
+			      p, "%x");
+	PRINT_MASKED_VAL_MISC(u32, inner_ipv6_flow_label, inner_ipv6_flow_label,
+			      p, "%x");
+}
+
+const char *parse_fs_hdrs(struct trace_seq *p,
+			  u8 match_criteria_enable,
+			  const u32 *mask_outer,
+			  const u32 *mask_misc,
+			  const u32 *mask_inner,
+			  const u32 *value_outer,
+			  const u32 *value_misc,
+			  const u32 *value_inner)
+{
+	const char *ret = trace_seq_buffer_ptr(p);
+
+	if (match_criteria_enable &
+	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) {
+		trace_seq_printf(p, "[outer] ");
+		print_lyr_2_4_hdrs(p, mask_outer, value_outer);
+	}
+
+	if (match_criteria_enable &
+	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) {
+		trace_seq_printf(p, "[misc] ");
+		print_misc_parameters_hdrs(p, mask_misc, value_misc);
+	}
+	if (match_criteria_enable &
+	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) {
+		trace_seq_printf(p, "[inner] ");
+		print_lyr_2_4_hdrs(p, mask_inner, value_inner);
+	}
+	trace_seq_putc(p, 0);
+	return ret;
+}
+
+const char *parse_fs_dst(struct trace_seq *p,
+			 const struct mlx5_flow_destination *dst,
+			 u32 counter_id)
+{
+	const char *ret = trace_seq_buffer_ptr(p);
+
+	switch (dst->type) {
+	case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+		trace_seq_printf(p, "vport=%u\n", dst->vport_num);
+		break;
+	case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+		trace_seq_printf(p, "ft=%p\n", dst->ft);
+		break;
+	case MLX5_FLOW_DESTINATION_TYPE_TIR:
+		trace_seq_printf(p, "tir=%u\n", dst->tir_num);
+		break;
+	case MLX5_FLOW_DESTINATION_TYPE_COUNTER:
+		trace_seq_printf(p, "counter_id=%u\n", counter_id);
+		break;
+	}
+
+	trace_seq_putc(p, 0);
+	return ret;
+}
+
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fte);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_rule);
+EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_rule);
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
new file mode 100644
index 0000000..1e3a6c3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(_MLX5_FS_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_FS_TP_
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+#include "../fs_core.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#define __parse_fs_hdrs(match_criteria_enable, mouter, mmisc, minner, vouter, \
+			vinner, vmisc)					      \
+	parse_fs_hdrs(p, match_criteria_enable, mouter, mmisc, minner, vouter,\
+		      vinner, vmisc)
+
+const char *parse_fs_hdrs(struct trace_seq *p,
+			  u8 match_criteria_enable,
+			  const u32 *mask_outer,
+			  const u32 *mask_misc,
+			  const u32 *mask_inner,
+			  const u32 *value_outer,
+			  const u32 *value_misc,
+			  const u32 *value_inner);
+
+#define __parse_fs_dst(dst, counter_id) \
+	parse_fs_dst(p, (const struct mlx5_flow_destination *)dst, counter_id)
+
+const char *parse_fs_dst(struct trace_seq *p,
+			 const struct mlx5_flow_destination *dst,
+			 u32 counter_id);
+
+TRACE_EVENT(mlx5_fs_add_fg,
+	    TP_PROTO(const struct mlx5_flow_group *fg),
+	    TP_ARGS(fg),
+	    TP_STRUCT__entry(
+		__field(const struct mlx5_flow_group *, fg)
+		__field(const struct mlx5_flow_table *, ft)
+		__field(u32, start_index)
+		__field(u32, end_index)
+		__field(u32, id)
+		__field(u8, mask_enable)
+		__array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+		__array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+		__array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc))
+	    ),
+	    TP_fast_assign(
+			   __entry->fg = fg;
+			   fs_get_obj(__entry->ft, fg->node.parent);
+			   __entry->start_index = fg->start_index;
+			   __entry->end_index = fg->start_index + fg->max_ftes;
+			   __entry->id = fg->id;
+			   __entry->mask_enable = fg->mask.match_criteria_enable;
+			   memcpy(__entry->mask_outer,
+				  MLX5_ADDR_OF(fte_match_param,
+					       &fg->mask.match_criteria,
+					       outer_headers),
+				  sizeof(__entry->mask_outer));
+			   memcpy(__entry->mask_inner,
+				  MLX5_ADDR_OF(fte_match_param,
+					       &fg->mask.match_criteria,
+					       inner_headers),
+				  sizeof(__entry->mask_inner));
+			   memcpy(__entry->mask_misc,
+				  MLX5_ADDR_OF(fte_match_param,
+					       &fg->mask.match_criteria,
+					       misc_parameters),
+				  sizeof(__entry->mask_misc));
+
+	    ),
+	    TP_printk("fg=%p ft=%p id=%u start=%u end=%u bit_mask=%02x %s\n",
+		      __entry->fg, __entry->ft, __entry->id,
+		      __entry->start_index, __entry->end_index,
+		      __entry->mask_enable,
+		      __parse_fs_hdrs(__entry->mask_enable,
+				      __entry->mask_outer,
+				      __entry->mask_misc,
+				      __entry->mask_inner,
+				      __entry->mask_outer,
+				      __entry->mask_misc,
+				      __entry->mask_inner))
+	    );
+
+TRACE_EVENT(mlx5_fs_del_fg,
+	    TP_PROTO(const struct mlx5_flow_group *fg),
+	    TP_ARGS(fg),
+	    TP_STRUCT__entry(
+		__field(const struct mlx5_flow_group *, fg)
+		__field(u32, id)
+	    ),
+	    TP_fast_assign(
+			   __entry->fg = fg;
+			   __entry->id = fg->id;
+
+	    ),
+	    TP_printk("fg=%p id=%u\n",
+		      __entry->fg, __entry->id)
+	    );
+
+#define ACTION_FLAGS \
+	{MLX5_FLOW_CONTEXT_ACTION_ALLOW,	 "ALLOW"},\
+	{MLX5_FLOW_CONTEXT_ACTION_DROP,		 "DROP"},\
+	{MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,	 "FWD"},\
+	{MLX5_FLOW_CONTEXT_ACTION_COUNT,	 "CNT"},\
+	{MLX5_FLOW_CONTEXT_ACTION_ENCAP,	 "ENCAP"},\
+	{MLX5_FLOW_CONTEXT_ACTION_DECAP,	 "DECAP"},\
+	{MLX5_FLOW_CONTEXT_ACTION_MOD_HDR,	 "MOD_HDR"},\
+	{MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"}
+
+TRACE_EVENT(mlx5_fs_set_fte,
+	    TP_PROTO(const struct fs_fte *fte, bool new_fte),
+	    TP_ARGS(fte, new_fte),
+	    TP_STRUCT__entry(
+		__field(const struct fs_fte *, fte)
+		__field(const struct mlx5_flow_group *, fg)
+		__field(u32, group_index)
+		__field(u32, index)
+		__field(u32, action)
+		__field(u32, flow_tag)
+		__field(u8,  mask_enable)
+		__field(bool, new_fte)
+		__array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+		__array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+		__array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc))
+		__array(u32, value_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+		__array(u32, value_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4))
+		__array(u32, value_misc, MLX5_ST_SZ_DW(fte_match_set_misc))
+	    ),
+	    TP_fast_assign(
+			   __entry->fte = fte;
+			   __entry->new_fte = new_fte;
+			   fs_get_obj(__entry->fg, fte->node.parent);
+			   __entry->group_index = __entry->fg->id;
+			   __entry->index = fte->index;
+			   __entry->action = fte->action;
+			   __entry->mask_enable = __entry->fg->mask.match_criteria_enable;
+			   __entry->flow_tag = fte->flow_tag;
+			   memcpy(__entry->mask_outer,
+				  MLX5_ADDR_OF(fte_match_param,
+					       &__entry->fg->mask.match_criteria,
+					       outer_headers),
+				  sizeof(__entry->mask_outer));
+			   memcpy(__entry->mask_inner,
+				  MLX5_ADDR_OF(fte_match_param,
+					       &__entry->fg->mask.match_criteria,
+					       inner_headers),
+				  sizeof(__entry->mask_inner));
+			   memcpy(__entry->mask_misc,
+				  MLX5_ADDR_OF(fte_match_param,
+					       &__entry->fg->mask.match_criteria,
+					       misc_parameters),
+				  sizeof(__entry->mask_misc));
+			   memcpy(__entry->value_outer,
+				  MLX5_ADDR_OF(fte_match_param,
+					       &fte->val,
+					       outer_headers),
+				  sizeof(__entry->value_outer));
+			   memcpy(__entry->value_inner,
+				  MLX5_ADDR_OF(fte_match_param,
+					       &fte->val,
+					       inner_headers),
+				  sizeof(__entry->value_inner));
+			   memcpy(__entry->value_misc,
+				  MLX5_ADDR_OF(fte_match_param,
+					       &fte->val,
+					       misc_parameters),
+				  sizeof(__entry->value_misc));
+
+	    ),
+	    TP_printk("op=%s fte=%p fg=%p index=%u group_index=%u action=<%s> flow_tag=%x %s\n",
+		      __entry->new_fte ? "add" : "set",
+		      __entry->fte, __entry->fg, __entry->index,
+		      __entry->group_index, __print_flags(__entry->action, "|",
+							  ACTION_FLAGS),
+		      __entry->flow_tag,
+		      __parse_fs_hdrs(__entry->mask_enable,
+				      __entry->mask_outer,
+				      __entry->mask_misc,
+				      __entry->mask_inner,
+				      __entry->value_outer,
+				      __entry->value_misc,
+				      __entry->value_inner))
+	    );
+
+TRACE_EVENT(mlx5_fs_del_fte,
+	    TP_PROTO(const struct fs_fte *fte),
+	    TP_ARGS(fte),
+	    TP_STRUCT__entry(
+		__field(const struct fs_fte *, fte)
+		__field(u32, index)
+	    ),
+	    TP_fast_assign(
+			   __entry->fte = fte;
+			   __entry->index = fte->index;
+
+	    ),
+	    TP_printk("fte=%p index=%u\n",
+		      __entry->fte, __entry->index)
+	    );
+
+TRACE_EVENT(mlx5_fs_add_rule,
+	    TP_PROTO(const struct mlx5_flow_rule *rule),
+	    TP_ARGS(rule),
+	    TP_STRUCT__entry(
+		__field(const struct mlx5_flow_rule *, rule)
+		__field(const struct fs_fte *, fte)
+		__field(u32, sw_action)
+		__field(u32, index)
+		__field(u32, counter_id)
+		__array(u8, destination, sizeof(struct mlx5_flow_destination))
+	    ),
+	    TP_fast_assign(
+			   __entry->rule = rule;
+			   fs_get_obj(__entry->fte, rule->node.parent);
+			   __entry->index = __entry->fte->dests_size - 1;
+			   __entry->sw_action = rule->sw_action;
+			   memcpy(__entry->destination,
+				  &rule->dest_attr,
+				  sizeof(__entry->destination));
+			   if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER &&
+			       rule->dest_attr.counter)
+				__entry->counter_id =
+				rule->dest_attr.counter->id;
+	    ),
+	    TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n",
+		      __entry->rule, __entry->fte, __entry->index,
+		      __print_flags(__entry->sw_action, "|", ACTION_FLAGS),
+		      __parse_fs_dst(__entry->destination, __entry->counter_id))
+	    );
+
+TRACE_EVENT(mlx5_fs_del_rule,
+	    TP_PROTO(const struct mlx5_flow_rule *rule),
+	    TP_ARGS(rule),
+	    TP_STRUCT__entry(
+		__field(const struct mlx5_flow_rule *, rule)
+		__field(const struct fs_fte *, fte)
+	    ),
+	    TP_fast_assign(
+			   __entry->rule = rule;
+			   fs_get_obj(__entry->fte, rule->node.parent);
+	    ),
+	    TP_printk("rule=%p fte=%p\n",
+		      __entry->rule, __entry->fte)
+	    );
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ./diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE fs_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 040d1af..cc13d3d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -291,10 +291,11 @@ struct mlx5e_tstamp {
 
 enum {
 	MLX5E_RQ_STATE_ENABLED,
-	MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS,
 	MLX5E_RQ_STATE_AM,
 };
 
+#define MLX5E_TEST_BIT(state, nr) (state & BIT(nr))
+
 struct mlx5e_cq {
 	/* data path - accessed per cqe */
 	struct mlx5_cqwq           wq;
@@ -342,7 +343,6 @@ enum {
 
 struct mlx5e_sq_wqe_info {
 	u8  opcode;
-	u8  num_wqebbs;
 };
 
 struct mlx5e_txqsq {
@@ -418,13 +418,8 @@ struct mlx5e_xdpsq {
 struct mlx5e_icosq {
 	/* data path */
 
-	/* dirtied @completion */
-	u16                        cc;
-
 	/* dirtied @xmit */
 	u16                        pc ____cacheline_aligned_in_smp;
-	u32                        dma_fifo_pc;
-	u16                        prev_cc;
 
 	struct mlx5e_cq            cq;
 
@@ -438,7 +433,6 @@ struct mlx5e_icosq {
 	void __iomem              *uar_map;
 	u32                        sqn;
 	u16                        edge;
-	struct device             *pdev;
 	__be32                     mkey_be;
 	unsigned long              state;
 
@@ -507,7 +501,7 @@ struct mlx5e_rx_am { /* Adaptive Moderation */
  */
 #define MLX5E_CACHE_UNIT	(MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
 				 MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
-#define MLX5E_CACHE_SIZE	(2 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
+#define MLX5E_CACHE_SIZE	(4 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
 struct mlx5e_page_cache {
 	u32 head;
 	u32 tail;
@@ -516,7 +510,7 @@ struct mlx5e_page_cache {
 
 struct mlx5e_rq;
 typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*);
-typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq*, struct mlx5e_rx_wqe*, u16);
+typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
 typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
 
 struct mlx5e_rq {
@@ -527,21 +521,26 @@ struct mlx5e_rq {
 		struct {
 			struct mlx5e_wqe_frag_info *frag_info;
 			u32 frag_sz;	/* max possible skb frag_sz */
-			bool page_reuse;
-			bool xdp_xmit;
+			union {
+				bool page_reuse;
+				bool xdp_xmit;
+			};
 		} wqe;
 		struct {
 			struct mlx5e_mpw_info *info;
 			void                  *mtt_no_align;
+			u16                    num_strides;
+			u8                     log_stride_sz;
+			bool                   umr_in_progress;
 		} mpwqe;
 	};
 	struct {
+		u16            headroom;
 		u8             page_order;
-		u32            wqe_sz;    /* wqe data buffer size */
 		u8             map_dir;   /* dma map direction */
 	} buff;
-	__be32                 mkey_be;
 
+	struct mlx5e_channel  *channel;
 	struct device         *pdev;
 	struct net_device     *netdev;
 	struct mlx5e_tstamp   *tstamp;
@@ -550,12 +549,11 @@ struct mlx5e_rq {
 	struct mlx5e_page_cache page_cache;
 
 	mlx5e_fp_handle_rx_cqe handle_rx_cqe;
-	mlx5e_fp_alloc_wqe     alloc_wqe;
+	mlx5e_fp_post_rx_wqes  post_wqes;
 	mlx5e_fp_dealloc_wqe   dealloc_wqe;
 
 	unsigned long          state;
 	int                    ix;
-	u16                    rx_headroom;
 
 	struct mlx5e_rx_am     am; /* Adaptive Moderation */
 
@@ -565,19 +563,13 @@ struct mlx5e_rq {
 
 	/* control */
 	struct mlx5_wq_ctrl    wq_ctrl;
+	__be32                 mkey_be;
 	u8                     wq_type;
-	u32                    mpwqe_stride_sz;
-	u32                    mpwqe_num_strides;
 	u32                    rqn;
-	struct mlx5e_channel  *channel;
 	struct mlx5_core_dev  *mdev;
 	struct mlx5_core_mkey  umr_mkey;
 } ____cacheline_aligned_in_smp;
 
-enum channel_flags {
-	MLX5E_CHANNEL_NAPI_SCHED = 1,
-};
-
 struct mlx5e_channel {
 	/* data path */
 	struct mlx5e_rq            rq;
@@ -589,7 +581,9 @@ struct mlx5e_channel {
 	struct net_device         *netdev;
 	__be32                     mkey_be;
 	u8                         num_tc;
-	unsigned long              flags;
+
+	/* data path - accessed per napi poll */
+	struct irq_desc *irq_desc;
 
 	/* control */
 	struct mlx5e_priv         *priv;
@@ -620,6 +614,12 @@ enum mlx5e_traffic_types {
 	MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY,
 };
 
+enum mlx5e_tunnel_types {
+	MLX5E_TT_IPV4_GRE,
+	MLX5E_TT_IPV6_GRE,
+	MLX5E_NUM_TUNNEL_TT,
+};
+
 enum {
 	MLX5E_STATE_ASYNC_EVENTS_ENABLED,
 	MLX5E_STATE_OPENED,
@@ -679,6 +679,7 @@ struct mlx5e_l2_table {
 struct mlx5e_ttc_table {
 	struct mlx5e_flow_table  ft;
 	struct mlx5_flow_handle	 *rules[MLX5E_NUM_TT];
+	struct mlx5_flow_handle  *tunnel_rules[MLX5E_NUM_TUNNEL_TT];
 };
 
 #define ARFS_HASH_SHIFT BITS_PER_BYTE
@@ -711,6 +712,7 @@ enum {
 	MLX5E_VLAN_FT_LEVEL = 0,
 	MLX5E_L2_FT_LEVEL,
 	MLX5E_TTC_FT_LEVEL,
+	MLX5E_INNER_TTC_FT_LEVEL,
 	MLX5E_ARFS_FT_LEVEL
 };
 
@@ -736,6 +738,7 @@ struct mlx5e_flow_steering {
 	struct mlx5e_vlan_table         vlan;
 	struct mlx5e_l2_table           l2;
 	struct mlx5e_ttc_table          ttc;
+	struct mlx5e_ttc_table          inner_ttc;
 	struct mlx5e_arfs_tables        arfs;
 };
 
@@ -769,6 +772,7 @@ struct mlx5e_priv {
 	u32                        tisn[MLX5E_MAX_NUM_TC];
 	struct mlx5e_rqt           indir_rqt;
 	struct mlx5e_tir           indir_tir[MLX5E_NUM_INDIR_TIRS];
+	struct mlx5e_tir           inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
 	struct mlx5e_tir           direct_tir[MLX5E_MAX_NUM_CHANNELS];
 	u32                        tx_rates[MLX5E_MAX_NUM_SQS];
 	int                        hard_mtu;
@@ -839,11 +843,9 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
-int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
-int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe,	u16 ix);
+bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq);
 void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
-void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq);
 void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
 
 void mlx5e_rx_am(struct mlx5e_rq *rq);
@@ -903,7 +905,7 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
 		       struct mlx5e_redirect_rqt_param rrp);
 void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
 				    enum mlx5e_traffic_types tt,
-				    void *tirc);
+				    void *tirc, bool inner);
 
 int mlx5e_open_locked(struct net_device *netdev);
 int mlx5e_close_locked(struct net_device *netdev);
@@ -922,8 +924,7 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
 void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
 
-void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev,
-				   u32 *indirection_rqt, int len,
+void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
 				   int num_channels);
 int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
 
@@ -932,6 +933,12 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
 void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
 			      struct mlx5e_params *params, u8 rq_type);
 
+static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
+{
+	return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) &&
+		MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version));
+}
+
 static inline
 struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index f559401..d12e9fc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -176,7 +176,6 @@ static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv)
 
 int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
 {
-
 	switch (sset) {
 	case ETH_SS_STATS:
 		return NUM_SW_COUNTERS +
@@ -207,7 +206,7 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset)
 	return mlx5e_ethtool_get_sset_count(priv, sset);
 }
 
-static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
+static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data)
 {
 	int i, j, tc, prio, idx = 0;
 	unsigned long pfc_combined;
@@ -242,10 +241,22 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
 		       pport_phy_statistical_stats_desc[i].format);
 
+	for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       pport_eth_ext_stats_desc[i].format);
+
 	for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
 		       pcie_perf_stats_desc[i].format);
 
+	for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       pcie_perf_stats_desc64[i].format);
+
+	for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       pcie_perf_stall_stats_desc[i].format);
+
 	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
 		for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
 			sprintf(data + (idx++) * ETH_GSTRING_LEN,
@@ -297,8 +308,7 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
 					priv->channel_tc2txq[i][tc]);
 }
 
-void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv,
-			       uint32_t stringset, uint8_t *data)
+void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data)
 {
 	int i;
 
@@ -320,8 +330,7 @@ void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv,
 	}
 }
 
-static void mlx5e_get_strings(struct net_device *dev,
-			      uint32_t stringset, uint8_t *data)
+static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
@@ -373,10 +382,22 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
 		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
 						  pport_phy_statistical_stats_desc, i);
 
+	for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++)
+		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters,
+						  pport_eth_ext_stats_desc, i);
+
 	for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++)
 		data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
 						  pcie_perf_stats_desc, i);
 
+	for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++)
+		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters,
+						  pcie_perf_stats_desc64, i);
+
+	for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++)
+		data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
+						  pcie_perf_stall_stats_desc, i);
+
 	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
 		for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
 			data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio],
@@ -642,8 +663,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 	new_channels.params = priv->channels.params;
 	new_channels.params.num_channels = count;
 	if (!netif_is_rxfh_configured(priv->netdev))
-		mlx5e_build_default_indir_rqt(priv->mdev,
-					      new_channels.params.indirection_rqt,
+		mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt,
 					      MLX5E_INDIR_RQT_SIZE, count);
 
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
@@ -966,24 +986,27 @@ static u8 get_connector_port(u32 eth_proto, u8 connector_type)
 	if (connector_type && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER)
 		return ptys2connector_type[connector_type];
 
-	if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_SR)
-			 | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4)
-			 | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4)
-			 | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) {
-			return PORT_FIBRE;
+	if (eth_proto &
+	    (MLX5E_PROT_MASK(MLX5E_10GBASE_SR)   |
+	     MLX5E_PROT_MASK(MLX5E_40GBASE_SR4)  |
+	     MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) |
+	     MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) {
+		return PORT_FIBRE;
 	}
 
-	if (eth_proto & (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4)
-			 | MLX5E_PROT_MASK(MLX5E_10GBASE_CR)
-			 | MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) {
-			return PORT_DA;
+	if (eth_proto &
+	    (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) |
+	     MLX5E_PROT_MASK(MLX5E_10GBASE_CR)  |
+	     MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) {
+		return PORT_DA;
 	}
 
-	if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4)
-			 | MLX5E_PROT_MASK(MLX5E_10GBASE_KR)
-			 | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4)
-			 | MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) {
-			return PORT_NONE;
+	if (eth_proto &
+	    (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) |
+	     MLX5E_PROT_MASK(MLX5E_10GBASE_KR)  |
+	     MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) |
+	     MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) {
+		return PORT_NONE;
 	}
 
 	return PORT_OTHER;
@@ -1190,9 +1213,18 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
 
 	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
 		memset(tirc, 0, ctxlen);
-		mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc);
+		mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
 		mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
 	}
+
+	if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+		return;
+
+	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+		memset(tirc, 0, ctxlen);
+		mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
+		mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen);
+	}
 }
 
 static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index dfccb53..f11fd07 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -36,6 +36,7 @@
 #include <linux/tcp.h>
 #include <linux/mlx5/fs.h>
 #include "en.h"
+#include "lib/mpfs.h"
 
 static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 				  struct mlx5e_l2_rule *ai, int type);
@@ -65,6 +66,7 @@ struct mlx5e_l2_hash_node {
 	struct hlist_node          hlist;
 	u8                         action;
 	struct mlx5e_l2_rule ai;
+	bool   mpfs;
 };
 
 static inline int mlx5e_hash_l2(u8 *addr)
@@ -362,17 +364,30 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
 static void mlx5e_execute_l2_action(struct mlx5e_priv *priv,
 				    struct mlx5e_l2_hash_node *hn)
 {
-	switch (hn->action) {
+	u8 action = hn->action;
+	int l2_err = 0;
+
+	switch (action) {
 	case MLX5E_ACTION_ADD:
 		mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH);
+		if (!is_multicast_ether_addr(hn->ai.addr)) {
+			l2_err = mlx5_mpfs_add_mac(priv->mdev, hn->ai.addr);
+			hn->mpfs = !l2_err;
+		}
 		hn->action = MLX5E_ACTION_NONE;
 		break;
 
 	case MLX5E_ACTION_DEL:
+		if (!is_multicast_ether_addr(hn->ai.addr) && hn->mpfs)
+			l2_err = mlx5_mpfs_del_mac(priv->mdev, hn->ai.addr);
 		mlx5e_del_l2_flow_rule(priv, &hn->ai);
 		mlx5e_del_l2_from_hash(hn);
 		break;
 	}
+
+	if (l2_err)
+		netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n",
+			    action == MLX5E_ACTION_ADD ? "add" : "del", hn->ai.addr, l2_err);
 }
 
 static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv)
@@ -593,12 +608,21 @@ static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc)
 			ttc->rules[i] = NULL;
 		}
 	}
+
+	for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) {
+		if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
+			mlx5_del_flow_rules(ttc->tunnel_rules[i]);
+			ttc->tunnel_rules[i] = NULL;
+		}
+	}
 }
 
-static struct {
+struct mlx5e_etype_proto {
 	u16 etype;
 	u8 proto;
-} ttc_rules[] = {
+};
+
+static struct mlx5e_etype_proto ttc_rules[] = {
 	[MLX5E_TT_IPV4_TCP] = {
 		.etype = ETH_P_IP,
 		.proto = IPPROTO_TCP,
@@ -645,6 +669,28 @@ static struct {
 	},
 };
 
+static struct mlx5e_etype_proto ttc_tunnel_rules[] = {
+	[MLX5E_TT_IPV4_GRE] = {
+		.etype = ETH_P_IP,
+		.proto = IPPROTO_GRE,
+	},
+	[MLX5E_TT_IPV6_GRE] = {
+		.etype = ETH_P_IPV6,
+		.proto = IPPROTO_GRE,
+	},
+};
+
+static u8 mlx5e_etype_to_ipv(u16 ethertype)
+{
+	if (ethertype == ETH_P_IP)
+		return 4;
+
+	if (ethertype == ETH_P_IPV6)
+		return 6;
+
+	return 0;
+}
+
 static struct mlx5_flow_handle *
 mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
 			struct mlx5_flow_table *ft,
@@ -652,10 +698,12 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
 			u16 etype,
 			u8 proto)
 {
+	int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
 	MLX5_DECLARE_FLOW_ACT(flow_act);
 	struct mlx5_flow_handle *rule;
 	struct mlx5_flow_spec *spec;
 	int err = 0;
+	u8 ipv;
 
 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec)
@@ -666,7 +714,13 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
 		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
 		MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
 	}
-	if (etype) {
+
+	ipv = mlx5e_etype_to_ipv(etype);
+	if (match_ipv_outer && ipv) {
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+		MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
+	} else if (etype) {
 		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
 		MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
@@ -708,6 +762,20 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv)
 			goto del_rules;
 	}
 
+	if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+		return 0;
+
+	rules     = ttc->tunnel_rules;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft   = priv->fs.inner_ttc.ft.t;
+	for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
+		rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest,
+						    ttc_tunnel_rules[tt].etype,
+						    ttc_tunnel_rules[tt].proto);
+		if (IS_ERR(rules[tt]))
+			goto del_rules;
+	}
+
 	return 0;
 
 del_rules:
@@ -718,13 +786,23 @@ del_rules:
 }
 
 #define MLX5E_TTC_NUM_GROUPS	3
-#define MLX5E_TTC_GROUP1_SIZE	BIT(3)
-#define MLX5E_TTC_GROUP2_SIZE	BIT(1)
-#define MLX5E_TTC_GROUP3_SIZE	BIT(0)
+#define MLX5E_TTC_GROUP1_SIZE	(BIT(3) + MLX5E_NUM_TUNNEL_TT)
+#define MLX5E_TTC_GROUP2_SIZE	 BIT(1)
+#define MLX5E_TTC_GROUP3_SIZE	 BIT(0)
 #define MLX5E_TTC_TABLE_SIZE	(MLX5E_TTC_GROUP1_SIZE +\
 				 MLX5E_TTC_GROUP2_SIZE +\
 				 MLX5E_TTC_GROUP3_SIZE)
-static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc)
+
+#define MLX5E_INNER_TTC_NUM_GROUPS	3
+#define MLX5E_INNER_TTC_GROUP1_SIZE	BIT(3)
+#define MLX5E_INNER_TTC_GROUP2_SIZE	BIT(1)
+#define MLX5E_INNER_TTC_GROUP3_SIZE	BIT(0)
+#define MLX5E_INNER_TTC_TABLE_SIZE	(MLX5E_INNER_TTC_GROUP1_SIZE +\
+					 MLX5E_INNER_TTC_GROUP2_SIZE +\
+					 MLX5E_INNER_TTC_GROUP3_SIZE)
+
+static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc,
+					 bool use_ipv)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct mlx5e_flow_table *ft = &ttc->ft;
@@ -746,7 +824,10 @@ static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc)
 	/* L4 Group */
 	mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
 	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
-	MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+	if (use_ipv)
+		MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
+	else
+		MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
 	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
 	MLX5_SET_CFG(in, start_flow_index, ix);
 	ix += MLX5E_TTC_GROUP1_SIZE;
@@ -787,6 +868,190 @@ err:
 	return err;
 }
 
+static struct mlx5_flow_handle *
+mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv,
+			      struct mlx5_flow_table *ft,
+			      struct mlx5_flow_destination *dest,
+			      u16 etype, u8 proto)
+{
+	MLX5_DECLARE_FLOW_ACT(flow_act);
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+	u8 ipv;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return ERR_PTR(-ENOMEM);
+
+	ipv = mlx5e_etype_to_ipv(etype);
+	if (etype && ipv) {
+		spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
+		MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
+	}
+
+	if (proto) {
+		spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
+		MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
+	}
+
+	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
+	}
+
+	kvfree(spec);
+	return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv)
+{
+	struct mlx5_flow_destination dest;
+	struct mlx5_flow_handle **rules;
+	struct mlx5e_ttc_table *ttc;
+	struct mlx5_flow_table *ft;
+	int err;
+	int tt;
+
+	ttc =  &priv->fs.inner_ttc;
+	ft = ttc->ft.t;
+	rules = ttc->rules;
+
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+	for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
+		if (tt == MLX5E_TT_ANY)
+			dest.tir_num = priv->direct_tir[0].tirn;
+		else
+			dest.tir_num = priv->inner_indir_tir[tt].tirn;
+
+		rules[tt] = mlx5e_generate_inner_ttc_rule(priv, ft, &dest,
+							  ttc_rules[tt].etype,
+							  ttc_rules[tt].proto);
+		if (IS_ERR(rules[tt]))
+			goto del_rules;
+	}
+
+	return 0;
+
+del_rules:
+	err = PTR_ERR(rules[tt]);
+	rules[tt] = NULL;
+	mlx5e_cleanup_ttc_rules(ttc);
+	return err;
+}
+
+static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5e_flow_table *ft = &ttc->ft;
+	int ix = 0;
+	u32 *in;
+	int err;
+	u8 *mc;
+
+	ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+	if (!ft->g)
+		return -ENOMEM;
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if (!in) {
+		kfree(ft->g);
+		return -ENOMEM;
+	}
+
+	/* L4 Group */
+	mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+	MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
+	MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_INNER_TTC_GROUP1_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err;
+	ft->num_groups++;
+
+	/* L3 Group */
+	MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_INNER_TTC_GROUP2_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err;
+	ft->num_groups++;
+
+	/* Any Group */
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_INNER_TTC_GROUP3_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err;
+	ft->num_groups++;
+
+	kvfree(in);
+	return 0;
+
+err:
+	err = PTR_ERR(ft->g[ft->num_groups]);
+	ft->g[ft->num_groups] = NULL;
+	kvfree(in);
+
+	return err;
+}
+
+static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv)
+{
+	struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc;
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5e_flow_table *ft = &ttc->ft;
+	int err;
+
+	if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+		return 0;
+
+	ft_attr.max_fte = MLX5E_INNER_TTC_TABLE_SIZE;
+	ft_attr.level   = MLX5E_INNER_TTC_FT_LEVEL;
+	ft_attr.prio    = MLX5E_NIC_PRIO;
+
+	ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+	if (IS_ERR(ft->t)) {
+		err = PTR_ERR(ft->t);
+		ft->t = NULL;
+		return err;
+	}
+
+	err = mlx5e_create_inner_ttc_table_groups(ttc);
+	if (err)
+		goto err;
+
+	err = mlx5e_generate_inner_ttc_table_rules(priv);
+	if (err)
+		goto err;
+
+	return 0;
+
+err:
+	mlx5e_destroy_flow_table(ft);
+	return err;
+}
+
+static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv)
+{
+	struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc;
+
+	if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+		return;
+
+	mlx5e_cleanup_ttc_rules(ttc);
+	mlx5e_destroy_flow_table(&ttc->ft);
+}
+
 void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv)
 {
 	struct mlx5e_ttc_table *ttc = &priv->fs.ttc;
@@ -797,6 +1062,7 @@ void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv)
 
 int mlx5e_create_ttc_table(struct mlx5e_priv *priv)
 {
+	bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
 	struct mlx5e_ttc_table *ttc = &priv->fs.ttc;
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5e_flow_table *ft = &ttc->ft;
@@ -813,7 +1079,7 @@ int mlx5e_create_ttc_table(struct mlx5e_priv *priv)
 		return err;
 	}
 
-	err = mlx5e_create_ttc_table_groups(ttc);
+	err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer);
 	if (err)
 		goto err;
 
@@ -1139,11 +1405,18 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
 		priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
 	}
 
+	err = mlx5e_create_inner_ttc_table(priv);
+	if (err) {
+		netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
+			   err);
+		goto err_destroy_arfs_tables;
+	}
+
 	err = mlx5e_create_ttc_table(priv);
 	if (err) {
 		netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
 			   err);
-		goto err_destroy_arfs_tables;
+		goto err_destroy_inner_ttc_table;
 	}
 
 	err = mlx5e_create_l2_table(priv);
@@ -1168,6 +1441,8 @@ err_destroy_l2_table:
 	mlx5e_destroy_l2_table(priv);
 err_destroy_ttc_table:
 	mlx5e_destroy_ttc_table(priv);
+err_destroy_inner_ttc_table:
+	mlx5e_destroy_inner_ttc_table(priv);
 err_destroy_arfs_tables:
 	mlx5e_arfs_destroy_tables(priv);
 
@@ -1179,6 +1454,7 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
 	mlx5e_destroy_vlan_table(priv);
 	mlx5e_destroy_l2_table(priv);
 	mlx5e_destroy_ttc_table(priv);
+	mlx5e_destroy_inner_ttc_table(priv);
 	mlx5e_arfs_destroy_tables(priv);
 	mlx5e_ethtool_cleanup_steering(priv);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d75f309..dfc2972 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -213,6 +213,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 		s->rx_cache_full  += rq_stats->cache_full;
 		s->rx_cache_empty += rq_stats->cache_empty;
 		s->rx_cache_busy  += rq_stats->cache_busy;
+		s->rx_cache_waive += rq_stats->cache_waive;
 
 		for (j = 0; j < priv->channels.params.num_tc; j++) {
 			sq_stats = &c->sq[j].stats;
@@ -293,6 +294,12 @@ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv, bool full)
 		mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 	}
 
+	if (MLX5_CAP_PCAM_FEATURE(mdev, rx_buffer_fullness_counters)) {
+		out = pstats->eth_ext_counters;
+		MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
+		mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+	}
+
 	MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
 	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
 		out = pstats->per_prio_counters[prio];
@@ -593,12 +600,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	}
 
 	rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
-	rq->rx_headroom = params->rq_headroom;
+	rq->buff.headroom = params->rq_headroom;
 
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 
-		rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
+		rq->post_wqes = mlx5e_post_rx_mpwqes;
 		rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
 		rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe;
@@ -615,11 +622,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			goto err_rq_wq_destroy;
 		}
 
-		rq->mpwqe_stride_sz = BIT(params->mpwqe_log_stride_sz);
-		rq->mpwqe_num_strides = BIT(params->mpwqe_log_num_strides);
+		rq->mpwqe.log_stride_sz = params->mpwqe_log_stride_sz;
+		rq->mpwqe.num_strides = BIT(params->mpwqe_log_num_strides);
 
-		rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
-		byte_count = rq->buff.wqe_sz;
+		byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
 
 		err = mlx5e_create_rq_umr_mkey(mdev, rq);
 		if (err)
@@ -639,7 +645,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			err = -ENOMEM;
 			goto err_rq_wq_destroy;
 		}
-		rq->alloc_wqe = mlx5e_alloc_rx_wqe;
+		rq->post_wqes = mlx5e_post_rx_wqes;
 		rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
 
 #ifdef CONFIG_MLX5_EN_IPSEC
@@ -655,18 +661,17 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			goto err_rq_wq_destroy;
 		}
 
-		rq->buff.wqe_sz = params->lro_en  ?
+		byte_count = params->lro_en  ?
 				params->lro_wqe_sz :
 				MLX5E_SW2HW_MTU(c->priv, c->netdev->mtu);
 #ifdef CONFIG_MLX5_EN_IPSEC
 		if (MLX5_IPSEC_DEV(mdev))
-			rq->buff.wqe_sz += MLX5E_METADATA_ETHER_LEN;
+			byte_count += MLX5E_METADATA_ETHER_LEN;
 #endif
 		rq->wqe.page_reuse = !params->xdp_prog && !params->lro_en;
-		byte_count = rq->buff.wqe_sz;
 
 		/* calc the required page order */
-		rq->wqe.frag_sz = MLX5_SKB_FRAG_SZ(rq->rx_headroom + byte_count);
+		rq->wqe.frag_sz = MLX5_SKB_FRAG_SZ(rq->buff.headroom + byte_count);
 		npages = DIV_ROUND_UP(rq->wqe.frag_sz, PAGE_SIZE);
 		rq->buff.page_order = order_base_2(npages);
 
@@ -677,6 +682,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	for (i = 0; i < wq_sz; i++) {
 		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
 
+		if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+			u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, i) << PAGE_SHIFT;
+
+			wqe->data.addr = cpu_to_be64(dma_offset);
+		}
+
 		wqe->data.byte_count = cpu_to_be32(byte_count);
 		wqe->data.lkey = rq->mkey_be;
 	}
@@ -883,7 +894,8 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
 	u16 wqe_ix;
 
 	/* UMR WQE (if in progress) is always at wq->head */
-	if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
+	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+	    rq->mpwqe.umr_in_progress)
 		mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]);
 
 	while (!mlx5_wq_ll_is_empty(wq)) {
@@ -926,7 +938,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
 		goto err_destroy_rq;
 
 	if (params->rx_am_enabled)
-		set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
+		c->rq.state |= BIT(MLX5E_RQ_STATE_AM);
 
 	return 0;
 
@@ -946,7 +958,6 @@ static void mlx5e_activate_rq(struct mlx5e_rq *rq)
 
 	set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
 	sq->db.ico_wqe[pi].opcode     = MLX5_OPCODE_NOP;
-	sq->db.ico_wqe[pi].num_wqebbs = 1;
 	nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
 	mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
 }
@@ -1048,7 +1059,6 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
 	struct mlx5_core_dev *mdev = c->mdev;
 	int err;
 
-	sq->pdev      = c->pdev;
 	sq->mkey_be   = c->mkey_be;
 	sq->channel   = c;
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
@@ -1752,7 +1762,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	struct mlx5e_cq_moder icocq_moder = {0, 0};
 	struct net_device *netdev = priv->netdev;
 	struct mlx5e_channel *c;
+	unsigned int irq;
 	int err;
+	int eqn;
 
 	c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
 	if (!c)
@@ -1768,6 +1780,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	c->num_tc   = params->num_tc;
 	c->xdp      = !!params->xdp_prog;
 
+	mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
+	c->irq_desc = irq_to_desc(irq);
+
 	netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
 
 	err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq);
@@ -2345,9 +2360,10 @@ static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
 
 void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params,
 				    enum mlx5e_traffic_types tt,
-				    void *tirc)
+				    void *tirc, bool inner)
 {
-	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+	void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) :
+			     MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
 
 #define MLX5_HASH_IP            (MLX5_HASH_FIELD_SEL_SRC_IP   |\
 				 MLX5_HASH_FIELD_SEL_DST_IP)
@@ -2496,6 +2512,21 @@ free_in:
 	return err;
 }
 
+static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv,
+					    enum mlx5e_traffic_types tt,
+					    u32 *tirc)
+{
+	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
+
+	mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
+
+	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+	MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
+	MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1);
+
+	mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true);
+}
+
 static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -2583,12 +2614,6 @@ static void mlx5e_build_channels_tx_maps(struct mlx5e_priv *priv)
 	}
 }
 
-static bool mlx5e_is_eswitch_vport_mngr(struct mlx5_core_dev *mdev)
-{
-	return (MLX5_CAP_GEN(mdev, vport_group_manager) &&
-		MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH);
-}
-
 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
 {
 	int num_txqs = priv->channels.num * priv->channels.params.num_tc;
@@ -2602,7 +2627,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
 	mlx5e_activate_channels(&priv->channels);
 	netif_tx_start_all_queues(priv->netdev);
 
-	if (mlx5e_is_eswitch_vport_mngr(priv->mdev))
+	if (MLX5_VPORT_MANAGER(priv->mdev))
 		mlx5e_add_sqs_fwd_rules(priv);
 
 	mlx5e_wait_channels_min_rx_wqes(&priv->channels);
@@ -2613,7 +2638,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
 {
 	mlx5e_redirect_rqts_to_drop(priv);
 
-	if (mlx5e_is_eswitch_vport_mngr(priv->mdev))
+	if (MLX5_VPORT_MANAGER(priv->mdev))
 		mlx5e_remove_sqs_fwd_rules(priv);
 
 	/* FIXME: This is a W/A only for tx timeout watch dog false alarm when
@@ -2690,6 +2715,8 @@ int mlx5e_open(struct net_device *netdev)
 
 	mutex_lock(&priv->state_lock);
 	err = mlx5e_open_locked(netdev);
+	if (!err)
+		mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP);
 	mutex_unlock(&priv->state_lock);
 
 	return err;
@@ -2724,6 +2751,7 @@ int mlx5e_close(struct net_device *netdev)
 		return -ENODEV;
 
 	mutex_lock(&priv->state_lock);
+	mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_DOWN);
 	err = mlx5e_close_locked(netdev);
 	mutex_unlock(&priv->state_lock);
 
@@ -2864,7 +2892,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv,
 
 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
 	MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn);
-	mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc);
+	mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false);
 }
 
 static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc)
@@ -2883,6 +2911,7 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv)
 	struct mlx5e_tir *tir;
 	void *tirc;
 	int inlen;
+	int i = 0;
 	int err;
 	u32 *in;
 	int tt;
@@ -2898,16 +2927,36 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv)
 		tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 		mlx5e_build_indir_tir_ctx(priv, tt, tirc);
 		err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
-		if (err)
-			goto err_destroy_tirs;
+		if (err) {
+			mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err);
+			goto err_destroy_inner_tirs;
+		}
 	}
 
+	if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+		goto out;
+
+	for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) {
+		memset(in, 0, inlen);
+		tir = &priv->inner_indir_tir[i];
+		tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+		mlx5e_build_inner_indir_tir_ctx(priv, i, tirc);
+		err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
+		if (err) {
+			mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err);
+			goto err_destroy_inner_tirs;
+		}
+	}
+
+out:
 	kvfree(in);
 
 	return 0;
 
-err_destroy_tirs:
-	mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err);
+err_destroy_inner_tirs:
+	for (i--; i >= 0; i--)
+		mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
+
 	for (tt--; tt >= 0; tt--)
 		mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]);
 
@@ -2961,6 +3010,12 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv)
 
 	for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
 		mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]);
+
+	if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+		return;
+
+	for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
+		mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
 }
 
 void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv)
@@ -3000,12 +3055,16 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
 	return 0;
 }
 
-static int mlx5e_setup_tc(struct net_device *netdev, u8 tc)
+static int mlx5e_setup_tc_mqprio(struct net_device *netdev,
+				 struct tc_mqprio_qopt *mqprio)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_channels new_channels = {};
+	u8 tc = mqprio->num_tc;
 	int err = 0;
 
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
 	if (tc && tc != MLX5E_MAX_NUM_TC)
 		return -EINVAL;
 
@@ -3029,39 +3088,42 @@ out:
 	return err;
 }
 
-static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
-			      u32 chain_index, __be16 proto,
-			      struct tc_to_netdev *tc)
+#ifdef CONFIG_MLX5_ESWITCH
+static int mlx5e_setup_tc_cls_flower(struct net_device *dev,
+				     struct tc_cls_flower_offload *cls_flower)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
-	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
-		goto mqprio;
+	if (!is_classid_clsact_ingress(cls_flower->common.classid) ||
+	    cls_flower->common.chain_index)
+		return -EOPNOTSUPP;
 
-	if (chain_index)
+	switch (cls_flower->command) {
+	case TC_CLSFLOWER_REPLACE:
+		return mlx5e_configure_flower(priv, cls_flower);
+	case TC_CLSFLOWER_DESTROY:
+		return mlx5e_delete_flower(priv, cls_flower);
+	case TC_CLSFLOWER_STATS:
+		return mlx5e_stats_flower(priv, cls_flower);
+	default:
 		return -EOPNOTSUPP;
+	}
+}
+#endif
 
-	switch (tc->type) {
+static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			  void *type_data)
+{
+	switch (type) {
+#ifdef CONFIG_MLX5_ESWITCH
 	case TC_SETUP_CLSFLOWER:
-		switch (tc->cls_flower->command) {
-		case TC_CLSFLOWER_REPLACE:
-			return mlx5e_configure_flower(priv, proto, tc->cls_flower);
-		case TC_CLSFLOWER_DESTROY:
-			return mlx5e_delete_flower(priv, tc->cls_flower);
-		case TC_CLSFLOWER_STATS:
-			return mlx5e_stats_flower(priv, tc->cls_flower);
-		}
+		return mlx5e_setup_tc_cls_flower(dev, type_data);
+#endif
+	case TC_SETUP_MQPRIO:
+		return mlx5e_setup_tc_mqprio(dev, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
-
-mqprio:
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
-
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
-
-	return mlx5e_setup_tc(dev, tc->mqprio->num_tc);
 }
 
 static void
@@ -3358,6 +3420,7 @@ static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	}
 }
 
+#ifdef CONFIG_MLX5_ESWITCH
 static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
@@ -3460,6 +3523,7 @@ static int mlx5e_get_vf_stats(struct net_device *dev,
 	return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
 					    vf_stats);
 }
+#endif
 
 static void mlx5e_add_vxlan_port(struct net_device *netdev,
 				 struct udp_tunnel_info *ti)
@@ -3489,13 +3553,13 @@ static void mlx5e_del_vxlan_port(struct net_device *netdev,
 	mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 0);
 }
 
-static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv,
-						    struct sk_buff *skb,
-						    netdev_features_t features)
+static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
+						     struct sk_buff *skb,
+						     netdev_features_t features)
 {
 	struct udphdr *udph;
-	u16 proto;
-	u16 port = 0;
+	u8 proto;
+	u16 port;
 
 	switch (vlan_get_protocol(skb)) {
 	case htons(ETH_P_IP):
@@ -3508,14 +3572,17 @@ static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv,
 		goto out;
 	}
 
-	if (proto == IPPROTO_UDP) {
+	switch (proto) {
+	case IPPROTO_GRE:
+		return features;
+	case IPPROTO_UDP:
 		udph = udp_hdr(skb);
 		port = be16_to_cpu(udph->dest);
-	}
 
-	/* Verify if UDP port is being offloaded by HW */
-	if (port && mlx5e_vxlan_lookup_port(priv, port))
-		return features;
+		/* Verify if UDP port is being offloaded by HW */
+		if (mlx5e_vxlan_lookup_port(priv, port))
+			return features;
+	}
 
 out:
 	/* Disable CSUM and GSO if the udp dport is not offloaded by HW */
@@ -3539,7 +3606,7 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
 	/* Validate if the tunneled packet is being offloaded by HW */
 	if (skb->encapsulation &&
 	    (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
-		return mlx5e_vxlan_features_check(priv, skb, features);
+		return mlx5e_tunnel_features_check(priv, skb, features);
 
 	return features;
 }
@@ -3636,7 +3703,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 
 		set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
 		/* napi_schedule in case we have missed anything */
-		set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags);
 		napi_schedule(&c->napi);
 
 		if (old_prog)
@@ -3693,11 +3759,11 @@ static void mlx5e_netpoll(struct net_device *dev)
 }
 #endif
 
-static const struct net_device_ops mlx5e_netdev_ops_basic = {
+static const struct net_device_ops mlx5e_netdev_ops = {
 	.ndo_open                = mlx5e_open,
 	.ndo_stop                = mlx5e_close,
 	.ndo_start_xmit          = mlx5e_xmit,
-	.ndo_setup_tc            = mlx5e_ndo_setup_tc,
+	.ndo_setup_tc            = mlx5e_setup_tc,
 	.ndo_select_queue        = mlx5e_select_queue,
 	.ndo_get_stats64         = mlx5e_get_stats,
 	.ndo_set_rx_mode         = mlx5e_set_rx_mode,
@@ -3708,6 +3774,9 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = {
 	.ndo_change_mtu          = mlx5e_change_mtu,
 	.ndo_do_ioctl            = mlx5e_ioctl,
 	.ndo_set_tx_maxrate      = mlx5e_set_tx_maxrate,
+	.ndo_udp_tunnel_add      = mlx5e_add_vxlan_port,
+	.ndo_udp_tunnel_del      = mlx5e_del_vxlan_port,
+	.ndo_features_check      = mlx5e_features_check,
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer	 = mlx5e_rx_flow_steer,
 #endif
@@ -3716,29 +3785,8 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller     = mlx5e_netpoll,
 #endif
-};
-
-static const struct net_device_ops mlx5e_netdev_ops_sriov = {
-	.ndo_open                = mlx5e_open,
-	.ndo_stop                = mlx5e_close,
-	.ndo_start_xmit          = mlx5e_xmit,
-	.ndo_setup_tc            = mlx5e_ndo_setup_tc,
-	.ndo_select_queue        = mlx5e_select_queue,
-	.ndo_get_stats64         = mlx5e_get_stats,
-	.ndo_set_rx_mode         = mlx5e_set_rx_mode,
-	.ndo_set_mac_address     = mlx5e_set_mac,
-	.ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
-	.ndo_set_features        = mlx5e_set_features,
-	.ndo_change_mtu          = mlx5e_change_mtu,
-	.ndo_do_ioctl            = mlx5e_ioctl,
-	.ndo_udp_tunnel_add	 = mlx5e_add_vxlan_port,
-	.ndo_udp_tunnel_del	 = mlx5e_del_vxlan_port,
-	.ndo_set_tx_maxrate      = mlx5e_set_tx_maxrate,
-	.ndo_features_check      = mlx5e_features_check,
-#ifdef CONFIG_RFS_ACCEL
-	.ndo_rx_flow_steer	 = mlx5e_rx_flow_steer,
-#endif
+#ifdef CONFIG_MLX5_ESWITCH
+	/* SRIOV E-Switch NDOs */
 	.ndo_set_vf_mac          = mlx5e_set_vf_mac,
 	.ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
 	.ndo_set_vf_spoofchk     = mlx5e_set_vf_spoofchk,
@@ -3747,13 +3795,9 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
 	.ndo_get_vf_config       = mlx5e_get_vf_config,
 	.ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
 	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
-	.ndo_tx_timeout          = mlx5e_tx_timeout,
-	.ndo_xdp		 = mlx5e_xdp,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	.ndo_poll_controller     = mlx5e_netpoll,
-#endif
 	.ndo_has_offload_stats	 = mlx5e_has_offload_stats,
 	.ndo_get_offload_stats	 = mlx5e_get_offload_stats,
+#endif
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -3790,8 +3834,7 @@ u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
 	       2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/;
 }
 
-void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev,
-				   u32 *indirection_rqt, int len,
+void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
 				   int num_channels)
 {
 	int i;
@@ -3934,7 +3977,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
 	/* RSS */
 	params->rss_hfunc = ETH_RSS_HASH_XOR;
 	netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key));
-	mlx5e_build_default_indir_rqt(mdev, params->indirection_rqt,
+	mlx5e_build_default_indir_rqt(params->indirection_rqt,
 				      MLX5E_INDIR_RQT_SIZE, max_channels);
 }
 
@@ -3973,9 +4016,11 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
 	}
 }
 
+#if IS_ENABLED(CONFIG_NET_SWITCHDEV) && IS_ENABLED(CONFIG_MLX5_ESWITCH)
 static const struct switchdev_ops mlx5e_switchdev_ops = {
 	.switchdev_port_attr_get	= mlx5e_attr_get,
 };
+#endif
 
 static void mlx5e_build_nic_netdev(struct net_device *netdev)
 {
@@ -3986,15 +4031,12 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 
 	SET_NETDEV_DEV(netdev, &mdev->pdev->dev);
 
-	if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
-		netdev->netdev_ops = &mlx5e_netdev_ops_sriov;
+	netdev->netdev_ops = &mlx5e_netdev_ops;
+
 #ifdef CONFIG_MLX5_CORE_EN_DCB
-		if (MLX5_CAP_GEN(mdev, qos))
-			netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+	if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos))
+		netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
 #endif
-	} else {
-		netdev->netdev_ops = &mlx5e_netdev_ops_basic;
-	}
 
 	netdev->watchdog_timeo    = 15 * HZ;
 
@@ -4017,20 +4059,32 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_RX;
 	netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
-	if (mlx5e_vxlan_allowed(mdev)) {
-		netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
-					   NETIF_F_GSO_UDP_TUNNEL_CSUM |
-					   NETIF_F_GSO_PARTIAL;
+	if (mlx5e_vxlan_allowed(mdev) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
+		netdev->hw_features     |= NETIF_F_GSO_PARTIAL;
 		netdev->hw_enc_features |= NETIF_F_IP_CSUM;
 		netdev->hw_enc_features |= NETIF_F_IPV6_CSUM;
 		netdev->hw_enc_features |= NETIF_F_TSO;
 		netdev->hw_enc_features |= NETIF_F_TSO6;
-		netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
-		netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM |
-					   NETIF_F_GSO_PARTIAL;
+		netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
+	}
+
+	if (mlx5e_vxlan_allowed(mdev)) {
+		netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
+					   NETIF_F_GSO_UDP_TUNNEL_CSUM;
+		netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
+					   NETIF_F_GSO_UDP_TUNNEL_CSUM;
 		netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
 	}
 
+	if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
+		netdev->hw_features     |= NETIF_F_GSO_GRE |
+					   NETIF_F_GSO_GRE_CSUM;
+		netdev->hw_enc_features |= NETIF_F_GSO_GRE |
+					   NETIF_F_GSO_GRE_CSUM;
+		netdev->gso_partial_features |= NETIF_F_GSO_GRE |
+						NETIF_F_GSO_GRE_CSUM;
+	}
+
 	mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled);
 
 	if (fcs_supported)
@@ -4066,8 +4120,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 
 	mlx5e_set_netdev_dev_addr(netdev);
 
-#ifdef CONFIG_NET_SWITCHDEV
-	if (MLX5_CAP_GEN(mdev, vport_group_manager))
+#if IS_ENABLED(CONFIG_NET_SWITCHDEV) && IS_ENABLED(CONFIG_MLX5_ESWITCH)
+	if (MLX5_VPORT_MANAGER(mdev))
 		netdev->switchdev_ops = &mlx5e_switchdev_ops;
 #endif
 
@@ -4199,6 +4253,10 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 
 	mlx5e_init_l2_addr(priv);
 
+	/* Marking the link as currently not needed by the Driver */
+	if (!netif_running(netdev))
+		mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN);
+
 	/* MTU range: 68 - hw-specific max */
 	netdev->min_mtu = ETH_MIN_MTU;
 	mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1);
@@ -4209,7 +4267,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 
 	mlx5e_enable_async_events(priv);
 
-	if (MLX5_CAP_GEN(mdev, vport_group_manager))
+	if (MLX5_VPORT_MANAGER(priv->mdev))
 		mlx5e_register_vport_reps(priv);
 
 	if (netdev->reg_state != NETREG_REGISTERED)
@@ -4243,7 +4301,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
 
 	queue_work(priv->wq, &priv->set_rx_mode_work);
 
-	if (MLX5_CAP_GEN(mdev, vport_group_manager))
+	if (MLX5_VPORT_MANAGER(priv->mdev))
 		mlx5e_unregister_vport_reps(priv);
 
 	mlx5e_disable_async_events(priv);
@@ -4416,32 +4474,29 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv)
 
 static void *mlx5e_add(struct mlx5_core_dev *mdev)
 {
-	struct mlx5_eswitch *esw = mdev->priv.eswitch;
-	int total_vfs = MLX5_TOTAL_VPORTS(mdev);
-	struct mlx5e_rep_priv *rpriv = NULL;
+	struct net_device *netdev;
+	void *rpriv = NULL;
 	void *priv;
-	int vport;
 	int err;
-	struct net_device *netdev;
 
 	err = mlx5e_check_required_hca_cap(mdev);
 	if (err)
 		return NULL;
 
-	if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
-		rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
+#ifdef CONFIG_MLX5_ESWITCH
+	if (MLX5_VPORT_MANAGER(mdev)) {
+		rpriv = mlx5e_alloc_nic_rep_priv(mdev);
 		if (!rpriv) {
-			mlx5_core_warn(mdev,
-				       "Not creating net device, Failed to alloc rep priv data\n");
+			mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n");
 			return NULL;
 		}
-		rpriv->rep = &esw->offloads.vport_reps[0];
 	}
+#endif
 
 	netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv);
 	if (!netdev) {
 		mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
-		goto err_unregister_reps;
+		goto err_free_rpriv;
 	}
 
 	priv = netdev_priv(netdev);
@@ -4462,14 +4517,9 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
 
 err_detach:
 	mlx5e_detach(mdev, priv);
-
 err_destroy_netdev:
 	mlx5e_destroy_netdev(priv);
-
-err_unregister_reps:
-	for (vport = 1; vport < total_vfs; vport++)
-		mlx5_eswitch_unregister_vport_rep(esw, vport);
-
+err_free_rpriv:
 	kfree(rpriv);
 	return NULL;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 45e60be..45e03c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -613,15 +613,18 @@ static int mlx5e_rep_open(struct net_device *dev)
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	int err;
 
-	err = mlx5e_open(dev);
+	mutex_lock(&priv->state_lock);
+	err = mlx5e_open_locked(dev);
 	if (err)
-		return err;
+		goto unlock;
 
-	err = mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_UP);
-	if (!err)
+	if (!mlx5_eswitch_set_vport_state(esw, rep->vport,
+					  MLX5_ESW_VPORT_ADMIN_STATE_UP))
 		netif_carrier_on(dev);
 
-	return 0;
+unlock:
+	mutex_unlock(&priv->state_lock);
+	return err;
 }
 
 static int mlx5e_rep_close(struct net_device *dev)
@@ -630,10 +633,13 @@ static int mlx5e_rep_close(struct net_device *dev)
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5_eswitch_rep *rep = rpriv->rep;
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	int ret;
 
+	mutex_lock(&priv->state_lock);
 	(void)mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
-
-	return mlx5e_close(dev);
+	ret = mlx5e_close_locked(dev);
+	mutex_unlock(&priv->state_lock);
+	return ret;
 }
 
 static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
@@ -651,37 +657,42 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
 	return 0;
 }
 
-static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
-				  u32 chain_index, __be16 proto,
-				  struct tc_to_netdev *tc)
+static int
+mlx5e_rep_setup_tc_cls_flower(struct net_device *dev,
+			      struct tc_cls_flower_offload *cls_flower)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
-	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+	if (!is_classid_clsact_ingress(cls_flower->common.classid) ||
+	    cls_flower->common.chain_index)
 		return -EOPNOTSUPP;
 
-	if (tc->egress_dev) {
+	if (cls_flower->egress_dev) {
 		struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-		struct net_device *uplink_dev = mlx5_eswitch_get_uplink_netdev(esw);
 
-		return uplink_dev->netdev_ops->ndo_setup_tc(uplink_dev, handle,
-							    chain_index,
-							    proto, tc);
+		dev = mlx5_eswitch_get_uplink_netdev(esw);
+		return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
+						     cls_flower);
 	}
 
-	if (chain_index)
+	switch (cls_flower->command) {
+	case TC_CLSFLOWER_REPLACE:
+		return mlx5e_configure_flower(priv, cls_flower);
+	case TC_CLSFLOWER_DESTROY:
+		return mlx5e_delete_flower(priv, cls_flower);
+	case TC_CLSFLOWER_STATS:
+		return mlx5e_stats_flower(priv, cls_flower);
+	default:
 		return -EOPNOTSUPP;
+	}
+}
 
-	switch (tc->type) {
+static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			      void *type_data)
+{
+	switch (type) {
 	case TC_SETUP_CLSFLOWER:
-		switch (tc->cls_flower->command) {
-		case TC_CLSFLOWER_REPLACE:
-			return mlx5e_configure_flower(priv, proto, tc->cls_flower);
-		case TC_CLSFLOWER_DESTROY:
-			return mlx5e_delete_flower(priv, tc->cls_flower);
-		case TC_CLSFLOWER_STATS:
-			return mlx5e_stats_flower(priv, tc->cls_flower);
-		}
+		return mlx5e_rep_setup_tc_cls_flower(dev, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -773,7 +784,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
 	.ndo_stop                = mlx5e_rep_close,
 	.ndo_start_xmit          = mlx5e_xmit,
 	.ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
-	.ndo_setup_tc            = mlx5e_rep_ndo_setup_tc,
+	.ndo_setup_tc            = mlx5e_rep_setup_tc,
 	.ndo_get_stats64         = mlx5e_rep_get_stats,
 	.ndo_has_offload_stats	 = mlx5e_has_offload_stats,
 	.ndo_get_offload_stats	 = mlx5e_get_offload_stats,
@@ -913,7 +924,7 @@ static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev)
 	return MLX5E_PORT_REPRESENTOR_NCH;
 }
 
-static struct mlx5e_profile mlx5e_rep_profile = {
+static const struct mlx5e_profile mlx5e_rep_profile = {
 	.init			= mlx5e_init_rep,
 	.init_rx		= mlx5e_init_rep_rx,
 	.cleanup_rx		= mlx5e_cleanup_rep_rx,
@@ -1099,3 +1110,16 @@ void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv)
 	mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */
 	mlx5_eswitch_unregister_vport_rep(esw, 0); /* UPLINK PF*/
 }
+
+void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+	struct mlx5e_rep_priv *rpriv;
+
+	rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
+	if (!rpriv)
+		return NULL;
+
+	rpriv->rep = &esw->offloads.vport_reps[0];
+	return rpriv;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index a0a1a7a..5659ed9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -38,6 +38,7 @@
 #include "eswitch.h"
 #include "en.h"
 
+#ifdef CONFIG_MLX5_ESWITCH
 struct mlx5e_neigh_update_table {
 	struct rhashtable       neigh_ht;
 	/* Save the neigh hash entries in a list in addition to the hash table
@@ -123,6 +124,7 @@ struct mlx5e_encap_entry {
 	int encap_size;
 };
 
+void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev);
 void mlx5e_register_vport_reps(struct mlx5e_priv *priv);
 void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv);
 bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
@@ -141,5 +143,12 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
 				  struct mlx5e_encap_entry *e);
 
 void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline void mlx5e_register_vport_reps(struct mlx5e_priv *priv) {}
+static inline void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) {}
+static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; }
+static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {}
+#endif
 
 #endif /* __MLX5E_REP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 7344433..f1dd638 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -163,7 +163,7 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
 
 static inline bool mlx5e_page_is_reserved(struct page *page)
 {
-	return page_is_pfmemalloc(page) || page_to_nid(page) != numa_node_id();
+	return page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id();
 }
 
 static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
@@ -177,8 +177,10 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
 		return false;
 	}
 
-	if (unlikely(page_is_pfmemalloc(dma_info->page)))
+	if (unlikely(mlx5e_page_is_reserved(dma_info->page))) {
+		rq->stats.cache_waive++;
 		return false;
+	}
 
 	cache->page_cache[cache->tail] = *dma_info;
 	cache->tail = tail_next;
@@ -252,7 +254,7 @@ static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq,
 		!mlx5e_page_is_reserved(wi->di.page);
 }
 
-int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
+static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
 {
 	struct mlx5e_wqe_frag_info *wi = &rq->wqe.frag_info[ix];
 
@@ -263,8 +265,7 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
 		wi->offset = 0;
 	}
 
-	wqe->data.addr = cpu_to_be64(wi->di.addr + wi->offset +
-				     rq->rx_headroom);
+	wqe->data.addr = cpu_to_be64(wi->di.addr + wi->offset + rq->buff.headroom);
 	return 0;
 }
 
@@ -296,7 +297,7 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
 
 static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
 {
-	return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
+	return rq->mpwqe.num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
 }
 
 static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq,
@@ -305,7 +306,7 @@ static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq,
 					    u32 page_idx, u32 frag_offset,
 					    u32 len)
 {
-	unsigned int truesize =	ALIGN(len, rq->mpwqe_stride_sz);
+	unsigned int truesize = ALIGN(len, BIT(rq->mpwqe.log_stride_sz));
 
 	dma_sync_single_for_cpu(rq->pdev,
 				wi->umr.dma_info[page_idx].addr + frag_offset,
@@ -358,7 +359,6 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
 	/* fill sq edge with nops to avoid wqe wrap around */
 	while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
 		sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
-		sq->db.ico_wqe[pi].num_wqebbs = 1;
 		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
 	}
 
@@ -369,41 +369,35 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
 			    MLX5_OPCODE_UMR);
 
 	sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
-	sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs;
 	sq->pc += num_wqebbs;
 	mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl);
 }
 
 static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq,
-				    struct mlx5e_rx_wqe *wqe,
 				    u16 ix)
 {
 	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
-	u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT;
 	int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
+	struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0];
 	int err;
 	int i;
 
-	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-		struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i];
-
+	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
 		err = mlx5e_page_alloc_mapped(rq, dma_info);
 		if (unlikely(err))
 			goto err_unmap;
 		wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
 		page_ref_add(dma_info->page, pg_strides);
-		wi->skbs_frags[i] = 0;
 	}
 
+	memset(wi->skbs_frags, 0, sizeof(*wi->skbs_frags) * MLX5_MPWRQ_PAGES_PER_WQE);
 	wi->consumed_strides = 0;
-	wqe->data.addr = cpu_to_be64(dma_offset);
 
 	return 0;
 
 err_unmap:
 	while (--i >= 0) {
-		struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i];
-
+		dma_info--;
 		page_ref_sub(dma_info->page, pg_strides);
 		mlx5e_page_release(rq, dma_info, true);
 	}
@@ -414,27 +408,21 @@ err_unmap:
 void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
 {
 	int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
+	struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0];
 	int i;
 
-	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-		struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i];
-
+	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) {
 		page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]);
 		mlx5e_page_release(rq, dma_info, true);
 	}
 }
 
-void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
+static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
 {
 	struct mlx5_wq_ll *wq = &rq->wq;
 	struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
 
-	clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state);
-
-	if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) {
-		mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]);
-		return;
-	}
+	rq->mpwqe.umr_in_progress = false;
 
 	mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index));
 
@@ -444,16 +432,18 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
 	mlx5_wq_ll_update_db_record(wq);
 }
 
-int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
+static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 {
 	int err;
 
-	err = mlx5e_alloc_rx_umr_mpwqe(rq, wqe, ix);
-	if (unlikely(err))
+	err = mlx5e_alloc_rx_umr_mpwqe(rq, ix);
+	if (unlikely(err)) {
+		rq->stats.buff_alloc_err++;
 		return err;
-	set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state);
+	}
+	rq->mpwqe.umr_in_progress = true;
 	mlx5e_post_umr_wqe(rq, ix);
-	return -EBUSY;
+	return 0;
 }
 
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
@@ -463,94 +453,150 @@ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	mlx5e_free_rx_mpwqe(rq, wi);
 }
 
-#define RQ_CANNOT_POST(rq) \
-	(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state) || \
-	 test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
-
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 {
 	struct mlx5_wq_ll *wq = &rq->wq;
+	int err;
 
-	if (unlikely(RQ_CANNOT_POST(rq)))
+	if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED)))
 		return false;
 
-	while (!mlx5_wq_ll_is_full(wq)) {
+	if (mlx5_wq_ll_is_full(wq))
+		return false;
+
+	do {
 		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
-		int err;
 
-		err = rq->alloc_wqe(rq, wqe, wq->head);
-		if (err == -EBUSY)
-			return true;
+		err = mlx5e_alloc_rx_wqe(rq, wqe, wq->head);
 		if (unlikely(err)) {
 			rq->stats.buff_alloc_err++;
 			break;
 		}
 
 		mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index));
-	}
+	} while (!mlx5_wq_ll_is_full(wq));
 
 	/* ensure wqes are visible to device before updating doorbell record */
 	dma_wmb();
 
 	mlx5_wq_ll_update_db_record(wq);
 
-	return !mlx5_wq_ll_is_full(wq);
+	return !!err;
+}
+
+static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
+					     struct mlx5e_icosq *sq,
+					     struct mlx5e_rq *rq,
+					     struct mlx5_cqe64 *cqe)
+{
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1;
+	struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci];
+
+	mlx5_cqwq_pop(&cq->wq);
+
+	if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) {
+		WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n",
+			  cqe->op_own);
+		return;
+	}
+
+	if (likely(icowi->opcode == MLX5_OPCODE_UMR)) {
+		mlx5e_post_rx_mpwqe(rq);
+		return;
+	}
+
+	if (unlikely(icowi->opcode != MLX5_OPCODE_NOP))
+		WARN_ONCE(true,
+			  "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n",
+			  icowi->opcode);
+}
+
+static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
+{
+	struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
+	struct mlx5_cqe64 *cqe;
+
+	if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED)))
+		return;
+
+	cqe = mlx5_cqwq_get_cqe(&cq->wq);
+	if (likely(!cqe))
+		return;
+
+	/* by design, there's only a single cqe */
+	mlx5e_poll_ico_single_cqe(cq, sq, rq, cqe);
+
+	mlx5_cqwq_update_db_record(&cq->wq);
+}
+
+bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
+{
+	struct mlx5_wq_ll *wq = &rq->wq;
+
+	if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED)))
+		return false;
+
+	mlx5e_poll_ico_cq(&rq->channel->icosq.cq, rq);
+
+	if (mlx5_wq_ll_is_full(wq))
+		return false;
+
+	if (!rq->mpwqe.umr_in_progress)
+		mlx5e_alloc_rx_mpwqe(rq, wq->head);
+
+	return true;
 }
 
 static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
 				 u32 cqe_bcnt)
 {
 	struct ethhdr	*eth = (struct ethhdr *)(skb->data);
-	struct iphdr	*ipv4;
-	struct ipv6hdr	*ipv6;
 	struct tcphdr	*tcp;
 	int network_depth = 0;
 	__be16 proto;
 	u16 tot_len;
+	void *ip_p;
 
 	u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
-	int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA  == l4_hdr_type) ||
-		       (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type));
+	u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
+		(l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
 
 	skb->mac_len = ETH_HLEN;
 	proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
 
-	ipv4 = (struct iphdr *)(skb->data + network_depth);
-	ipv6 = (struct ipv6hdr *)(skb->data + network_depth);
 	tot_len = cqe_bcnt - network_depth;
+	ip_p = skb->data + network_depth;
 
 	if (proto == htons(ETH_P_IP)) {
-		tcp = (struct tcphdr *)(skb->data + network_depth +
-					sizeof(struct iphdr));
-		ipv6 = NULL;
-		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
-	} else {
-		tcp = (struct tcphdr *)(skb->data + network_depth +
-					sizeof(struct ipv6hdr));
-		ipv4 = NULL;
-		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
-	}
-
-	if (get_cqe_lro_tcppsh(cqe))
-		tcp->psh                = 1;
+		struct iphdr *ipv4 = ip_p;
 
-	if (tcp_ack) {
-		tcp->ack                = 1;
-		tcp->ack_seq            = cqe->lro_ack_seq_num;
-		tcp->window             = cqe->lro_tcp_win;
-	}
+		tcp = ip_p + sizeof(struct iphdr);
+		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 
-	if (ipv4) {
 		ipv4->ttl               = cqe->lro_min_ttl;
 		ipv4->tot_len           = cpu_to_be16(tot_len);
 		ipv4->check             = 0;
 		ipv4->check             = ip_fast_csum((unsigned char *)ipv4,
 						       ipv4->ihl);
 	} else {
+		struct ipv6hdr *ipv6 = ip_p;
+
+		tcp = ip_p + sizeof(struct ipv6hdr);
+		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+
 		ipv6->hop_limit         = cqe->lro_min_ttl;
 		ipv6->payload_len       = cpu_to_be16(tot_len -
 						      sizeof(struct ipv6hdr));
 	}
+
+	tcp->psh = get_cqe_lro_tcppsh(cqe);
+
+	if (tcp_ack) {
+		tcp->ack                = 1;
+		tcp->ack_seq            = cqe->lro_ack_seq_num;
+		tcp->window             = cqe->lro_tcp_win;
+	}
 }
 
 static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
@@ -776,9 +822,9 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 			     struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt)
 {
 	struct mlx5e_dma_info *di = &wi->di;
+	u16 rx_headroom = rq->buff.headroom;
 	struct sk_buff *skb;
 	void *va, *data;
-	u16 rx_headroom = rq->rx_headroom;
 	bool consumed;
 	u32 frag_size;
 
@@ -857,6 +903,7 @@ wq_ll_pop:
 		       &wqe->next.next_wqe_index);
 }
 
+#ifdef CONFIG_MLX5_ESWITCH
 void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 {
 	struct net_device *netdev = rq->netdev;
@@ -901,6 +948,7 @@ wq_ll_pop:
 	mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
 		       &wqe->next.next_wqe_index);
 }
+#endif
 
 static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq,
 					   struct mlx5_cqe64 *cqe,
@@ -909,7 +957,7 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq,
 					   struct sk_buff *skb)
 {
 	u16 stride_ix      = mpwrq_get_cqe_stride_index(cqe);
-	u32 wqe_offset     = stride_ix * rq->mpwqe_stride_sz;
+	u32 wqe_offset     = stride_ix << rq->mpwqe.log_stride_sz;
 	u32 head_offset    = wqe_offset & (PAGE_SIZE - 1);
 	u32 page_idx       = wqe_offset >> PAGE_SHIFT;
 	u32 head_page_idx  = page_idx;
@@ -977,7 +1025,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	napi_gro_receive(rq->cq.napi, skb);
 
 mpwrq_cqe_out:
-	if (likely(wi->consumed_strides < rq->mpwqe_num_strides))
+	if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
 		return;
 
 	mlx5e_free_rx_mpwqe(rq, wi);
@@ -987,21 +1035,23 @@ mpwrq_cqe_out:
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 {
 	struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
-	struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
+	struct mlx5e_xdpsq *xdpsq;
+	struct mlx5_cqe64 *cqe;
 	int work_done = 0;
 
-	if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
+	if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED)))
 		return 0;
 
 	if (cq->decmprs_left)
 		work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
 
-	for (; work_done < budget; work_done++) {
-		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_cqe(&cq->wq);
+	cqe = mlx5_cqwq_get_cqe(&cq->wq);
+	if (!cqe)
+		return 0;
 
-		if (!cqe)
-			break;
+	xdpsq = &rq->xdpsq;
 
+	do {
 		if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
 			work_done +=
 				mlx5e_decompress_cqes_start(rq, cq,
@@ -1012,7 +1062,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 		mlx5_cqwq_pop(&cq->wq);
 
 		rq->handle_rx_cqe(rq, cqe);
-	}
+	} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
 
 	if (xdpsq->db.doorbell) {
 		mlx5e_xmit_xdp_doorbell(xdpsq);
@@ -1030,13 +1080,18 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 {
 	struct mlx5e_xdpsq *sq;
+	struct mlx5_cqe64 *cqe;
 	struct mlx5e_rq *rq;
 	u16 sqcc;
 	int i;
 
 	sq = container_of(cq, struct mlx5e_xdpsq, cq);
 
-	if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+	if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED)))
+		return false;
+
+	cqe = mlx5_cqwq_get_cqe(&cq->wq);
+	if (!cqe)
 		return false;
 
 	rq = container_of(sq, struct mlx5e_rq, xdpsq);
@@ -1046,15 +1101,11 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 	 */
 	sqcc = sq->cc;
 
-	for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) {
-		struct mlx5_cqe64 *cqe;
+	i = 0;
+	do {
 		u16 wqe_counter;
 		bool last_wqe;
 
-		cqe = mlx5_cqwq_get_cqe(&cq->wq);
-		if (!cqe)
-			break;
-
 		mlx5_cqwq_pop(&cq->wq);
 
 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
@@ -1072,7 +1123,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 			/* Recycle RX page */
 			mlx5e_page_release(rq, di, true);
 		} while (!last_wqe);
-	}
+	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
 
 	mlx5_cqwq_update_db_record(&cq->wq);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index e65517e..6d199ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -47,7 +47,7 @@
 
 struct counter_desc {
 	char		format[ETH_GSTRING_LEN];
-	int		offset; /* Byte offset */
+	size_t		offset; /* Byte offset */
 };
 
 struct mlx5e_sw_stats {
@@ -84,6 +84,7 @@ struct mlx5e_sw_stats {
 	u64 rx_cache_full;
 	u64 rx_cache_empty;
 	u64 rx_cache_busy;
+	u64 rx_cache_waive;
 
 	/* Special handling counters */
 	u64 link_down_events_phy;
@@ -123,6 +124,7 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) },
 };
 
@@ -216,6 +218,12 @@ static const struct counter_desc vport_stats_desc[] = {
 	MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \
 		   counter_set.eth_per_prio_grp_data_layout.c##_high)
 #define NUM_PPORT_PRIO				8
+#define PPORT_ETH_EXT_OFF(c) \
+	MLX5_BYTE_OFF(ppcnt_reg, \
+		      counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
+#define PPORT_ETH_EXT_GET(pstats, c) \
+	MLX5_GET64(ppcnt_reg, (pstats)->eth_ext_counters, \
+		   counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
 
 struct mlx5e_pport_stats {
 	__be64 IEEE_802_3_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
@@ -224,6 +232,7 @@ struct mlx5e_pport_stats {
 	__be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)];
 	__be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
 	__be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+	__be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
 };
 
 static const struct counter_desc pport_802_3_stats_desc[] = {
@@ -290,12 +299,22 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
 	{ "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
 };
 
+static const struct counter_desc pport_eth_ext_stats_desc[] = {
+	{ "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) },
+};
+
 #define PCIE_PERF_OFF(c) \
 	MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c)
 #define PCIE_PERF_GET(pcie_stats, c) \
 	MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \
 		 counter_set.pcie_perf_cntrs_grp_data_layout.c)
 
+#define PCIE_PERF_OFF64(c) \
+	MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high)
+#define PCIE_PERF_GET64(pcie_stats, c) \
+	MLX5_GET64(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \
+		   counter_set.pcie_perf_cntrs_grp_data_layout.c##_high)
+
 struct mlx5e_pcie_stats {
 	__be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)];
 };
@@ -305,6 +324,17 @@ static const struct counter_desc pcie_perf_stats_desc[] = {
 	{ "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) },
 };
 
+static const struct counter_desc pcie_perf_stats_desc64[] = {
+	{ "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) },
+};
+
+static const struct counter_desc pcie_perf_stall_stats_desc[] = {
+	{ "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) },
+	{ "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) },
+	{ "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) },
+	{ "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) },
+};
+
 struct mlx5e_rq_stats {
 	u64 packets;
 	u64 bytes;
@@ -326,6 +356,7 @@ struct mlx5e_rq_stats {
 	u64 cache_full;
 	u64 cache_empty;
 	u64 cache_busy;
+	u64 cache_waive;
 };
 
 static const struct counter_desc rq_stats_desc[] = {
@@ -349,6 +380,7 @@ static const struct counter_desc rq_stats_desc[] = {
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
 };
 
 struct mlx5e_sq_stats {
@@ -397,17 +429,29 @@ static const struct counter_desc sq_stats_desc[] = {
 #define NUM_PCIE_PERF_COUNTERS(priv) \
 	(ARRAY_SIZE(pcie_perf_stats_desc) * \
 	 MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group))
+#define NUM_PCIE_PERF_COUNTERS64(priv) \
+	(ARRAY_SIZE(pcie_perf_stats_desc64) * \
+	 MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt))
+#define NUM_PCIE_PERF_STALL_COUNTERS(priv) \
+	(ARRAY_SIZE(pcie_perf_stall_stats_desc) * \
+	 MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled))
 #define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \
 	ARRAY_SIZE(pport_per_prio_traffic_stats_desc)
 #define NUM_PPORT_PER_PRIO_PFC_COUNTERS \
 	ARRAY_SIZE(pport_per_prio_pfc_stats_desc)
+#define NUM_PPORT_ETH_EXT_COUNTERS(priv) \
+	(ARRAY_SIZE(pport_eth_ext_stats_desc) * \
+	 MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters))
 #define NUM_PPORT_COUNTERS(priv)	(NUM_PPORT_802_3_COUNTERS + \
 					 NUM_PPORT_2863_COUNTERS  + \
 					 NUM_PPORT_2819_COUNTERS  + \
 					 NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) + \
 					 NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \
-					 NUM_PPORT_PRIO)
-#define NUM_PCIE_COUNTERS(priv)		NUM_PCIE_PERF_COUNTERS(priv)
+					 NUM_PPORT_PRIO + \
+					 NUM_PPORT_ETH_EXT_COUNTERS(priv))
+#define NUM_PCIE_COUNTERS(priv)		(NUM_PCIE_PERF_COUNTERS(priv) + \
+					 NUM_PCIE_PERF_COUNTERS64(priv) +\
+					 NUM_PCIE_PERF_STALL_COUNTERS(priv))
 #define NUM_RQ_STATS			ARRAY_SIZE(rq_stats_desc)
 #define NUM_SQ_STATS			ARRAY_SIZE(sq_stats_desc)
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 7f282e8..da503e6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1326,7 +1326,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 	LIST_HEAD(actions);
 	int err;
 
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return -EINVAL;
 
 	attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
@@ -1837,7 +1837,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 	bool encap = false;
 	int err = 0;
 
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return -EINVAL;
 
 	memset(attr, 0, sizeof(*attr));
@@ -1937,7 +1937,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 	return err;
 }
 
-int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
+int mlx5e_configure_flower(struct mlx5e_priv *priv,
 			   struct tc_cls_flower_offload *f)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index ecbe30d..c14c263 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -33,12 +33,15 @@
 #ifndef __MLX5_EN_TC_H__
 #define __MLX5_EN_TC_H__
 
+#include <net/pkt_cls.h>
+
 #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
 
+#ifdef CONFIG_MLX5_ESWITCH
 int mlx5e_tc_init(struct mlx5e_priv *priv);
 void mlx5e_tc_cleanup(struct mlx5e_priv *priv);
 
-int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
+int mlx5e_configure_flower(struct mlx5e_priv *priv,
 			   struct tc_cls_flower_offload *f);
 int mlx5e_delete_flower(struct mlx5e_priv *priv,
 			struct tc_cls_flower_offload *f);
@@ -60,4 +63,10 @@ static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
 	return atomic_read(&priv->fs.tc.ht.nelems);
 }
 
+#else /* CONFIG_MLX5_ESWITCH */
+static inline int  mlx5e_tc_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_tc_cleanup(struct mlx5e_priv *priv) {}
+static inline int  mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; }
+#endif
+
 #endif /* __MLX5_EN_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 31353e5..fee43e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -394,6 +394,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
 	struct mlx5e_txqsq *sq;
+	struct mlx5_cqe64 *cqe;
 	u32 dma_fifo_cc;
 	u32 nbytes;
 	u16 npkts;
@@ -402,7 +403,11 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 
 	sq = container_of(cq, struct mlx5e_txqsq, cq);
 
-	if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+	if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED)))
+		return false;
+
+	cqe = mlx5_cqwq_get_cqe(&cq->wq);
+	if (!cqe)
 		return false;
 
 	npkts = 0;
@@ -416,15 +421,11 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 	/* avoid dirtying sq cache line every cqe */
 	dma_fifo_cc = sq->dma_fifo_cc;
 
-	for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) {
-		struct mlx5_cqe64 *cqe;
+	i = 0;
+	do {
 		u16 wqe_counter;
 		bool last_wqe;
 
-		cqe = mlx5_cqwq_get_cqe(&cq->wq);
-		if (!cqe)
-			break;
-
 		mlx5_cqwq_pop(&cq->wq);
 
 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
@@ -467,7 +468,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 			sqcc += wi->num_wqebbs;
 			napi_consume_skb(skb, napi_budget);
 		} while (!last_wqe);
-	}
+
+	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
 
 	mlx5_cqwq_update_db_record(&cq->wq);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 92db28a..e906b75 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -30,66 +30,18 @@
  * SOFTWARE.
  */
 
+#include <linux/irq.h>
 #include "en.h"
 
-static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
-					     struct mlx5e_icosq *sq,
-					     struct mlx5_cqe64 *cqe,
-					     u16 *sqcc)
+static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
 {
-	struct mlx5_wq_cyc *wq = &sq->wq;
-	u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1;
-	struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci];
-	struct mlx5e_rq *rq = &sq->channel->rq;
-
-	prefetch(rq);
-	mlx5_cqwq_pop(&cq->wq);
-	*sqcc += icowi->num_wqebbs;
-
-	if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) {
-		WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n",
-			  cqe->op_own);
-		return;
-	}
-
-	if (likely(icowi->opcode == MLX5_OPCODE_UMR)) {
-		mlx5e_post_rx_mpwqe(rq);
-		return;
-	}
-
-	if (unlikely(icowi->opcode != MLX5_OPCODE_NOP))
-		WARN_ONCE(true,
-			  "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n",
-			  icowi->opcode);
-}
-
-static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
-{
-	struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq);
-	struct mlx5_cqe64 *cqe;
-	u16 sqcc;
-
-	if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
-		return;
-
-	cqe = mlx5_cqwq_get_cqe(&cq->wq);
-	if (likely(!cqe))
-		return;
+	int current_cpu = smp_processor_id();
+	const struct cpumask *aff;
+	struct irq_data *idata;
 
-	/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
-	 * otherwise a cq overrun may occur
-	 */
-	sqcc = sq->cc;
-
-	/* by design, there's only a single cqe */
-	mlx5e_poll_ico_single_cqe(cq, sq, cqe, &sqcc);
-
-	mlx5_cqwq_update_db_record(&cq->wq);
-
-	/* ensure cq space is freed before enabling more cqes */
-	wmb();
-
-	sq->cc = sqcc;
+	idata = irq_desc_get_irq_data(c->irq_desc);
+	aff = irq_data_get_affinity_mask(idata);
+	return cpumask_test_cpu(current_cpu, aff);
 }
 
 int mlx5e_napi_poll(struct napi_struct *napi, int budget)
@@ -100,8 +52,6 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 	int work_done;
 	int i;
 
-	clear_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags);
-
 	for (i = 0; i < c->num_tc; i++)
 		busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
 
@@ -111,25 +61,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 	work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget);
 	busy |= work_done == budget;
 
-	mlx5e_poll_ico_cq(&c->icosq.cq);
+	busy |= c->rq.post_wqes(&c->rq);
 
-	busy |= mlx5e_post_rx_wqes(&c->rq);
-
-	if (busy)
-		return budget;
-
-	napi_complete_done(napi, work_done);
+	if (busy) {
+		if (likely(mlx5e_channel_no_affinity_change(c)))
+			return budget;
+		if (work_done == budget)
+			work_done--;
+	}
 
-	/* avoid losing completion event during/after polling cqs */
-	if (test_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags)) {
-		napi_schedule(napi);
+	if (unlikely(!napi_complete_done(napi, work_done)))
 		return work_done;
-	}
 
 	for (i = 0; i < c->num_tc; i++)
 		mlx5e_cq_arm(&c->sq[i].cq);
 
-	if (test_bit(MLX5E_RQ_STATE_AM, &c->rq.state))
+	if (MLX5E_TEST_BIT(c->rq.state, MLX5E_RQ_STATE_AM))
 		mlx5e_rx_am(&c->rq);
 
 	mlx5e_cq_arm(&c->rq.cq);
@@ -143,7 +90,6 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq)
 	struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
 
 	cq->event_ctr++;
-	set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags);
 	napi_schedule(cq->napi);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index edd11eb..fc606bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -36,9 +36,7 @@
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 #include "fpga/core.h"
-#ifdef CONFIG_MLX5_CORE_EN
 #include "eswitch.h"
-#endif
 
 enum {
 	MLX5_EQE_SIZE		= sizeof(struct mlx5_eqe),
@@ -193,6 +191,7 @@ static void eq_update_ci(struct mlx5_eq *eq, int arm)
 {
 	__be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
 	u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
 	__raw_writel((__force u32)cpu_to_be32(val), addr);
 	/* We still want ordering, just not swabbing, so add a barrier */
 	mb();
@@ -483,11 +482,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 			}
 			break;
 
-#ifdef CONFIG_MLX5_CORE_EN
 		case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
 			mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
 			break;
-#endif
 
 		case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
 			mlx5_port_module_event(dev, eqe);
@@ -702,9 +699,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 	u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
 	int err;
 
-	if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
-	    MLX5_CAP_GEN(dev, vport_group_manager) &&
-	    mlx5_core_is_pf(dev))
+	if (MLX5_VPORT_MANAGER(dev))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
 
 	if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 5b41f69..c77f4c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -46,19 +46,13 @@ enum {
 	MLX5_ACTION_DEL  = 2,
 };
 
-/* E-Switch UC L2 table hash node */
-struct esw_uc_addr {
-	struct l2addr_node node;
-	u32                table_index;
-	u32                vport;
-};
-
 /* Vport UC/MC hash node */
 struct vport_addr {
 	struct l2addr_node     node;
 	u8                     action;
 	u32                    vport;
-	struct mlx5_flow_handle *flow_rule; /* SRIOV only */
+	struct mlx5_flow_handle *flow_rule;
+	bool mpfs; /* UC MAC was added to MPFs */
 	/* A flag indicating that mac was added due to mc promiscuous vport */
 	bool mc_promisc;
 };
@@ -154,81 +148,6 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
 	return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in));
 }
 
-/* HW L2 Table (MPFS) management */
-static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index,
-				  u8 *mac, u8 vlan_valid, u16 vlan)
-{
-	u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0};
-	u8 *in_mac_addr;
-
-	MLX5_SET(set_l2_table_entry_in, in, opcode,
-		 MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
-	MLX5_SET(set_l2_table_entry_in, in, table_index, index);
-	MLX5_SET(set_l2_table_entry_in, in, vlan_valid, vlan_valid);
-	MLX5_SET(set_l2_table_entry_in, in, vlan, vlan);
-
-	in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
-	ether_addr_copy(&in_mac_addr[2], mac);
-
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
-{
-	u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0};
-
-	MLX5_SET(delete_l2_table_entry_in, in, opcode,
-		 MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
-	MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix)
-{
-	int err = 0;
-
-	*ix = find_first_zero_bit(l2_table->bitmap, l2_table->size);
-	if (*ix >= l2_table->size)
-		err = -ENOSPC;
-	else
-		__set_bit(*ix, l2_table->bitmap);
-
-	return err;
-}
-
-static void free_l2_table_index(struct mlx5_l2_table *l2_table, u32 ix)
-{
-	__clear_bit(ix, l2_table->bitmap);
-}
-
-static int set_l2_table_entry(struct mlx5_core_dev *dev, u8 *mac,
-			      u8 vlan_valid, u16 vlan,
-			      u32 *index)
-{
-	struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table;
-	int err;
-
-	err = alloc_l2_table_index(l2_table, index);
-	if (err)
-		return err;
-
-	err = set_l2_table_entry_cmd(dev, *index, mac, vlan_valid, vlan);
-	if (err)
-		free_l2_table_index(l2_table, *index);
-
-	return err;
-}
-
-static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index)
-{
-	struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table;
-
-	del_l2_table_entry_cmd(dev, index);
-	free_l2_table_index(l2_table, index);
-}
-
 /* E-Switch FDB */
 static struct mlx5_flow_handle *
 __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
@@ -455,65 +374,60 @@ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,
 
 static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 {
-	struct hlist_head *hash = esw->l2_table.l2_hash;
-	struct esw_uc_addr *esw_uc;
 	u8 *mac = vaddr->node.addr;
 	u32 vport = vaddr->vport;
 	int err;
 
-	esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr);
-	if (esw_uc) {
+	/* Skip mlx5_mpfs_add_mac for PFs,
+	 * it is already done by the PF netdev in mlx5e_execute_l2_action
+	 */
+	if (!vport)
+		goto fdb_add;
+
+	err = mlx5_mpfs_add_mac(esw->dev, mac);
+	if (err) {
 		esw_warn(esw->dev,
-			 "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n",
-			 mac, vport, esw_uc->vport);
-		return -EEXIST;
+			 "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n",
+			 mac, vport, err);
+		return err;
 	}
+	vaddr->mpfs = true;
 
-	esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL);
-	if (!esw_uc)
-		return -ENOMEM;
-	esw_uc->vport = vport;
-
-	err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index);
-	if (err)
-		goto abort;
-
+fdb_add:
 	/* SRIOV is enabled: Forward UC MAC to vport */
 	if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY)
 		vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
 
-	esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n",
-		  vport, mac, esw_uc->table_index, vaddr->flow_rule);
-	return err;
-abort:
-	l2addr_hash_del(esw_uc);
-	return err;
+	esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n",
+		  vport, mac, vaddr->flow_rule);
+
+	return 0;
 }
 
 static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 {
-	struct hlist_head *hash = esw->l2_table.l2_hash;
-	struct esw_uc_addr *esw_uc;
 	u8 *mac = vaddr->node.addr;
 	u32 vport = vaddr->vport;
+	int err = 0;
 
-	esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr);
-	if (!esw_uc || esw_uc->vport != vport) {
-		esw_debug(esw->dev,
-			  "MAC(%pM) doesn't belong to vport (%d)\n",
-			  mac, vport);
-		return -EINVAL;
-	}
-	esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n",
-		  vport, mac, esw_uc->table_index, vaddr->flow_rule);
+	/* Skip mlx5_mpfs_del_mac for PFs,
+	 * it is already done by the PF netdev in mlx5e_execute_l2_action
+	 */
+	if (!vport || !vaddr->mpfs)
+		goto fdb_del;
 
-	del_l2_table_entry(esw->dev, esw_uc->table_index);
+	err = mlx5_mpfs_del_mac(esw->dev, mac);
+	if (err)
+		esw_warn(esw->dev,
+			 "Failed to del L2 table mac(%pM) for vport(%d), err(%d)\n",
+			 mac, vport, err);
+	vaddr->mpfs = false;
 
+fdb_del:
 	if (vaddr->flow_rule)
 		mlx5_del_flow_rules(vaddr->flow_rule);
 	vaddr->flow_rule = NULL;
 
-	l2addr_hash_del(esw_uc);
 	return 0;
 }
 
@@ -1611,13 +1525,14 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
 }
 
 /* Public E-Switch API */
+#define ESW_ALLOWED(esw) ((esw) && MLX5_VPORT_MANAGER((esw)->dev))
+
 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 {
 	int err;
 	int i, enabled_events;
 
-	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
-	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+	if (!ESW_ALLOWED(esw))
 		return 0;
 
 	if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) ||
@@ -1634,7 +1549,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 
 	esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
 	esw->mode = mode;
-	esw_disable_vport(esw, 0);
 
 	if (mode == SRIOV_LEGACY)
 		err = esw_create_legacy_fdb_table(esw, nvfs + 1);
@@ -1647,7 +1561,11 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	if (err)
 		esw_warn(esw->dev, "Failed to create eswitch TSAR");
 
-	enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE;
+	/* Don't enable vport events when in SRIOV_OFFLOADS mode, since:
+	 * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode
+	 * 2. FDB/Eswitch is programmed by user space tools
+	 */
+	enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0;
 	for (i = 0; i <= nvfs; i++)
 		esw_enable_vport(esw, i, enabled_events);
 
@@ -1656,7 +1574,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	return 0;
 
 abort:
-	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
 	esw->mode = SRIOV_NONE;
 	return err;
 }
@@ -1667,9 +1584,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 	int nvports;
 	int i;
 
-	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
-	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH ||
-	    esw->mode == SRIOV_NONE)
+	if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE)
 		return;
 
 	esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n",
@@ -1692,44 +1607,21 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 		esw_offloads_cleanup(esw, nvports);
 
 	esw->mode = SRIOV_NONE;
-	/* VPORT 0 (PF) must be enabled back with non-sriov configuration */
-	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
-}
-
-void mlx5_eswitch_attach(struct mlx5_eswitch *esw)
-{
-	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
-	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
-		return;
-
-	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
-	/* VF Vports will be enabled when SRIOV is enabled */
-}
-
-void mlx5_eswitch_detach(struct mlx5_eswitch *esw)
-{
-	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
-	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
-		return;
-
-	esw_disable_vport(esw, 0);
 }
 
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 {
-	int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
 	int total_vports = MLX5_TOTAL_VPORTS(dev);
 	struct mlx5_eswitch *esw;
 	int vport_num;
 	int err;
 
-	if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
-	    MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+	if (!MLX5_VPORT_MANAGER(dev))
 		return 0;
 
 	esw_info(dev,
-		 "Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n",
-		 total_vports, l2_table_size,
+		 "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
+		 total_vports,
 		 MLX5_MAX_UC_PER_VPORT(dev),
 		 MLX5_MAX_MC_PER_VPORT(dev));
 
@@ -1739,14 +1631,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 
 	esw->dev = dev;
 
-	esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size),
-				   sizeof(uintptr_t), GFP_KERNEL);
-	if (!esw->l2_table.bitmap) {
-		err = -ENOMEM;
-		goto abort;
-	}
-	esw->l2_table.size = l2_table_size;
-
 	esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
 	if (!esw->work_queue) {
 		err = -ENOMEM;
@@ -1797,7 +1681,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 abort:
 	if (esw->work_queue)
 		destroy_workqueue(esw->work_queue);
-	kfree(esw->l2_table.bitmap);
 	kfree(esw->vports);
 	kfree(esw->offloads.vport_reps);
 	kfree(esw);
@@ -1806,15 +1689,13 @@ abort:
 
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 {
-	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
-	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+	if (!esw || !MLX5_VPORT_MANAGER(esw->dev))
 		return;
 
 	esw_info(esw->dev, "cleanup\n");
 
 	esw->dev->priv.eswitch = NULL;
 	destroy_workqueue(esw->work_queue);
-	kfree(esw->l2_table.bitmap);
 	kfree(esw->offloads.vport_reps);
 	kfree(esw->vports);
 	kfree(esw);
@@ -1838,8 +1719,6 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
 }
 
 /* Vport Administration */
-#define ESW_ALLOWED(esw) \
-	(esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev))
 #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
 
 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 834a330..565c8b7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -37,6 +37,15 @@
 #include <linux/if_link.h>
 #include <net/devlink.h>
 #include <linux/mlx5/device.h>
+#include "lib/mpfs.h"
+
+enum {
+	SRIOV_NONE,
+	SRIOV_LEGACY,
+	SRIOV_OFFLOADS
+};
+
+#ifdef CONFIG_MLX5_ESWITCH
 
 #define MLX5_MAX_UC_PER_VPORT(dev) \
 	(1 << MLX5_CAP_GEN(dev, log_max_current_uc_list))
@@ -44,9 +53,6 @@
 #define MLX5_MAX_MC_PER_VPORT(dev) \
 	(1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
 
-#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE))
-#define MLX5_L2_ADDR_HASH(addr) (addr[5])
-
 #define FDB_UPLINK_VPORT 0xffff
 
 #define MLX5_MIN_BW_SHARE 1
@@ -54,48 +60,6 @@
 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
 	min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit)
 
-/* L2 -mac address based- hash helpers */
-struct l2addr_node {
-	struct hlist_node hlist;
-	u8                addr[ETH_ALEN];
-};
-
-#define for_each_l2hash_node(hn, tmp, hash, i) \
-	for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \
-		hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)
-
-#define l2addr_hash_find(hash, mac, type) ({                \
-	int ix = MLX5_L2_ADDR_HASH(mac);                    \
-	bool found = false;                                 \
-	type *ptr = NULL;                                   \
-							    \
-	hlist_for_each_entry(ptr, &hash[ix], node.hlist)    \
-		if (ether_addr_equal(ptr->node.addr, mac)) {\
-			found = true;                       \
-			break;                              \
-		}                                           \
-	if (!found)                                         \
-		ptr = NULL;                                 \
-	ptr;                                                \
-})
-
-#define l2addr_hash_add(hash, mac, type, gfp) ({            \
-	int ix = MLX5_L2_ADDR_HASH(mac);                    \
-	type *ptr = NULL;                                   \
-							    \
-	ptr = kzalloc(sizeof(type), gfp);                   \
-	if (ptr) {                                          \
-		ether_addr_copy(ptr->node.addr, mac);       \
-		hlist_add_head(&ptr->node.hlist, &hash[ix]);\
-	}                                                   \
-	ptr;                                                \
-})
-
-#define l2addr_hash_del(ptr) ({                             \
-	hlist_del(&ptr->node.hlist);                        \
-	kfree(ptr);                                         \
-})
-
 struct vport_ingress {
 	struct mlx5_flow_table *acl;
 	struct mlx5_flow_group *allow_untagged_spoofchk_grp;
@@ -150,12 +114,6 @@ struct mlx5_vport {
 	u16                     enabled_events;
 };
 
-struct mlx5_l2_table {
-	struct hlist_head l2_hash[MLX5_L2_ADDR_HASH_SIZE];
-	u32                  size;
-	unsigned long        *bitmap;
-};
-
 struct mlx5_eswitch_fdb {
 	void *fdb;
 	union {
@@ -175,12 +133,6 @@ struct mlx5_eswitch_fdb {
 	};
 };
 
-enum {
-	SRIOV_NONE,
-	SRIOV_LEGACY,
-	SRIOV_OFFLOADS
-};
-
 struct mlx5_esw_sq {
 	struct mlx5_flow_handle	*send_to_vport_rule;
 	struct list_head	 list;
@@ -222,7 +174,6 @@ struct esw_mc_addr { /* SRIOV only */
 
 struct mlx5_eswitch {
 	struct mlx5_core_dev    *dev;
-	struct mlx5_l2_table    l2_table;
 	struct mlx5_eswitch_fdb fdb_table;
 	struct hlist_head       mc_table[MLX5_L2_ADDR_HASH_SIZE];
 	struct workqueue_struct *work_queue;
@@ -250,8 +201,6 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
 /* E-Switch API */
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
-void mlx5_eswitch_attach(struct mlx5_eswitch *esw);
-void mlx5_eswitch_detach(struct mlx5_eswitch *esw);
 void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
@@ -345,4 +294,13 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 
 #define esw_debug(dev, format, ...)				\
 	mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
+#else  /* CONFIG_MLX5_ESWITCH */
+/* eswitch API stubs */
+static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
+static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {}
+static inline int  mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
+static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
+#endif /* CONFIG_MLX5_ESWITCH */
+
 #endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 5bc0593..d9fd857 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -433,6 +433,8 @@ static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
 	struct mlx5_flow_table *fdb = NULL;
 	int esw_size, err = 0;
 	u32 flags = 0;
+	u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+				MLX5_CAP_GEN(dev, max_flow_counter_15_0);
 
 	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
 	if (!root_ns) {
@@ -443,9 +445,9 @@ static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
 
 	esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n",
 		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
-		  MLX5_CAP_GEN(dev, max_flow_counter), ESW_OFFLOADS_NUM_GROUPS);
+		  max_flow_counter, ESW_OFFLOADS_NUM_GROUPS);
 
-	esw_size = min_t(int, MLX5_CAP_GEN(dev, max_flow_counter) * ESW_OFFLOADS_NUM_GROUPS,
+	esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS,
 			 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
 
 	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index e750f07..e0d0efd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -263,7 +263,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 	MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->modify_id);
 	in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
 				      match_value);
-	memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param));
+	memcpy(in_match_value, &fte->val, sizeof(fte->val));
 
 	in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
 	if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
@@ -359,7 +359,7 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id)
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
 {
 	u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
@@ -374,7 +374,7 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id)
 	return err;
 }
 
-int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id)
+int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id)
 {
 	u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0};
@@ -385,7 +385,7 @@ int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id)
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
+int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
 		      u64 *packets, u64 *bytes)
 {
 	u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
@@ -409,14 +409,14 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
 }
 
 struct mlx5_cmd_fc_bulk {
-	u16 id;
+	u32 id;
 	int num;
 	int outlen;
 	u32 out[0];
 };
 
 struct mlx5_cmd_fc_bulk *
-mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num)
+mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num)
 {
 	struct mlx5_cmd_fc_bulk *b;
 	int outlen =
@@ -453,7 +453,7 @@ mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b)
 }
 
 void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
-			  struct mlx5_cmd_fc_bulk *b, u16 id,
+			  struct mlx5_cmd_fc_bulk *b, u32 id,
 			  u64 *packets, u64 *bytes)
 {
 	int index = id - b->id;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 0f98a7c..c6d7bdf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -74,20 +74,20 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
 			    struct mlx5_flow_table *ft,
 			    u32 underlay_qpn);
 
-int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id);
-int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id);
-int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
+int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
+int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
 		      u64 *packets, u64 *bytes);
 
 struct mlx5_cmd_fc_bulk;
 
 struct mlx5_cmd_fc_bulk *
-mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num);
+mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num);
 void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b);
 int
 mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b);
 void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
-			  struct mlx5_cmd_fc_bulk *b, u16 id,
+			  struct mlx5_cmd_fc_bulk *b, u32 id,
 			  u64 *packets, u64 *bytes);
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index e8690fe..5a7bea6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -36,6 +36,7 @@
 #include "mlx5_core.h"
 #include "fs_core.h"
 #include "fs_cmd.h"
+#include "diag/fs_tracepoint.h"
 
 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
 					 sizeof(struct init_tree_node))
@@ -82,8 +83,8 @@
 #define ETHTOOL_PRIO_NUM_LEVELS 1
 #define ETHTOOL_NUM_PRIOS 11
 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
-/* Vlan, mac, ttc, aRFS */
-#define KERNEL_NIC_PRIO_NUM_LEVELS 4
+/* Vlan, mac, ttc, inner ttc, aRFS */
+#define KERNEL_NIC_PRIO_NUM_LEVELS 5
 #define KERNEL_NIC_NUM_PRIOS 1
 /* One more level for tc */
 #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
@@ -150,6 +151,23 @@ enum fs_i_mutex_lock_class {
 	FS_MUTEX_CHILD
 };
 
+static const struct rhashtable_params rhash_fte = {
+	.key_len = FIELD_SIZEOF(struct fs_fte, val),
+	.key_offset = offsetof(struct fs_fte, val),
+	.head_offset = offsetof(struct fs_fte, hash),
+	.automatic_shrinking = true,
+	.min_size = 1,
+};
+
+static const struct rhashtable_params rhash_fg = {
+	.key_len = FIELD_SIZEOF(struct mlx5_flow_group, mask),
+	.key_offset = offsetof(struct mlx5_flow_group, mask),
+	.head_offset = offsetof(struct mlx5_flow_group, hash),
+	.automatic_shrinking = true,
+	.min_size = 1,
+
+};
+
 static void del_rule(struct fs_node *node);
 static void del_flow_table(struct fs_node *node);
 static void del_flow_group(struct fs_node *node);
@@ -255,71 +273,77 @@ static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
 	return NULL;
 }
 
-static bool masked_memcmp(void *mask, void *val1, void *val2, size_t size)
+static bool check_last_reserved(const u32 *match_criteria)
 {
-	unsigned int i;
+	char *match_criteria_reserved =
+		MLX5_ADDR_OF(fte_match_param, match_criteria, MLX5_FTE_MATCH_PARAM_RESERVED);
 
-	for (i = 0; i < size; i++, mask++, val1++, val2++)
-		if ((*((u8 *)val1) & (*(u8 *)mask)) !=
-		    ((*(u8 *)val2) & (*(u8 *)mask)))
-			return false;
-
-	return true;
+	return	!match_criteria_reserved[0] &&
+		!memcmp(match_criteria_reserved, match_criteria_reserved + 1,
+			MLX5_FLD_SZ_BYTES(fte_match_param,
+					  MLX5_FTE_MATCH_PARAM_RESERVED) - 1);
 }
 
-static bool compare_match_value(struct mlx5_flow_group_mask *mask,
-				void *fte_param1, void *fte_param2)
+static bool check_valid_mask(u8 match_criteria_enable, const u32 *match_criteria)
 {
-	if (mask->match_criteria_enable &
-	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) {
-		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
-						fte_param1, outer_headers);
-		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
-						fte_param2, outer_headers);
-		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
-					      mask->match_criteria, outer_headers);
+	if (match_criteria_enable & ~(
+		(1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)   |
+		(1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) |
+		(1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS)))
+		return false;
 
-		if (!masked_memcmp(fte_mask, fte_match1, fte_match2,
-				   MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
+	if (!(match_criteria_enable &
+	      1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)) {
+		char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
+						  match_criteria, outer_headers);
+
+		if (fg_type_mask[0] ||
+		    memcmp(fg_type_mask, fg_type_mask + 1,
+			   MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1))
 			return false;
 	}
 
-	if (mask->match_criteria_enable &
-	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) {
-		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
-						fte_param1, misc_parameters);
-		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
-						fte_param2, misc_parameters);
-		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
-					  mask->match_criteria, misc_parameters);
+	if (!(match_criteria_enable &
+	      1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS)) {
+		char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
+						  match_criteria, misc_parameters);
 
-		if (!masked_memcmp(fte_mask, fte_match1, fte_match2,
-				   MLX5_ST_SZ_BYTES(fte_match_set_misc)))
+		if (fg_type_mask[0] ||
+		    memcmp(fg_type_mask, fg_type_mask + 1,
+			   MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1))
 			return false;
 	}
 
-	if (mask->match_criteria_enable &
-	    1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) {
-		void *fte_match1 = MLX5_ADDR_OF(fte_match_param,
-						fte_param1, inner_headers);
-		void *fte_match2 = MLX5_ADDR_OF(fte_match_param,
-						fte_param2, inner_headers);
-		void *fte_mask = MLX5_ADDR_OF(fte_match_param,
-					  mask->match_criteria, inner_headers);
+	if (!(match_criteria_enable &
+	      1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS)) {
+		char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
+						  match_criteria, inner_headers);
 
-		if (!masked_memcmp(fte_mask, fte_match1, fte_match2,
-				   MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4)))
+		if (fg_type_mask[0] ||
+		    memcmp(fg_type_mask, fg_type_mask + 1,
+			   MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1))
 			return false;
 	}
-	return true;
+
+	return check_last_reserved(match_criteria);
 }
 
-static bool compare_match_criteria(u8 match_criteria_enable1,
-				   u8 match_criteria_enable2,
-				   void *mask1, void *mask2)
+static bool check_valid_spec(const struct mlx5_flow_spec *spec)
 {
-	return match_criteria_enable1 == match_criteria_enable2 &&
-		!memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param));
+	int i;
+
+	if (!check_valid_mask(spec->match_criteria_enable, spec->match_criteria)) {
+		pr_warn("mlx5_core: Match criteria given mismatches match_criteria_enable\n");
+		return false;
+	}
+
+	for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++)
+		if (spec->match_value[i] & ~spec->match_criteria[i]) {
+			pr_warn("mlx5_core: match_value differs from match_criteria\n");
+			return false;
+		}
+
+	return check_last_reserved(spec->match_value);
 }
 
 static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
@@ -360,6 +384,8 @@ static void del_flow_table(struct fs_node *node)
 	err = mlx5_cmd_destroy_flow_table(dev, ft);
 	if (err)
 		mlx5_core_warn(dev, "flow steering can't destroy ft\n");
+	ida_destroy(&ft->fte_allocator);
+	rhltable_destroy(&ft->fgs_hash);
 	fs_get_obj(prio, ft->node.parent);
 	prio->num_ft--;
 }
@@ -370,22 +396,16 @@ static void del_rule(struct fs_node *node)
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
 	struct fs_fte *fte;
-	u32	*match_value;
 	int modify_mask;
 	struct mlx5_core_dev *dev = get_dev(node);
-	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
 	int err;
 	bool update_fte = false;
 
-	match_value = kvzalloc(match_len, GFP_KERNEL);
-	if (!match_value)
-		return;
-
 	fs_get_obj(rule, node);
 	fs_get_obj(fte, rule->node.parent);
 	fs_get_obj(fg, fte->node.parent);
-	memcpy(match_value, fte->val, sizeof(fte->val));
 	fs_get_obj(ft, fg->node.parent);
+	trace_mlx5_fs_del_rule(rule);
 	list_del(&rule->node.list);
 	if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 		mutex_lock(&rule->dest_attr.ft->lock);
@@ -414,7 +434,18 @@ out:
 				       "%s can't del rule fg id=%d fte_index=%d\n",
 				       __func__, fg->id, fte->index);
 	}
-	kvfree(match_value);
+}
+
+static void destroy_fte(struct fs_fte *fte, struct mlx5_flow_group *fg)
+{
+	struct mlx5_flow_table *ft;
+	int ret;
+
+	ret = rhashtable_remove_fast(&fg->ftes_hash, &fte->hash, rhash_fte);
+	WARN_ON(ret);
+	fte->status = 0;
+	fs_get_obj(ft, fg->node.parent);
+	ida_simple_remove(&ft->fte_allocator, fte->index);
 }
 
 static void del_fte(struct fs_node *node)
@@ -428,6 +459,7 @@ static void del_fte(struct fs_node *node)
 	fs_get_obj(fte, node);
 	fs_get_obj(fg, fte->node.parent);
 	fs_get_obj(ft, fg->node.parent);
+	trace_mlx5_fs_del_fte(fte);
 
 	dev = get_dev(&ft->node);
 	err = mlx5_cmd_delete_fte(dev, ft,
@@ -437,8 +469,7 @@ static void del_fte(struct fs_node *node)
 			       "flow steering can't delete fte in index %d of flow group id %d\n",
 			       fte->index, fg->id);
 
-	fte->status = 0;
-	fg->num_ftes--;
+	destroy_fte(fte, fg);
 }
 
 static void del_flow_group(struct fs_node *node)
@@ -446,14 +477,21 @@ static void del_flow_group(struct fs_node *node)
 	struct mlx5_flow_group *fg;
 	struct mlx5_flow_table *ft;
 	struct mlx5_core_dev *dev;
+	int err;
 
 	fs_get_obj(fg, node);
 	fs_get_obj(ft, fg->node.parent);
 	dev = get_dev(&ft->node);
+	trace_mlx5_fs_del_fg(fg);
 
 	if (ft->autogroup.active)
 		ft->autogroup.num_groups--;
 
+	rhashtable_destroy(&fg->ftes_hash);
+	err = rhltable_remove(&ft->fgs_hash,
+			      &fg->hash,
+			      rhash_fg);
+	WARN_ON(err);
 	if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id))
 		mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
 			       fg->id, ft->id);
@@ -488,10 +526,17 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in)
 	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
 					    create_fg_in,
 					    match_criteria_enable);
+	int ret;
+
 	fg = kzalloc(sizeof(*fg), GFP_KERNEL);
 	if (!fg)
 		return ERR_PTR(-ENOMEM);
 
+	ret = rhashtable_init(&fg->ftes_hash, &rhash_fte);
+	if (ret) {
+		kfree(fg);
+		return ERR_PTR(ret);
+	}
 	fg->mask.match_criteria_enable = match_criteria_enable;
 	memcpy(&fg->mask.match_criteria, match_criteria,
 	       sizeof(fg->mask.match_criteria));
@@ -509,10 +554,17 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft
 						u32 flags)
 {
 	struct mlx5_flow_table *ft;
+	int ret;
 
 	ft  = kzalloc(sizeof(*ft), GFP_KERNEL);
 	if (!ft)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
+
+	ret = rhltable_init(&ft->fgs_hash, &rhash_fg);
+	if (ret) {
+		kfree(ft);
+		return ERR_PTR(ret);
+	}
 
 	ft->level = level;
 	ft->node.type = FS_TYPE_FLOW_TABLE;
@@ -523,6 +575,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft
 	ft->flags = flags;
 	INIT_LIST_HEAD(&ft->fwd_rules);
 	mutex_init(&ft->lock);
+	ida_init(&ft->fte_allocator);
 
 	return ft;
 }
@@ -812,8 +865,8 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 			      ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0,
 			      root->table_type,
 			      op_mod, ft_attr->flags);
-	if (!ft) {
-		err = -ENOMEM;
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
 		goto unlock_root;
 	}
 
@@ -839,6 +892,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 destroy_ft:
 	mlx5_cmd_destroy_flow_table(root->dev, ft);
 free_ft:
+	ida_destroy(&ft->fte_allocator);
 	kfree(ft);
 unlock_root:
 	mutex_unlock(&root->chain_lock);
@@ -924,11 +978,13 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *
 	if (IS_ERR(fg))
 		return fg;
 
+	err = rhltable_insert(&ft->fgs_hash, &fg->hash, rhash_fg);
+	if (err)
+		goto err_free_fg;
+
 	err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id);
-	if (err) {
-		kfree(fg);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto err_remove_fg;
 
 	if (ft->autogroup.active)
 		ft->autogroup.num_groups++;
@@ -938,14 +994,33 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *
 	/* Add node to group list */
 	list_add(&fg->node.list, prev_fg);
 
+	trace_mlx5_fs_add_fg(fg);
 	return fg;
+
+err_remove_fg:
+	WARN_ON(rhltable_remove(&ft->fgs_hash,
+				&fg->hash,
+				rhash_fg));
+err_free_fg:
+	rhashtable_destroy(&fg->ftes_hash);
+	kfree(fg);
+
+	return ERR_PTR(err);
 }
 
 struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
 					       u32 *fg_in)
 {
+	void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+					    fg_in, match_criteria);
+	u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
+					    fg_in,
+					    match_criteria_enable);
 	struct mlx5_flow_group *fg;
 
+	if (!check_valid_mask(match_criteria_enable, match_criteria))
+		return ERR_PTR(-EINVAL);
+
 	if (ft->autogroup.active)
 		return ERR_PTR(-EPERM);
 
@@ -1102,43 +1177,38 @@ free_handle:
 	return ERR_PTR(err);
 }
 
-/* Assumed fg is locked */
-static unsigned int get_free_fte_index(struct mlx5_flow_group *fg,
-				       struct list_head **prev)
-{
-	struct fs_fte *fte;
-	unsigned int start = fg->start_index;
-
-	if (prev)
-		*prev = &fg->node.children;
-
-	/* assumed list is sorted by index */
-	fs_for_each_fte(fte, fg) {
-		if (fte->index != start)
-			return start;
-		start++;
-		if (prev)
-			*prev = &fte->node.list;
-	}
-
-	return start;
-}
-
-/* prev is output, prev->next = new_fte */
 static struct fs_fte *create_fte(struct mlx5_flow_group *fg,
 				 u32 *match_value,
-				 struct mlx5_flow_act *flow_act,
-				 struct list_head **prev)
+				 struct mlx5_flow_act *flow_act)
 {
+	struct mlx5_flow_table *ft;
 	struct fs_fte *fte;
 	int index;
+	int ret;
+
+	fs_get_obj(ft, fg->node.parent);
+	index = ida_simple_get(&ft->fte_allocator, fg->start_index,
+			       fg->start_index + fg->max_ftes,
+			       GFP_KERNEL);
+	if (index < 0)
+		return ERR_PTR(index);
 
-	index = get_free_fte_index(fg, prev);
 	fte = alloc_fte(flow_act, match_value, index);
-	if (IS_ERR(fte))
-		return fte;
+	if (IS_ERR(fte)) {
+		ret = PTR_ERR(fte);
+		goto err_alloc;
+	}
+	ret = rhashtable_insert_fast(&fg->ftes_hash, &fte->hash, rhash_fte);
+	if (ret)
+		goto err_hash;
 
 	return fte;
+
+err_hash:
+	kfree(fte);
+err_alloc:
+	ida_simple_remove(&ft->fte_allocator, index);
+	return ERR_PTR(ret);
 }
 
 static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
@@ -1226,79 +1296,104 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
 	return NULL;
 }
 
+static bool check_conflicting_actions(u32 action1, u32 action2)
+{
+	u32 xored_actions = action1 ^ action2;
+
+	/* if one rule only wants to count, it's ok */
+	if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT ||
+	    action2 == MLX5_FLOW_CONTEXT_ACTION_COUNT)
+		return false;
+
+	if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP  |
+			     MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+			     MLX5_FLOW_CONTEXT_ACTION_DECAP))
+		return true;
+
+	return false;
+}
+
+static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act)
+{
+	if (check_conflicting_actions(flow_act->action, fte->action)) {
+		mlx5_core_warn(get_dev(&fte->node),
+			       "Found two FTEs with conflicting actions\n");
+		return -EEXIST;
+	}
+
+	if (fte->flow_tag != flow_act->flow_tag) {
+		mlx5_core_warn(get_dev(&fte->node),
+			       "FTE flow tag %u already exists with different flow tag %u\n",
+			       fte->flow_tag,
+			       flow_act->flow_tag);
+		return -EEXIST;
+	}
+
+	return 0;
+}
+
 static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
 					    u32 *match_value,
 					    struct mlx5_flow_act *flow_act,
 					    struct mlx5_flow_destination *dest,
-					    int dest_num)
+					    int dest_num,
+					    struct fs_fte *fte)
 {
 	struct mlx5_flow_handle *handle;
 	struct mlx5_flow_table *ft;
-	struct list_head *prev;
-	struct fs_fte *fte;
 	int i;
 
-	nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT);
-	fs_for_each_fte(fte, fg) {
+	if (fte) {
+		int old_action;
+		int ret;
+
 		nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
-		if (compare_match_value(&fg->mask, match_value, &fte->val) &&
-		    (flow_act->action & fte->action)) {
-			int old_action = fte->action;
-
-			if (fte->flow_tag != flow_act->flow_tag) {
-				mlx5_core_warn(get_dev(&fte->node),
-					       "FTE flow tag %u already exists with different flow tag %u\n",
-					       fte->flow_tag,
-					       flow_act->flow_tag);
-				handle = ERR_PTR(-EEXIST);
-				goto unlock_fte;
-			}
+		ret = check_conflicting_ftes(fte, flow_act);
+		if (ret) {
+			handle = ERR_PTR(ret);
+			goto unlock_fte;
+		}
 
-			fte->action |= flow_act->action;
-			handle = add_rule_fte(fte, fg, dest, dest_num,
-					      old_action != flow_act->action);
-			if (IS_ERR(handle)) {
-				fte->action = old_action;
-				goto unlock_fte;
-			} else {
-				goto add_rules;
-			}
+		old_action = fte->action;
+		fte->action |= flow_act->action;
+		handle = add_rule_fte(fte, fg, dest, dest_num,
+				      old_action != flow_act->action);
+		if (IS_ERR(handle)) {
+			fte->action = old_action;
+			goto unlock_fte;
+		} else {
+			trace_mlx5_fs_set_fte(fte, false);
+			goto add_rules;
 		}
-		unlock_ref_node(&fte->node);
 	}
 	fs_get_obj(ft, fg->node.parent);
-	if (fg->num_ftes >= fg->max_ftes) {
-		handle = ERR_PTR(-ENOSPC);
-		goto unlock_fg;
-	}
 
-	fte = create_fte(fg, match_value, flow_act, &prev);
-	if (IS_ERR(fte)) {
-		handle = (void *)fte;
-		goto unlock_fg;
-	}
+	fte = create_fte(fg, match_value, flow_act);
+	if (IS_ERR(fte))
+		return (void *)fte;
 	tree_init_node(&fte->node, 0, del_fte);
 	nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
 	handle = add_rule_fte(fte, fg, dest, dest_num, false);
 	if (IS_ERR(handle)) {
 		unlock_ref_node(&fte->node);
+		destroy_fte(fte, fg);
 		kfree(fte);
-		goto unlock_fg;
+		return handle;
 	}
 
-	fg->num_ftes++;
-
 	tree_add_node(&fte->node, &fg->node);
-	list_add(&fte->node.list, prev);
+	/* fte list isn't sorted */
+	list_add_tail(&fte->node.list, &fg->node.children);
+	trace_mlx5_fs_set_fte(fte, true);
 add_rules:
 	for (i = 0; i < handle->num_rules; i++) {
-		if (atomic_read(&handle->rule[i]->node.refcount) == 1)
+		if (atomic_read(&handle->rule[i]->node.refcount) == 1) {
 			tree_add_node(&handle->rule[i]->node, &fte->node);
+			trace_mlx5_fs_add_rule(handle->rule[i]);
+		}
 	}
 unlock_fte:
 	unlock_ref_node(&fte->node);
-unlock_fg:
-	unlock_ref_node(&fg->node);
 	return handle;
 }
 
@@ -1347,6 +1442,96 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
 }
 
 static struct mlx5_flow_handle *
+try_add_to_existing_fg(struct mlx5_flow_table *ft,
+		       struct mlx5_flow_spec *spec,
+		       struct mlx5_flow_act *flow_act,
+		       struct mlx5_flow_destination *dest,
+		       int dest_num)
+{
+	struct mlx5_flow_group *g;
+	struct mlx5_flow_handle *rule = ERR_PTR(-ENOENT);
+	struct rhlist_head *tmp, *list;
+	struct match_list {
+		struct list_head	list;
+		struct mlx5_flow_group *g;
+	} match_list, *iter;
+	LIST_HEAD(match_head);
+
+	rcu_read_lock();
+	/* Collect all fgs which has a matching match_criteria */
+	list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg);
+	rhl_for_each_entry_rcu(g, tmp, list, hash) {
+		struct match_list *curr_match;
+
+		if (likely(list_empty(&match_head))) {
+			match_list.g = g;
+			list_add_tail(&match_list.list, &match_head);
+			continue;
+		}
+		curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC);
+
+		if (!curr_match) {
+			rcu_read_unlock();
+			rule = ERR_PTR(-ENOMEM);
+			goto free_list;
+		}
+		curr_match->g = g;
+		list_add_tail(&curr_match->list, &match_head);
+	}
+	rcu_read_unlock();
+
+	/* Try to find a fg that already contains a matching fte */
+	list_for_each_entry(iter, &match_head, list) {
+		struct fs_fte *fte;
+
+		g = iter->g;
+		nested_lock_ref_node(&g->node, FS_MUTEX_PARENT);
+		fte = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value,
+					     rhash_fte);
+		if (fte) {
+			rule = add_rule_fg(g, spec->match_value,
+					   flow_act, dest, dest_num, fte);
+			unlock_ref_node(&g->node);
+			goto free_list;
+		}
+		unlock_ref_node(&g->node);
+	}
+
+	/* No group with matching fte found. Try to add a new fte to any
+	 * matching fg.
+	 */
+	list_for_each_entry(iter, &match_head, list) {
+		g = iter->g;
+
+		nested_lock_ref_node(&g->node, FS_MUTEX_PARENT);
+		rule = add_rule_fg(g, spec->match_value,
+				   flow_act, dest, dest_num, NULL);
+		if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) {
+			unlock_ref_node(&g->node);
+			goto free_list;
+		}
+		unlock_ref_node(&g->node);
+	}
+
+free_list:
+	if (!list_empty(&match_head)) {
+		struct match_list *match_tmp;
+
+		/* The most common case is having one FG. Since we want to
+		 * optimize this case, we save the first on the stack.
+		 * Therefore, no need to free it.
+		 */
+		list_del(&list_first_entry(&match_head, typeof(*iter), list)->list);
+		list_for_each_entry_safe(iter, match_tmp, &match_head, list) {
+			list_del(&iter->list);
+			kfree(iter);
+		}
+	}
+
+	return rule;
+}
+
+static struct mlx5_flow_handle *
 _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		     struct mlx5_flow_spec *spec,
 		     struct mlx5_flow_act *flow_act,
@@ -1358,22 +1543,18 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 	struct mlx5_flow_handle *rule;
 	int i;
 
+	if (!check_valid_spec(spec))
+		return ERR_PTR(-EINVAL);
+
 	for (i = 0; i < dest_num; i++) {
 		if (!dest_is_valid(&dest[i], flow_act->action, ft))
 			return ERR_PTR(-EINVAL);
 	}
 
 	nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
-	fs_for_each_fg(g, ft)
-		if (compare_match_criteria(g->mask.match_criteria_enable,
-					   spec->match_criteria_enable,
-					   g->mask.match_criteria,
-					   spec->match_criteria)) {
-			rule = add_rule_fg(g, spec->match_value,
-					   flow_act, dest, dest_num);
-			if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC)
-				goto unlock;
-		}
+	rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num);
+	if (!IS_ERR(rule))
+		goto unlock;
 
 	g = create_autogroup(ft, spec->match_criteria_enable,
 			     spec->match_criteria);
@@ -1382,7 +1563,8 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		goto unlock;
 	}
 
-	rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num);
+	rule = add_rule_fg(g, spec->match_value, flow_act, dest,
+			   dest_num, NULL);
 	if (IS_ERR(rule)) {
 		/* Remove assumes refcount > 0 and autogroup creates a group
 		 * with a refcount = 0.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 990acee..5509a752 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -34,6 +34,7 @@
 #define _MLX5_FS_CORE_
 
 #include <linux/mlx5/fs.h>
+#include <linux/rhashtable.h>
 
 enum fs_node_type {
 	FS_TYPE_NAMESPACE,
@@ -118,6 +119,8 @@ struct mlx5_flow_table {
 	/* FWD rules that point on this flow table */
 	struct list_head		fwd_rules;
 	u32				flags;
+	struct ida			fte_allocator;
+	struct rhltable			fgs_hash;
 };
 
 struct mlx5_fc_cache {
@@ -136,17 +139,29 @@ struct mlx5_fc {
 	u64 lastpackets;
 	u64 lastbytes;
 
-	u16 id;
+	u32 id;
 	bool deleted;
 	bool aging;
 
 	struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
 };
 
+#define MLX5_FTE_MATCH_PARAM_RESERVED	reserved_at_600
+/* Calculate the fte_match_param length and without the reserved length.
+ * Make sure the reserved field is the last.
+ */
+#define MLX5_ST_SZ_DW_MATCH_PARAM					    \
+	((MLX5_BYTE_OFF(fte_match_param, MLX5_FTE_MATCH_PARAM_RESERVED) / sizeof(u32)) + \
+	 BUILD_BUG_ON_ZERO(MLX5_ST_SZ_BYTES(fte_match_param) !=		     \
+			   MLX5_FLD_SZ_BYTES(fte_match_param,		     \
+					     MLX5_FTE_MATCH_PARAM_RESERVED) +\
+			   MLX5_BYTE_OFF(fte_match_param,		     \
+					 MLX5_FTE_MATCH_PARAM_RESERVED)))
+
 /* Type of children is mlx5_flow_rule */
 struct fs_fte {
 	struct fs_node			node;
-	u32				val[MLX5_ST_SZ_DW(fte_match_param)];
+	u32				val[MLX5_ST_SZ_DW_MATCH_PARAM];
 	u32				dests_size;
 	u32				flow_tag;
 	u32				index;
@@ -155,6 +170,7 @@ struct fs_fte {
 	u32				modify_id;
 	enum fs_fte_status		status;
 	struct mlx5_fc			*counter;
+	struct rhash_head		hash;
 };
 
 /* Type of children is mlx5_flow_table/namespace */
@@ -174,7 +190,7 @@ struct mlx5_flow_namespace {
 
 struct mlx5_flow_group_mask {
 	u8	match_criteria_enable;
-	u32	match_criteria[MLX5_ST_SZ_DW(fte_match_param)];
+	u32	match_criteria[MLX5_ST_SZ_DW_MATCH_PARAM];
 };
 
 /* Type of children is fs_fte */
@@ -183,8 +199,9 @@ struct mlx5_flow_group {
 	struct mlx5_flow_group_mask	mask;
 	u32				start_index;
 	u32				max_ftes;
-	u32				num_ftes;
 	u32				id;
+	struct rhashtable		ftes_hash;
+	struct rhlist_head		hash;
 };
 
 struct mlx5_flow_root_namespace {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 6507d8a..89d1f865 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -38,6 +38,8 @@
 #include "fs_cmd.h"
 
 #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
+/* Max number of counters to query in bulk read is 32K */
+#define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
 
 /* locking scheme:
  *
@@ -90,16 +92,21 @@ static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter)
 	rb_insert_color(&counter->node, root);
 }
 
+/* The function returns the last node that was queried so the caller
+ * function can continue calling it till all counters are queried.
+ */
 static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
 					   struct mlx5_fc *first,
-					   u16 last_id)
+					   u32 last_id)
 {
 	struct mlx5_cmd_fc_bulk *b;
 	struct rb_node *node = NULL;
-	u16 afirst_id;
+	u32 afirst_id;
 	int num;
 	int err;
-	int max_bulk = 1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk);
+
+	int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
+			     (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
 
 	/* first id must be aligned to 4 when using bulk query */
 	afirst_id = first->id & ~0x3;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index eb04e97..43c126c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -39,10 +39,11 @@ static void mlx5i_get_drvinfo(struct net_device *dev,
 	struct mlx5e_priv *priv = mlx5i_epriv(dev);
 
 	mlx5e_ethtool_get_drvinfo(priv, drvinfo);
+	strlcpy(drvinfo->driver, DRIVER_NAME "[ib_ipoib]",
+		sizeof(drvinfo->driver));
 }
 
-static void mlx5i_get_strings(struct net_device *dev,
-			      uint32_t stringset, uint8_t *data)
+static void mlx5i_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
 	struct mlx5e_priv *priv  = mlx5i_epriv(dev);
 
@@ -129,17 +130,123 @@ static int mlx5i_flash_device(struct net_device *netdev,
 	return mlx5e_ethtool_flash_device(priv, flash);
 }
 
+enum mlx5_ptys_width {
+	MLX5_PTYS_WIDTH_1X	= 1 << 0,
+	MLX5_PTYS_WIDTH_2X	= 1 << 1,
+	MLX5_PTYS_WIDTH_4X	= 1 << 2,
+	MLX5_PTYS_WIDTH_8X	= 1 << 3,
+	MLX5_PTYS_WIDTH_12X	= 1 << 4,
+};
+
+static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width)
+{
+	switch (width) {
+	case MLX5_PTYS_WIDTH_1X:  return  1;
+	case MLX5_PTYS_WIDTH_2X:  return  2;
+	case MLX5_PTYS_WIDTH_4X:  return  4;
+	case MLX5_PTYS_WIDTH_8X:  return  8;
+	case MLX5_PTYS_WIDTH_12X: return 12;
+	default:		  return -1;
+	}
+}
+
+enum mlx5_ptys_rate {
+	MLX5_PTYS_RATE_SDR	= 1 << 0,
+	MLX5_PTYS_RATE_DDR	= 1 << 1,
+	MLX5_PTYS_RATE_QDR	= 1 << 2,
+	MLX5_PTYS_RATE_FDR10	= 1 << 3,
+	MLX5_PTYS_RATE_FDR	= 1 << 4,
+	MLX5_PTYS_RATE_EDR	= 1 << 5,
+	MLX5_PTYS_RATE_HDR	= 1 << 6,
+};
+
+static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate)
+{
+	switch (rate) {
+	case MLX5_PTYS_RATE_SDR:   return 2500;
+	case MLX5_PTYS_RATE_DDR:   return 5000;
+	case MLX5_PTYS_RATE_QDR:
+	case MLX5_PTYS_RATE_FDR10: return 10000;
+	case MLX5_PTYS_RATE_FDR:   return 14000;
+	case MLX5_PTYS_RATE_EDR:   return 25000;
+	case MLX5_PTYS_RATE_HDR:   return 50000;
+	default:		   return -1;
+	}
+}
+
+static int mlx5i_get_port_settings(struct net_device *netdev,
+				   u16 *ib_link_width_oper, u16 *ib_proto_oper)
+{
+	struct mlx5e_priv *priv    = mlx5i_epriv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
+	int ret;
+
+	ret = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_IB, 1);
+	if (ret)
+		return ret;
+
+	*ib_link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper);
+	*ib_proto_oper      = MLX5_GET(ptys_reg, out, ib_proto_oper);
+
+	return 0;
+}
+
+static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper)
+{
+	int rate, width;
+
+	rate = mlx5_ptys_rate_enum_to_int(ib_proto_oper);
+	if (rate < 0)
+		return -EINVAL;
+	width = mlx5_ptys_width_enum_to_int(ib_link_width_oper);
+	if (width < 0)
+		return -EINVAL;
+
+	return rate * width;
+}
+
+static int mlx5i_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings *link_ksettings)
+{
+	u16 ib_link_width_oper;
+	u16 ib_proto_oper;
+	int speed, ret;
+
+	ret = mlx5i_get_port_settings(netdev, &ib_link_width_oper, &ib_proto_oper);
+	if (ret)
+		return ret;
+
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
+
+	speed = mlx5i_get_speed_settings(ib_link_width_oper, ib_proto_oper);
+	if (speed < 0)
+		return -EINVAL;
+
+	link_ksettings->base.duplex = DUPLEX_FULL;
+	link_ksettings->base.port = PORT_OTHER;
+
+	link_ksettings->base.autoneg = AUTONEG_DISABLE;
+
+	link_ksettings->base.speed = speed;
+
+	return 0;
+}
+
 const struct ethtool_ops mlx5i_ethtool_ops = {
-	.get_drvinfo       = mlx5i_get_drvinfo,
-	.get_strings       = mlx5i_get_strings,
-	.get_sset_count    = mlx5i_get_sset_count,
-	.get_ethtool_stats = mlx5i_get_ethtool_stats,
-	.get_ringparam     = mlx5i_get_ringparam,
-	.set_ringparam     = mlx5i_set_ringparam,
-	.flash_device      = mlx5i_flash_device,
-	.get_channels      = mlx5i_get_channels,
-	.set_channels      = mlx5i_set_channels,
-	.get_coalesce      = mlx5i_get_coalesce,
-	.set_coalesce      = mlx5i_set_coalesce,
-	.get_ts_info       = mlx5i_get_ts_info,
+	.get_drvinfo        = mlx5i_get_drvinfo,
+	.get_strings        = mlx5i_get_strings,
+	.get_sset_count     = mlx5i_get_sset_count,
+	.get_ethtool_stats  = mlx5i_get_ethtool_stats,
+	.get_ringparam      = mlx5i_get_ringparam,
+	.set_ringparam      = mlx5i_set_ringparam,
+	.flash_device       = mlx5i_flash_device,
+	.get_channels       = mlx5i_get_channels,
+	.set_channels       = mlx5i_set_channels,
+	.get_coalesce       = mlx5i_get_coalesce,
+	.set_coalesce       = mlx5i_set_coalesce,
+	.get_ts_info        = mlx5i_get_ts_info,
+	.get_link_ksettings = mlx5i_get_link_ksettings,
+	.get_link           = ethtool_op_get_link,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
new file mode 100644
index 0000000..7cb6712
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include "mlx5_core.h"
+#include "lib/mpfs.h"
+
+/* HW L2 Table (MPFS) management */
+static int set_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac)
+{
+	u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0};
+	u8 *in_mac_addr;
+
+	MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
+	MLX5_SET(set_l2_table_entry_in, in, table_index, index);
+
+	in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
+	ether_addr_copy(&in_mac_addr[2], mac);
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
+{
+	u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0};
+
+	MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
+	MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+/* UC L2 table hash node */
+struct l2table_node {
+	struct l2addr_node node;
+	u32                index; /* index in HW l2 table */
+};
+
+struct mlx5_mpfs {
+	struct hlist_head    hash[MLX5_L2_ADDR_HASH_SIZE];
+	struct mutex         lock; /* Synchronize l2 table access */
+	u32                  size;
+	unsigned long        *bitmap;
+};
+
+static int alloc_l2table_index(struct mlx5_mpfs *l2table, u32 *ix)
+{
+	int err = 0;
+
+	*ix = find_first_zero_bit(l2table->bitmap, l2table->size);
+	if (*ix >= l2table->size)
+		err = -ENOSPC;
+	else
+		__set_bit(*ix, l2table->bitmap);
+
+	return err;
+}
+
+static void free_l2table_index(struct mlx5_mpfs *l2table, u32 ix)
+{
+	__clear_bit(ix, l2table->bitmap);
+}
+
+int mlx5_mpfs_init(struct mlx5_core_dev *dev)
+{
+	int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
+	struct mlx5_mpfs *mpfs;
+
+	if (!MLX5_VPORT_MANAGER(dev))
+		return 0;
+
+	mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL);
+	if (!mpfs)
+		return -ENOMEM;
+
+	mutex_init(&mpfs->lock);
+	mpfs->size   = l2table_size;
+	mpfs->bitmap = kcalloc(BITS_TO_LONGS(l2table_size),
+			       sizeof(uintptr_t), GFP_KERNEL);
+	if (!mpfs->bitmap) {
+		kfree(mpfs);
+		return -ENOMEM;
+	}
+
+	dev->priv.mpfs = mpfs;
+	return 0;
+}
+
+void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev)
+{
+	struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+
+	if (!MLX5_VPORT_MANAGER(dev))
+		return;
+
+	WARN_ON(!hlist_empty(mpfs->hash));
+	kfree(mpfs->bitmap);
+	kfree(mpfs);
+}
+
+int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
+{
+	struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+	struct l2table_node *l2addr;
+	u32 index;
+	int err;
+
+	if (!MLX5_VPORT_MANAGER(dev))
+		return 0;
+
+	mutex_lock(&mpfs->lock);
+
+	l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node);
+	if (l2addr) {
+		err = -EEXIST;
+		goto abort;
+	}
+
+	err = alloc_l2table_index(mpfs, &index);
+	if (err)
+		goto abort;
+
+	l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL);
+	if (!l2addr) {
+		free_l2table_index(mpfs, index);
+		err = -ENOMEM;
+		goto abort;
+	}
+
+	l2addr->index = index;
+	err = set_l2table_entry_cmd(dev, index, mac);
+	if (err) {
+		l2addr_hash_del(l2addr);
+		free_l2table_index(mpfs, index);
+	}
+
+	mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index);
+abort:
+	mutex_unlock(&mpfs->lock);
+	return err;
+}
+
+int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
+{
+	struct mlx5_mpfs *mpfs = dev->priv.mpfs;
+	struct l2table_node *l2addr;
+	int err = 0;
+	u32 index;
+
+	if (!MLX5_VPORT_MANAGER(dev))
+		return 0;
+
+	mutex_lock(&mpfs->lock);
+
+	l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node);
+	if (!l2addr) {
+		err = -ENOENT;
+		goto unlock;
+	}
+
+	index = l2addr->index;
+	del_l2table_entry_cmd(dev, index);
+	l2addr_hash_del(l2addr);
+	free_l2table_index(mpfs, index);
+	mlx5_core_dbg(dev, "MPFS mac deleted %pM, index (%d)\n", mac, index);
+unlock:
+	mutex_unlock(&mpfs->lock);
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
new file mode 100644
index 0000000..4a7b2c3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_MPFS_H__
+#define __MLX5_MPFS_H__
+
+#include <linux/if_ether.h>
+#include <linux/mlx5/device.h>
+
+/* L2 -mac address based- hash helpers */
+#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE))
+#define MLX5_L2_ADDR_HASH(addr) (addr[5])
+
+struct l2addr_node {
+	struct hlist_node hlist;
+	u8                addr[ETH_ALEN];
+};
+
+#define for_each_l2hash_node(hn, tmp, hash, i) \
+	for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \
+		hlist_for_each_entry_safe(hn, tmp, &(hash)[i], hlist)
+
+#define l2addr_hash_find(hash, mac, type) ({                \
+	int ix = MLX5_L2_ADDR_HASH(mac);                    \
+	bool found = false;                                 \
+	type *ptr = NULL;                                   \
+							    \
+	hlist_for_each_entry(ptr, &(hash)[ix], node.hlist)  \
+		if (ether_addr_equal(ptr->node.addr, mac)) {\
+			found = true;                       \
+			break;                              \
+		}                                           \
+	if (!found)                                         \
+		ptr = NULL;                                 \
+	ptr;                                                \
+})
+
+#define l2addr_hash_add(hash, mac, type, gfp) ({            \
+	int ix = MLX5_L2_ADDR_HASH(mac);                    \
+	type *ptr = NULL;                                   \
+							    \
+	ptr = kzalloc(sizeof(type), gfp);                   \
+	if (ptr) {                                          \
+		ether_addr_copy(ptr->node.addr, mac);       \
+		hlist_add_head(&ptr->node.hlist, &(hash)[ix]);\
+	}                                                   \
+	ptr;                                                \
+})
+
+#define l2addr_hash_del(ptr) ({                             \
+	hlist_del(&(ptr)->node.hlist);                      \
+	kfree(ptr);                                         \
+})
+
+#ifdef CONFIG_MLX5_MPFS
+int  mlx5_mpfs_init(struct mlx5_core_dev *dev);
+void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev);
+int  mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac);
+int  mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac);
+#else /* #ifndef CONFIG_MLX5_MPFS */
+static inline int  mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {}
+static inline int  mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
+static inline int  mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
+#endif
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 8c4b45ef5..0d2c8dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -54,9 +54,8 @@
 #include <net/devlink.h>
 #include "mlx5_core.h"
 #include "fs_core.h"
-#ifdef CONFIG_MLX5_CORE_EN
+#include "lib/mpfs.h"
 #include "eswitch.h"
-#endif
 #include "lib/mlx5.h"
 #include "fpga/core.h"
 #include "accel/ipsec.h"
@@ -788,7 +787,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 	return -EOPNOTSUPP;
 }
 
-
 static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 {
 	struct pci_dev *pdev = dev->pdev;
@@ -897,13 +895,17 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 		goto err_tables_cleanup;
 	}
 
-#ifdef CONFIG_MLX5_CORE_EN
+	err = mlx5_mpfs_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init l2 table %d\n", err);
+		goto err_rl_cleanup;
+	}
+
 	err = mlx5_eswitch_init(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to init eswitch %d\n", err);
-		goto err_rl_cleanup;
+		goto err_mpfs_cleanup;
 	}
-#endif
 
 	err = mlx5_sriov_init(dev);
 	if (err) {
@@ -922,13 +924,11 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 err_sriov_cleanup:
 	mlx5_sriov_cleanup(dev);
 err_eswitch_cleanup:
-#ifdef CONFIG_MLX5_CORE_EN
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
-
+err_mpfs_cleanup:
+	mlx5_mpfs_cleanup(dev);
 err_rl_cleanup:
-#endif
 	mlx5_cleanup_rl_table(dev);
-
 err_tables_cleanup:
 	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_srq_table(dev);
@@ -946,9 +946,8 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 {
 	mlx5_fpga_cleanup(dev);
 	mlx5_sriov_cleanup(dev);
-#ifdef CONFIG_MLX5_CORE_EN
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
-#endif
+	mlx5_mpfs_cleanup(dev);
 	mlx5_cleanup_rl_table(dev);
 	mlx5_cleanup_reserved_gids(dev);
 	mlx5_cleanup_mkey_table(dev);
@@ -1106,10 +1105,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto err_fs;
 	}
 
-#ifdef CONFIG_MLX5_CORE_EN
-	mlx5_eswitch_attach(dev->priv.eswitch);
-#endif
-
 	err = mlx5_sriov_attach(dev);
 	if (err) {
 		dev_err(&pdev->dev, "sriov init failed %d\n", err);
@@ -1152,9 +1147,6 @@ err_fpga_start:
 	mlx5_sriov_detach(dev);
 
 err_sriov:
-#ifdef CONFIG_MLX5_CORE_EN
-	mlx5_eswitch_detach(dev->priv.eswitch);
-#endif
 	mlx5_cleanup_fs(dev);
 
 err_fs:
@@ -1225,9 +1217,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	mlx5_fpga_device_stop(dev);
 
 	mlx5_sriov_detach(dev);
-#ifdef CONFIG_MLX5_CORE_EN
-	mlx5_eswitch_detach(dev->priv.eswitch);
-#endif
 	mlx5_cleanup_fs(dev);
 	free_comp_eqs(dev);
 	mlx5_stop_eqs(dev);
@@ -1258,7 +1247,7 @@ struct mlx5_core_event_handler {
 };
 
 static const struct devlink_ops mlx5_devlink_ops = {
-#ifdef CONFIG_MLX5_CORE_EN
+#ifdef CONFIG_MLX5_ESWITCH
 	.eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
 	.eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
 	.eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
@@ -1298,6 +1287,9 @@ static int init_one(struct pci_dev *pdev,
 	mutex_init(&dev->pci_status_mutex);
 	mutex_init(&dev->intf_state_mutex);
 
+	INIT_LIST_HEAD(&priv->waiting_events_list);
+	priv->is_accum_events = false;
+
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 	err = init_srcu_struct(&priv->pfault_srcu);
 	if (err) {
@@ -1352,7 +1344,6 @@ clean_srcu:
 	cleanup_srcu_struct(&priv->pfault_srcu);
 clean_dev:
 #endif
-	pci_set_drvdata(pdev, NULL);
 	devlink_free(devlink);
 
 	return err;
@@ -1379,7 +1370,6 @@ static void remove_one(struct pci_dev *pdev)
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 	cleanup_srcu_struct(&priv->pfault_srcu);
 #endif
-	pci_set_drvdata(pdev, NULL);
 	devlink_free(devlink);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 01d637d..b7c2900 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -43,6 +43,10 @@
 #define DRIVER_VERSION "5.0-0"
 
 #define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev))
+#define MLX5_VPORT_MANAGER(mdev) \
+	(MLX5_CAP_GEN(mdev, vport_group_manager) && \
+	(MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
+	 mlx5_core_is_pf(mdev))
 
 extern uint mlx5_core_debug_mask;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index 28d8472..6c48e99 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -34,9 +34,7 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/vport.h>
 #include "mlx5_core.h"
-#ifdef CONFIG_MLX5_CORE_EN
 #include "eswitch.h"
-#endif
 
 bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
 {
@@ -90,14 +88,12 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 		return -EBUSY;
 	}
 
-#ifdef CONFIG_MLX5_CORE_EN
 	err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
 	if (err) {
 		mlx5_core_warn(dev,
 			       "failed to enable eswitch SRIOV (%d)\n", err);
 		return err;
 	}
-#endif
 
 	for (vf = 0; vf < num_vfs; vf++) {
 		err = mlx5_core_enable_hca(dev, vf + 1);
@@ -117,7 +113,6 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 			}
 		}
 		mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf);
-
 	}
 
 	return 0;
@@ -130,11 +125,7 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
 	int vf;
 
 	if (!sriov->enabled_vfs)
-#ifdef CONFIG_MLX5_CORE_EN
-		goto disable_sriov_resources;
-#else
-		return;
-#endif
+		goto out;
 
 	for (vf = 0; vf < sriov->num_vfs; vf++) {
 		if (!sriov->vfs_ctx[vf].enabled)
@@ -148,10 +139,8 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
 		sriov->enabled_vfs--;
 	}
 
-#ifdef CONFIG_MLX5_CORE_EN
-disable_sriov_resources:
+out:
 	mlx5_eswitch_disable_sriov(dev->priv.eswitch);
-#endif
 
 	if (mlx5_wait_for_vf_pages(dev))
 		mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 695adff..d56eea3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -75,6 +75,7 @@ config MLXSW_SPECTRUM
 	depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q
 	depends on PSAMPLE || PSAMPLE=n
 	depends on BRIDGE || BRIDGE=n
+	depends on IPV6 || IPV6=n
 	select PARMAN
 	select MLXFW
 	default m
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 62fc42f..891ff41 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -16,8 +16,9 @@ mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
 				   spectrum_switchdev.o spectrum_router.o \
 				   spectrum_kvdl.o spectrum_acl_tcam.o \
 				   spectrum_acl.o spectrum_flower.o \
-				   spectrum_cnt.o spectrum_dpipe.o \
-				   spectrum_fid.o
+				   spectrum_cnt.o spectrum_fid.o \
+				   spectrum_ipip.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)	+= spectrum_dcb.o
+mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
 obj-$(CONFIG_MLXSW_MINIMAL)	+= mlxsw_minimal.o
 mlxsw_minimal-objs		:= minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index affe84e..9d5e7cf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -667,7 +667,7 @@ static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core,
 	int err;
 
 	dev_dbg(mlxsw_core->bus_info->dev, "EMAD reg access (tid=%llx,reg_id=%x(%s),type=%s)\n",
-		trans->tid, reg->id, mlxsw_reg_id_str(reg->id),
+		tid, reg->id, mlxsw_reg_id_str(reg->id),
 		mlxsw_core_reg_access_type_str(type));
 
 	skb = mlxsw_emad_alloc(mlxsw_core, reg->len);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
index 9807ef8..f6963b0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h
@@ -57,6 +57,9 @@ enum mlxsw_afk_element {
 	MLXSW_AFK_ELEMENT_VID,
 	MLXSW_AFK_ELEMENT_PCP,
 	MLXSW_AFK_ELEMENT_TCP_FLAGS,
+	MLXSW_AFK_ELEMENT_IP_TTL_,
+	MLXSW_AFK_ELEMENT_IP_ECN,
+	MLXSW_AFK_ELEMENT_IP_DSCP,
 	MLXSW_AFK_ELEMENT_MAX,
 };
 
@@ -104,6 +107,9 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
 	MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12),
 	MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3),
 	MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9),
+	MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x14, 0, 8),
+	MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x14, 9, 2),
+	MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x14, 11, 6),
 	MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x18, 0, 32),
 	MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x1C, 0, 32),
 	MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x18, 8),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 1bd34d9..cc27c5d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5,6 +5,7 @@
  * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
  * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
+ * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -3679,15 +3680,17 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP,
-	MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP,
-	MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND,
 };
 
 /* reg_htgt_trap_group
@@ -3952,10 +3955,12 @@ MLXSW_ITEM32(reg, rgcr, pcp_rw, 0x18, 16, 2);
  */
 MLXSW_ITEM32(reg, rgcr, activity_dis, 0x20, 0, 8);
 
-static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en)
+static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en,
+				       bool ipv6_en)
 {
 	MLXSW_REG_ZERO(rgcr, payload);
 	mlxsw_reg_rgcr_ipv4_en_set(payload, ipv4_en);
+	mlxsw_reg_rgcr_ipv6_en_set(payload, ipv6_en);
 }
 
 /* RITR - Router Interface Table Register
@@ -3988,16 +3993,18 @@ MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1);
 MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1);
 
 enum mlxsw_reg_ritr_if_type {
+	/* VLAN interface. */
 	MLXSW_REG_RITR_VLAN_IF,
+	/* FID interface. */
 	MLXSW_REG_RITR_FID_IF,
+	/* Sub-port interface. */
 	MLXSW_REG_RITR_SP_IF,
+	/* Loopback Interface. */
+	MLXSW_REG_RITR_LOOPBACK_IF,
 };
 
 /* reg_ritr_type
- * Router interface type.
- * 0 - VLAN interface.
- * 1 - FID interface.
- * 2 - Sub-port interface.
+ * Router interface type as per enum mlxsw_reg_ritr_if_type.
  * Access: RW
  */
 MLXSW_ITEM32(reg, ritr, type, 0x00, 23, 3);
@@ -4125,6 +4132,67 @@ MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16);
  */
 MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12);
 
+/* Loopback Interface */
+
+enum mlxsw_reg_ritr_loopback_protocol {
+	/* IPinIP IPv4 underlay Unicast */
+	MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4,
+	/* IPinIP IPv6 underlay Unicast */
+	MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6,
+};
+
+/* reg_ritr_loopback_protocol
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_protocol, 0x08, 28, 4);
+
+enum mlxsw_reg_ritr_loopback_ipip_type {
+	/* Tunnel is IPinIP. */
+	MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_IP,
+	/* Tunnel is GRE, no key. */
+	MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP,
+	/* Tunnel is GRE, with a key. */
+	MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP,
+};
+
+/* reg_ritr_loopback_ipip_type
+ * Encapsulation type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_ipip_type, 0x10, 24, 4);
+
+enum mlxsw_reg_ritr_loopback_ipip_options {
+	/* The key is defined by gre_key. */
+	MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
+};
+
+/* reg_ritr_loopback_ipip_options
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_ipip_options, 0x10, 20, 4);
+
+/* reg_ritr_loopback_ipip_uvr
+ * Underlay Virtual Router ID.
+ * Range is 0..cap_max_virtual_routers-1.
+ * Reserved for Spectrum-2.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_ipip_uvr, 0x10, 0, 16);
+
+/* reg_ritr_loopback_ipip_usip*
+ * Encapsulation Underlay source IP.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, ritr, loopback_ipip_usip6, 0x18, 16);
+MLXSW_ITEM32(reg, ritr, loopback_ipip_usip4, 0x24, 0, 32);
+
+/* reg_ritr_loopback_ipip_gre_key
+ * GRE Key.
+ * Reserved when ipip_type is not IP_IN_GRE_KEY_IN_IP.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, loopback_ipip_gre_key, 0x28, 0, 32);
+
 /* Shared between ingress/egress */
 enum mlxsw_reg_ritr_counter_set_type {
 	/* No Count. */
@@ -4195,24 +4263,54 @@ static inline void mlxsw_reg_ritr_sp_if_pack(char *payload, bool lag,
 
 static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
 				       enum mlxsw_reg_ritr_if_type type,
-				       u16 rif, u16 vr_id, u16 mtu,
-				       const char *mac)
+				       u16 rif, u16 vr_id, u16 mtu)
 {
 	bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL;
 
 	MLXSW_REG_ZERO(ritr, payload);
 	mlxsw_reg_ritr_enable_set(payload, enable);
 	mlxsw_reg_ritr_ipv4_set(payload, 1);
+	mlxsw_reg_ritr_ipv6_set(payload, 1);
 	mlxsw_reg_ritr_type_set(payload, type);
 	mlxsw_reg_ritr_op_set(payload, op);
 	mlxsw_reg_ritr_rif_set(payload, rif);
 	mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
+	mlxsw_reg_ritr_ipv6_fe_set(payload, 1);
 	mlxsw_reg_ritr_lb_en_set(payload, 1);
 	mlxsw_reg_ritr_virtual_router_set(payload, vr_id);
 	mlxsw_reg_ritr_mtu_set(payload, mtu);
+}
+
+static inline void mlxsw_reg_ritr_mac_pack(char *payload, const char *mac)
+{
 	mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
 }
 
+static inline void
+mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload,
+			    enum mlxsw_reg_ritr_loopback_ipip_type ipip_type,
+			    enum mlxsw_reg_ritr_loopback_ipip_options options,
+			    u16 uvr_id, u32 gre_key)
+{
+	mlxsw_reg_ritr_loopback_ipip_type_set(payload, ipip_type);
+	mlxsw_reg_ritr_loopback_ipip_options_set(payload, options);
+	mlxsw_reg_ritr_loopback_ipip_uvr_set(payload, uvr_id);
+	mlxsw_reg_ritr_loopback_ipip_gre_key_set(payload, gre_key);
+}
+
+static inline void
+mlxsw_reg_ritr_loopback_ipip4_pack(char *payload,
+			    enum mlxsw_reg_ritr_loopback_ipip_type ipip_type,
+			    enum mlxsw_reg_ritr_loopback_ipip_options options,
+			    u16 uvr_id, u32 usip, u32 gre_key)
+{
+	mlxsw_reg_ritr_loopback_protocol_set(payload,
+				    MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4);
+	mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options,
+						 uvr_id, gre_key);
+	mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip);
+}
+
 /* RATR - Router Adjacency Table Register
  * --------------------------------------
  * The RATR register is used to configure the Router Adjacency (next-hop)
@@ -4268,6 +4366,38 @@ MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1);
  */
 MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1);
 
+enum mlxsw_reg_ratr_type {
+	/* Ethernet */
+	MLXSW_REG_RATR_TYPE_ETHERNET,
+	/* IPoIB Unicast without GRH.
+	 * Reserved for Spectrum.
+	 */
+	MLXSW_REG_RATR_TYPE_IPOIB_UC,
+	/* IPoIB Unicast with GRH. Supported only in table 0 (Ethernet unicast
+	 * adjacency).
+	 * Reserved for Spectrum.
+	 */
+	MLXSW_REG_RATR_TYPE_IPOIB_UC_W_GRH,
+	/* IPoIB Multicast.
+	 * Reserved for Spectrum.
+	 */
+	MLXSW_REG_RATR_TYPE_IPOIB_MC,
+	/* MPLS.
+	 * Reserved for SwitchX/-2.
+	 */
+	MLXSW_REG_RATR_TYPE_MPLS,
+	/* IPinIP Encap.
+	 * Reserved for SwitchX/-2.
+	 */
+	MLXSW_REG_RATR_TYPE_IPIP,
+};
+
+/* reg_ratr_type
+ * Adjacency entry type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, type, 0x04, 28, 4);
+
 /* reg_ratr_adjacency_index_low
  * Bits 15:0 of index into the adjacency table.
  * For SwitchX and SwitchX-2, the adjacency table is linear and
@@ -4297,17 +4427,17 @@ enum mlxsw_reg_ratr_trap_action {
  */
 MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4);
 
-enum mlxsw_reg_ratr_trap_id {
-	MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0 = 0,
-	MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1 = 1,
-};
-
 /* reg_ratr_adjacency_index_high
  * Bits 23:16 of the adjacency_index.
  * Access: Index
  */
 MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8);
 
+enum mlxsw_reg_ratr_trap_id {
+	MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0,
+	MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1,
+};
+
 /* reg_ratr_trap_id
  * Trap ID to be reported to CPU.
  * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1.
@@ -4322,14 +4452,44 @@ MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8);
  */
 MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6);
 
+enum mlxsw_reg_ratr_ipip_type {
+	/* IPv4, address set by mlxsw_reg_ratr_ipip_ipv4_udip. */
+	MLXSW_REG_RATR_IPIP_TYPE_IPV4,
+	/* IPv6, address set by mlxsw_reg_ratr_ipip_ipv6_ptr. */
+	MLXSW_REG_RATR_IPIP_TYPE_IPV6,
+};
+
+/* reg_ratr_ipip_type
+ * Underlay destination ip type.
+ * Note: the type field must match the protocol of the router interface.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, ipip_type, 0x10, 16, 4);
+
+/* reg_ratr_ipip_ipv4_udip
+ * Underlay ipv4 dip.
+ * Reserved when ipip_type is IPv6.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, ipip_ipv4_udip, 0x18, 0, 32);
+
+/* reg_ratr_ipip_ipv6_ptr
+ * Pointer to IPv6 underlay destination ip address.
+ * For Spectrum: Pointer to KVD linear space.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24);
+
 static inline void
 mlxsw_reg_ratr_pack(char *payload,
 		    enum mlxsw_reg_ratr_op op, bool valid,
+		    enum mlxsw_reg_ratr_type type,
 		    u32 adjacency_index, u16 egress_rif)
 {
 	MLXSW_REG_ZERO(ratr, payload);
 	mlxsw_reg_ratr_op_set(payload, op);
 	mlxsw_reg_ratr_v_set(payload, valid);
+	mlxsw_reg_ratr_type_set(payload, type);
 	mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index);
 	mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16);
 	mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif);
@@ -4341,6 +4501,12 @@ static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload,
 	mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac);
 }
 
+static inline void mlxsw_reg_ratr_ipip4_entry_pack(char *payload, u32 ipv4_udip)
+{
+	mlxsw_reg_ratr_ipip_type_set(payload, MLXSW_REG_RATR_IPIP_TYPE_IPV4);
+	mlxsw_reg_ratr_ipip_ipv4_udip_set(payload, ipv4_udip);
+}
+
 /* RICNT - Router Interface Counter Register
  * -----------------------------------------
  * The RICNT register retrieves per port performance counters
@@ -4712,12 +4878,13 @@ MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8);
 /* reg_ralue_dip*
  * The prefix of the route or of the marker that the object of the LPM
  * is compared with. The most significant bits of the dip are the prefix.
- * The list significant bits must be '0' if the prefix_len is smaller
+ * The least significant bits must be '0' if the prefix_len is smaller
  * than 128 for IPv6 or smaller than 32 for IPv4.
  * IPv4 address uses bits dip[31:0] and bits dip[127:32] are reserved.
  * Access: Index
  */
 MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32);
+MLXSW_ITEM_BUF(reg, ralue, dip6, 0x0C, 16);
 
 enum mlxsw_reg_ralue_entry_type {
 	MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1,
@@ -4806,7 +4973,7 @@ MLXSW_ITEM32(reg, ralue, ecmp_size, 0x28, 0, 13);
  */
 MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16);
 
-/* reg_ralue_v
+/* reg_ralue_ip2me_v
  * Valid bit for the tunnel_ptr field.
  * If valid = 0 then trap to CPU as IP2ME trap ID.
  * If valid = 1 and the packet format allows NVE or IPinIP tunnel
@@ -4816,15 +4983,15 @@ MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16);
  * Only relevant in case of IP2ME action.
  * Access: RW
  */
-MLXSW_ITEM32(reg, ralue, v, 0x24, 31, 1);
+MLXSW_ITEM32(reg, ralue, ip2me_v, 0x24, 31, 1);
 
-/* reg_ralue_tunnel_ptr
+/* reg_ralue_ip2me_tunnel_ptr
  * Tunnel Pointer for NVE or IPinIP tunnel decapsulation.
  * For Spectrum, pointer to KVD Linear.
  * Only relevant in case of IP2ME action.
  * Access: RW
  */
-MLXSW_ITEM32(reg, ralue, tunnel_ptr, 0x24, 0, 24);
+MLXSW_ITEM32(reg, ralue, ip2me_tunnel_ptr, 0x24, 0, 24);
 
 static inline void mlxsw_reg_ralue_pack(char *payload,
 					enum mlxsw_reg_ralxx_protocol protocol,
@@ -4851,6 +5018,16 @@ static inline void mlxsw_reg_ralue_pack4(char *payload,
 	mlxsw_reg_ralue_dip4_set(payload, dip);
 }
 
+static inline void mlxsw_reg_ralue_pack6(char *payload,
+					 enum mlxsw_reg_ralxx_protocol protocol,
+					 enum mlxsw_reg_ralue_op op,
+					 u16 virtual_router, u8 prefix_len,
+					 const void *dip)
+{
+	mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len);
+	mlxsw_reg_ralue_dip6_memcpy_to(payload, dip);
+}
+
 static inline void
 mlxsw_reg_ralue_act_remote_pack(char *payload,
 				enum mlxsw_reg_ralue_trap_action trap_action,
@@ -4883,6 +5060,15 @@ mlxsw_reg_ralue_act_ip2me_pack(char *payload)
 					MLXSW_REG_RALUE_ACTION_TYPE_IP2ME);
 }
 
+static inline void
+mlxsw_reg_ralue_act_ip2me_tun_pack(char *payload, u32 tunnel_ptr)
+{
+	mlxsw_reg_ralue_action_type_set(payload,
+					MLXSW_REG_RALUE_ACTION_TYPE_IP2ME);
+	mlxsw_reg_ralue_ip2me_v_set(payload, 1);
+	mlxsw_reg_ralue_ip2me_tunnel_ptr_set(payload, tunnel_ptr);
+}
+
 /* RAUHT - Router Algorithmic LPM Unicast Host Table Register
  * ----------------------------------------------------------
  * The RAUHT register is used to configure and query the Unicast Host table in
@@ -4954,6 +5140,7 @@ MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16);
  * Access: Index
  */
 MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32);
+MLXSW_ITEM_BUF(reg, rauht, dip6, 0x10, 16);
 
 enum mlxsw_reg_rauht_trap_action {
 	MLXSW_REG_RAUHT_TRAP_ACTION_NOP,
@@ -4982,6 +5169,15 @@ enum mlxsw_reg_rauht_trap_id {
  */
 MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9);
 
+enum mlxsw_reg_flow_counter_set_type {
+	/* No count */
+	MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00,
+	/* Count packets and bytes */
+	MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03,
+	/* Count only packets */
+	MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05,
+};
+
 /* reg_rauht_counter_set_type
  * Counter set type for flow counters
  * Access: RW
@@ -5018,6 +5214,23 @@ static inline void mlxsw_reg_rauht_pack4(char *payload,
 	mlxsw_reg_rauht_dip4_set(payload, dip);
 }
 
+static inline void mlxsw_reg_rauht_pack6(char *payload,
+					 enum mlxsw_reg_rauht_op op, u16 rif,
+					 const char *mac, const char *dip)
+{
+	mlxsw_reg_rauht_pack(payload, op, rif, mac);
+	mlxsw_reg_rauht_type_set(payload, MLXSW_REG_RAUHT_TYPE_IPV6);
+	mlxsw_reg_rauht_dip6_memcpy_to(payload, dip);
+}
+
+static inline void mlxsw_reg_rauht_pack_counter(char *payload,
+						u64 counter_index)
+{
+	mlxsw_reg_rauht_counter_index_set(payload, counter_index);
+	mlxsw_reg_rauht_counter_set_type_set(payload,
+					     MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES);
+}
+
 /* RALEU - Router Algorithmic LPM ECMP Update Register
  * ---------------------------------------------------
  * The register enables updating the ECMP section in the action for multiple
@@ -5216,6 +5429,30 @@ MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0,
 MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0,
 		     32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false);
 
+#define MLXSW_REG_RAUHTD_IPV6_ENT_LEN 0x20
+
+/* reg_rauhtd_ipv6_ent_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on the
+ * specific entry.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1,
+		     MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false);
+
+/* reg_rauhtd_ipv6_ent_rif
+ * Router interface.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0,
+		     16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false);
+
+/* reg_rauhtd_ipv6_ent_dip
+ * Destination IPv6 address.
+ * Access: RO
+ */
+MLXSW_ITEM_BUF_INDEXED(reg, rauhtd, ipv6_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN,
+		       16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x10);
+
 static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload,
 						    int ent_index, u16 *p_rif,
 						    u32 *p_dip)
@@ -5224,6 +5461,141 @@ static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload,
 	*p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index);
 }
 
+static inline void mlxsw_reg_rauhtd_ent_ipv6_unpack(char *payload,
+						    int rec_index, u16 *p_rif,
+						    char *p_dip)
+{
+	*p_rif = mlxsw_reg_rauhtd_ipv6_ent_rif_get(payload, rec_index);
+	mlxsw_reg_rauhtd_ipv6_ent_dip_memcpy_from(payload, rec_index, p_dip);
+}
+
+/* RTDP - Routing Tunnel Decap Properties Register
+ * -----------------------------------------------
+ * The RTDP register is used for configuring the tunnel decap properties of NVE
+ * and IPinIP.
+ */
+#define MLXSW_REG_RTDP_ID 0x8020
+#define MLXSW_REG_RTDP_LEN 0x44
+
+MLXSW_REG_DEFINE(rtdp, MLXSW_REG_RTDP_ID, MLXSW_REG_RTDP_LEN);
+
+enum mlxsw_reg_rtdp_type {
+	MLXSW_REG_RTDP_TYPE_NVE,
+	MLXSW_REG_RTDP_TYPE_IPIP,
+};
+
+/* reg_rtdp_type
+ * Type of the RTDP entry as per enum mlxsw_reg_rtdp_type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, type, 0x00, 28, 4);
+
+/* reg_rtdp_tunnel_index
+ * Index to the Decap entry.
+ * For Spectrum, Index to KVD Linear.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24);
+
+/* IPinIP */
+
+/* reg_rtdp_ipip_irif
+ * Ingress Router Interface for the overlay router
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_irif, 0x04, 16, 16);
+
+enum mlxsw_reg_rtdp_ipip_sip_check {
+	/* No sip checks. */
+	MLXSW_REG_RTDP_IPIP_SIP_CHECK_NO,
+	/* Filter packet if underlay is not IPv4 or if underlay SIP does not
+	 * equal ipv4_usip.
+	 */
+	MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4,
+	/* Filter packet if underlay is not IPv6 or if underlay SIP does not
+	 * equal ipv6_usip.
+	 */
+	MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6 = 3,
+};
+
+/* reg_rtdp_ipip_sip_check
+ * SIP check to perform. If decapsulation failed due to these configurations
+ * then trap_id is IPIP_DECAP_ERROR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_sip_check, 0x04, 0, 3);
+
+/* If set, allow decapsulation of IPinIP (without GRE). */
+#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_IPIP	BIT(0)
+/* If set, allow decapsulation of IPinGREinIP without a key. */
+#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE	BIT(1)
+/* If set, allow decapsulation of IPinGREinIP with a key. */
+#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY	BIT(2)
+
+/* reg_rtdp_ipip_type_check
+ * Flags as per MLXSW_REG_RTDP_IPIP_TYPE_CHECK_*. If decapsulation failed due to
+ * these configurations then trap_id is IPIP_DECAP_ERROR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_type_check, 0x08, 24, 3);
+
+/* reg_rtdp_ipip_gre_key_check
+ * Whether GRE key should be checked. When check is enabled:
+ * - A packet received as IPinIP (without GRE) will always pass.
+ * - A packet received as IPinGREinIP without a key will not pass the check.
+ * - A packet received as IPinGREinIP with a key will pass the check only if the
+ *   key in the packet is equal to expected_gre_key.
+ * If decapsulation failed due to GRE key then trap_id is IPIP_DECAP_ERROR.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_gre_key_check, 0x08, 23, 1);
+
+/* reg_rtdp_ipip_ipv4_usip
+ * Underlay IPv4 address for ipv4 source address check.
+ * Reserved when sip_check is not '1'.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_ipv4_usip, 0x0C, 0, 32);
+
+/* reg_rtdp_ipip_ipv6_usip_ptr
+ * This field is valid when sip_check is "sipv6 check explicitly". This is a
+ * pointer to the IPv6 DIP which is configured by RIPS. For Spectrum, the index
+ * is to the KVD linear.
+ * Reserved when sip_check is not MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_ipv6_usip_ptr, 0x10, 0, 24);
+
+/* reg_rtdp_ipip_expected_gre_key
+ * GRE key for checking.
+ * Reserved when gre_key_check is '0'.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rtdp, ipip_expected_gre_key, 0x14, 0, 32);
+
+static inline void mlxsw_reg_rtdp_pack(char *payload,
+				       enum mlxsw_reg_rtdp_type type,
+				       u32 tunnel_index)
+{
+	MLXSW_REG_ZERO(rtdp, payload);
+	mlxsw_reg_rtdp_type_set(payload, type);
+	mlxsw_reg_rtdp_tunnel_index_set(payload, tunnel_index);
+}
+
+static inline void
+mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif,
+			  enum mlxsw_reg_rtdp_ipip_sip_check sip_check,
+			  unsigned int type_check, bool gre_key_check,
+			  u32 ipv4_usip, u32 expected_gre_key)
+{
+	mlxsw_reg_rtdp_ipip_irif_set(payload, irif);
+	mlxsw_reg_rtdp_ipip_sip_check_set(payload, sip_check);
+	mlxsw_reg_rtdp_ipip_type_check_set(payload, type_check);
+	mlxsw_reg_rtdp_ipip_gre_key_check_set(payload, gre_key_check);
+	mlxsw_reg_rtdp_ipip_ipv4_usip_set(payload, ipv4_usip);
+	mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key);
+}
+
 /* MFCR - Management Fan Control Register
  * --------------------------------------
  * This register controls the settings of the Fan Speed PWM mechanism.
@@ -5982,15 +6354,6 @@ static inline void mlxsw_reg_mpsc_pack(char *payload, u8 local_port, bool e,
 
 MLXSW_REG_DEFINE(mgpc, MLXSW_REG_MGPC_ID, MLXSW_REG_MGPC_LEN);
 
-enum mlxsw_reg_mgpc_counter_set_type {
-	/* No count */
-	MLXSW_REG_MGPC_COUNTER_SET_TYPE_NO_COUT = 0x00,
-	/* Count packets and bytes */
-	MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03,
-	/* Count only packets */
-	MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS = 0x05,
-};
-
 /* reg_mgpc_counter_set_type
  * Counter set type.
  * Access: OP
@@ -6030,7 +6393,7 @@ MLXSW_ITEM64(reg, mgpc, packet_counter, 0x10, 0, 64);
 
 static inline void mlxsw_reg_mgpc_pack(char *payload, u32 counter_index,
 				       enum mlxsw_reg_mgpc_opcode opcode,
-				       enum mlxsw_reg_mgpc_counter_set_type set_type)
+				       enum mlxsw_reg_flow_counter_set_type set_type)
 {
 	MLXSW_REG_ZERO(mgpc, payload);
 	mlxsw_reg_mgpc_counter_index_set(payload, counter_index);
@@ -6494,6 +6857,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(rgcr),
 	MLXSW_REG(ritr),
 	MLXSW_REG(ratr),
+	MLXSW_REG(rtdp),
 	MLXSW_REG(ricnt),
 	MLXSW_REG(ralta),
 	MLXSW_REG(ralst),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index c6a3e61b..ed7cd6c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -58,6 +58,7 @@
 #include <net/tc_act/tc_mirred.h>
 #include <net/netevent.h>
 #include <net/tc_act/tc_sample.h>
+#include <net/addrconf.h>
 
 #include "spectrum.h"
 #include "pci.h"
@@ -381,12 +382,14 @@ int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
 	int err;
 
 	mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_NOP,
-			    MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES);
+			    MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl);
 	if (err)
 		return err;
-	*packets = mlxsw_reg_mgpc_packet_counter_get(mgpc_pl);
-	*bytes = mlxsw_reg_mgpc_byte_counter_get(mgpc_pl);
+	if (packets)
+		*packets = mlxsw_reg_mgpc_packet_counter_get(mgpc_pl);
+	if (bytes)
+		*bytes = mlxsw_reg_mgpc_byte_counter_get(mgpc_pl);
 	return 0;
 }
 
@@ -396,7 +399,7 @@ static int mlxsw_sp_flow_counter_clear(struct mlxsw_sp *mlxsw_sp,
 	char mgpc_pl[MLXSW_REG_MGPC_LEN];
 
 	mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_CLEAR,
-			    MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES);
+			    MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl);
 }
 
@@ -1616,16 +1619,16 @@ mlxsw_sp_port_del_cls_matchall_sample(struct mlxsw_sp_port *mlxsw_sp_port)
 }
 
 static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
-					  __be16 protocol,
-					  struct tc_cls_matchall_offload *cls,
+					  struct tc_cls_matchall_offload *f,
 					  bool ingress)
 {
 	struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
+	__be16 protocol = f->common.protocol;
 	const struct tc_action *a;
 	LIST_HEAD(actions);
 	int err;
 
-	if (!tc_single_action(cls->exts)) {
+	if (!tcf_exts_has_one_action(f->exts)) {
 		netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n");
 		return -EOPNOTSUPP;
 	}
@@ -1633,9 +1636,9 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 	mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL);
 	if (!mall_tc_entry)
 		return -ENOMEM;
-	mall_tc_entry->cookie = cls->cookie;
+	mall_tc_entry->cookie = f->cookie;
 
-	tcf_exts_to_list(cls->exts, &actions);
+	tcf_exts_to_list(f->exts, &actions);
 	a = list_first_entry(&actions, struct tc_action, list);
 
 	if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) {
@@ -1647,7 +1650,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 							    mirror, a, ingress);
 	} else if (is_tcf_sample(a) && protocol == htons(ETH_P_ALL)) {
 		mall_tc_entry->type = MLXSW_SP_PORT_MALL_SAMPLE;
-		err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, cls,
+		err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, f,
 							    a, ingress);
 	} else {
 		err = -EOPNOTSUPP;
@@ -1665,12 +1668,12 @@ err_add_action:
 }
 
 static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
-					   struct tc_cls_matchall_offload *cls)
+					   struct tc_cls_matchall_offload *f)
 {
 	struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
 
 	mall_tc_entry = mlxsw_sp_port_mall_tc_entry_find(mlxsw_sp_port,
-							 cls->cookie);
+							 f->cookie);
 	if (!mall_tc_entry) {
 		netdev_dbg(mlxsw_sp_port->dev, "tc entry not found on port\n");
 		return;
@@ -1692,49 +1695,72 @@ static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 	kfree(mall_tc_entry);
 }
 
-static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle,
-			     u32 chain_index, __be16 proto,
-			     struct tc_to_netdev *tc)
+static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
+					  struct tc_cls_matchall_offload *f)
 {
-	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-	bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS);
+	bool ingress;
 
-	if (chain_index)
+	if (is_classid_clsact_ingress(f->common.classid))
+		ingress = true;
+	else if (is_classid_clsact_egress(f->common.classid))
+		ingress = false;
+	else
 		return -EOPNOTSUPP;
 
-	switch (tc->type) {
-	case TC_SETUP_MATCHALL:
-		switch (tc->cls_mall->command) {
-		case TC_CLSMATCHALL_REPLACE:
-			return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port,
-							      proto,
-							      tc->cls_mall,
-							      ingress);
-		case TC_CLSMATCHALL_DESTROY:
-			mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port,
-						       tc->cls_mall);
-			return 0;
-		default:
-			return -EOPNOTSUPP;
-		}
-	case TC_SETUP_CLSFLOWER:
-		switch (tc->cls_flower->command) {
-		case TC_CLSFLOWER_REPLACE:
-			return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress,
-						       proto, tc->cls_flower);
-		case TC_CLSFLOWER_DESTROY:
-			mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress,
-						tc->cls_flower);
-			return 0;
-		case TC_CLSFLOWER_STATS:
-			return mlxsw_sp_flower_stats(mlxsw_sp_port, ingress,
-						     tc->cls_flower);
-		default:
-			return -EOPNOTSUPP;
-		}
+	if (f->common.chain_index)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, f,
+						      ingress);
+	case TC_CLSMATCHALL_DESTROY:
+		mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port, f);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port,
+			     struct tc_cls_flower_offload *f)
+{
+	bool ingress;
+
+	if (is_classid_clsact_ingress(f->common.classid))
+		ingress = true;
+	else if (is_classid_clsact_egress(f->common.classid))
+		ingress = false;
+	else
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case TC_CLSFLOWER_REPLACE:
+		return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress, f);
+	case TC_CLSFLOWER_DESTROY:
+		mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress, f);
+		return 0;
+	case TC_CLSFLOWER_STATS:
+		return mlxsw_sp_flower_stats(mlxsw_sp_port, ingress, f);
+	default:
+		return -EOPNOTSUPP;
 	}
+}
 
-	return -EOPNOTSUPP;
+static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			     void *type_data)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+	switch (type) {
+	case TC_SETUP_CLSMATCHALL:
+		return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data);
+	case TC_SETUP_CLSFLOWER:
+		return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
@@ -3333,15 +3359,48 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false),
 	MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false),
 	MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false),
+	MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, MIRROR_TO_CPU, IPV6_MLD,
+			  false),
+	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD,
+			     false),
+	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_DONE, TRAP_TO_CPU, IPV6_MLD,
+			     false),
+	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD,
+			     false),
 	/* L3 traps */
-	MLXSW_SP_RXL_NO_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_NO_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_NO_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_MARK(OSPF, TRAP_TO_CPU, OSPF, false),
-	MLXSW_SP_RXL_NO_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
-	MLXSW_SP_RXL_NO_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false),
-	MLXSW_SP_RXL_NO_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, ARP_MISS, false),
-	MLXSW_SP_RXL_NO_MARK(BGP_IPV4, TRAP_TO_CPU, BGP_IPV4, false),
+	MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
+	MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP,
+			  false),
+	MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, TRAP_TO_CPU, ROUTER_EXP,
+			  false),
+	MLXSW_SP_RXL_MARK(IPV4_OSPF, TRAP_TO_CPU, OSPF, false),
+	MLXSW_SP_RXL_MARK(IPV6_OSPF, TRAP_TO_CPU, OSPF, false),
+	MLXSW_SP_RXL_MARK(IPV6_DHCP, TRAP_TO_CPU, DHCP, false),
+	MLXSW_SP_RXL_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false),
+	MLXSW_SP_RXL_MARK(IPV4_BGP, TRAP_TO_CPU, BGP, false),
+	MLXSW_SP_RXL_MARK(IPV6_BGP, TRAP_TO_CPU, BGP, false),
+	MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, TRAP_TO_CPU, IPV6_ND,
+			  false),
+	MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND,
+			  false),
+	MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, TRAP_TO_CPU, IPV6_ND,
+			  false),
+	MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND,
+			  false),
+	MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6_ND, false),
+	MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP,
+			  false),
+	MLXSW_SP_RXL_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, HOST_MISS, false),
+	MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false),
+	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
 	/* PKT Sample trap */
 	MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
 		  false, SP_IP2ME, DISCARD),
@@ -3376,15 +3435,17 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 			burst_size = 7;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD:
 			rate = 16 * 1024;
 			burst_size = 10;
 			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
 			rate = 1024;
 			burst_size = 7;
 			break;
@@ -3433,21 +3494,23 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 			priority = 5;
 			tc = 5;
 			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
 			priority = 4;
 			tc = 4;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD:
 			priority = 3;
 			tc = 3;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
 			priority = 2;
 			tc = 2;
 			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
 			priority = 1;
@@ -3694,7 +3757,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 	mlxsw_sp_fids_fini(mlxsw_sp);
 }
 
-static struct mlxsw_config_profile mlxsw_sp_config_profile = {
+static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
 	.used_max_vepa_channels		= 1,
 	.max_vepa_channels		= 0,
 	.used_max_mid			= 1,
@@ -4363,6 +4426,10 @@ static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = {
 	.priority = 10,	/* Must be called before FIB notifier block */
 };
 
+static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = {
+	.notifier_call = mlxsw_sp_inet6addr_event,
+};
+
 static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
 	.notifier_call = mlxsw_sp_router_netevent_event,
 };
@@ -4383,6 +4450,7 @@ static int __init mlxsw_sp_module_init(void)
 
 	register_netdevice_notifier(&mlxsw_sp_netdevice_nb);
 	register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
+	register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
 	register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
 
 	err = mlxsw_core_driver_register(&mlxsw_sp_driver);
@@ -4399,6 +4467,7 @@ err_pci_driver_register:
 	mlxsw_core_driver_unregister(&mlxsw_sp_driver);
 err_core_driver_register:
 	unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+	unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
 	unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
 	unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
 	return err;
@@ -4409,6 +4478,7 @@ static void __exit mlxsw_sp_module_exit(void)
 	mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver);
 	mlxsw_core_driver_unregister(&mlxsw_sp_driver);
 	unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+	unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
 	unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
 	unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 5ef98d4..84ce83a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -77,6 +77,7 @@ enum mlxsw_sp_rif_type {
 	MLXSW_SP_RIF_TYPE_SUBPORT,
 	MLXSW_SP_RIF_TYPE_VLAN,
 	MLXSW_SP_RIF_TYPE_FID,
+	MLXSW_SP_RIF_TYPE_IPIP_LB, /* IP-in-IP loopback. */
 	MLXSW_SP_RIF_TYPE_MAX,
 };
 
@@ -384,6 +385,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev);
 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
 			    unsigned long event, void *ptr);
+int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
+			     unsigned long event, void *ptr);
 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
 				 struct netdev_notifier_changeupper_info *info);
 void
@@ -415,6 +418,7 @@ struct mlxsw_sp_acl_profile_ops {
 	int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
 			    struct net_device *dev, bool ingress);
 	void (*ruleset_unbind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv);
+	u16 (*ruleset_group_id)(void *ruleset_priv);
 	size_t rule_priv_size;
 	int (*rule_add)(struct mlxsw_sp *mlxsw_sp,
 			void *ruleset_priv, void *rule_priv,
@@ -438,11 +442,16 @@ struct mlxsw_sp_acl_ruleset;
 /* spectrum_acl.c */
 struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl);
 struct mlxsw_sp_acl_ruleset *
-mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp,
-			 struct net_device *dev, bool ingress,
+mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp, struct net_device *dev,
+			    bool ingress, u32 chain_index,
+			    enum mlxsw_sp_acl_profile profile);
+struct mlxsw_sp_acl_ruleset *
+mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, struct net_device *dev,
+			 bool ingress, u32 chain_index,
 			 enum mlxsw_sp_acl_profile profile);
 void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp,
 			      struct mlxsw_sp_acl_ruleset *ruleset);
+u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset);
 
 struct mlxsw_sp_acl_rule_info *
 mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl);
@@ -506,7 +515,7 @@ extern const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops;
 
 /* spectrum_flower.c */
 int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
-			    __be16 protocol, struct tc_cls_flower_offload *f);
+			    struct tc_cls_flower_offload *f);
 void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
 			     struct tc_cls_flower_offload *f);
 int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 01a1501..4b2455e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -74,6 +74,7 @@ struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl)
 struct mlxsw_sp_acl_ruleset_ht_key {
 	struct net_device *dev; /* dev this ruleset is bound to */
 	bool ingress;
+	u32 chain_index;
 	const struct mlxsw_sp_acl_profile_ops *ops;
 };
 
@@ -163,7 +164,8 @@ static void mlxsw_sp_acl_ruleset_destroy(struct mlxsw_sp *mlxsw_sp,
 
 static int mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_acl_ruleset *ruleset,
-				     struct net_device *dev, bool ingress)
+				     struct net_device *dev, bool ingress,
+				     u32 chain_index)
 {
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
 	struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
@@ -171,13 +173,20 @@ static int mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
 
 	ruleset->ht_key.dev = dev;
 	ruleset->ht_key.ingress = ingress;
+	ruleset->ht_key.chain_index = chain_index;
 	err = rhashtable_insert_fast(&acl->ruleset_ht, &ruleset->ht_node,
 				     mlxsw_sp_acl_ruleset_ht_params);
 	if (err)
 		return err;
-	err = ops->ruleset_bind(mlxsw_sp, ruleset->priv, dev, ingress);
-	if (err)
-		goto err_ops_ruleset_bind;
+	if (!ruleset->ht_key.chain_index) {
+		/* We only need ruleset with chain index 0, the implicit one,
+		 * to be directly bound to device. The rest of the rulesets
+		 * are bound by "Goto action set".
+		 */
+		err = ops->ruleset_bind(mlxsw_sp, ruleset->priv, dev, ingress);
+		if (err)
+			goto err_ops_ruleset_bind;
+	}
 	return 0;
 
 err_ops_ruleset_bind:
@@ -192,7 +201,8 @@ static void mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp,
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
 	struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
 
-	ops->ruleset_unbind(mlxsw_sp, ruleset->priv);
+	if (!ruleset->ht_key.chain_index)
+		ops->ruleset_unbind(mlxsw_sp, ruleset->priv);
 	rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node,
 			       mlxsw_sp_acl_ruleset_ht_params);
 }
@@ -211,14 +221,48 @@ static void mlxsw_sp_acl_ruleset_ref_dec(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_acl_ruleset_destroy(mlxsw_sp, ruleset);
 }
 
+static struct mlxsw_sp_acl_ruleset *
+__mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp_acl *acl, struct net_device *dev,
+			      bool ingress, u32 chain_index,
+			      const struct mlxsw_sp_acl_profile_ops *ops)
+{
+	struct mlxsw_sp_acl_ruleset_ht_key ht_key;
+
+	memset(&ht_key, 0, sizeof(ht_key));
+	ht_key.dev = dev;
+	ht_key.ingress = ingress;
+	ht_key.chain_index = chain_index;
+	ht_key.ops = ops;
+	return rhashtable_lookup_fast(&acl->ruleset_ht, &ht_key,
+				      mlxsw_sp_acl_ruleset_ht_params);
+}
+
 struct mlxsw_sp_acl_ruleset *
-mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp,
-			 struct net_device *dev, bool ingress,
+mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp, struct net_device *dev,
+			    bool ingress, u32 chain_index,
+			    enum mlxsw_sp_acl_profile profile)
+{
+	const struct mlxsw_sp_acl_profile_ops *ops;
+	struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
+	struct mlxsw_sp_acl_ruleset *ruleset;
+
+	ops = acl->ops->profile_ops(mlxsw_sp, profile);
+	if (!ops)
+		return ERR_PTR(-EINVAL);
+	ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, dev, ingress,
+						chain_index, ops);
+	if (!ruleset)
+		return ERR_PTR(-ENOENT);
+	return ruleset;
+}
+
+struct mlxsw_sp_acl_ruleset *
+mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, struct net_device *dev,
+			 bool ingress, u32 chain_index,
 			 enum mlxsw_sp_acl_profile profile)
 {
 	const struct mlxsw_sp_acl_profile_ops *ops;
 	struct mlxsw_sp_acl *acl = mlxsw_sp->acl;
-	struct mlxsw_sp_acl_ruleset_ht_key ht_key;
 	struct mlxsw_sp_acl_ruleset *ruleset;
 	int err;
 
@@ -226,12 +270,8 @@ mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp,
 	if (!ops)
 		return ERR_PTR(-EINVAL);
 
-	memset(&ht_key, 0, sizeof(ht_key));
-	ht_key.dev = dev;
-	ht_key.ingress = ingress;
-	ht_key.ops = ops;
-	ruleset = rhashtable_lookup_fast(&acl->ruleset_ht, &ht_key,
-					 mlxsw_sp_acl_ruleset_ht_params);
+	ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, dev, ingress,
+						chain_index, ops);
 	if (ruleset) {
 		mlxsw_sp_acl_ruleset_ref_inc(ruleset);
 		return ruleset;
@@ -239,7 +279,8 @@ mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp,
 	ruleset = mlxsw_sp_acl_ruleset_create(mlxsw_sp, ops);
 	if (IS_ERR(ruleset))
 		return ruleset;
-	err = mlxsw_sp_acl_ruleset_bind(mlxsw_sp, ruleset, dev, ingress);
+	err = mlxsw_sp_acl_ruleset_bind(mlxsw_sp, ruleset, dev,
+					ingress, chain_index);
 	if (err)
 		goto err_ruleset_bind;
 	return ruleset;
@@ -255,6 +296,13 @@ void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset);
 }
 
+u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset)
+{
+	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
+
+	return ops->ruleset_group_id(ruleset->priv);
+}
+
 static int
 mlxsw_sp_acl_rulei_counter_alloc(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_acl_rule_info *rulei)
@@ -369,7 +417,7 @@ int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
 		local_port = mlxsw_sp_port->local_port;
 		in_port = false;
 	} else {
-		/* If out_dev is NULL, the called wants to
+		/* If out_dev is NULL, the caller wants to
 		 * set forward to ingress port.
 		 */
 		local_port = 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h
index 85d5001..fb80318 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h
@@ -70,6 +70,9 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = {
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = {
 	MLXSW_AFK_ELEMENT_INST_U32(SRC_IP4, 0x00, 0, 32),
+	MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 4, 2),
+	MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 24, 8),
+	MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x08, 0, 6),
 	MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x08, 8, 9), /* TCP_CONTROL+TCP_ECN */
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index 61a10f1..50b40de 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -295,6 +295,12 @@ mlxsw_sp_acl_tcam_group_unbind(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbt), ppbt_pl);
 }
 
+static u16
+mlxsw_sp_acl_tcam_group_id(struct mlxsw_sp_acl_tcam_group *group)
+{
+	return group->id;
+}
+
 static unsigned int
 mlxsw_sp_acl_tcam_region_prio(struct mlxsw_sp_acl_tcam_region *region)
 {
@@ -984,6 +990,9 @@ static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = {
 	MLXSW_AFK_ELEMENT_VID,
 	MLXSW_AFK_ELEMENT_PCP,
 	MLXSW_AFK_ELEMENT_TCP_FLAGS,
+	MLXSW_AFK_ELEMENT_IP_TTL_,
+	MLXSW_AFK_ELEMENT_IP_ECN,
+	MLXSW_AFK_ELEMENT_IP_DSCP,
 };
 
 static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = {
@@ -1060,6 +1069,14 @@ mlxsw_sp_acl_tcam_flower_ruleset_unbind(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_acl_tcam_group_unbind(mlxsw_sp, &ruleset->group);
 }
 
+static u16
+mlxsw_sp_acl_tcam_flower_ruleset_group_id(void *ruleset_priv)
+{
+	struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
+
+	return mlxsw_sp_acl_tcam_group_id(&ruleset->group);
+}
+
 static int
 mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp,
 				  void *ruleset_priv, void *rule_priv,
@@ -1096,6 +1113,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = {
 	.ruleset_del		= mlxsw_sp_acl_tcam_flower_ruleset_del,
 	.ruleset_bind		= mlxsw_sp_acl_tcam_flower_ruleset_bind,
 	.ruleset_unbind		= mlxsw_sp_acl_tcam_flower_ruleset_unbind,
+	.ruleset_group_id	= mlxsw_sp_acl_tcam_flower_ruleset_group_id,
 	.rule_priv_size		= sizeof(struct mlxsw_sp_acl_tcam_flower_rule),
 	.rule_add		= mlxsw_sp_acl_tcam_flower_rule_add,
 	.rule_del		= mlxsw_sp_acl_tcam_flower_rule_del,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index af2c65a..51e6846 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -74,6 +74,9 @@ static struct devlink_dpipe_header mlxsw_sp_dpipe_header_metadata = {
 
 static struct devlink_dpipe_header *mlxsw_dpipe_headers[] = {
 	&mlxsw_sp_dpipe_header_metadata,
+	&devlink_dpipe_header_ethernet,
+	&devlink_dpipe_header_ipv4,
+	&devlink_dpipe_header_ipv6,
 };
 
 static struct devlink_dpipe_headers mlxsw_sp_dpipe_headers = {
@@ -114,26 +117,6 @@ static int mlxsw_sp_dpipe_table_erif_matches_dump(void *priv,
 	return devlink_dpipe_match_put(skb, &match);
 }
 
-static void mlxsw_sp_erif_entry_clear(struct devlink_dpipe_entry *entry)
-{
-	unsigned int value_count, value_index;
-	struct devlink_dpipe_value *value;
-
-	value = entry->action_values;
-	value_count = entry->action_values_count;
-	for (value_index = 0; value_index < value_count; value_index++) {
-		kfree(value[value_index].value);
-		kfree(value[value_index].mask);
-	}
-
-	value = entry->match_values;
-	value_count = entry->match_values_count;
-	for (value_index = 0; value_index < value_count; value_index++) {
-		kfree(value[value_index].value);
-		kfree(value[value_index].mask);
-	}
-}
-
 static void
 mlxsw_sp_erif_match_action_prepare(struct devlink_dpipe_match *match,
 				   struct devlink_dpipe_action *action)
@@ -215,8 +198,8 @@ static int mlxsw_sp_erif_entry_get(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int
-mlxsw_sp_table_erif_entries_dump(void *priv, bool counters_enabled,
-				 struct devlink_dpipe_dump_ctx *dump_ctx)
+mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled,
+				       struct devlink_dpipe_dump_ctx *dump_ctx)
 {
 	struct devlink_dpipe_value match_value, action_value;
 	struct devlink_dpipe_action action = {0};
@@ -270,16 +253,16 @@ start_again:
 		goto start_again;
 	rtnl_unlock();
 
-	mlxsw_sp_erif_entry_clear(&entry);
+	devlink_dpipe_entry_clear(&entry);
 	return 0;
 err_entry_append:
 err_entry_get:
 	rtnl_unlock();
-	mlxsw_sp_erif_entry_clear(&entry);
+	devlink_dpipe_entry_clear(&entry);
 	return err;
 }
 
-static int mlxsw_sp_table_erif_counters_update(void *priv, bool enable)
+static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable)
 {
 	struct mlxsw_sp *mlxsw_sp = priv;
 	int i;
@@ -301,24 +284,29 @@ static int mlxsw_sp_table_erif_counters_update(void *priv, bool enable)
 	return 0;
 }
 
+static u64 mlxsw_sp_dpipe_table_erif_size_get(void *priv)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	return MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+}
+
 static struct devlink_dpipe_table_ops mlxsw_sp_erif_ops = {
 	.matches_dump = mlxsw_sp_dpipe_table_erif_matches_dump,
 	.actions_dump = mlxsw_sp_dpipe_table_erif_actions_dump,
-	.entries_dump = mlxsw_sp_table_erif_entries_dump,
-	.counters_set_update = mlxsw_sp_table_erif_counters_update,
+	.entries_dump = mlxsw_sp_dpipe_table_erif_entries_dump,
+	.counters_set_update = mlxsw_sp_dpipe_table_erif_counters_update,
+	.size_get = mlxsw_sp_dpipe_table_erif_size_get,
 };
 
 static int mlxsw_sp_dpipe_erif_table_init(struct mlxsw_sp *mlxsw_sp)
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
-	u64 table_size;
 
-	table_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
 	return devlink_dpipe_table_register(devlink,
 					    MLXSW_SP_DPIPE_TABLE_NAME_ERIF,
 					    &mlxsw_sp_erif_ops,
-					    mlxsw_sp, table_size,
-					    false);
+					    mlxsw_sp, false);
 }
 
 static void mlxsw_sp_dpipe_erif_table_fini(struct mlxsw_sp *mlxsw_sp)
@@ -328,6 +316,516 @@ static void mlxsw_sp_dpipe_erif_table_fini(struct mlxsw_sp *mlxsw_sp)
 	devlink_dpipe_table_unregister(devlink, MLXSW_SP_DPIPE_TABLE_NAME_ERIF);
 }
 
+static int mlxsw_sp_dpipe_table_host_matches_dump(struct sk_buff *skb, int type)
+{
+	struct devlink_dpipe_match match = {0};
+	int err;
+
+	match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+	match.header = &mlxsw_sp_dpipe_header_metadata;
+	match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT;
+
+	err = devlink_dpipe_match_put(skb, &match);
+	if (err)
+		return err;
+
+	switch (type) {
+	case AF_INET:
+		match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+		match.header = &devlink_dpipe_header_ipv4;
+		match.field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP;
+		break;
+	case AF_INET6:
+		match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+		match.header = &devlink_dpipe_header_ipv6;
+		match.field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP;
+		break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	return devlink_dpipe_match_put(skb, &match);
+}
+
+static int
+mlxsw_sp_dpipe_table_host4_matches_dump(void *priv, struct sk_buff *skb)
+{
+	return mlxsw_sp_dpipe_table_host_matches_dump(skb, AF_INET);
+}
+
+static int
+mlxsw_sp_dpipe_table_host_actions_dump(void *priv, struct sk_buff *skb)
+{
+	struct devlink_dpipe_action action = {0};
+
+	action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+	action.header = &devlink_dpipe_header_ethernet;
+	action.field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC;
+
+	return devlink_dpipe_action_put(skb, &action);
+}
+
+enum mlxsw_sp_dpipe_table_host_match {
+	MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF,
+	MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP,
+	MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT,
+};
+
+static void
+mlxsw_sp_dpipe_table_host_match_action_prepare(struct devlink_dpipe_match *matches,
+					       struct devlink_dpipe_action *action,
+					       int type)
+{
+	struct devlink_dpipe_match *match;
+
+	match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF];
+	match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+	match->header = &mlxsw_sp_dpipe_header_metadata;
+	match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT;
+
+	match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP];
+	match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+	switch (type) {
+	case AF_INET:
+		match->header = &devlink_dpipe_header_ipv4;
+		match->field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP;
+		break;
+	case AF_INET6:
+		match->header = &devlink_dpipe_header_ipv6;
+		match->field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP;
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+	action->header = &devlink_dpipe_header_ethernet;
+	action->field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC;
+}
+
+static int
+mlxsw_sp_dpipe_table_host_entry_prepare(struct devlink_dpipe_entry *entry,
+					struct devlink_dpipe_value *match_values,
+					struct devlink_dpipe_match *matches,
+					struct devlink_dpipe_value *action_value,
+					struct devlink_dpipe_action *action,
+					int type)
+{
+	struct devlink_dpipe_value *match_value;
+	struct devlink_dpipe_match *match;
+
+	entry->match_values = match_values;
+	entry->match_values_count = MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT;
+
+	entry->action_values = action_value;
+	entry->action_values_count = 1;
+
+	match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF];
+	match_value = &match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF];
+
+	match_value->match = match;
+	match_value->value_size = sizeof(u32);
+	match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
+	if (!match_value->value)
+		return -ENOMEM;
+
+	match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP];
+	match_value = &match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP];
+
+	match_value->match = match;
+	switch (type) {
+	case AF_INET:
+		match_value->value_size = sizeof(u32);
+		break;
+	case AF_INET6:
+		match_value->value_size = sizeof(struct in6_addr);
+		break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
+	if (!match_value->value)
+		return -ENOMEM;
+
+	action_value->action = action;
+	action_value->value_size = sizeof(u64);
+	action_value->value = kmalloc(action_value->value_size, GFP_KERNEL);
+	if (!action_value->value)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void
+__mlxsw_sp_dpipe_table_host_entry_fill(struct devlink_dpipe_entry *entry,
+				       struct mlxsw_sp_rif *rif,
+				       unsigned char *ha, void *dip)
+{
+	struct devlink_dpipe_value *value;
+	u32 *rif_value;
+	u8 *ha_value;
+
+	/* Set Match RIF index */
+	value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF];
+
+	rif_value = value->value;
+	*rif_value = mlxsw_sp_rif_index(rif);
+	value->mapping_value = mlxsw_sp_rif_dev_ifindex(rif);
+	value->mapping_valid = true;
+
+	/* Set Match DIP */
+	value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP];
+	memcpy(value->value, dip, value->value_size);
+
+	/* Set Action DMAC */
+	value = entry->action_values;
+	ha_value = value->value;
+	ether_addr_copy(ha_value, ha);
+}
+
+static void
+mlxsw_sp_dpipe_table_host4_entry_fill(struct devlink_dpipe_entry *entry,
+				      struct mlxsw_sp_neigh_entry *neigh_entry,
+				      struct mlxsw_sp_rif *rif)
+{
+	unsigned char *ha;
+	u32 dip;
+
+	ha = mlxsw_sp_neigh_entry_ha(neigh_entry);
+	dip = mlxsw_sp_neigh4_entry_dip(neigh_entry);
+	__mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, &dip);
+}
+
+static void
+mlxsw_sp_dpipe_table_host6_entry_fill(struct devlink_dpipe_entry *entry,
+				      struct mlxsw_sp_neigh_entry *neigh_entry,
+				      struct mlxsw_sp_rif *rif)
+{
+	struct in6_addr *dip;
+	unsigned char *ha;
+
+	ha = mlxsw_sp_neigh_entry_ha(neigh_entry);
+	dip = mlxsw_sp_neigh6_entry_dip(neigh_entry);
+
+	__mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, dip);
+}
+
+static void
+mlxsw_sp_dpipe_table_host_entry_fill(struct mlxsw_sp *mlxsw_sp,
+				     struct devlink_dpipe_entry *entry,
+				     struct mlxsw_sp_neigh_entry *neigh_entry,
+				     struct mlxsw_sp_rif *rif,
+				     int type)
+{
+	int err;
+
+	switch (type) {
+	case AF_INET:
+		mlxsw_sp_dpipe_table_host4_entry_fill(entry, neigh_entry, rif);
+		break;
+	case AF_INET6:
+		mlxsw_sp_dpipe_table_host6_entry_fill(entry, neigh_entry, rif);
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	err = mlxsw_sp_neigh_counter_get(mlxsw_sp, neigh_entry,
+					 &entry->counter);
+	if (!err)
+		entry->counter_valid = true;
+}
+
+static int
+mlxsw_sp_dpipe_table_host_entries_get(struct mlxsw_sp *mlxsw_sp,
+				      struct devlink_dpipe_entry *entry,
+				      bool counters_enabled,
+				      struct devlink_dpipe_dump_ctx *dump_ctx,
+				      int type)
+{
+	int rif_neigh_count = 0;
+	int rif_neigh_skip = 0;
+	int neigh_count = 0;
+	int rif_count;
+	int i, j;
+	int err;
+
+	rtnl_lock();
+	i = 0;
+	rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+start_again:
+	err = devlink_dpipe_entry_ctx_prepare(dump_ctx);
+	if (err)
+		goto err_ctx_prepare;
+	j = 0;
+	rif_neigh_skip = rif_neigh_count;
+	for (; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
+		struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
+		struct mlxsw_sp_neigh_entry *neigh_entry;
+
+		if (!rif)
+			continue;
+
+		rif_neigh_count = 0;
+		mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) {
+			int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry);
+
+			if (neigh_type != type)
+				continue;
+
+			if (neigh_type == AF_INET6 &&
+			    mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
+				continue;
+
+			if (rif_neigh_count < rif_neigh_skip)
+				goto skip;
+
+			mlxsw_sp_dpipe_table_host_entry_fill(mlxsw_sp, entry,
+							     neigh_entry, rif,
+							     type);
+			entry->index = neigh_count;
+			err = devlink_dpipe_entry_ctx_append(dump_ctx, entry);
+			if (err) {
+				if (err == -EMSGSIZE) {
+					if (!j)
+						goto err_entry_append;
+					else
+						goto out;
+				}
+				goto err_entry_append;
+			}
+			neigh_count++;
+			j++;
+skip:
+			rif_neigh_count++;
+		}
+		rif_neigh_skip = 0;
+	}
+out:
+	devlink_dpipe_entry_ctx_close(dump_ctx);
+	if (i != rif_count)
+		goto start_again;
+
+	rtnl_unlock();
+	return 0;
+
+err_ctx_prepare:
+err_entry_append:
+	rtnl_unlock();
+	return err;
+}
+
+static int
+mlxsw_sp_dpipe_table_host_entries_dump(struct mlxsw_sp *mlxsw_sp,
+				       bool counters_enabled,
+				       struct devlink_dpipe_dump_ctx *dump_ctx,
+				       int type)
+{
+	struct devlink_dpipe_value match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT];
+	struct devlink_dpipe_match matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT];
+	struct devlink_dpipe_value action_value;
+	struct devlink_dpipe_action action = {0};
+	struct devlink_dpipe_entry entry = {0};
+	int err;
+
+	memset(matches, 0, MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT *
+			   sizeof(matches[0]));
+	memset(match_values, 0, MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT *
+				sizeof(match_values[0]));
+	memset(&action_value, 0, sizeof(action_value));
+
+	mlxsw_sp_dpipe_table_host_match_action_prepare(matches, &action, type);
+	err = mlxsw_sp_dpipe_table_host_entry_prepare(&entry, match_values,
+						      matches, &action_value,
+						      &action, type);
+	if (err)
+		goto out;
+
+	err = mlxsw_sp_dpipe_table_host_entries_get(mlxsw_sp, &entry,
+						    counters_enabled, dump_ctx,
+						    type);
+out:
+	devlink_dpipe_entry_clear(&entry);
+	return err;
+}
+
+static int
+mlxsw_sp_dpipe_table_host4_entries_dump(void *priv, bool counters_enabled,
+					struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	return mlxsw_sp_dpipe_table_host_entries_dump(mlxsw_sp,
+						      counters_enabled,
+						      dump_ctx, AF_INET);
+}
+
+static void
+mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp,
+					  bool enable, int type)
+{
+	int i;
+
+	rtnl_lock();
+	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
+		struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
+		struct mlxsw_sp_neigh_entry *neigh_entry;
+
+		if (!rif)
+			continue;
+		mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) {
+			int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry);
+
+			if (neigh_type != type)
+				continue;
+
+			if (neigh_type == AF_INET6 &&
+			    mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
+				continue;
+
+			mlxsw_sp_neigh_entry_counter_update(mlxsw_sp,
+							    neigh_entry,
+							    enable);
+		}
+	}
+	rtnl_unlock();
+}
+
+static int mlxsw_sp_dpipe_table_host4_counters_update(void *priv, bool enable)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	mlxsw_sp_dpipe_table_host_counters_update(mlxsw_sp, enable, AF_INET);
+	return 0;
+}
+
+static u64
+mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type)
+{
+	u64 size = 0;
+	int i;
+
+	rtnl_lock();
+	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
+		struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i);
+		struct mlxsw_sp_neigh_entry *neigh_entry;
+
+		if (!rif)
+			continue;
+		mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) {
+			int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry);
+
+			if (neigh_type != type)
+				continue;
+
+			if (neigh_type == AF_INET6 &&
+			    mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
+				continue;
+
+			size++;
+		}
+	}
+	rtnl_unlock();
+
+	return size;
+}
+
+static u64 mlxsw_sp_dpipe_table_host4_size_get(void *priv)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	return mlxsw_sp_dpipe_table_host_size_get(mlxsw_sp, AF_INET);
+}
+
+static struct devlink_dpipe_table_ops mlxsw_sp_host4_ops = {
+	.matches_dump = mlxsw_sp_dpipe_table_host4_matches_dump,
+	.actions_dump = mlxsw_sp_dpipe_table_host_actions_dump,
+	.entries_dump = mlxsw_sp_dpipe_table_host4_entries_dump,
+	.counters_set_update = mlxsw_sp_dpipe_table_host4_counters_update,
+	.size_get = mlxsw_sp_dpipe_table_host4_size_get,
+};
+
+static int mlxsw_sp_dpipe_host4_table_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+	return devlink_dpipe_table_register(devlink,
+					    MLXSW_SP_DPIPE_TABLE_NAME_HOST4,
+					    &mlxsw_sp_host4_ops,
+					    mlxsw_sp, false);
+}
+
+static void mlxsw_sp_dpipe_host4_table_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+	devlink_dpipe_table_unregister(devlink,
+				       MLXSW_SP_DPIPE_TABLE_NAME_HOST4);
+}
+
+static int
+mlxsw_sp_dpipe_table_host6_matches_dump(void *priv, struct sk_buff *skb)
+{
+	return mlxsw_sp_dpipe_table_host_matches_dump(skb, AF_INET6);
+}
+
+static int
+mlxsw_sp_dpipe_table_host6_entries_dump(void *priv, bool counters_enabled,
+					struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	return mlxsw_sp_dpipe_table_host_entries_dump(mlxsw_sp,
+						      counters_enabled,
+						      dump_ctx, AF_INET6);
+}
+
+static int mlxsw_sp_dpipe_table_host6_counters_update(void *priv, bool enable)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	mlxsw_sp_dpipe_table_host_counters_update(mlxsw_sp, enable, AF_INET6);
+	return 0;
+}
+
+static u64 mlxsw_sp_dpipe_table_host6_size_get(void *priv)
+{
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	return mlxsw_sp_dpipe_table_host_size_get(mlxsw_sp, AF_INET6);
+}
+
+static struct devlink_dpipe_table_ops mlxsw_sp_host6_ops = {
+	.matches_dump = mlxsw_sp_dpipe_table_host6_matches_dump,
+	.actions_dump = mlxsw_sp_dpipe_table_host_actions_dump,
+	.entries_dump = mlxsw_sp_dpipe_table_host6_entries_dump,
+	.counters_set_update = mlxsw_sp_dpipe_table_host6_counters_update,
+	.size_get = mlxsw_sp_dpipe_table_host6_size_get,
+};
+
+static int mlxsw_sp_dpipe_host6_table_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+	return devlink_dpipe_table_register(devlink,
+					    MLXSW_SP_DPIPE_TABLE_NAME_HOST6,
+					    &mlxsw_sp_host6_ops,
+					    mlxsw_sp, false);
+}
+
+static void mlxsw_sp_dpipe_host6_table_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+	devlink_dpipe_table_unregister(devlink,
+				       MLXSW_SP_DPIPE_TABLE_NAME_HOST6);
+}
+
 int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp)
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
@@ -339,10 +837,22 @@ int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp)
 		return err;
 	err = mlxsw_sp_dpipe_erif_table_init(mlxsw_sp);
 	if (err)
-		goto err_erif_register;
+		goto err_erif_table_init;
+
+	err = mlxsw_sp_dpipe_host4_table_init(mlxsw_sp);
+	if (err)
+		goto err_host4_table_init;
+
+	err = mlxsw_sp_dpipe_host6_table_init(mlxsw_sp);
+	if (err)
+		goto err_host6_table_init;
 	return 0;
 
-err_erif_register:
+err_host6_table_init:
+	mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp);
+err_host4_table_init:
+	mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp);
+err_erif_table_init:
 	devlink_dpipe_headers_unregister(priv_to_devlink(mlxsw_sp->core));
 	return err;
 }
@@ -351,6 +861,8 @@ void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp)
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
 
+	mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp);
+	mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp);
 	mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp);
 	devlink_dpipe_headers_unregister(devlink);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h
index d208929..283fde4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h
@@ -35,9 +35,26 @@
 #ifndef _MLXSW_PIPELINE_H_
 #define _MLXSW_PIPELINE_H_
 
+#if IS_ENABLED(CONFIG_NET_DEVLINK)
+
 int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp);
 
+#else
+
+static inline int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp)
+{
+	return 0;
+}
+
+static inline void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp)
+{
+}
+
+#endif
+
 #define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif"
+#define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4"
+#define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6"
 
 #endif /* _MLXSW_PIPELINE_H_*/
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
index 6afbe9e..bbd238e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
@@ -109,7 +109,6 @@ static const int mlxsw_sp_sfgc_uc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
 
 static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
 	[MLXSW_REG_SFGC_TYPE_BROADCAST]				= 1,
-	[MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6]	= 1,
 	[MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP]	= 1,
 	[MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL]			= 1,
 	[MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST]			= 1,
@@ -117,6 +116,7 @@ static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
 
 static const int mlxsw_sp_sfgc_mc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = {
 	[MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4]	= 1,
+	[MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6]	= 1,
 };
 
 static const int *mlxsw_sp_packet_type_sfgc_types[] = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 21bb2bf..8aace9a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -45,7 +45,7 @@
 #include "core_acl_flex_keys.h"
 
 static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
-					 struct net_device *dev,
+					 struct net_device *dev, bool ingress,
 					 struct mlxsw_sp_acl_rule_info *rulei,
 					 struct tcf_exts *exts)
 {
@@ -53,7 +53,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 	LIST_HEAD(actions);
 	int err;
 
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return 0;
 
 	/* Count action is inserted first */
@@ -71,6 +71,20 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 			err = mlxsw_sp_acl_rulei_act_trap(rulei);
 			if (err)
 				return err;
+		} else if (is_tcf_gact_goto_chain(a)) {
+			u32 chain_index = tcf_gact_goto_chain_index(a);
+			struct mlxsw_sp_acl_ruleset *ruleset;
+			u16 group_id;
+
+			ruleset = mlxsw_sp_acl_ruleset_lookup(mlxsw_sp, dev,
+							      ingress,
+							      chain_index,
+							      MLXSW_SP_ACL_PROFILE_FLOWER);
+			if (IS_ERR(ruleset))
+				return PTR_ERR(ruleset);
+
+			group_id = mlxsw_sp_acl_ruleset_group_id(ruleset);
+			mlxsw_sp_acl_rulei_act_jump(rulei, group_id);
 		} else if (is_tcf_mirred_egress_redirect(a)) {
 			int ifindex = tcf_mirred_ifindex(a);
 			struct net_device *out_dev;
@@ -212,11 +226,46 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_acl_rule_info *rulei,
+				    struct tc_cls_flower_offload *f,
+				    u16 n_proto)
+{
+	struct flow_dissector_key_ip *key, *mask;
+
+	if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP))
+		return 0;
+
+	if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) {
+		dev_err(mlxsw_sp->bus_info->dev, "IP keys supported only for IPv4/6\n");
+		return -EINVAL;
+	}
+
+	key = skb_flow_dissector_target(f->dissector,
+					FLOW_DISSECTOR_KEY_IP,
+					f->key);
+	mask = skb_flow_dissector_target(f->dissector,
+					 FLOW_DISSECTOR_KEY_IP,
+					 f->mask);
+	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_TTL_,
+				       key->ttl, mask->ttl);
+
+	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_ECN,
+				       key->tos & 0x3, mask->tos & 0x3);
+
+	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP,
+				       key->tos >> 6, mask->tos >> 6);
+
+	return 0;
+}
+
 static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
-				 struct net_device *dev,
+				 struct net_device *dev, bool ingress,
 				 struct mlxsw_sp_acl_rule_info *rulei,
 				 struct tc_cls_flower_offload *f)
 {
+	u16 n_proto_mask = 0;
+	u16 n_proto_key = 0;
 	u16 addr_type = 0;
 	u8 ip_proto = 0;
 	int err;
@@ -229,12 +278,13 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
 	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
 	      BIT(FLOW_DISSECTOR_KEY_TCP) |
+	      BIT(FLOW_DISSECTOR_KEY_IP) |
 	      BIT(FLOW_DISSECTOR_KEY_VLAN))) {
 		dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n");
 		return -EOPNOTSUPP;
 	}
 
-	mlxsw_sp_acl_rulei_priority(rulei, f->prio);
+	mlxsw_sp_acl_rulei_priority(rulei, f->common.prio);
 
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
 		struct flow_dissector_key_control *key =
@@ -253,8 +303,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 			skb_flow_dissector_target(f->dissector,
 						  FLOW_DISSECTOR_KEY_BASIC,
 						  f->mask);
-		u16 n_proto_key = ntohs(key->n_proto);
-		u16 n_proto_mask = ntohs(mask->n_proto);
+		n_proto_key = ntohs(key->n_proto);
+		n_proto_mask = ntohs(mask->n_proto);
 
 		if (n_proto_key == ETH_P_ALL) {
 			n_proto_key = 0;
@@ -324,11 +374,16 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		return err;
 
-	return mlxsw_sp_flower_parse_actions(mlxsw_sp, dev, rulei, f->exts);
+	err = mlxsw_sp_flower_parse_ip(mlxsw_sp, rulei, f, n_proto_key & n_proto_mask);
+	if (err)
+		return err;
+
+	return mlxsw_sp_flower_parse_actions(mlxsw_sp, dev, ingress,
+					     rulei, f->exts);
 }
 
 int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
-			    __be16 protocol, struct tc_cls_flower_offload *f)
+			    struct tc_cls_flower_offload *f)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct net_device *dev = mlxsw_sp_port->dev;
@@ -338,6 +393,7 @@ int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
 	int err;
 
 	ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, dev, ingress,
+					   f->common.chain_index,
 					   MLXSW_SP_ACL_PROFILE_FLOWER);
 	if (IS_ERR(ruleset))
 		return PTR_ERR(ruleset);
@@ -349,7 +405,7 @@ int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
 	}
 
 	rulei = mlxsw_sp_acl_rule_rulei(rule);
-	err = mlxsw_sp_flower_parse(mlxsw_sp, dev, rulei, f);
+	err = mlxsw_sp_flower_parse(mlxsw_sp, dev, ingress, rulei, f);
 	if (err)
 		goto err_flower_parse;
 
@@ -381,7 +437,7 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
 	struct mlxsw_sp_acl_rule *rule;
 
 	ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev,
-					   ingress,
+					   ingress, f->common.chain_index,
 					   MLXSW_SP_ACL_PROFILE_FLOWER);
 	if (IS_ERR(ruleset))
 		return;
@@ -407,7 +463,7 @@ int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
 	int err;
 
 	ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev,
-					   ingress,
+					   ingress, f->common.chain_index,
 					   MLXSW_SP_ACL_PROFILE_FLOWER);
 	if (WARN_ON(IS_ERR(ruleset)))
 		return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
new file mode 100644
index 0000000..702fe94
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -0,0 +1,214 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <net/ip_tunnels.h>
+
+#include "spectrum_ipip.h"
+
+static bool
+mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev)
+{
+	struct ip_tunnel *tun = netdev_priv(ol_dev);
+
+	return !!(tun->parms.i_flags & TUNNEL_KEY);
+}
+
+static bool
+mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev)
+{
+	struct ip_tunnel *tun = netdev_priv(ol_dev);
+
+	return !!(tun->parms.o_flags & TUNNEL_KEY);
+}
+
+static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev)
+{
+	struct ip_tunnel *tun = netdev_priv(ol_dev);
+
+	return mlxsw_sp_ipip_netdev_has_ikey(ol_dev) ?
+		be32_to_cpu(tun->parms.i_key) : 0;
+}
+
+static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev)
+{
+	struct ip_tunnel *tun = netdev_priv(ol_dev);
+
+	return mlxsw_sp_ipip_netdev_has_okey(ol_dev) ?
+		be32_to_cpu(tun->parms.o_key) : 0;
+}
+
+static int
+mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+				  struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+	u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
+	__be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev);
+	char ratr_pl[MLXSW_REG_RATR_LEN];
+
+	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
+			    true, MLXSW_REG_RATR_TYPE_IPIP,
+			    adj_index, rif_index);
+	mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4));
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
+}
+
+static int
+mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp,
+				     u32 tunnel_index,
+				     struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+	bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev);
+	u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
+	u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev);
+	char rtdp_pl[MLXSW_REG_RTDP_LEN];
+	unsigned int type_check;
+	u32 daddr4;
+
+	mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index);
+
+	type_check = has_ikey ?
+		MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY :
+		MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE;
+
+	/* Linux demuxes tunnels based on packet SIP (which must match tunnel
+	 * remote IP). Thus configure decap so that it filters out packets that
+	 * are not IPv4 or have the wrong SIP. IPIP_DECAP_ERROR trap is
+	 * generated for packets that fail this criterion. Linux then handles
+	 * such packets in slow path and generates ICMP destination unreachable.
+	 */
+	daddr4 = be32_to_cpu(mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev));
+	mlxsw_reg_rtdp_ipip4_pack(rtdp_pl, rif_index,
+				  MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4,
+				  type_check, has_ikey, daddr4, ikey);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
+}
+
+static int
+mlxsw_sp_ipip_fib_entry_op_gre4_ralue(struct mlxsw_sp *mlxsw_sp,
+				      u32 dip, u8 prefix_len, u16 ul_vr_id,
+				      enum mlxsw_reg_ralue_op op,
+				      u32 tunnel_index)
+{
+	char ralue_pl[MLXSW_REG_RALUE_LEN];
+
+	mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_REG_RALXX_PROTOCOL_IPV4, op,
+			      ul_vr_id, prefix_len, dip);
+	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, tunnel_index);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static int mlxsw_sp_ipip_fib_entry_op_gre4(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_ipip_entry *ipip_entry,
+					enum mlxsw_reg_ralue_op op,
+					u32 tunnel_index)
+{
+	u16 ul_vr_id = mlxsw_sp_ipip_lb_ul_vr_id(ipip_entry->ol_lb);
+	__be32 dip;
+	int err;
+
+	err = mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(mlxsw_sp, tunnel_index,
+						   ipip_entry);
+	if (err)
+		return err;
+
+	dip = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
+					 ipip_entry->ol_dev).addr4;
+	return mlxsw_sp_ipip_fib_entry_op_gre4_ralue(mlxsw_sp, be32_to_cpu(dip),
+						     32, ul_vr_id, op,
+						     tunnel_index);
+}
+
+static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto,
+					  const struct net_device *ol_dev)
+{
+	union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev);
+	union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev);
+	union mlxsw_sp_l3addr naddr = {0};
+
+	/* Tunnels with unset local or remote address are valid in Linux and
+	 * used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access
+	 * (NBMA) tunnels. In principle these can be offloaded, but the driver
+	 * currently doesn't support this. So punt.
+	 */
+	return memcmp(&saddr, &naddr, sizeof(naddr)) &&
+	       memcmp(&daddr, &naddr, sizeof(naddr));
+}
+
+static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp,
+					   const struct net_device *ol_dev,
+					   enum mlxsw_sp_l3proto ol_proto)
+{
+	struct ip_tunnel *tunnel = netdev_priv(ol_dev);
+	__be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */
+	bool inherit_ttl = tunnel->parms.iph.ttl == 0;
+	bool inherit_tos = tunnel->parms.iph.tos & 0x1;
+
+	return (tunnel->parms.i_flags & ~okflags) == 0 &&
+	       (tunnel->parms.o_flags & ~okflags) == 0 &&
+	       inherit_ttl && inherit_tos &&
+	       mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV4, ol_dev);
+}
+
+static struct mlxsw_sp_rif_ipip_lb_config
+mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp,
+				      const struct net_device *ol_dev)
+{
+	enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
+
+	lb_ipipt = mlxsw_sp_ipip_netdev_has_okey(ol_dev) ?
+		MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP :
+		MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP;
+	return (struct mlxsw_sp_rif_ipip_lb_config){
+		.lb_ipipt = lb_ipipt,
+		.okey = mlxsw_sp_ipip_netdev_okey(ol_dev),
+		.ul_protocol = MLXSW_SP_L3_PROTO_IPV4,
+		.saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4,
+						    ol_dev),
+	};
+}
+
+static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = {
+	.dev_type = ARPHRD_IPGRE,
+	.ul_proto = MLXSW_SP_L3_PROTO_IPV4,
+	.nexthop_update = mlxsw_sp_ipip_nexthop_update_gre4,
+	.fib_entry_op = mlxsw_sp_ipip_fib_entry_op_gre4,
+	.can_offload = mlxsw_sp_ipip_can_offload_gre4,
+	.ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre4,
+};
+
+const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = {
+	[MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
new file mode 100644
index 0000000..1c2db83
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -0,0 +1,79 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_IPIP_H_
+#define _MLXSW_IPIP_H_
+
+#include "spectrum_router.h"
+#include <net/ip_fib.h>
+
+enum mlxsw_sp_ipip_type {
+	MLXSW_SP_IPIP_TYPE_GRE4,
+	MLXSW_SP_IPIP_TYPE_MAX,
+};
+
+struct mlxsw_sp_ipip_entry {
+	enum mlxsw_sp_ipip_type ipipt;
+	struct net_device *ol_dev; /* Overlay. */
+	struct mlxsw_sp_rif_ipip_lb *ol_lb;
+	unsigned int ref_count; /* Number of next hops using the tunnel. */
+	struct mlxsw_sp_fib_entry *decap_fib_entry;
+	struct list_head ipip_list_node;
+};
+
+struct mlxsw_sp_ipip_ops {
+	int dev_type;
+	enum mlxsw_sp_l3proto ul_proto; /* Underlay. */
+
+	int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+			      struct mlxsw_sp_ipip_entry *ipip_entry);
+
+	bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp,
+			    const struct net_device *ol_dev,
+			    enum mlxsw_sp_l3proto ol_proto);
+
+	/* Return a configuration for creating an overlay loopback RIF. */
+	struct mlxsw_sp_rif_ipip_lb_config
+	(*ol_loopback_config)(struct mlxsw_sp *mlxsw_sp,
+			      const struct net_device *ol_dev);
+
+	int (*fib_entry_op)(struct mlxsw_sp *mlxsw_sp,
+			    struct mlxsw_sp_ipip_entry *ipip_entry,
+			    enum mlxsw_reg_ralue_op op,
+			    u32 tunnel_index);
+};
+
+extern const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[];
+
+#endif /* _MLXSW_IPIP_H_*/
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 4b2e0fd..f0fb898 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1,9 +1,10 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
- * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
  * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
+ * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -43,18 +44,27 @@
 #include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_bridge.h>
+#include <linux/socket.h>
+#include <linux/route.h>
 #include <net/netevent.h>
 #include <net/neighbour.h>
 #include <net/arp.h>
 #include <net/ip_fib.h>
+#include <net/ip6_fib.h>
 #include <net/fib_rules.h>
+#include <net/ip_tunnels.h>
 #include <net/l3mdev.h>
+#include <net/addrconf.h>
+#include <net/ndisc.h>
+#include <net/ipv6.h>
+#include <net/fib_notifier.h>
 
 #include "spectrum.h"
 #include "core.h"
 #include "reg.h"
 #include "spectrum_cnt.h"
 #include "spectrum_dpipe.h"
+#include "spectrum_ipip.h"
 #include "spectrum_router.h"
 
 struct mlxsw_sp_vr;
@@ -79,9 +89,11 @@ struct mlxsw_sp_router {
 	struct delayed_work nexthop_probe_dw;
 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
 	struct list_head nexthop_neighs_list;
+	struct list_head ipip_list;
 	bool aborted;
 	struct notifier_block fib_nb;
 	const struct mlxsw_sp_rif_ops **rif_ops_arr;
+	const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
 };
 
 struct mlxsw_sp_rif {
@@ -122,6 +134,17 @@ struct mlxsw_sp_rif_subport {
 	bool lag;
 };
 
+struct mlxsw_sp_rif_ipip_lb {
+	struct mlxsw_sp_rif common;
+	struct mlxsw_sp_rif_ipip_lb_config lb_config;
+	u16 ul_vr_id; /* Reserved for Spectrum-2. */
+};
+
+struct mlxsw_sp_rif_params_ipip_lb {
+	struct mlxsw_sp_rif_params common;
+	struct mlxsw_sp_rif_ipip_lb_config lb_config;
+};
+
 struct mlxsw_sp_rif_ops {
 	enum mlxsw_sp_rif_type type;
 	size_t rif_size;
@@ -304,7 +327,7 @@ static struct mlxsw_sp_rif *
 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
 			 const struct net_device *dev);
 
-#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE)
+#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
 
 struct mlxsw_sp_prefix_usage {
 	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
@@ -314,19 +337,6 @@ struct mlxsw_sp_prefix_usage {
 	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
 
 static bool
-mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
-			     struct mlxsw_sp_prefix_usage *prefix_usage2)
-{
-	unsigned char prefix;
-
-	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
-		if (!test_bit(prefix, prefix_usage2->b))
-			return false;
-	}
-	return true;
-}
-
-static bool
 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
 			 struct mlxsw_sp_prefix_usage *prefix_usage2)
 {
@@ -371,6 +381,14 @@ enum mlxsw_sp_fib_entry_type {
 	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
 	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
 	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
+
+	/* This is a special case of local delivery, where a packet should be
+	 * decapsulated on reception. Note that there is no corresponding ENCAP,
+	 * because that's a type of next hop, not of FIB entry. (There can be
+	 * several next hops in a REMOTE entry, and some of them may be
+	 * encapsulating entries.)
+	 */
+	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
 };
 
 struct mlxsw_sp_nexthop_group;
@@ -384,11 +402,9 @@ struct mlxsw_sp_fib_node {
 	struct mlxsw_sp_fib_key key;
 };
 
-struct mlxsw_sp_fib_entry_params {
-	u32 tb_id;
-	u32 prio;
-	u8 tos;
-	u8 type;
+struct mlxsw_sp_fib_entry_decap {
+	struct mlxsw_sp_ipip_entry *ipip_entry;
+	u32 tunnel_index;
 };
 
 struct mlxsw_sp_fib_entry {
@@ -397,13 +413,26 @@ struct mlxsw_sp_fib_entry {
 	enum mlxsw_sp_fib_entry_type type;
 	struct list_head nexthop_group_node;
 	struct mlxsw_sp_nexthop_group *nh_group;
-	struct mlxsw_sp_fib_entry_params params;
-	bool offloaded;
+	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
+};
+
+struct mlxsw_sp_fib4_entry {
+	struct mlxsw_sp_fib_entry common;
+	u32 tb_id;
+	u32 prio;
+	u8 tos;
+	u8 type;
+};
+
+struct mlxsw_sp_fib6_entry {
+	struct mlxsw_sp_fib_entry common;
+	struct list_head rt6_list;
+	unsigned int nrt6;
 };
 
-enum mlxsw_sp_l3proto {
-	MLXSW_SP_L3_PROTO_IPV4,
-	MLXSW_SP_L3_PROTO_IPV6,
+struct mlxsw_sp_rt6 {
+	struct list_head list;
+	struct rt6_info *rt;
 };
 
 struct mlxsw_sp_lpm_tree {
@@ -428,6 +457,7 @@ struct mlxsw_sp_vr {
 	u32 tb_id; /* kernel fib table id */
 	unsigned int rif_count;
 	struct mlxsw_sp_fib *fib4;
+	struct mlxsw_sp_fib *fib6;
 };
 
 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
@@ -487,15 +517,15 @@ static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
 }
 
-static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
-				  struct mlxsw_sp_lpm_tree *lpm_tree)
+static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_lpm_tree *lpm_tree)
 {
 	char ralta_pl[MLXSW_REG_RALTA_LEN];
 
 	mlxsw_reg_ralta_pack(ralta_pl, false,
 			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
 			     lpm_tree->id);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
 }
 
 static int
@@ -551,10 +581,10 @@ err_left_struct_set:
 	return ERR_PTR(err);
 }
 
-static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
-				     struct mlxsw_sp_lpm_tree *lpm_tree)
+static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
+				      struct mlxsw_sp_lpm_tree *lpm_tree)
 {
-	return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
+	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
 }
 
 static struct mlxsw_sp_lpm_tree *
@@ -571,24 +601,21 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
 		    lpm_tree->proto == proto &&
 		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
 					     prefix_usage))
-			goto inc_ref_count;
+			return lpm_tree;
 	}
-	lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
-					    proto);
-	if (IS_ERR(lpm_tree))
-		return lpm_tree;
+	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
+}
 
-inc_ref_count:
+static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
+{
 	lpm_tree->ref_count++;
-	return lpm_tree;
 }
 
-static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
-				 struct mlxsw_sp_lpm_tree *lpm_tree)
+static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_lpm_tree *lpm_tree)
 {
 	if (--lpm_tree->ref_count == 0)
-		return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
-	return 0;
+		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
 }
 
 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
@@ -625,7 +652,7 @@ static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
 
 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
 {
-	return !!vr->fib4;
+	return !!vr->fib4 || !!vr->fib6;
 }
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
@@ -642,13 +669,13 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
 }
 
 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
-				     const struct mlxsw_sp_fib *fib)
+				     const struct mlxsw_sp_fib *fib, u8 tree_id)
 {
 	char raltb_pl[MLXSW_REG_RALTB_LEN];
 
 	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
 			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
-			     fib->lpm_tree->id);
+			     tree_id);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
 }
 
@@ -694,7 +721,7 @@ static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
 	case MLXSW_SP_L3_PROTO_IPV4:
 		return vr->fib4;
 	case MLXSW_SP_L3_PROTO_IPV6:
-		BUG_ON(1);
+		return vr->fib6;
 	}
 	return NULL;
 }
@@ -703,6 +730,7 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
 					      u32 tb_id)
 {
 	struct mlxsw_sp_vr *vr;
+	int err;
 
 	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
 	if (!vr)
@@ -710,54 +738,26 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
 	vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
 	if (IS_ERR(vr->fib4))
 		return ERR_CAST(vr->fib4);
+	vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6);
+	if (IS_ERR(vr->fib6)) {
+		err = PTR_ERR(vr->fib6);
+		goto err_fib6_create;
+	}
 	vr->tb_id = tb_id;
 	return vr;
-}
 
-static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
-{
+err_fib6_create:
 	mlxsw_sp_fib_destroy(vr->fib4);
 	vr->fib4 = NULL;
+	return ERR_PTR(err);
 }
 
-static int
-mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib,
-			   struct mlxsw_sp_prefix_usage *req_prefix_usage)
+static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
 {
-	struct mlxsw_sp_lpm_tree *lpm_tree = fib->lpm_tree;
-	struct mlxsw_sp_lpm_tree *new_tree;
-	int err;
-
-	if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage))
-		return 0;
-
-	new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
-					 fib->proto);
-	if (IS_ERR(new_tree)) {
-		/* We failed to get a tree according to the required
-		 * prefix usage. However, the current tree might be still good
-		 * for us if our requirement is subset of the prefixes used
-		 * in the tree.
-		 */
-		if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
-						 &lpm_tree->prefix_usage))
-			return 0;
-		return PTR_ERR(new_tree);
-	}
-
-	/* Prevent packet loss by overwriting existing binding */
-	fib->lpm_tree = new_tree;
-	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib);
-	if (err)
-		goto err_tree_bind;
-	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
-
-	return 0;
-
-err_tree_bind:
-	fib->lpm_tree = lpm_tree;
-	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
-	return err;
+	mlxsw_sp_fib_destroy(vr->fib6);
+	vr->fib6 = NULL;
+	mlxsw_sp_fib_destroy(vr->fib4);
+	vr->fib4 = NULL;
 }
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id)
@@ -773,10 +773,105 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id)
 
 static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
 {
-	if (!vr->rif_count && list_empty(&vr->fib4->node_list))
+	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
+	    list_empty(&vr->fib6->node_list))
 		mlxsw_sp_vr_destroy(vr);
 }
 
+static bool
+mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
+				    enum mlxsw_sp_l3proto proto, u8 tree_id)
+{
+	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
+
+	if (!mlxsw_sp_vr_is_used(vr))
+		return false;
+	if (fib->lpm_tree && fib->lpm_tree->id == tree_id)
+		return true;
+	return false;
+}
+
+static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fib *fib,
+					struct mlxsw_sp_lpm_tree *new_tree)
+{
+	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
+	int err;
+
+	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
+	if (err)
+		return err;
+	fib->lpm_tree = new_tree;
+	mlxsw_sp_lpm_tree_hold(new_tree);
+	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
+	return 0;
+}
+
+static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
+					 struct mlxsw_sp_fib *fib,
+					 struct mlxsw_sp_lpm_tree *new_tree)
+{
+	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
+	enum mlxsw_sp_l3proto proto = fib->proto;
+	u8 old_id, new_id = new_tree->id;
+	struct mlxsw_sp_vr *vr;
+	int i, err;
+
+	if (!old_tree)
+		goto no_replace;
+	old_id = old_tree->id;
+
+	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
+		vr = &mlxsw_sp->router->vrs[i];
+		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
+			continue;
+		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
+						   mlxsw_sp_vr_fib(vr, proto),
+						   new_tree);
+		if (err)
+			goto err_tree_replace;
+	}
+
+	return 0;
+
+err_tree_replace:
+	for (i--; i >= 0; i--) {
+		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
+			continue;
+		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
+					     mlxsw_sp_vr_fib(vr, proto),
+					     old_tree);
+	}
+	return err;
+
+no_replace:
+	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
+	if (err)
+		return err;
+	fib->lpm_tree = new_tree;
+	mlxsw_sp_lpm_tree_hold(new_tree);
+	return 0;
+}
+
+static void
+mlxsw_sp_vrs_prefixes(struct mlxsw_sp *mlxsw_sp,
+		      enum mlxsw_sp_l3proto proto,
+		      struct mlxsw_sp_prefix_usage *req_prefix_usage)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
+		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
+		struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
+		unsigned char prefix;
+
+		if (!mlxsw_sp_vr_is_used(vr))
+			continue;
+		mlxsw_sp_prefix_usage_for_each(prefix, &fib->prefix_usage)
+			mlxsw_sp_prefix_usage_set(req_prefix_usage, prefix);
+	}
+}
+
 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
 {
 	struct mlxsw_sp_vr *vr;
@@ -816,6 +911,374 @@ static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
 	kfree(mlxsw_sp->router->vrs);
 }
 
+static struct net_device *
+__mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
+{
+	struct ip_tunnel *tun = netdev_priv(ol_dev);
+	struct net *net = dev_net(ol_dev);
+
+	return __dev_get_by_index(net, tun->parms.link);
+}
+
+static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
+{
+	struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
+
+	if (d)
+		return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
+	else
+		return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
+}
+
+static struct mlxsw_sp_rif *
+mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
+		    const struct mlxsw_sp_rif_params *params);
+
+static struct mlxsw_sp_rif_ipip_lb *
+mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
+				enum mlxsw_sp_ipip_type ipipt,
+				struct net_device *ol_dev)
+{
+	struct mlxsw_sp_rif_params_ipip_lb lb_params;
+	const struct mlxsw_sp_ipip_ops *ipip_ops;
+	struct mlxsw_sp_rif *rif;
+
+	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
+	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
+		.common.dev = ol_dev,
+		.common.lag = false,
+		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
+	};
+
+	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common);
+	if (IS_ERR(rif))
+		return ERR_CAST(rif);
+	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
+}
+
+static struct mlxsw_sp_ipip_entry *
+mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
+			  enum mlxsw_sp_ipip_type ipipt,
+			  struct net_device *ol_dev)
+{
+	struct mlxsw_sp_ipip_entry *ipip_entry;
+	struct mlxsw_sp_ipip_entry *ret = NULL;
+
+	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
+	if (!ipip_entry)
+		return ERR_PTR(-ENOMEM);
+
+	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
+							    ol_dev);
+	if (IS_ERR(ipip_entry->ol_lb)) {
+		ret = ERR_CAST(ipip_entry->ol_lb);
+		goto err_ol_ipip_lb_create;
+	}
+
+	ipip_entry->ipipt = ipipt;
+	ipip_entry->ol_dev = ol_dev;
+
+	return ipip_entry;
+
+err_ol_ipip_lb_create:
+	kfree(ipip_entry);
+	return ret;
+}
+
+static void
+mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+	WARN_ON(ipip_entry->ref_count > 0);
+	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
+	kfree(ipip_entry);
+}
+
+static __be32
+mlxsw_sp_ipip_netdev_saddr4(const struct net_device *ol_dev)
+{
+	struct ip_tunnel *tun = netdev_priv(ol_dev);
+
+	return tun->parms.iph.saddr;
+}
+
+union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
+			   const struct net_device *ol_dev)
+{
+	switch (proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		return (union mlxsw_sp_l3addr) {
+			.addr4 = mlxsw_sp_ipip_netdev_saddr4(ol_dev),
+		};
+	case MLXSW_SP_L3_PROTO_IPV6:
+		break;
+	};
+
+	WARN_ON(1);
+	return (union mlxsw_sp_l3addr) {
+		.addr4 = 0,
+	};
+}
+
+__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev)
+{
+	struct ip_tunnel *tun = netdev_priv(ol_dev);
+
+	return tun->parms.iph.daddr;
+}
+
+union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
+			   const struct net_device *ol_dev)
+{
+	switch (proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		return (union mlxsw_sp_l3addr) {
+			.addr4 = mlxsw_sp_ipip_netdev_daddr4(ol_dev),
+		};
+	case MLXSW_SP_L3_PROTO_IPV6:
+		break;
+	};
+
+	WARN_ON(1);
+	return (union mlxsw_sp_l3addr) {
+		.addr4 = 0,
+	};
+}
+
+static bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1,
+			       const union mlxsw_sp_l3addr *addr2)
+{
+	return !memcmp(addr1, addr2, sizeof(*addr1));
+}
+
+static bool
+mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
+				  const enum mlxsw_sp_l3proto ul_proto,
+				  union mlxsw_sp_l3addr saddr,
+				  u32 ul_tb_id,
+				  struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
+	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
+	union mlxsw_sp_l3addr tun_saddr;
+
+	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
+		return false;
+
+	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
+	return tun_ul_tb_id == ul_tb_id &&
+	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
+}
+
+static int
+mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_fib_entry *fib_entry,
+			      struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+	u32 tunnel_index;
+	int err;
+
+	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
+	if (err)
+		return err;
+
+	ipip_entry->decap_fib_entry = fib_entry;
+	fib_entry->decap.ipip_entry = ipip_entry;
+	fib_entry->decap.tunnel_index = tunnel_index;
+	return 0;
+}
+
+static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
+					  struct mlxsw_sp_fib_entry *fib_entry)
+{
+	/* Unlink this node from the IPIP entry that it's the decap entry of. */
+	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
+	fib_entry->decap.ipip_entry = NULL;
+	mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
+}
+
+static struct mlxsw_sp_fib_node *
+mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
+			 size_t addr_len, unsigned char prefix_len);
+static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_fib_entry *fib_entry);
+
+static void
+mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
+
+	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
+	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+
+	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+}
+
+static void
+mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_ipip_entry *ipip_entry,
+				  struct mlxsw_sp_fib_entry *decap_fib_entry)
+{
+	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
+					  ipip_entry))
+		return;
+	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
+
+	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
+		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
+}
+
+/* Given an IPIP entry, find the corresponding decap route. */
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+	static struct mlxsw_sp_fib_node *fib_node;
+	const struct mlxsw_sp_ipip_ops *ipip_ops;
+	struct mlxsw_sp_fib_entry *fib_entry;
+	unsigned char saddr_prefix_len;
+	union mlxsw_sp_l3addr saddr;
+	struct mlxsw_sp_fib *ul_fib;
+	struct mlxsw_sp_vr *ul_vr;
+	const void *saddrp;
+	size_t saddr_len;
+	u32 ul_tb_id;
+	u32 saddr4;
+
+	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
+
+	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
+	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
+	if (!ul_vr)
+		return NULL;
+
+	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
+	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
+					   ipip_entry->ol_dev);
+
+	switch (ipip_ops->ul_proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		saddr4 = be32_to_cpu(saddr.addr4);
+		saddrp = &saddr4;
+		saddr_len = 4;
+		saddr_prefix_len = 32;
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		WARN_ON(1);
+		return NULL;
+	}
+
+	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
+					    saddr_prefix_len);
+	if (!fib_node || list_empty(&fib_node->entry_list))
+		return NULL;
+
+	fib_entry = list_first_entry(&fib_node->entry_list,
+				     struct mlxsw_sp_fib_entry, list);
+	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
+		return NULL;
+
+	return fib_entry;
+}
+
+static struct mlxsw_sp_ipip_entry *
+mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp,
+			enum mlxsw_sp_ipip_type ipipt,
+			struct net_device *ol_dev)
+{
+	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
+	struct mlxsw_sp_router *router = mlxsw_sp->router;
+	struct mlxsw_sp_fib_entry *decap_fib_entry;
+	struct mlxsw_sp_ipip_entry *ipip_entry;
+	enum mlxsw_sp_l3proto ul_proto;
+	union mlxsw_sp_l3addr saddr;
+
+	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
+			    ipip_list_node) {
+		if (ipip_entry->ol_dev == ol_dev)
+			goto inc_ref_count;
+
+		/* The configuration where several tunnels have the same local
+		 * address in the same underlay table needs special treatment in
+		 * the HW. That is currently not implemented in the driver.
+		 */
+		ul_proto = router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
+		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
+		if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
+						      ul_tb_id, ipip_entry))
+			return ERR_PTR(-EEXIST);
+	}
+
+	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
+	if (IS_ERR(ipip_entry))
+		return ipip_entry;
+
+	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
+	if (decap_fib_entry)
+		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
+						  decap_fib_entry);
+
+	list_add_tail(&ipip_entry->ipip_list_node,
+		      &mlxsw_sp->router->ipip_list);
+
+inc_ref_count:
+	++ipip_entry->ref_count;
+	return ipip_entry;
+}
+
+static void
+mlxsw_sp_ipip_entry_put(struct mlxsw_sp *mlxsw_sp,
+			struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+	if (--ipip_entry->ref_count == 0) {
+		list_del(&ipip_entry->ipip_list_node);
+		if (ipip_entry->decap_fib_entry)
+			mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
+		mlxsw_sp_ipip_entry_destroy(ipip_entry);
+	}
+}
+
+static bool
+mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
+				  const struct net_device *ul_dev,
+				  enum mlxsw_sp_l3proto ul_proto,
+				  union mlxsw_sp_l3addr ul_dip,
+				  struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
+	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
+	struct net_device *ipip_ul_dev;
+
+	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
+		return false;
+
+	ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
+	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
+						 ul_tb_id, ipip_entry) &&
+	       (!ipip_ul_dev || ipip_ul_dev == ul_dev);
+}
+
+/* Given decap parameters, find the corresponding IPIP entry. */
+static struct mlxsw_sp_ipip_entry *
+mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
+				  const struct net_device *ul_dev,
+				  enum mlxsw_sp_l3proto ul_proto,
+				  union mlxsw_sp_l3addr ul_dip)
+{
+	struct mlxsw_sp_ipip_entry *ipip_entry;
+
+	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
+			    ipip_list_node)
+		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
+						      ul_proto, ul_dip,
+						      ipip_entry))
+			return ipip_entry;
+
+	return NULL;
+}
+
 struct mlxsw_sp_neigh_key {
 	struct neighbour *n;
 };
@@ -831,6 +1294,8 @@ struct mlxsw_sp_neigh_entry {
 					* this neigh entry
 					*/
 	struct list_head nexthop_neighs_list_node;
+	unsigned int counter_index;
+	bool counter_valid;
 };
 
 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
@@ -839,6 +1304,62 @@ static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
 	.key_len = sizeof(struct mlxsw_sp_neigh_key),
 };
 
+struct mlxsw_sp_neigh_entry *
+mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
+			struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	if (!neigh_entry) {
+		if (list_empty(&rif->neigh_list))
+			return NULL;
+		else
+			return list_first_entry(&rif->neigh_list,
+						typeof(*neigh_entry),
+						rif_list_node);
+	}
+	if (neigh_entry->rif_list_node.next == &rif->neigh_list)
+		return NULL;
+	return list_next_entry(neigh_entry, rif_list_node);
+}
+
+int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	return neigh_entry->key.n->tbl->family;
+}
+
+unsigned char *
+mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	return neigh_entry->ha;
+}
+
+u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	struct neighbour *n;
+
+	n = neigh_entry->key.n;
+	return ntohl(*((__be32 *) n->primary_key));
+}
+
+struct in6_addr *
+mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	struct neighbour *n;
+
+	n = neigh_entry->key.n;
+	return (struct in6_addr *) &n->primary_key;
+}
+
+int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_neigh_entry *neigh_entry,
+			       u64 *p_counter)
+{
+	if (!neigh_entry->counter_valid)
+		return -EINVAL;
+
+	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
+					 p_counter, NULL);
+}
+
 static struct mlxsw_sp_neigh_entry *
 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
 			   u16 rif)
@@ -879,6 +1400,53 @@ mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
 			       mlxsw_sp_neigh_ht_params);
 }
 
+static bool
+mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	struct devlink *devlink;
+	const char *table_name;
+
+	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
+	case AF_INET:
+		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
+		break;
+	case AF_INET6:
+		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
+		break;
+	default:
+		WARN_ON(1);
+		return false;
+	}
+
+	devlink = priv_to_devlink(mlxsw_sp->core);
+	return devlink_dpipe_table_counter_enabled(devlink, table_name);
+}
+
+static void
+mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+			     struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
+		return;
+
+	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
+		return;
+
+	neigh_entry->counter_valid = true;
+}
+
+static void
+mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
+			    struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	if (!neigh_entry->counter_valid)
+		return;
+	mlxsw_sp_flow_counter_free(mlxsw_sp,
+				   neigh_entry->counter_index);
+	neigh_entry->counter_valid = false;
+}
+
 static struct mlxsw_sp_neigh_entry *
 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
 {
@@ -898,6 +1466,7 @@ mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
 	if (err)
 		goto err_neigh_entry_insert;
 
+	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
 	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
 
 	return neigh_entry;
@@ -912,6 +1481,7 @@ mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
 			     struct mlxsw_sp_neigh_entry *neigh_entry)
 {
 	list_del(&neigh_entry->rif_list_node);
+	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
 	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
 	mlxsw_sp_neigh_entry_free(neigh_entry);
 }
@@ -929,8 +1499,15 @@ mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
 static void
 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
 {
-	unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+	unsigned long interval;
 
+#if IS_ENABLED(CONFIG_IPV6)
+	interval = min_t(unsigned long,
+			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
+			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
+#else
+	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+#endif
 	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
 }
 
@@ -965,6 +1542,44 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
 	neigh_release(n);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
+						   char *rauhtd_pl,
+						   int rec_index)
+{
+	struct net_device *dev;
+	struct neighbour *n;
+	struct in6_addr dip;
+	u16 rif;
+
+	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
+					 (char *) &dip);
+
+	if (!mlxsw_sp->router->rifs[rif]) {
+		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
+		return;
+	}
+
+	dev = mlxsw_sp->router->rifs[rif]->dev;
+	n = neigh_lookup(&nd_tbl, &dip, dev);
+	if (!n) {
+		netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n",
+			   &dip);
+		return;
+	}
+
+	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
+	neigh_event_send(n, NULL);
+	neigh_release(n);
+}
+#else
+static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
+						   char *rauhtd_pl,
+						   int rec_index)
+{
+}
+#endif
+
 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
 						   char *rauhtd_pl,
 						   int rec_index)
@@ -988,6 +1603,15 @@ static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
 
 }
 
+static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
+						   char *rauhtd_pl,
+						   int rec_index)
+{
+	/* One record contains one entry. */
+	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
+					       rec_index);
+}
+
 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
 					      char *rauhtd_pl, int rec_index)
 {
@@ -997,7 +1621,8 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
 						       rec_index);
 		break;
 	case MLXSW_REG_RAUHTD_TYPE_IPV6:
-		WARN_ON_ONCE(1);
+		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
+						       rec_index);
 		break;
 	}
 }
@@ -1022,22 +1647,20 @@ static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
 	return false;
 }
 
-static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
+static int
+__mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
+				       char *rauhtd_pl,
+				       enum mlxsw_reg_rauhtd_type type)
 {
-	char *rauhtd_pl;
-	u8 num_rec;
-	int i, err;
-
-	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
-	if (!rauhtd_pl)
-		return -ENOMEM;
+	int i, num_rec;
+	int err;
 
 	/* Make sure the neighbour's netdev isn't removed in the
 	 * process.
 	 */
 	rtnl_lock();
 	do {
-		mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
+		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
 		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
 				      rauhtd_pl);
 		if (err) {
@@ -1051,6 +1674,27 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
 	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
 	rtnl_unlock();
 
+	return err;
+}
+
+static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
+{
+	enum mlxsw_reg_rauhtd_type type;
+	char *rauhtd_pl;
+	int err;
+
+	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
+	if (!rauhtd_pl)
+		return -ENOMEM;
+
+	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
+	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
+	if (err)
+		goto out;
+
+	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
+	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
+out:
 	kfree(rauhtd_pl);
 	return err;
 }
@@ -1143,9 +1787,43 @@ mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
 
 	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
 			      dip);
+	if (neigh_entry->counter_valid)
+		mlxsw_reg_rauht_pack_counter(rauht_pl,
+					     neigh_entry->counter_index);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
+}
+
+static void
+mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_neigh_entry *neigh_entry,
+				enum mlxsw_reg_rauht_op op)
+{
+	struct neighbour *n = neigh_entry->key.n;
+	char rauht_pl[MLXSW_REG_RAUHT_LEN];
+	const char *dip = n->primary_key;
+
+	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
+			      dip);
+	if (neigh_entry->counter_valid)
+		mlxsw_reg_rauht_pack_counter(rauht_pl,
+					     neigh_entry->counter_index);
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
 }
 
+bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
+{
+	struct neighbour *n = neigh_entry->key.n;
+
+	/* Packets with a link-local destination address are trapped
+	 * after LPM lookup and never reach the neighbour table, so
+	 * there is no need to program such neighbours to the device.
+	 */
+	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
+	    IPV6_ADDR_LINKLOCAL)
+		return true;
+	return false;
+}
+
 static void
 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_neigh_entry *neigh_entry,
@@ -1154,11 +1832,29 @@ mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
 	if (!adding && !neigh_entry->connected)
 		return;
 	neigh_entry->connected = adding;
-	if (neigh_entry->key.n->tbl == &arp_tbl)
+	if (neigh_entry->key.n->tbl->family == AF_INET) {
 		mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
 						mlxsw_sp_rauht_op(adding));
-	else
+	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
+		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
+			return;
+		mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
+						mlxsw_sp_rauht_op(adding));
+	} else {
 		WARN_ON_ONCE(1);
+	}
+}
+
+void
+mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_neigh_entry *neigh_entry,
+				    bool adding)
+{
+	if (adding)
+		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
+	else
+		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
+	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
 }
 
 struct mlxsw_sp_neigh_event_work {
@@ -1227,7 +1923,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
 		p = ptr;
 
 		/* We don't care about changes in the default table. */
-		if (!p->dev || p->tbl != &arp_tbl)
+		if (!p->dev || (p->tbl->family != AF_INET &&
+				p->tbl->family != AF_INET6))
 			return NOTIFY_DONE;
 
 		/* We are in atomic context and can't take RTNL mutex,
@@ -1246,7 +1943,7 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
 	case NETEVENT_NEIGH_UPDATE:
 		n = ptr;
 
-		if (n->tbl != &arp_tbl)
+		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
 			return NOTIFY_DONE;
 
 		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
@@ -1307,27 +2004,23 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
 	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
 }
 
-static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
-				    const struct mlxsw_sp_rif *rif)
-{
-	char rauht_pl[MLXSW_REG_RAUHT_LEN];
-
-	mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
-			     rif->rif_index, rif->addr);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
-}
-
 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_rif *rif)
 {
 	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
 
-	mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
 	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
-				 rif_list_node)
+				 rif_list_node) {
+		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
 		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
+	}
 }
 
+enum mlxsw_sp_nexthop_type {
+	MLXSW_SP_NEXTHOP_TYPE_ETH,
+	MLXSW_SP_NEXTHOP_TYPE_IPIP,
+};
+
 struct mlxsw_sp_nexthop_key {
 	struct fib_nh *fib_nh;
 };
@@ -1340,6 +2033,8 @@ struct mlxsw_sp_nexthop {
 						*/
 	struct rhash_head ht_node;
 	struct mlxsw_sp_nexthop_key key;
+	unsigned char gw_addr[sizeof(struct in6_addr)];
+	int ifindex;
 	struct mlxsw_sp_rif *rif;
 	u8 should_offload:1, /* set indicates this neigh is connected and
 			      * should be put to KVD linear area of this group.
@@ -1350,17 +2045,18 @@ struct mlxsw_sp_nexthop {
 	   update:1; /* set indicates that MAC of this neigh should be
 		      * updated in HW
 		      */
-	struct mlxsw_sp_neigh_entry *neigh_entry;
-};
-
-struct mlxsw_sp_nexthop_group_key {
-	struct fib_info *fi;
+	enum mlxsw_sp_nexthop_type type;
+	union {
+		struct mlxsw_sp_neigh_entry *neigh_entry;
+		struct mlxsw_sp_ipip_entry *ipip_entry;
+	};
 };
 
 struct mlxsw_sp_nexthop_group {
+	void *priv;
 	struct rhash_head ht_node;
 	struct list_head fib_list; /* list of fib entries that use this group */
-	struct mlxsw_sp_nexthop_group_key key;
+	struct neigh_table *neigh_tbl;
 	u8 adj_index_valid:1,
 	   gateway:1; /* routes using the group use a gateway */
 	u32 adj_index;
@@ -1370,15 +2066,154 @@ struct mlxsw_sp_nexthop_group {
 #define nh_rif	nexthops[0].rif
 };
 
+static struct fib_info *
+mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	return nh_grp->priv;
+}
+
+struct mlxsw_sp_nexthop_group_cmp_arg {
+	enum mlxsw_sp_l3proto proto;
+	union {
+		struct fib_info *fi;
+		struct mlxsw_sp_fib6_entry *fib6_entry;
+	};
+};
+
+static bool
+mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
+				    const struct in6_addr *gw, int ifindex)
+{
+	int i;
+
+	for (i = 0; i < nh_grp->count; i++) {
+		const struct mlxsw_sp_nexthop *nh;
+
+		nh = &nh_grp->nexthops[i];
+		if (nh->ifindex == ifindex &&
+		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
+			return true;
+	}
+
+	return false;
+}
+
+static bool
+mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
+			    const struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+	if (nh_grp->count != fib6_entry->nrt6)
+		return false;
+
+	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+		struct in6_addr *gw;
+		int ifindex;
+
+		ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
+		gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
+		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex))
+			return false;
+	}
+
+	return true;
+}
+
+static int
+mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
+	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
+
+	switch (cmp_arg->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
+	case MLXSW_SP_L3_PROTO_IPV6:
+		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
+						    cmp_arg->fib6_entry);
+	default:
+		WARN_ON(1);
+		return 1;
+	}
+}
+
+static int
+mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	return nh_grp->neigh_tbl->family;
+}
+
+static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
+{
+	const struct mlxsw_sp_nexthop_group *nh_grp = data;
+	const struct mlxsw_sp_nexthop *nh;
+	struct fib_info *fi;
+	unsigned int val;
+	int i;
+
+	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
+	case AF_INET:
+		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
+		return jhash(&fi, sizeof(fi), seed);
+	case AF_INET6:
+		val = nh_grp->count;
+		for (i = 0; i < nh_grp->count; i++) {
+			nh = &nh_grp->nexthops[i];
+			val ^= nh->ifindex;
+		}
+		return jhash(&val, sizeof(val), seed);
+	default:
+		WARN_ON(1);
+		return 0;
+	}
+}
+
+static u32
+mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
+{
+	unsigned int val = fib6_entry->nrt6;
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+	struct net_device *dev;
+
+	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+		dev = mlxsw_sp_rt6->rt->dst.dev;
+		val ^= dev->ifindex;
+	}
+
+	return jhash(&val, sizeof(val), seed);
+}
+
+static u32
+mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
+{
+	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
+
+	switch (cmp_arg->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
+	case MLXSW_SP_L3_PROTO_IPV6:
+		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
+	default:
+		WARN_ON(1);
+		return 0;
+	}
+}
+
 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
-	.key_offset = offsetof(struct mlxsw_sp_nexthop_group, key),
 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
-	.key_len = sizeof(struct mlxsw_sp_nexthop_group_key),
+	.hashfn	     = mlxsw_sp_nexthop_group_hash,
+	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
+	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
 };
 
 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_nexthop_group *nh_grp)
 {
+	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
+	    !nh_grp->gateway)
+		return 0;
+
 	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
 				      &nh_grp->ht_node,
 				      mlxsw_sp_nexthop_group_ht_params);
@@ -1387,16 +2222,38 @@ static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
 					  struct mlxsw_sp_nexthop_group *nh_grp)
 {
+	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
+	    !nh_grp->gateway)
+		return;
+
 	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
 			       &nh_grp->ht_node,
 			       mlxsw_sp_nexthop_group_ht_params);
 }
 
 static struct mlxsw_sp_nexthop_group *
-mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp,
-			      struct mlxsw_sp_nexthop_group_key key)
+mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
+			       struct fib_info *fi)
+{
+	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
+
+	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
+	cmp_arg.fi = fi;
+	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
+				      &cmp_arg,
+				      mlxsw_sp_nexthop_group_ht_params);
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_fib6_entry *fib6_entry)
 {
-	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, &key,
+	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
+
+	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
+	cmp_arg.fib6_entry = fib6_entry;
+	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
+				      &cmp_arg,
 				      mlxsw_sp_nexthop_group_ht_params);
 }
 
@@ -1473,15 +2330,26 @@ static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 	char ratr_pl[MLXSW_REG_RATR_LEN];
 
 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
-			    true, adj_index, neigh_entry->rif);
+			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
+			    adj_index, neigh_entry->rif);
 	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
 }
 
+static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
+					u32 adj_index,
+					struct mlxsw_sp_nexthop *nh)
+{
+	const struct mlxsw_sp_ipip_ops *ipip_ops;
+
+	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
+	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
+}
+
 static int
-mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
-				  struct mlxsw_sp_nexthop_group *nh_grp,
-				  bool reallocate)
+mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_nexthop_group *nh_grp,
+			      bool reallocate)
 {
 	u32 adj_index = nh_grp->adj_index; /* base */
 	struct mlxsw_sp_nexthop *nh;
@@ -1497,8 +2365,16 @@ mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
 		}
 
 		if (nh->update || reallocate) {
-			err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
-							  adj_index, nh);
+			switch (nh->type) {
+			case MLXSW_SP_NEXTHOP_TYPE_ETH:
+				err = mlxsw_sp_nexthop_mac_update
+					    (mlxsw_sp, adj_index, nh);
+				break;
+			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
+				err = mlxsw_sp_nexthop_ipip_update
+					    (mlxsw_sp, adj_index, nh);
+				break;
+			}
 			if (err)
 				return err;
 			nh->update = 0;
@@ -1509,9 +2385,6 @@ mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
-static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
-				     struct mlxsw_sp_fib_entry *fib_entry);
-
 static bool
 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
 				 const struct mlxsw_sp_fib_entry *fib_entry);
@@ -1535,6 +2408,24 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
 }
 
 static void
+mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
+				   enum mlxsw_reg_ralue_op op, int err);
+
+static void
+mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
+	struct mlxsw_sp_fib_entry *fib_entry;
+
+	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
+		if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
+						      fib_entry))
+			continue;
+		mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
+	}
+}
+
+static void
 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp_nexthop_group *nh_grp)
 {
@@ -1556,7 +2447,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 	for (i = 0; i < nh_grp->count; i++) {
 		nh = &nh_grp->nexthops[i];
 
-		if (nh->should_offload ^ nh->offloaded) {
+		if (nh->should_offload != nh->offloaded) {
 			offload_change = true;
 			if (nh->should_offload)
 				nh->update = 1;
@@ -1568,8 +2459,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 		/* Nothing was added or removed, so no need to reallocate. Just
 		 * update MAC on existing adjacency indexes.
 		 */
-		err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
-							false);
+		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
 		if (err) {
 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
 			goto set_trap;
@@ -1596,7 +2486,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 	nh_grp->adj_index_valid = 1;
 	nh_grp->adj_index = adj_index;
 	nh_grp->ecmp_size = ecmp_size;
-	err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
+	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
 	if (err) {
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
 		goto set_trap;
@@ -1621,6 +2511,10 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
 		goto set_trap;
 	}
+
+	/* Offload state within the group changed, so update the flags. */
+	mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
+
 	return;
 
 set_trap:
@@ -1640,9 +2534,9 @@ set_trap:
 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
 					    bool removing)
 {
-	if (!removing && !nh->should_offload)
+	if (!removing)
 		nh->should_offload = 1;
-	else if (removing && nh->offloaded)
+	else if (nh->offloaded)
 		nh->should_offload = 0;
 	nh->update = 1;
 }
@@ -1684,7 +2578,6 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
 				       struct mlxsw_sp_nexthop *nh)
 {
 	struct mlxsw_sp_neigh_entry *neigh_entry;
-	struct fib_nh *fib_nh = nh->key.fib_nh;
 	struct neighbour *n;
 	u8 nud_state, dead;
 	int err;
@@ -1693,13 +2586,14 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
 		return 0;
 
 	/* Take a reference of neigh here ensuring that neigh would
-	 * not be detructed before the nexthop entry is finished.
+	 * not be destructed before the nexthop entry is finished.
 	 * The reference is taken either in neigh_lookup() or
 	 * in neigh_create() in case n is not found.
 	 */
-	n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
+	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
 	if (!n) {
-		n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
+		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
+				 nh->rif->dev);
 		if (IS_ERR(n))
 			return PTR_ERR(n);
 		neigh_event_send(n, NULL);
@@ -1761,18 +2655,131 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
 	neigh_release(n);
 }
 
-static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
-				 struct mlxsw_sp_nexthop_group *nh_grp,
-				 struct mlxsw_sp_nexthop *nh,
-				 struct fib_nh *fib_nh)
+static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
+				      const struct net_device *dev,
+				      enum mlxsw_sp_ipip_type *p_type)
+{
+	struct mlxsw_sp_router *router = mlxsw_sp->router;
+	const struct mlxsw_sp_ipip_ops *ipip_ops;
+	enum mlxsw_sp_ipip_type ipipt;
+
+	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
+		ipip_ops = router->ipip_ops_arr[ipipt];
+		if (dev->type == ipip_ops->dev_type) {
+			if (p_type)
+				*p_type = ipipt;
+			return true;
+		}
+	}
+	return false;
+}
+
+static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
+				      enum mlxsw_sp_ipip_type ipipt,
+				      struct mlxsw_sp_nexthop *nh,
+				      struct net_device *ol_dev)
+{
+	if (!nh->nh_grp->gateway || nh->ipip_entry)
+		return 0;
+
+	nh->ipip_entry = mlxsw_sp_ipip_entry_get(mlxsw_sp, ipipt, ol_dev);
+	if (IS_ERR(nh->ipip_entry))
+		return PTR_ERR(nh->ipip_entry);
+
+	__mlxsw_sp_nexthop_neigh_update(nh, false);
+	return 0;
+}
+
+static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_nexthop *nh)
+{
+	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
+
+	if (!ipip_entry)
+		return;
+
+	__mlxsw_sp_nexthop_neigh_update(nh, true);
+	mlxsw_sp_ipip_entry_put(mlxsw_sp, ipip_entry);
+	nh->ipip_entry = NULL;
+}
+
+static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
+					const struct fib_nh *fib_nh,
+					enum mlxsw_sp_ipip_type *p_ipipt)
 {
 	struct net_device *dev = fib_nh->nh_dev;
-	struct in_device *in_dev;
+
+	return dev &&
+	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
+	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
+}
+
+static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_nexthop *nh)
+{
+	switch (nh->type) {
+	case MLXSW_SP_NEXTHOP_TYPE_ETH:
+		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
+		mlxsw_sp_nexthop_rif_fini(nh);
+		break;
+	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
+		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
+		break;
+	}
+}
+
+static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_nexthop *nh,
+				       struct fib_nh *fib_nh)
+{
+	struct mlxsw_sp_router *router = mlxsw_sp->router;
+	struct net_device *dev = fib_nh->nh_dev;
+	enum mlxsw_sp_ipip_type ipipt;
 	struct mlxsw_sp_rif *rif;
 	int err;
 
+	if (mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fib_nh, &ipipt) &&
+	    router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
+						     MLXSW_SP_L3_PROTO_IPV4)) {
+		nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
+		return mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev);
+	}
+
+	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
+	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	if (!rif)
+		return 0;
+
+	mlxsw_sp_nexthop_rif_init(nh, rif);
+	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
+	if (err)
+		goto err_neigh_init;
+
+	return 0;
+
+err_neigh_init:
+	mlxsw_sp_nexthop_rif_fini(nh);
+	return err;
+}
+
+static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_nexthop *nh)
+{
+	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
+}
+
+static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_nexthop_group *nh_grp,
+				  struct mlxsw_sp_nexthop *nh,
+				  struct fib_nh *fib_nh)
+{
+	struct net_device *dev = fib_nh->nh_dev;
+	struct in_device *in_dev;
+	int err;
+
 	nh->nh_grp = nh_grp;
 	nh->key.fib_nh = fib_nh;
+	memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
 	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
 	if (err)
 		return err;
@@ -1785,37 +2792,29 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
 	    fib_nh->nh_flags & RTNH_F_LINKDOWN)
 		return 0;
 
-	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
-	if (!rif)
-		return 0;
-	mlxsw_sp_nexthop_rif_init(nh, rif);
-
-	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
+	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
 	if (err)
 		goto err_nexthop_neigh_init;
 
 	return 0;
 
 err_nexthop_neigh_init:
-	mlxsw_sp_nexthop_rif_fini(nh);
 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
 	return err;
 }
 
-static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
-				  struct mlxsw_sp_nexthop *nh)
+static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_nexthop *nh)
 {
-	mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
-	mlxsw_sp_nexthop_rif_fini(nh);
+	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
 	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
 }
 
-static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
-				   unsigned long event, struct fib_nh *fib_nh)
+static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
+				    unsigned long event, struct fib_nh *fib_nh)
 {
 	struct mlxsw_sp_nexthop_key key;
 	struct mlxsw_sp_nexthop *nh;
-	struct mlxsw_sp_rif *rif;
 
 	if (mlxsw_sp->router->aborted)
 		return;
@@ -1825,18 +2824,12 @@ static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
 	if (WARN_ON_ONCE(!nh))
 		return;
 
-	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev);
-	if (!rif)
-		return;
-
 	switch (event) {
 	case FIB_EVENT_NH_ADD:
-		mlxsw_sp_nexthop_rif_init(nh, rif);
-		mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
+		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
 		break;
 	case FIB_EVENT_NH_DEL:
-		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
-		mlxsw_sp_nexthop_rif_fini(nh);
+		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
 		break;
 	}
 
@@ -1849,14 +2842,20 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_nexthop *nh, *tmp;
 
 	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
-		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
-		mlxsw_sp_nexthop_rif_fini(nh);
+		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
 		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
 	}
 }
 
+static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
+				   const struct fib_info *fi)
+{
+	return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
+	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
+}
+
 static struct mlxsw_sp_nexthop_group *
-mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
+mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 {
 	struct mlxsw_sp_nexthop_group *nh_grp;
 	struct mlxsw_sp_nexthop *nh;
@@ -1870,17 +2869,19 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
 	if (!nh_grp)
 		return ERR_PTR(-ENOMEM);
+	nh_grp->priv = fi;
 	INIT_LIST_HEAD(&nh_grp->fib_list);
-	nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
+	nh_grp->neigh_tbl = &arp_tbl;
+
+	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
 	nh_grp->count = fi->fib_nhs;
-	nh_grp->key.fi = fi;
 	fib_info_hold(fi);
 	for (i = 0; i < nh_grp->count; i++) {
 		nh = &nh_grp->nexthops[i];
 		fib_nh = &fi->fib_nh[i];
-		err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
+		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
 		if (err)
-			goto err_nexthop_init;
+			goto err_nexthop4_init;
 	}
 	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
 	if (err)
@@ -1889,19 +2890,19 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
 	return nh_grp;
 
 err_nexthop_group_insert:
-err_nexthop_init:
+err_nexthop4_init:
 	for (i--; i >= 0; i--) {
 		nh = &nh_grp->nexthops[i];
-		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
+		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
 	}
-	fib_info_put(nh_grp->key.fi);
+	fib_info_put(fi);
 	kfree(nh_grp);
 	return ERR_PTR(err);
 }
 
 static void
-mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
-			       struct mlxsw_sp_nexthop_group *nh_grp)
+mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_nexthop_group *nh_grp)
 {
 	struct mlxsw_sp_nexthop *nh;
 	int i;
@@ -1909,25 +2910,23 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
 	for (i = 0; i < nh_grp->count; i++) {
 		nh = &nh_grp->nexthops[i];
-		mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
+		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
 	}
 	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
 	WARN_ON_ONCE(nh_grp->adj_index_valid);
-	fib_info_put(nh_grp->key.fi);
+	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
 	kfree(nh_grp);
 }
 
-static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
-				      struct mlxsw_sp_fib_entry *fib_entry,
-				      struct fib_info *fi)
+static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_fib_entry *fib_entry,
+				       struct fib_info *fi)
 {
-	struct mlxsw_sp_nexthop_group_key key;
 	struct mlxsw_sp_nexthop_group *nh_grp;
 
-	key.fi = fi;
-	nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key);
+	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
 	if (!nh_grp) {
-		nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
+		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
 		if (IS_ERR(nh_grp))
 			return PTR_ERR(nh_grp);
 	}
@@ -1936,15 +2935,25 @@ static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
-static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
-				       struct mlxsw_sp_fib_entry *fib_entry)
+static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fib_entry *fib_entry)
 {
 	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
 
 	list_del(&fib_entry->nexthop_group_node);
 	if (!list_empty(&nh_grp->fib_list))
 		return;
-	mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
+	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
+}
+
+static bool
+mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
+{
+	struct mlxsw_sp_fib4_entry *fib4_entry;
+
+	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
+				  common);
+	return !fib4_entry->tos;
 }
 
 static bool
@@ -1952,29 +2961,133 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
 {
 	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
 
-	if (fib_entry->params.tos)
-		return false;
+	switch (fib_entry->fib_node->fib->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
+			return false;
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		break;
+	}
 
 	switch (fib_entry->type) {
 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
 		return !!nh_group->adj_index_valid;
 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
 		return !!nh_group->nh_rif;
+	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
+		return true;
 	default:
 		return false;
 	}
 }
 
-static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+static struct mlxsw_sp_nexthop *
+mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
+		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
+{
+	int i;
+
+	for (i = 0; i < nh_grp->count; i++) {
+		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+		struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+		if (nh->rif && nh->rif->dev == rt->dst.dev &&
+		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
+				    &rt->rt6i_gateway))
+			return nh;
+		continue;
+	}
+
+	return NULL;
+}
+
+static void
+mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+	int i;
+
+	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
+	    fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
+		nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
+		return;
+	}
+
+	for (i = 0; i < nh_grp->count; i++) {
+		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+
+		if (nh->offloaded)
+			nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
+		else
+			nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
+	}
+}
+
+static void
+mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
 {
-	fib_entry->offloaded = true;
+	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+	int i;
+
+	for (i = 0; i < nh_grp->count; i++) {
+		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
+
+		nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
+	}
+}
+
+static void
+mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry;
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
+				  common);
+
+	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
+		list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
+				 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
+		return;
+	}
 
+	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+		struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+		struct mlxsw_sp_nexthop *nh;
+
+		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
+		if (nh && nh->offloaded)
+			mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
+		else
+			mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
+	}
+}
+
+static void
+mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry;
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
+				  common);
+	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+		struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+		rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
+	}
+}
+
+static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
+{
 	switch (fib_entry->fib_node->fib->proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
-		fib_info_offload_inc(fib_entry->nh_group->key.fi);
+		mlxsw_sp_fib4_entry_offload_set(fib_entry);
 		break;
 	case MLXSW_SP_L3_PROTO_IPV6:
-		WARN_ON_ONCE(1);
+		mlxsw_sp_fib6_entry_offload_set(fib_entry);
+		break;
 	}
 }
 
@@ -1983,13 +3096,12 @@ mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
 {
 	switch (fib_entry->fib_node->fib->proto) {
 	case MLXSW_SP_L3_PROTO_IPV4:
-		fib_info_offload_dec(fib_entry->nh_group->key.fi);
+		mlxsw_sp_fib4_entry_offload_unset(fib_entry);
 		break;
 	case MLXSW_SP_L3_PROTO_IPV6:
-		WARN_ON_ONCE(1);
+		mlxsw_sp_fib6_entry_offload_unset(fib_entry);
+		break;
 	}
-
-	fib_entry->offloaded = false;
 }
 
 static void
@@ -1998,17 +3110,13 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
 {
 	switch (op) {
 	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
-		if (!fib_entry->offloaded)
-			return;
 		return mlxsw_sp_fib_entry_offload_unset(fib_entry);
 	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
 		if (err)
 			return;
-		if (mlxsw_sp_fib_entry_should_offload(fib_entry) &&
-		    !fib_entry->offloaded)
+		if (mlxsw_sp_fib_entry_should_offload(fib_entry))
 			mlxsw_sp_fib_entry_offload_set(fib_entry);
-		else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) &&
-			 fib_entry->offloaded)
+		else if (!mlxsw_sp_fib_entry_should_offload(fib_entry))
 			mlxsw_sp_fib_entry_offload_unset(fib_entry);
 		return;
 	default:
@@ -2016,13 +3124,37 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
 	}
 }
 
-static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
-					 struct mlxsw_sp_fib_entry *fib_entry,
-					 enum mlxsw_reg_ralue_op op)
+static void
+mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
+			      const struct mlxsw_sp_fib_entry *fib_entry,
+			      enum mlxsw_reg_ralue_op op)
 {
-	char ralue_pl[MLXSW_REG_RALUE_LEN];
 	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
-	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
+	enum mlxsw_reg_ralxx_protocol proto;
+	u32 *p_dip;
+
+	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
+
+	switch (fib->proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		p_dip = (u32 *) fib_entry->fib_node->key.addr;
+		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
+				      fib_entry->fib_node->key.prefix_len,
+				      *p_dip);
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
+				      fib_entry->fib_node->key.prefix_len,
+				      fib_entry->fib_node->key.addr);
+		break;
+	}
+}
+
+static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fib_entry *fib_entry,
+					enum mlxsw_reg_ralue_op op)
+{
+	char ralue_pl[MLXSW_REG_RALUE_LEN];
 	enum mlxsw_reg_ralue_trap_action trap_action;
 	u16 trap_id = 0;
 	u32 adjacency_index = 0;
@@ -2041,24 +3173,19 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
 	}
 
-	mlxsw_reg_ralue_pack4(ralue_pl,
-			      (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
-			      fib->vr->id, fib_entry->fib_node->key.prefix_len,
-			      *p_dip);
+	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
 					adjacency_index, ecmp_size);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
 }
 
-static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
-					struct mlxsw_sp_fib_entry *fib_entry,
-					enum mlxsw_reg_ralue_op op)
+static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_fib_entry *fib_entry,
+				       enum mlxsw_reg_ralue_op op)
 {
 	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
-	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
 	enum mlxsw_reg_ralue_trap_action trap_action;
 	char ralue_pl[MLXSW_REG_RALUE_LEN];
-	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
 	u16 trap_id = 0;
 	u16 rif_index = 0;
 
@@ -2070,42 +3197,53 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
 	}
 
-	mlxsw_reg_ralue_pack4(ralue_pl,
-			      (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
-			      fib->vr->id, fib_entry->fib_node->key.prefix_len,
-			      *p_dip);
+	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
 	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
 				       rif_index);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
 }
 
-static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
-				       struct mlxsw_sp_fib_entry *fib_entry,
-				       enum mlxsw_reg_ralue_op op)
+static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
+				      struct mlxsw_sp_fib_entry *fib_entry,
+				      enum mlxsw_reg_ralue_op op)
 {
-	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
 	char ralue_pl[MLXSW_REG_RALUE_LEN];
-	u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
 
-	mlxsw_reg_ralue_pack4(ralue_pl,
-			      (enum mlxsw_reg_ralxx_protocol) fib->proto, op,
-			      fib->vr->id, fib_entry->fib_node->key.prefix_len,
-			      *p_dip);
+	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
 }
 
-static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
-				  struct mlxsw_sp_fib_entry *fib_entry,
-				  enum mlxsw_reg_ralue_op op)
+static int
+mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_fib_entry *fib_entry,
+				 enum mlxsw_reg_ralue_op op)
+{
+	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
+	const struct mlxsw_sp_ipip_ops *ipip_ops;
+
+	if (WARN_ON(!ipip_entry))
+		return -EINVAL;
+
+	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
+	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
+				      fib_entry->decap.tunnel_index);
+}
+
+static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_fib_entry *fib_entry,
+				   enum mlxsw_reg_ralue_op op)
 {
 	switch (fib_entry->type) {
 	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
-		return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
+		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
 	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
-		return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
+		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
 	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
-		return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
+		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
+	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
+		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
+							fib_entry, op);
 	}
 	return -EINVAL;
 }
@@ -2114,16 +3252,10 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_fib_entry *fib_entry,
 				 enum mlxsw_reg_ralue_op op)
 {
-	int err = -EINVAL;
+	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
 
-	switch (fib_entry->fib_node->fib->proto) {
-	case MLXSW_SP_L3_PROTO_IPV4:
-		err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
-		break;
-	case MLXSW_SP_L3_PROTO_IPV6:
-		return err;
-	}
 	mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
+
 	return err;
 }
 
@@ -2146,11 +3278,23 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
 			     const struct fib_entry_notifier_info *fen_info,
 			     struct mlxsw_sp_fib_entry *fib_entry)
 {
+	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
+	struct net_device *dev = fen_info->fi->fib_dev;
+	struct mlxsw_sp_ipip_entry *ipip_entry;
 	struct fib_info *fi = fen_info->fi;
 
 	switch (fen_info->type) {
-	case RTN_BROADCAST: /* fall through */
 	case RTN_LOCAL:
+		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
+						 MLXSW_SP_L3_PROTO_IPV4, dip);
+		if (ipip_entry) {
+			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
+			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
+							     fib_entry,
+							     ipip_entry);
+		}
+		/* fall through */
+	case RTN_BROADCAST:
 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
 		return 0;
 	case RTN_UNREACHABLE: /* fall through */
@@ -2163,82 +3307,87 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
 		return 0;
 	case RTN_UNICAST:
-		if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
-			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
-		else
+		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
 			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
+		else
+			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
 		return 0;
 	default:
 		return -EINVAL;
 	}
 }
 
-static struct mlxsw_sp_fib_entry *
+static struct mlxsw_sp_fib4_entry *
 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
 			   struct mlxsw_sp_fib_node *fib_node,
 			   const struct fib_entry_notifier_info *fen_info)
 {
+	struct mlxsw_sp_fib4_entry *fib4_entry;
 	struct mlxsw_sp_fib_entry *fib_entry;
 	int err;
 
-	fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
-	if (!fib_entry) {
-		err = -ENOMEM;
-		goto err_fib_entry_alloc;
-	}
+	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
+	if (!fib4_entry)
+		return ERR_PTR(-ENOMEM);
+	fib_entry = &fib4_entry->common;
 
 	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
 	if (err)
 		goto err_fib4_entry_type_set;
 
-	err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi);
+	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
 	if (err)
-		goto err_nexthop_group_get;
+		goto err_nexthop4_group_get;
 
-	fib_entry->params.prio = fen_info->fi->fib_priority;
-	fib_entry->params.tb_id = fen_info->tb_id;
-	fib_entry->params.type = fen_info->type;
-	fib_entry->params.tos = fen_info->tos;
+	fib4_entry->prio = fen_info->fi->fib_priority;
+	fib4_entry->tb_id = fen_info->tb_id;
+	fib4_entry->type = fen_info->type;
+	fib4_entry->tos = fen_info->tos;
 
 	fib_entry->fib_node = fib_node;
 
-	return fib_entry;
+	return fib4_entry;
 
-err_nexthop_group_get:
+err_nexthop4_group_get:
 err_fib4_entry_type_set:
-	kfree(fib_entry);
-err_fib_entry_alloc:
+	kfree(fib4_entry);
 	return ERR_PTR(err);
 }
 
 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
-					struct mlxsw_sp_fib_entry *fib_entry)
+					struct mlxsw_sp_fib4_entry *fib4_entry)
 {
-	mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
-	kfree(fib_entry);
+	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
+	kfree(fib4_entry);
 }
 
-static struct mlxsw_sp_fib_node *
-mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
-		       const struct fib_entry_notifier_info *fen_info);
-
-static struct mlxsw_sp_fib_entry *
+static struct mlxsw_sp_fib4_entry *
 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
 			   const struct fib_entry_notifier_info *fen_info)
 {
-	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_fib4_entry *fib4_entry;
 	struct mlxsw_sp_fib_node *fib_node;
+	struct mlxsw_sp_fib *fib;
+	struct mlxsw_sp_vr *vr;
 
-	fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
-	if (IS_ERR(fib_node))
+	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
+	if (!vr)
 		return NULL;
+	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
 
-	list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
-		if (fib_entry->params.tb_id == fen_info->tb_id &&
-		    fib_entry->params.tos == fen_info->tos &&
-		    fib_entry->params.type == fen_info->type &&
-		    fib_entry->nh_group->key.fi == fen_info->fi) {
-			return fib_entry;
+	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
+					    sizeof(fen_info->dst),
+					    fen_info->dst_len);
+	if (!fib_node)
+		return NULL;
+
+	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
+		if (fib4_entry->tb_id == fen_info->tb_id &&
+		    fib4_entry->tos == fen_info->tos &&
+		    fib4_entry->type == fen_info->type &&
+		    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
+		    fen_info->fi) {
+			return fib4_entry;
 		}
 	}
 
@@ -2311,6 +3460,67 @@ mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
 				struct mlxsw_sp_fib_entry, list) == fib_entry;
 }
 
+static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
+				      struct mlxsw_sp_fib *fib,
+				      struct mlxsw_sp_fib_node *fib_node)
+{
+	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
+	struct mlxsw_sp_lpm_tree *lpm_tree;
+	int err;
+
+	/* Since the tree is shared between all virtual routers we must
+	 * make sure it contains all the required prefix lengths. This
+	 * can be computed by either adding the new prefix length to the
+	 * existing prefix usage of a bound tree, or by aggregating the
+	 * prefix lengths across all virtual routers and adding the new
+	 * one as well.
+	 */
+	if (fib->lpm_tree)
+		mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
+					  &fib->lpm_tree->prefix_usage);
+	else
+		mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage);
+	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
+
+	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
+					 fib->proto);
+	if (IS_ERR(lpm_tree))
+		return PTR_ERR(lpm_tree);
+
+	if (fib->lpm_tree && fib->lpm_tree->id == lpm_tree->id)
+		return 0;
+
+	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
+					 struct mlxsw_sp_fib *fib)
+{
+	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
+	struct mlxsw_sp_lpm_tree *lpm_tree;
+
+	/* Aggregate prefix lengths across all virtual routers to make
+	 * sure we only have used prefix lengths in the LPM tree.
+	 */
+	mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage);
+	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
+					 fib->proto);
+	if (IS_ERR(lpm_tree))
+		goto err_tree_get;
+	mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
+
+err_tree_get:
+	if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage))
+		return;
+	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
+	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
+	fib->lpm_tree = NULL;
+}
+
 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
 {
 	unsigned char prefix_len = fib_node->key.prefix_len;
@@ -2333,8 +3543,6 @@ static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
 				  struct mlxsw_sp_fib_node *fib_node,
 				  struct mlxsw_sp_fib *fib)
 {
-	struct mlxsw_sp_prefix_usage req_prefix_usage;
-	struct mlxsw_sp_lpm_tree *lpm_tree;
 	int err;
 
 	err = mlxsw_sp_fib_node_insert(fib, fib_node);
@@ -2342,33 +3550,15 @@ static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
 		return err;
 	fib_node->fib = fib;
 
-	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &fib->prefix_usage);
-	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
-
-	if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) {
-		err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib,
-						 &req_prefix_usage);
-		if (err)
-			goto err_tree_check;
-	} else {
-		lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
-						 fib->proto);
-		if (IS_ERR(lpm_tree))
-			return PTR_ERR(lpm_tree);
-		fib->lpm_tree = lpm_tree;
-		err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib);
-		if (err)
-			goto err_tree_bind;
-	}
+	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib, fib_node);
+	if (err)
+		goto err_fib_lpm_tree_link;
 
 	mlxsw_sp_fib_node_prefix_inc(fib_node);
 
 	return 0;
 
-err_tree_bind:
-	fib->lpm_tree = NULL;
-	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
-err_tree_check:
+err_fib_lpm_tree_link:
 	fib_node->fib = NULL;
 	mlxsw_sp_fib_node_remove(fib, fib_node);
 	return err;
@@ -2377,46 +3567,34 @@ err_tree_check:
 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_fib_node *fib_node)
 {
-	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
 	struct mlxsw_sp_fib *fib = fib_node->fib;
 
 	mlxsw_sp_fib_node_prefix_dec(fib_node);
-
-	if (mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) {
-		mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
-		fib->lpm_tree = NULL;
-		mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
-	} else {
-		mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, &fib->prefix_usage);
-	}
-
+	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib);
 	fib_node->fib = NULL;
 	mlxsw_sp_fib_node_remove(fib, fib_node);
 }
 
 static struct mlxsw_sp_fib_node *
-mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
-		       const struct fib_entry_notifier_info *fen_info)
+mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
+		      size_t addr_len, unsigned char prefix_len,
+		      enum mlxsw_sp_l3proto proto)
 {
 	struct mlxsw_sp_fib_node *fib_node;
 	struct mlxsw_sp_fib *fib;
 	struct mlxsw_sp_vr *vr;
 	int err;
 
-	vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->tb_id);
+	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id);
 	if (IS_ERR(vr))
 		return ERR_CAST(vr);
-	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
+	fib = mlxsw_sp_vr_fib(vr, proto);
 
-	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
-					    sizeof(fen_info->dst),
-					    fen_info->dst_len);
+	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
 	if (fib_node)
 		return fib_node;
 
-	fib_node = mlxsw_sp_fib_node_create(fib, &fen_info->dst,
-					    sizeof(fen_info->dst),
-					    fen_info->dst_len);
+	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
 	if (!fib_node) {
 		err = -ENOMEM;
 		goto err_fib_node_create;
@@ -2435,8 +3613,8 @@ err_fib_node_create:
 	return ERR_PTR(err);
 }
 
-static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
-				   struct mlxsw_sp_fib_node *fib_node)
+static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_fib_node *fib_node)
 {
 	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
 
@@ -2447,95 +3625,100 @@ static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_vr_put(vr);
 }
 
-static struct mlxsw_sp_fib_entry *
+static struct mlxsw_sp_fib4_entry *
 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
-			      const struct mlxsw_sp_fib_entry_params *params)
+			      const struct mlxsw_sp_fib4_entry *new4_entry)
 {
-	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_fib4_entry *fib4_entry;
 
-	list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
-		if (fib_entry->params.tb_id > params->tb_id)
+	list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
+		if (fib4_entry->tb_id > new4_entry->tb_id)
 			continue;
-		if (fib_entry->params.tb_id != params->tb_id)
+		if (fib4_entry->tb_id != new4_entry->tb_id)
 			break;
-		if (fib_entry->params.tos > params->tos)
+		if (fib4_entry->tos > new4_entry->tos)
 			continue;
-		if (fib_entry->params.prio >= params->prio ||
-		    fib_entry->params.tos < params->tos)
-			return fib_entry;
+		if (fib4_entry->prio >= new4_entry->prio ||
+		    fib4_entry->tos < new4_entry->tos)
+			return fib4_entry;
 	}
 
 	return NULL;
 }
 
-static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry,
-					  struct mlxsw_sp_fib_entry *new_entry)
+static int
+mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
+			       struct mlxsw_sp_fib4_entry *new4_entry)
 {
 	struct mlxsw_sp_fib_node *fib_node;
 
-	if (WARN_ON(!fib_entry))
+	if (WARN_ON(!fib4_entry))
 		return -EINVAL;
 
-	fib_node = fib_entry->fib_node;
-	list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) {
-		if (fib_entry->params.tb_id != new_entry->params.tb_id ||
-		    fib_entry->params.tos != new_entry->params.tos ||
-		    fib_entry->params.prio != new_entry->params.prio)
+	fib_node = fib4_entry->common.fib_node;
+	list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
+				 common.list) {
+		if (fib4_entry->tb_id != new4_entry->tb_id ||
+		    fib4_entry->tos != new4_entry->tos ||
+		    fib4_entry->prio != new4_entry->prio)
 			break;
 	}
 
-	list_add_tail(&new_entry->list, &fib_entry->list);
+	list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
 	return 0;
 }
 
 static int
-mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node,
-			       struct mlxsw_sp_fib_entry *new_entry,
+mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
 			       bool replace, bool append)
 {
-	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
+	struct mlxsw_sp_fib4_entry *fib4_entry;
 
-	fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params);
+	fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
 
 	if (append)
-		return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry);
-	if (replace && WARN_ON(!fib_entry))
+		return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
+	if (replace && WARN_ON(!fib4_entry))
 		return -EINVAL;
 
 	/* Insert new entry before replaced one, so that we can later
 	 * remove the second.
 	 */
-	if (fib_entry) {
-		list_add_tail(&new_entry->list, &fib_entry->list);
+	if (fib4_entry) {
+		list_add_tail(&new4_entry->common.list,
+			      &fib4_entry->common.list);
 	} else {
-		struct mlxsw_sp_fib_entry *last;
+		struct mlxsw_sp_fib4_entry *last;
 
-		list_for_each_entry(last, &fib_node->entry_list, list) {
-			if (new_entry->params.tb_id > last->params.tb_id)
+		list_for_each_entry(last, &fib_node->entry_list, common.list) {
+			if (new4_entry->tb_id > last->tb_id)
 				break;
-			fib_entry = last;
+			fib4_entry = last;
 		}
 
-		if (fib_entry)
-			list_add(&new_entry->list, &fib_entry->list);
+		if (fib4_entry)
+			list_add(&new4_entry->common.list,
+				 &fib4_entry->common.list);
 		else
-			list_add(&new_entry->list, &fib_node->entry_list);
+			list_add(&new4_entry->common.list,
+				 &fib_node->entry_list);
 	}
 
 	return 0;
 }
 
 static void
-mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry)
+mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
 {
-	list_del(&fib_entry->list);
+	list_del(&fib4_entry->common.list);
 }
 
-static int
-mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
-			     const struct mlxsw_sp_fib_node *fib_node,
-			     struct mlxsw_sp_fib_entry *fib_entry)
+static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_fib_entry *fib_entry)
 {
+	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
+
 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
 		return 0;
 
@@ -2552,11 +3735,11 @@ mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
 	return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
 }
 
-static void
-mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
-			     const struct mlxsw_sp_fib_node *fib_node,
-			     struct mlxsw_sp_fib_entry *fib_entry)
+static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fib_entry *fib_entry)
 {
+	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
+
 	if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
 		return;
 
@@ -2574,54 +3757,53 @@ mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
-					 struct mlxsw_sp_fib_entry *fib_entry,
+					 struct mlxsw_sp_fib4_entry *fib4_entry,
 					 bool replace, bool append)
 {
-	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
 	int err;
 
-	err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace,
-					     append);
+	err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
 	if (err)
 		return err;
 
-	err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry);
+	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
 	if (err)
-		goto err_fib4_node_entry_add;
+		goto err_fib_node_entry_add;
 
 	return 0;
 
-err_fib4_node_entry_add:
-	mlxsw_sp_fib4_node_list_remove(fib_entry);
+err_fib_node_entry_add:
+	mlxsw_sp_fib4_node_list_remove(fib4_entry);
 	return err;
 }
 
 static void
 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
-				struct mlxsw_sp_fib_entry *fib_entry)
+				struct mlxsw_sp_fib4_entry *fib4_entry)
 {
-	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
+	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
+	mlxsw_sp_fib4_node_list_remove(fib4_entry);
 
-	mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry);
-	mlxsw_sp_fib4_node_list_remove(fib_entry);
+	if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
+		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
 }
 
 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
-					struct mlxsw_sp_fib_entry *fib_entry,
+					struct mlxsw_sp_fib4_entry *fib4_entry,
 					bool replace)
 {
-	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
-	struct mlxsw_sp_fib_entry *replaced;
+	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
+	struct mlxsw_sp_fib4_entry *replaced;
 
 	if (!replace)
 		return;
 
 	/* We inserted the new entry before replaced one */
-	replaced = list_next_entry(fib_entry, list);
+	replaced = list_next_entry(fib4_entry, common.list);
 
 	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
 	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
-	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
+	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 }
 
 static int
@@ -2629,76 +3811,774 @@ mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
 			 const struct fib_entry_notifier_info *fen_info,
 			 bool replace, bool append)
 {
-	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_fib4_entry *fib4_entry;
 	struct mlxsw_sp_fib_node *fib_node;
 	int err;
 
 	if (mlxsw_sp->router->aborted)
 		return 0;
 
-	fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
+	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
+					 &fen_info->dst, sizeof(fen_info->dst),
+					 fen_info->dst_len,
+					 MLXSW_SP_L3_PROTO_IPV4);
 	if (IS_ERR(fib_node)) {
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
 		return PTR_ERR(fib_node);
 	}
 
-	fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
-	if (IS_ERR(fib_entry)) {
+	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
+	if (IS_ERR(fib4_entry)) {
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
-		err = PTR_ERR(fib_entry);
+		err = PTR_ERR(fib4_entry);
 		goto err_fib4_entry_create;
 	}
 
-	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace,
+	err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
 					    append);
 	if (err) {
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
 		goto err_fib4_node_entry_link;
 	}
 
-	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace);
+	mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
 
 	return 0;
 
 err_fib4_node_entry_link:
-	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
+	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
 err_fib4_entry_create:
-	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
+	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 	return err;
 }
 
 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
 				     struct fib_entry_notifier_info *fen_info)
 {
+	struct mlxsw_sp_fib4_entry *fib4_entry;
+	struct mlxsw_sp_fib_node *fib_node;
+
+	if (mlxsw_sp->router->aborted)
+		return;
+
+	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
+	if (WARN_ON(!fib4_entry))
+		return;
+	fib_node = fib4_entry->common.fib_node;
+
+	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
+	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
+	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+}
+
+static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
+{
+	/* Packets with link-local destination IP arriving to the router
+	 * are trapped to the CPU, so no need to program specific routes
+	 * for them.
+	 */
+	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
+		return true;
+
+	/* Multicast routes aren't supported, so ignore them. Neighbour
+	 * Discovery packets are specifically trapped.
+	 */
+	if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
+		return true;
+
+	/* Cloned routes are irrelevant in the forwarding path. */
+	if (rt->rt6i_flags & RTF_CACHE)
+		return true;
+
+	return false;
+}
+
+static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
+{
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
+	if (!mlxsw_sp_rt6)
+		return ERR_PTR(-ENOMEM);
+
+	/* In case of route replace, replaced route is deleted with
+	 * no notification. Take reference to prevent accessing freed
+	 * memory.
+	 */
+	mlxsw_sp_rt6->rt = rt;
+	rt6_hold(rt);
+
+	return mlxsw_sp_rt6;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+{
+	rt6_release(rt);
+}
+#else
+static void mlxsw_sp_rt6_release(struct rt6_info *rt)
+{
+}
+#endif
+
+static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
+{
+	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
+	kfree(mlxsw_sp_rt6);
+}
+
+static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
+{
+	/* RTF_CACHE routes are ignored */
+	return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
+}
+
+static struct rt6_info *
+mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
+				list)->rt;
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
+				 const struct rt6_info *nrt, bool replace)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry;
+
+	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
+		return NULL;
+
+	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
+		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+
+		/* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
+		 * virtual router.
+		 */
+		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
+			continue;
+		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
+			break;
+		if (rt->rt6i_metric < nrt->rt6i_metric)
+			continue;
+		if (rt->rt6i_metric == nrt->rt6i_metric &&
+		    mlxsw_sp_fib6_rt_can_mp(rt))
+			return fib6_entry;
+		if (rt->rt6i_metric > nrt->rt6i_metric)
+			break;
+	}
+
+	return NULL;
+}
+
+static struct mlxsw_sp_rt6 *
+mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
+			    const struct rt6_info *rt)
+{
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
+		if (mlxsw_sp_rt6->rt == rt)
+			return mlxsw_sp_rt6;
+	}
+
+	return NULL;
+}
+
+static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
+					const struct rt6_info *rt,
+					enum mlxsw_sp_ipip_type *ret)
+{
+	return rt->dst.dev &&
+	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
+}
+
+static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_nexthop_group *nh_grp,
+				       struct mlxsw_sp_nexthop *nh,
+				       const struct rt6_info *rt)
+{
+	struct mlxsw_sp_router *router = mlxsw_sp->router;
+	struct net_device *dev = rt->dst.dev;
+	enum mlxsw_sp_ipip_type ipipt;
+	struct mlxsw_sp_rif *rif;
+	int err;
+
+	if (mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, &ipipt) &&
+	    router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
+						     MLXSW_SP_L3_PROTO_IPV6)) {
+		nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
+		return mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev);
+	}
+
+	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
+	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	if (!rif)
+		return 0;
+	mlxsw_sp_nexthop_rif_init(nh, rif);
+
+	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
+	if (err)
+		goto err_nexthop_neigh_init;
+
+	return 0;
+
+err_nexthop_neigh_init:
+	mlxsw_sp_nexthop_rif_fini(nh);
+	return err;
+}
+
+static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_nexthop *nh)
+{
+	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
+}
+
+static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_nexthop_group *nh_grp,
+				  struct mlxsw_sp_nexthop *nh,
+				  const struct rt6_info *rt)
+{
+	struct net_device *dev = rt->dst.dev;
+
+	nh->nh_grp = nh_grp;
+	memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
+
+	if (!dev)
+		return 0;
+	nh->ifindex = dev->ifindex;
+
+	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
+}
+
+static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_nexthop *nh)
+{
+	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
+}
+
+static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
+				    const struct rt6_info *rt)
+{
+	return rt->rt6i_flags & RTF_GATEWAY ||
+	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
+}
+
+static struct mlxsw_sp_nexthop_group *
+mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp;
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+	struct mlxsw_sp_nexthop *nh;
+	size_t alloc_size;
+	int i = 0;
+	int err;
+
+	alloc_size = sizeof(*nh_grp) +
+		     fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
+	nh_grp = kzalloc(alloc_size, GFP_KERNEL);
+	if (!nh_grp)
+		return ERR_PTR(-ENOMEM);
+	INIT_LIST_HEAD(&nh_grp->fib_list);
+#if IS_ENABLED(CONFIG_IPV6)
+	nh_grp->neigh_tbl = &nd_tbl;
+#endif
+	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
+					struct mlxsw_sp_rt6, list);
+	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
+	nh_grp->count = fib6_entry->nrt6;
+	for (i = 0; i < nh_grp->count; i++) {
+		struct rt6_info *rt = mlxsw_sp_rt6->rt;
+
+		nh = &nh_grp->nexthops[i];
+		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
+		if (err)
+			goto err_nexthop6_init;
+		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
+	}
+
+	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
+	if (err)
+		goto err_nexthop_group_insert;
+
+	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+	return nh_grp;
+
+err_nexthop_group_insert:
+err_nexthop6_init:
+	for (i--; i >= 0; i--) {
+		nh = &nh_grp->nexthops[i];
+		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
+	}
+	kfree(nh_grp);
+	return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	struct mlxsw_sp_nexthop *nh;
+	int i = nh_grp->count;
+
+	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
+	for (i--; i >= 0; i--) {
+		nh = &nh_grp->nexthops[i];
+		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
+	}
+	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
+	WARN_ON(nh_grp->adj_index_valid);
+	kfree(nh_grp);
+}
+
+static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp;
+
+	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
+	if (!nh_grp) {
+		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
+		if (IS_ERR(nh_grp))
+			return PTR_ERR(nh_grp);
+	}
+
+	list_add_tail(&fib6_entry->common.nexthop_group_node,
+		      &nh_grp->fib_list);
+	fib6_entry->common.nh_group = nh_grp;
+
+	return 0;
+}
+
+static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fib_entry *fib_entry)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
+
+	list_del(&fib_entry->nexthop_group_node);
+	if (!list_empty(&nh_grp->fib_list))
+		return;
+	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
+}
+
+static int
+mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
+	int err;
+
+	fib6_entry->common.nh_group = NULL;
+	list_del(&fib6_entry->common.nexthop_group_node);
+
+	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
+	if (err)
+		goto err_nexthop6_group_get;
+
+	/* In case this entry is offloaded, then the adjacency index
+	 * currently associated with it in the device's table is that
+	 * of the old group. Start using the new one instead.
+	 */
+	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
+	if (err)
+		goto err_fib_node_entry_add;
+
+	if (list_empty(&old_nh_grp->fib_list))
+		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
+
+	return 0;
+
+err_fib_node_entry_add:
+	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
+err_nexthop6_group_get:
+	list_add_tail(&fib6_entry->common.nexthop_group_node,
+		      &old_nh_grp->fib_list);
+	fib6_entry->common.nh_group = old_nh_grp;
+	return err;
+}
+
+static int
+mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_fib6_entry *fib6_entry,
+				struct rt6_info *rt)
+{
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+	int err;
+
+	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
+	if (IS_ERR(mlxsw_sp_rt6))
+		return PTR_ERR(mlxsw_sp_rt6);
+
+	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+	fib6_entry->nrt6++;
+
+	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
+	if (err)
+		goto err_nexthop6_group_update;
+
+	return 0;
+
+err_nexthop6_group_update:
+	fib6_entry->nrt6--;
+	list_del(&mlxsw_sp_rt6->list);
+	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+	return err;
+}
+
+static void
+mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_fib6_entry *fib6_entry,
+				struct rt6_info *rt)
+{
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+
+	mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
+	if (WARN_ON(!mlxsw_sp_rt6))
+		return;
+
+	fib6_entry->nrt6--;
+	list_del(&mlxsw_sp_rt6->list);
+	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
+	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+}
+
+static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
+					 struct mlxsw_sp_fib_entry *fib_entry,
+					 const struct rt6_info *rt)
+{
+	/* Packets hitting RTF_REJECT routes need to be discarded by the
+	 * stack. We can rely on their destination device not having a
+	 * RIF (it's the loopback device) and can thus use action type
+	 * local, which will cause them to be trapped with a lower
+	 * priority than packets that need to be locally received.
+	 */
+	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+	else if (rt->rt6i_flags & RTF_REJECT)
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
+	else
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+}
+
+static void
+mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
+
+	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
+				 list) {
+		fib6_entry->nrt6--;
+		list_del(&mlxsw_sp_rt6->list);
+		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+	}
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
+			   struct mlxsw_sp_fib_node *fib_node,
+			   struct rt6_info *rt)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
+	int err;
+
+	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
+	if (!fib6_entry)
+		return ERR_PTR(-ENOMEM);
+	fib_entry = &fib6_entry->common;
+
+	mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
+	if (IS_ERR(mlxsw_sp_rt6)) {
+		err = PTR_ERR(mlxsw_sp_rt6);
+		goto err_rt6_create;
+	}
+
+	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
+
+	INIT_LIST_HEAD(&fib6_entry->rt6_list);
+	list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+	fib6_entry->nrt6 = 1;
+	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
+	if (err)
+		goto err_nexthop6_group_get;
+
+	fib_entry->fib_node = fib_node;
+
+	return fib6_entry;
+
+err_nexthop6_group_get:
+	list_del(&mlxsw_sp_rt6->list);
+	mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
+err_rt6_create:
+	kfree(fib6_entry);
+	return ERR_PTR(err);
+}
+
+static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
+	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
+	WARN_ON(fib6_entry->nrt6);
+	kfree(fib6_entry);
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
+			      const struct rt6_info *nrt, bool replace)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
+
+	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
+		struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+
+		if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
+			continue;
+		if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
+			break;
+		if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
+			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
+			    mlxsw_sp_fib6_rt_can_mp(nrt))
+				return fib6_entry;
+			if (mlxsw_sp_fib6_rt_can_mp(nrt))
+				fallback = fallback ?: fib6_entry;
+		}
+		if (rt->rt6i_metric > nrt->rt6i_metric)
+			return fallback ?: fib6_entry;
+	}
+
+	return fallback;
+}
+
+static int
+mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
+			       bool replace)
+{
+	struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
+	struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
+	struct mlxsw_sp_fib6_entry *fib6_entry;
+
+	fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
+
+	if (replace && WARN_ON(!fib6_entry))
+		return -EINVAL;
+
+	if (fib6_entry) {
+		list_add_tail(&new6_entry->common.list,
+			      &fib6_entry->common.list);
+	} else {
+		struct mlxsw_sp_fib6_entry *last;
+
+		list_for_each_entry(last, &fib_node->entry_list, common.list) {
+			struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
+
+			if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
+				break;
+			fib6_entry = last;
+		}
+
+		if (fib6_entry)
+			list_add(&new6_entry->common.list,
+				 &fib6_entry->common.list);
+		else
+			list_add(&new6_entry->common.list,
+				 &fib_node->entry_list);
+	}
+
+	return 0;
+}
+
+static void
+mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	list_del(&fib6_entry->common.list);
+}
+
+static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
+					 struct mlxsw_sp_fib6_entry *fib6_entry,
+					 bool replace)
+{
+	int err;
+
+	err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
+	if (err)
+		goto err_fib_node_entry_add;
+
+	return 0;
+
+err_fib_node_entry_add:
+	mlxsw_sp_fib6_node_list_remove(fib6_entry);
+	return err;
+}
+
+static void
+mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_fib6_entry *fib6_entry)
+{
+	mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
+	mlxsw_sp_fib6_node_list_remove(fib6_entry);
+}
+
+static struct mlxsw_sp_fib6_entry *
+mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
+			   const struct rt6_info *rt)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry;
+	struct mlxsw_sp_fib_node *fib_node;
+	struct mlxsw_sp_fib *fib;
+	struct mlxsw_sp_vr *vr;
+
+	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
+	if (!vr)
+		return NULL;
+	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
+
+	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
+					    sizeof(rt->rt6i_dst.addr),
+					    rt->rt6i_dst.plen);
+	if (!fib_node)
+		return NULL;
+
+	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
+		struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+
+		if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
+		    rt->rt6i_metric == iter_rt->rt6i_metric &&
+		    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
+			return fib6_entry;
+	}
+
+	return NULL;
+}
+
+static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
+					struct mlxsw_sp_fib6_entry *fib6_entry,
+					bool replace)
+{
+	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
+	struct mlxsw_sp_fib6_entry *replaced;
+
+	if (!replace)
+		return;
+
+	replaced = list_next_entry(fib6_entry, common.list);
+
+	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
+	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
+	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+}
+
+static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
+				    struct rt6_info *rt, bool replace)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry;
+	struct mlxsw_sp_fib_node *fib_node;
+	int err;
+
+	if (mlxsw_sp->router->aborted)
+		return 0;
+
+	if (rt->rt6i_src.plen)
+		return -EINVAL;
+
+	if (mlxsw_sp_fib6_rt_should_ignore(rt))
+		return 0;
+
+	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
+					 &rt->rt6i_dst.addr,
+					 sizeof(rt->rt6i_dst.addr),
+					 rt->rt6i_dst.plen,
+					 MLXSW_SP_L3_PROTO_IPV6);
+	if (IS_ERR(fib_node))
+		return PTR_ERR(fib_node);
+
+	/* Before creating a new entry, try to append route to an existing
+	 * multipath entry.
+	 */
+	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
+	if (fib6_entry) {
+		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
+		if (err)
+			goto err_fib6_entry_nexthop_add;
+		return 0;
+	}
+
+	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
+	if (IS_ERR(fib6_entry)) {
+		err = PTR_ERR(fib6_entry);
+		goto err_fib6_entry_create;
+	}
+
+	err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
+	if (err)
+		goto err_fib6_node_entry_link;
+
+	mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
+
+	return 0;
+
+err_fib6_node_entry_link:
+	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+err_fib6_entry_create:
+err_fib6_entry_nexthop_add:
+	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+	return err;
+}
+
+static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
+				     struct rt6_info *rt)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
 
 	if (mlxsw_sp->router->aborted)
 		return;
 
-	fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
-	if (WARN_ON(!fib_entry))
+	if (mlxsw_sp_fib6_rt_should_ignore(rt))
 		return;
-	fib_node = fib_entry->fib_node;
 
-	mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
-	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
-	mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
+	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
+	if (WARN_ON(!fib6_entry))
+		return;
+
+	/* If route is part of a multipath entry, but not the last one
+	 * removed, then only reduce its nexthop group.
+	 */
+	if (!list_is_singular(&fib6_entry->rt6_list)) {
+		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
+		return;
+	}
+
+	fib_node = fib6_entry->common.fib_node;
+
+	mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
+	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 }
 
-static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
+static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
+					    enum mlxsw_reg_ralxx_protocol proto,
+					    u8 tree_id)
 {
 	char ralta_pl[MLXSW_REG_RALTA_LEN];
 	char ralst_pl[MLXSW_REG_RALST_LEN];
 	int i, err;
 
-	mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
-			     MLXSW_SP_LPM_TREE_MIN);
+	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
 	if (err)
 		return err;
 
-	mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
+	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
 	if (err)
 		return err;
@@ -2708,20 +4588,14 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
 		char raltb_pl[MLXSW_REG_RALTB_LEN];
 		char ralue_pl[MLXSW_REG_RALUE_LEN];
 
-		if (!mlxsw_sp_vr_is_used(vr))
-			continue;
-
-		mlxsw_reg_raltb_pack(raltb_pl, vr->id,
-				     MLXSW_REG_RALXX_PROTOCOL_IPV4,
-				     MLXSW_SP_LPM_TREE_MIN);
+		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
 				      raltb_pl);
 		if (err)
 			return err;
 
-		mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
-				      MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0,
-				      0);
+		mlxsw_reg_ralue_pack(ralue_pl, proto,
+				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
 		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
 				      ralue_pl);
@@ -2732,17 +4606,33 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
 	return 0;
 }
 
+static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
+{
+	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
+	int err;
+
+	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
+					       MLXSW_SP_LPM_TREE_MIN);
+	if (err)
+		return err;
+
+	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
+	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
+						MLXSW_SP_LPM_TREE_MIN + 1);
+}
+
 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_fib_node *fib_node)
 {
-	struct mlxsw_sp_fib_entry *fib_entry, *tmp;
+	struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
 
-	list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) {
-		bool do_break = &tmp->list == &fib_node->entry_list;
+	list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
+				 common.list) {
+		bool do_break = &tmp->common.list == &fib_node->entry_list;
 
-		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
-		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
-		mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
+		mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
+		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
+		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 		/* Break when entry list is empty and node was freed.
 		 * Otherwise, we'll access freed memory in the next
 		 * iteration.
@@ -2752,6 +4642,23 @@ static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
 	}
 }
 
+static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_fib_node *fib_node)
+{
+	struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
+
+	list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
+				 common.list) {
+		bool do_break = &tmp->common.list == &fib_node->entry_list;
+
+		mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
+		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+		if (do_break)
+			break;
+	}
+}
+
 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
 				    struct mlxsw_sp_fib_node *fib_node)
 {
@@ -2760,7 +4667,7 @@ static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
 		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
 		break;
 	case MLXSW_SP_L3_PROTO_IPV6:
-		WARN_ON_ONCE(1);
+		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
 		break;
 	}
 }
@@ -2791,10 +4698,17 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
 		if (!mlxsw_sp_vr_is_used(vr))
 			continue;
 		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
+
+		/* If virtual router was only used for IPv4, then it's no
+		 * longer used.
+		 */
+		if (!mlxsw_sp_vr_is_used(vr))
+			continue;
+		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
 	}
 }
 
-static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
+static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
 {
 	int err;
 
@@ -2811,6 +4725,7 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
 struct mlxsw_sp_fib_event_work {
 	struct work_struct work;
 	union {
+		struct fib6_entry_notifier_info fen6_info;
 		struct fib_entry_notifier_info fen_info;
 		struct fib_rule_notifier_info fr_info;
 		struct fib_nh_notifier_info fnh_info;
@@ -2819,7 +4734,7 @@ struct mlxsw_sp_fib_event_work {
 	unsigned long event;
 };
 
-static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
+static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
 {
 	struct mlxsw_sp_fib_event_work *fib_work =
 		container_of(work, struct mlxsw_sp_fib_event_work, work);
@@ -2839,7 +4754,7 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
 		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
 					       replace, append);
 		if (err)
-			mlxsw_sp_router_fib4_abort(mlxsw_sp);
+			mlxsw_sp_router_fib_abort(mlxsw_sp);
 		fib_info_put(fib_work->fen_info.fi);
 		break;
 	case FIB_EVENT_ENTRY_DEL:
@@ -2850,13 +4765,13 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
 	case FIB_EVENT_RULE_DEL:
 		rule = fib_work->fr_info.rule;
 		if (!fib4_rule_default(rule) && !rule->l3mdev)
-			mlxsw_sp_router_fib4_abort(mlxsw_sp);
+			mlxsw_sp_router_fib_abort(mlxsw_sp);
 		fib_rule_put(rule);
 		break;
 	case FIB_EVENT_NH_ADD: /* fall through */
 	case FIB_EVENT_NH_DEL:
-		mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event,
-				       fib_work->fnh_info.fib_nh);
+		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
+					fib_work->fnh_info.fib_nh);
 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
 		break;
 	}
@@ -2864,6 +4779,87 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
 	kfree(fib_work);
 }
 
+static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
+{
+	struct mlxsw_sp_fib_event_work *fib_work =
+		container_of(work, struct mlxsw_sp_fib_event_work, work);
+	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
+	struct fib_rule *rule;
+	bool replace;
+	int err;
+
+	rtnl_lock();
+	switch (fib_work->event) {
+	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+	case FIB_EVENT_ENTRY_ADD:
+		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
+		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
+					       fib_work->fen6_info.rt, replace);
+		if (err)
+			mlxsw_sp_router_fib_abort(mlxsw_sp);
+		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+		break;
+	case FIB_EVENT_ENTRY_DEL:
+		mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
+		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
+		break;
+	case FIB_EVENT_RULE_ADD: /* fall through */
+	case FIB_EVENT_RULE_DEL:
+		rule = fib_work->fr_info.rule;
+		if (!fib6_rule_default(rule) && !rule->l3mdev)
+			mlxsw_sp_router_fib_abort(mlxsw_sp);
+		fib_rule_put(rule);
+		break;
+	}
+	rtnl_unlock();
+	kfree(fib_work);
+}
+
+static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
+				       struct fib_notifier_info *info)
+{
+	switch (fib_work->event) {
+	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+	case FIB_EVENT_ENTRY_APPEND: /* fall through */
+	case FIB_EVENT_ENTRY_ADD: /* fall through */
+	case FIB_EVENT_ENTRY_DEL:
+		memcpy(&fib_work->fen_info, info, sizeof(fib_work->fen_info));
+		/* Take referece on fib_info to prevent it from being
+		 * freed while work is queued. Release it afterwards.
+		 */
+		fib_info_hold(fib_work->fen_info.fi);
+		break;
+	case FIB_EVENT_RULE_ADD: /* fall through */
+	case FIB_EVENT_RULE_DEL:
+		memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
+		fib_rule_get(fib_work->fr_info.rule);
+		break;
+	case FIB_EVENT_NH_ADD: /* fall through */
+	case FIB_EVENT_NH_DEL:
+		memcpy(&fib_work->fnh_info, info, sizeof(fib_work->fnh_info));
+		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
+		break;
+	}
+}
+
+static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
+				       struct fib_notifier_info *info)
+{
+	switch (fib_work->event) {
+	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+	case FIB_EVENT_ENTRY_ADD: /* fall through */
+	case FIB_EVENT_ENTRY_DEL:
+		memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info));
+		rt6_hold(fib_work->fen6_info.rt);
+		break;
+	case FIB_EVENT_RULE_ADD: /* fall through */
+	case FIB_EVENT_RULE_DEL:
+		memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
+		fib_rule_get(fib_work->fr_info.rule);
+		break;
+	}
+}
+
 /* Called with rcu_read_lock() */
 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 				     unsigned long event, void *ptr)
@@ -2879,31 +4875,18 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 	if (WARN_ON(!fib_work))
 		return NOTIFY_BAD;
 
-	INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
 	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
 	fib_work->mlxsw_sp = router->mlxsw_sp;
 	fib_work->event = event;
 
-	switch (event) {
-	case FIB_EVENT_ENTRY_REPLACE: /* fall through */
-	case FIB_EVENT_ENTRY_APPEND: /* fall through */
-	case FIB_EVENT_ENTRY_ADD: /* fall through */
-	case FIB_EVENT_ENTRY_DEL:
-		memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
-		/* Take referece on fib_info to prevent it from being
-		 * freed while work is queued. Release it afterwards.
-		 */
-		fib_info_hold(fib_work->fen_info.fi);
+	switch (info->family) {
+	case AF_INET:
+		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
+		mlxsw_sp_router_fib4_event(fib_work, info);
 		break;
-	case FIB_EVENT_RULE_ADD: /* fall through */
-	case FIB_EVENT_RULE_DEL:
-		memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info));
-		fib_rule_get(fib_work->fr_info.rule);
-		break;
-	case FIB_EVENT_NH_ADD: /* fall through */
-	case FIB_EVENT_NH_DEL:
-		memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
-		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
+	case AF_INET6:
+		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
+		mlxsw_sp_router_fib6_event(fib_work, info);
 		break;
 	}
 
@@ -2948,17 +4931,28 @@ static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
 }
 
-static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif,
-				       const struct in_device *in_dev,
-				       unsigned long event)
+static bool
+mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
+			   unsigned long event)
 {
+	struct inet6_dev *inet6_dev;
+	bool addr_list_empty = true;
+	struct in_device *idev;
+
 	switch (event) {
 	case NETDEV_UP:
-		if (!rif)
-			return true;
-		return false;
+		return rif == NULL;
 	case NETDEV_DOWN:
-		if (rif && !in_dev->ifa_list &&
+		idev = __in_dev_get_rtnl(dev);
+		if (idev && idev->ifa_list)
+			addr_list_empty = false;
+
+		inet6_dev = __in6_dev_get(dev);
+		if (addr_list_empty && inet6_dev &&
+		    !list_empty(&inet6_dev->addr_list))
+			addr_list_empty = false;
+
+		if (rif && addr_list_empty &&
 		    !netif_is_l3_slave(rif->dev))
 			return true;
 		/* It is possible we already removed the RIF ourselves
@@ -2977,7 +4971,10 @@ mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
 {
 	enum mlxsw_sp_fid_type type;
 
-	/* RIF type is derived from the type of the underlying FID */
+	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
+		return MLXSW_SP_RIF_TYPE_IPIP_LB;
+
+	/* Otherwise RIF type is derived from the type of the underlying FID. */
 	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
 		type = MLXSW_SP_FID_TYPE_8021Q;
 	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
@@ -3036,6 +5033,16 @@ u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
 	return rif->rif_index;
 }
 
+u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
+{
+	return lb_rif->common.rif_index;
+}
+
+u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
+{
+	return lb_rif->ul_vr_id;
+}
+
 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
 {
 	return rif->dev->ifindex;
@@ -3047,9 +5054,9 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 {
 	u32 tb_id = l3mdev_fib_table(params->dev);
 	const struct mlxsw_sp_rif_ops *ops;
+	struct mlxsw_sp_fid *fid = NULL;
 	enum mlxsw_sp_rif_type type;
 	struct mlxsw_sp_rif *rif;
-	struct mlxsw_sp_fid *fid;
 	struct mlxsw_sp_vr *vr;
 	u16 rif_index;
 	int err;
@@ -3073,12 +5080,14 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 	rif->mlxsw_sp = mlxsw_sp;
 	rif->ops = ops;
 
-	fid = ops->fid_get(rif);
-	if (IS_ERR(fid)) {
-		err = PTR_ERR(fid);
-		goto err_fid_get;
+	if (ops->fid_get) {
+		fid = ops->fid_get(rif);
+		if (IS_ERR(fid)) {
+			err = PTR_ERR(fid);
+			goto err_fid_get;
+		}
+		rif->fid = fid;
 	}
-	rif->fid = fid;
 
 	if (ops->setup)
 		ops->setup(rif, params);
@@ -3087,22 +5096,15 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_configure;
 
-	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, params->dev->dev_addr,
-				  mlxsw_sp_fid_index(fid), true);
-	if (err)
-		goto err_rif_fdb_op;
-
 	mlxsw_sp_rif_counters_alloc(rif);
-	mlxsw_sp_fid_rif_set(fid, rif);
 	mlxsw_sp->router->rifs[rif_index] = rif;
 	vr->rif_count++;
 
 	return rif;
 
-err_rif_fdb_op:
-	ops->deconfigure(rif);
 err_configure:
-	mlxsw_sp_fid_put(fid);
+	if (fid)
+		mlxsw_sp_fid_put(fid);
 err_fid_get:
 	kfree(rif);
 err_rif_alloc:
@@ -3123,12 +5125,11 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
 
 	vr->rif_count--;
 	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
-	mlxsw_sp_fid_rif_set(fid, NULL);
 	mlxsw_sp_rif_counters_free(rif);
-	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->dev->dev_addr,
-			    mlxsw_sp_fid_index(fid), false);
 	ops->deconfigure(rif);
-	mlxsw_sp_fid_put(fid);
+	if (fid)
+		/* Loopback RIFs are not associated with a FID. */
+		mlxsw_sp_fid_put(fid);
 	kfree(rif);
 	mlxsw_sp_vr_put(vr);
 }
@@ -3356,7 +5357,7 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
 		goto out;
 
 	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
-	if (!mlxsw_sp_rif_should_config(rif, ifa->ifa_dev, event))
+	if (!mlxsw_sp_rif_should_config(rif, dev, event))
 		goto out;
 
 	err = __mlxsw_sp_inetaddr_event(dev, event);
@@ -3364,6 +5365,61 @@ out:
 	return notifier_from_errno(err);
 }
 
+struct mlxsw_sp_inet6addr_event_work {
+	struct work_struct work;
+	struct net_device *dev;
+	unsigned long event;
+};
+
+static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
+{
+	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
+		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
+	struct net_device *dev = inet6addr_work->dev;
+	unsigned long event = inet6addr_work->event;
+	struct mlxsw_sp *mlxsw_sp;
+	struct mlxsw_sp_rif *rif;
+
+	rtnl_lock();
+	mlxsw_sp = mlxsw_sp_lower_get(dev);
+	if (!mlxsw_sp)
+		goto out;
+
+	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	if (!mlxsw_sp_rif_should_config(rif, dev, event))
+		goto out;
+
+	__mlxsw_sp_inetaddr_event(dev, event);
+out:
+	rtnl_unlock();
+	dev_put(dev);
+	kfree(inet6addr_work);
+}
+
+/* Called with rcu_read_lock() */
+int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
+			     unsigned long event, void *ptr)
+{
+	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
+	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
+	struct net_device *dev = if6->idev->dev;
+
+	if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
+		return NOTIFY_DONE;
+
+	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
+	if (!inet6addr_work)
+		return NOTIFY_BAD;
+
+	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
+	inet6addr_work->dev = dev;
+	inet6addr_work->event = event;
+	dev_hold(dev);
+	mlxsw_core_schedule_work(&inet6addr_work->work);
+
+	return NOTIFY_DONE;
+}
+
 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
 			     const char *mac, int mtu)
 {
@@ -3501,8 +5557,8 @@ static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
 
 	rif_subport = mlxsw_sp_rif_subport_rif(rif);
 	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
-			    rif->rif_index, rif->vr_id, rif->dev->mtu,
-			    rif->dev->dev_addr);
+			    rif->rif_index, rif->vr_id, rif->dev->mtu);
+	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
 	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
 				  rif_subport->lag ? rif_subport->lag_id :
 						     rif_subport->system_port,
@@ -3513,11 +5569,32 @@ static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
 
 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
 {
-	return mlxsw_sp_rif_subport_op(rif, true);
+	int err;
+
+	err = mlxsw_sp_rif_subport_op(rif, true);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
+				  mlxsw_sp_fid_index(rif->fid), true);
+	if (err)
+		goto err_rif_fdb_op;
+
+	mlxsw_sp_fid_rif_set(rif->fid, rif);
+	return 0;
+
+err_rif_fdb_op:
+	mlxsw_sp_rif_subport_op(rif, false);
+	return err;
 }
 
 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
 {
+	struct mlxsw_sp_fid *fid = rif->fid;
+
+	mlxsw_sp_fid_rif_set(fid, NULL);
+	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
+			    mlxsw_sp_fid_index(fid), false);
 	mlxsw_sp_rif_subport_op(rif, false);
 }
 
@@ -3544,7 +5621,8 @@ static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
 	char ritr_pl[MLXSW_REG_RITR_LEN];
 
 	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
-			    rif->dev->mtu, rif->dev->dev_addr);
+			    rif->dev->mtu);
+	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
 	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
 
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
@@ -3565,25 +5643,48 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
 	if (err)
 		return err;
 
+	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+				     mlxsw_sp_router_port(mlxsw_sp), true);
+	if (err)
+		goto err_fid_mc_flood_set;
+
 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
 				     mlxsw_sp_router_port(mlxsw_sp), true);
 	if (err)
 		goto err_fid_bc_flood_set;
 
+	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
+				  mlxsw_sp_fid_index(rif->fid), true);
+	if (err)
+		goto err_rif_fdb_op;
+
+	mlxsw_sp_fid_rif_set(rif->fid, rif);
 	return 0;
 
+err_rif_fdb_op:
+	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
+			       mlxsw_sp_router_port(mlxsw_sp), false);
 err_fid_bc_flood_set:
+	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+			       mlxsw_sp_router_port(mlxsw_sp), false);
+err_fid_mc_flood_set:
 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
 	return err;
 }
 
 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
 {
-	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
 	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
+	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+	struct mlxsw_sp_fid *fid = rif->fid;
 
+	mlxsw_sp_fid_rif_set(fid, NULL);
+	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
+			    mlxsw_sp_fid_index(fid), false);
 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
 			       mlxsw_sp_router_port(mlxsw_sp), false);
+	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+			       mlxsw_sp_router_port(mlxsw_sp), false);
 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
 }
 
@@ -3614,25 +5715,48 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
 	if (err)
 		return err;
 
+	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+				     mlxsw_sp_router_port(mlxsw_sp), true);
+	if (err)
+		goto err_fid_mc_flood_set;
+
 	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
 				     mlxsw_sp_router_port(mlxsw_sp), true);
 	if (err)
 		goto err_fid_bc_flood_set;
 
+	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
+				  mlxsw_sp_fid_index(rif->fid), true);
+	if (err)
+		goto err_rif_fdb_op;
+
+	mlxsw_sp_fid_rif_set(rif->fid, rif);
 	return 0;
 
+err_rif_fdb_op:
+	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
+			       mlxsw_sp_router_port(mlxsw_sp), false);
 err_fid_bc_flood_set:
+	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+			       mlxsw_sp_router_port(mlxsw_sp), false);
+err_fid_mc_flood_set:
 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
 	return err;
 }
 
 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
 {
-	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
 	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
+	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+	struct mlxsw_sp_fid *fid = rif->fid;
 
+	mlxsw_sp_fid_rif_set(fid, NULL);
+	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
+			    mlxsw_sp_fid_index(fid), false);
 	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
 			       mlxsw_sp_router_port(mlxsw_sp), false);
+	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
+			       mlxsw_sp_router_port(mlxsw_sp), false);
 	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
 }
 
@@ -3650,10 +5774,104 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
 	.fid_get		= mlxsw_sp_rif_fid_fid_get,
 };
 
+static struct mlxsw_sp_rif_ipip_lb *
+mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
+{
+	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
+}
+
+static void
+mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
+			   const struct mlxsw_sp_rif_params *params)
+{
+	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
+	struct mlxsw_sp_rif_ipip_lb *rif_lb;
+
+	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
+				 common);
+	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
+	rif_lb->lb_config = params_lb->lb_config;
+}
+
+static int
+mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
+			struct mlxsw_sp_vr *ul_vr, bool enable)
+{
+	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
+	struct mlxsw_sp_rif *rif = &lb_rif->common;
+	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+	char ritr_pl[MLXSW_REG_RITR_LEN];
+	u32 saddr4;
+
+	switch (lb_cf.ul_protocol) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
+		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
+				    rif->rif_index, rif->vr_id, rif->dev->mtu);
+		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
+			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
+			    ul_vr->id, saddr4, lb_cf.okey);
+		break;
+
+	case MLXSW_SP_L3_PROTO_IPV6:
+		return -EAFNOSUPPORT;
+	}
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
+}
+
+static int
+mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
+{
+	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
+	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
+	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+	struct mlxsw_sp_vr *ul_vr;
+	int err;
+
+	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id);
+	if (IS_ERR(ul_vr))
+		return PTR_ERR(ul_vr);
+
+	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
+	if (err)
+		goto err_loopback_op;
+
+	lb_rif->ul_vr_id = ul_vr->id;
+	++ul_vr->rif_count;
+	return 0;
+
+err_loopback_op:
+	mlxsw_sp_vr_put(ul_vr);
+	return err;
+}
+
+static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
+{
+	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
+	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
+	struct mlxsw_sp_vr *ul_vr;
+
+	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
+	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
+
+	--ul_vr->rif_count;
+	mlxsw_sp_vr_put(ul_vr);
+}
+
+static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
+	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
+	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
+	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
+	.configure		= mlxsw_sp_rif_ipip_lb_configure,
+	.deconfigure		= mlxsw_sp_rif_ipip_lb_deconfigure,
+};
+
 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_ops,
 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
+	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp_rif_ipip_lb_ops,
 };
 
 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
@@ -3681,6 +5899,18 @@ static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
 	kfree(mlxsw_sp->router->rifs);
 }
 
+static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
+{
+	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
+	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
+	return 0;
+}
+
+static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
+}
+
 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
 {
 	struct mlxsw_sp_router *router;
@@ -3704,7 +5934,7 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 		return -EIO;
 	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
 
-	mlxsw_reg_rgcr_pack(rgcr_pl, true);
+	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
 	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
 	if (err)
@@ -3716,7 +5946,7 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
 {
 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
 
-	mlxsw_reg_rgcr_pack(rgcr_pl, false);
+	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
 }
 
@@ -3740,6 +5970,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 	if (err)
 		goto err_rifs_init;
 
+	err = mlxsw_sp_ipips_init(mlxsw_sp);
+	if (err)
+		goto err_ipips_init;
+
 	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
 			      &mlxsw_sp_nexthop_ht_params);
 	if (err)
@@ -3781,6 +6015,8 @@ err_lpm_init:
 err_nexthop_group_ht_init:
 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
 err_nexthop_ht_init:
+	mlxsw_sp_ipips_fini(mlxsw_sp);
+err_ipips_init:
 	mlxsw_sp_rifs_fini(mlxsw_sp);
 err_rifs_init:
 	__mlxsw_sp_router_fini(mlxsw_sp);
@@ -3797,6 +6033,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
 	mlxsw_sp_lpm_fini(mlxsw_sp);
 	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
 	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
+	mlxsw_sp_ipips_fini(mlxsw_sp);
 	mlxsw_sp_rifs_fini(mlxsw_sp);
 	__mlxsw_sp_router_fini(mlxsw_sp);
 	kfree(mlxsw_sp->router);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index a3e8d2b..345fcc4f3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -36,15 +36,38 @@
 #define _MLXSW_ROUTER_H_
 
 #include "spectrum.h"
+#include "reg.h"
+
+enum mlxsw_sp_l3proto {
+	MLXSW_SP_L3_PROTO_IPV4,
+	MLXSW_SP_L3_PROTO_IPV6,
+};
+
+union mlxsw_sp_l3addr {
+	__be32 addr4;
+	struct in6_addr addr6;
+};
+
+struct mlxsw_sp_rif_ipip_lb;
+struct mlxsw_sp_rif_ipip_lb_config {
+	enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
+	u32 okey;
+	enum mlxsw_sp_l3proto ul_protocol; /* Underlay. */
+	union mlxsw_sp_l3addr saddr;
+};
 
 enum mlxsw_sp_rif_counter_dir {
 	MLXSW_SP_RIF_COUNTER_INGRESS,
 	MLXSW_SP_RIF_COUNTER_EGRESS,
 };
 
+struct mlxsw_sp_neigh_entry;
+
 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
 					   u16 rif_index);
 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
+u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
+u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_rif *rif,
@@ -56,5 +79,33 @@ void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp_rif *rif,
 			       enum mlxsw_sp_rif_counter_dir dir);
+struct mlxsw_sp_neigh_entry *
+mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
+			struct mlxsw_sp_neigh_entry *neigh_entry);
+int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry);
+unsigned char *
+mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry);
+u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry);
+struct in6_addr *
+mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry);
+
+#define mlxsw_sp_rif_neigh_for_each(neigh_entry, rif)				\
+	for (neigh_entry = mlxsw_sp_rif_neigh_next(rif, NULL); neigh_entry;	\
+	     neigh_entry = mlxsw_sp_rif_neigh_next(rif, neigh_entry))
+int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_neigh_entry *neigh_entry,
+			       u64 *p_counter);
+void
+mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_neigh_entry *neigh_entry,
+				    bool adding);
+bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry);
+union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto,
+			   const struct net_device *ol_dev);
+union mlxsw_sp_l3addr
+mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
+			   const struct net_device *ol_dev);
+__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev);
 
 #endif /* _MLXSW_ROUTER_H_*/
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c
index 74341fe..ab7a298 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchib.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchib.c
@@ -497,7 +497,7 @@ static void mlxsw_sib_fini(struct mlxsw_core *mlxsw_core)
 	mlxsw_sib_ports_remove(mlxsw_sib);
 }
 
-static struct mlxsw_config_profile mlxsw_sib_config_profile = {
+static const struct mlxsw_config_profile mlxsw_sib_config_profile = {
 	.used_max_system_port		= 1,
 	.max_system_port		= 48000,
 	.used_max_ib_mc			= 1,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 3b0f724..f3c29bb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -1674,7 +1674,7 @@ static void mlxsw_sx_fini(struct mlxsw_core *mlxsw_core)
 	mlxsw_sx_ports_remove(mlxsw_sx);
 }
 
-static struct mlxsw_config_profile mlxsw_sx_config_profile = {
+static const struct mlxsw_config_profile mlxsw_sx_config_profile = {
 	.used_max_vepa_channels		= 1,
 	.max_vepa_channels		= 0,
 	.used_max_mid			= 1,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 12b5ed58..f396a1f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -61,11 +61,33 @@ enum {
 	MLXSW_TRAP_ID_MTUERROR = 0x52,
 	MLXSW_TRAP_ID_TTLERROR = 0x53,
 	MLXSW_TRAP_ID_LBERROR = 0x54,
-	MLXSW_TRAP_ID_OSPF = 0x55,
+	MLXSW_TRAP_ID_IPV4_OSPF = 0x55,
 	MLXSW_TRAP_ID_IP2ME = 0x5F,
+	MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60,
+	MLXSW_TRAP_ID_IPV6_LINK_LOCAL_DEST = 0x61,
+	MLXSW_TRAP_ID_IPV6_LINK_LOCAL_SRC = 0x62,
+	MLXSW_TRAP_ID_IPV6_ALL_NODES_LINK = 0x63,
+	MLXSW_TRAP_ID_IPV6_OSPF = 0x64,
+	MLXSW_TRAP_ID_IPV6_MLDV12_LISTENER_QUERY = 0x65,
+	MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_REPORT = 0x66,
+	MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_DONE = 0x67,
+	MLXSW_TRAP_ID_IPV6_MLDV2_LISTENER_REPORT = 0x68,
+	MLXSW_TRAP_ID_IPV6_DHCP = 0x69,
+	MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F,
 	MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
-	MLXSW_TRAP_ID_BGP_IPV4 = 0x88,
+	MLXSW_TRAP_ID_IPV4_BGP = 0x88,
+	MLXSW_TRAP_ID_IPV6_BGP = 0x89,
+	MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A,
+	MLXSW_TRAP_ID_L3_IPV6_ROUTER_ADVERTISMENT = 0x8B,
+	MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_SOLICITATION = 0x8C,
+	MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_ADVERTISMENT = 0x8D,
+	MLXSW_TRAP_ID_L3_IPV6_REDIRECTION = 0x8E,
 	MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
+	MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91,
+	MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92,
+	MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1,
+	MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
+	MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
 	MLXSW_TRAP_ID_ACL0 = 0x1C0,
 
 	MLXSW_TRAP_ID_MAX = 0x1FF
diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index c0d7d5e..2e4effa 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -161,7 +161,7 @@ static void moxart_mac_setup_desc_ring(struct net_device *ndev)
 
 	priv->rx_head = 0;
 
-	/* reset the MAC controller TX/RX desciptor base address */
+	/* reset the MAC controller TX/RX descriptor base address */
 	writel(priv->tx_base, priv->base + REG_TXR_BASE_ADDRESS);
 	writel(priv->rx_base, priv->base + REG_RXR_BASE_ADDRESS);
 }
@@ -269,9 +269,8 @@ rx_next:
 		priv->rx_head = rx_head;
 	}
 
-	if (rx < budget) {
+	if (rx < budget)
 		napi_complete_done(napi, rx);
-	}
 
 	priv->reg_imr |= RPKT_FINISH_M;
 	writel(priv->reg_imr, priv->base + REG_INTERRUPT_MASK);
@@ -289,8 +288,8 @@ static int moxart_tx_queue_space(struct net_device *ndev)
 static void moxart_tx_finished(struct net_device *ndev)
 {
 	struct moxart_mac_priv_t *priv = netdev_priv(ndev);
-	unsigned tx_head = priv->tx_head;
-	unsigned tx_tail = priv->tx_tail;
+	unsigned int tx_head = priv->tx_head;
+	unsigned int tx_tail = priv->tx_tail;
 
 	while (tx_tail != tx_head) {
 		dma_unmap_single(&ndev->dev, priv->tx_mapping[tx_tail],
@@ -312,7 +311,7 @@ static void moxart_tx_finished(struct net_device *ndev)
 
 static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id)
 {
-	struct net_device *ndev = (struct net_device *) dev_id;
+	struct net_device *ndev = (struct net_device *)dev_id;
 	struct moxart_mac_priv_t *priv = netdev_priv(ndev);
 	unsigned int ists = readl(priv->base + REG_INTERRUPT_STATUS);
 
@@ -495,7 +494,7 @@ static int moxart_mac_probe(struct platform_device *pdev)
 	priv->tx_desc_base = dma_alloc_coherent(NULL, TX_REG_DESC_SIZE *
 						TX_DESC_NUM, &priv->tx_base,
 						GFP_DMA | GFP_KERNEL);
-	if (priv->tx_desc_base == NULL) {
+	if (!priv->tx_desc_base) {
 		ret = -ENOMEM;
 		goto init_fail;
 	}
@@ -503,7 +502,7 @@ static int moxart_mac_probe(struct platform_device *pdev)
 	priv->rx_desc_base = dma_alloc_coherent(NULL, RX_REG_DESC_SIZE *
 						RX_DESC_NUM, &priv->rx_base,
 						GFP_DMA | GFP_KERNEL);
-	if (priv->rx_desc_base == NULL) {
+	if (!priv->rx_desc_base) {
 		ret = -ENOMEM;
 		goto init_fail;
 	}
diff --git a/drivers/net/ethernet/moxa/moxart_ether.h b/drivers/net/ethernet/moxa/moxart_ether.h
index 686b895..bee608b 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.h
+++ b/drivers/net/ethernet/moxa/moxart_ether.h
@@ -55,17 +55,17 @@
 #define RX_DESC2_ADDRESS_VIRT	4
 
 #define TX_DESC_NUM		64
-#define TX_DESC_NUM_MASK	(TX_DESC_NUM-1)
+#define TX_DESC_NUM_MASK	(TX_DESC_NUM - 1)
 #define TX_NEXT(N)		(((N) + 1) & (TX_DESC_NUM_MASK))
 #define TX_BUF_SIZE		1600
-#define TX_BUF_SIZE_MAX		(TX_DESC1_BUF_SIZE_MASK+1)
+#define TX_BUF_SIZE_MAX		(TX_DESC1_BUF_SIZE_MASK + 1)
 #define TX_WAKE_THRESHOLD	16
 
 #define RX_DESC_NUM		64
-#define RX_DESC_NUM_MASK	(RX_DESC_NUM-1)
+#define RX_DESC_NUM_MASK	(RX_DESC_NUM - 1)
 #define RX_NEXT(N)		(((N) + 1) & (RX_DESC_NUM_MASK))
 #define RX_BUF_SIZE		1600
-#define RX_BUF_SIZE_MAX		(RX_DESC1_BUF_SIZE_MASK+1)
+#define RX_BUF_SIZE_MAX		(RX_DESC1_BUF_SIZE_MASK + 1)
 
 #define REG_INTERRUPT_STATUS	0
 #define REG_INTERRUPT_MASK	4
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index fd2ec36..462eda9 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -42,8 +42,6 @@
  *     aggregated as a single large packet
  * napi: This parameter used to enable/disable NAPI (polling Rx)
  *     Possible values '1' for enable and '0' for disable. Default is '1'
- * ufo: This parameter used to enable/disable UDP Fragmentation Offload(UFO)
- *      Possible values '1' for enable and '0' for disable. Default is '0'
  * vlan_tag_strip: This can be used to enable or disable vlan stripping.
  *                 Possible values '1' for enable , '0' for disable.
  *                 Default is '2' - which means disable in promisc mode
@@ -453,7 +451,6 @@ S2IO_PARM_INT(lro_max_pkts, 0xFFFF);
 S2IO_PARM_INT(indicate_max_pkts, 0);
 
 S2IO_PARM_INT(napi, 1);
-S2IO_PARM_INT(ufo, 0);
 S2IO_PARM_INT(vlan_tag_strip, NO_STRIP_IN_PROMISC);
 
 static unsigned int tx_fifo_len[MAX_TX_FIFOS] =
@@ -4128,32 +4125,6 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	frg_len = skb_headlen(skb);
-	if (offload_type == SKB_GSO_UDP) {
-		int ufo_size;
-
-		ufo_size = s2io_udp_mss(skb);
-		ufo_size &= ~7;
-		txdp->Control_1 |= TXD_UFO_EN;
-		txdp->Control_1 |= TXD_UFO_MSS(ufo_size);
-		txdp->Control_1 |= TXD_BUFFER0_SIZE(8);
-#ifdef __BIG_ENDIAN
-		/* both variants do cpu_to_be64(be32_to_cpu(...)) */
-		fifo->ufo_in_band_v[put_off] =
-			(__force u64)skb_shinfo(skb)->ip6_frag_id;
-#else
-		fifo->ufo_in_band_v[put_off] =
-			(__force u64)skb_shinfo(skb)->ip6_frag_id << 32;
-#endif
-		txdp->Host_Control = (unsigned long)fifo->ufo_in_band_v;
-		txdp->Buffer_Pointer = pci_map_single(sp->pdev,
-						      fifo->ufo_in_band_v,
-						      sizeof(u64),
-						      PCI_DMA_TODEVICE);
-		if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer))
-			goto pci_map_failed;
-		txdp++;
-	}
-
 	txdp->Buffer_Pointer = pci_map_single(sp->pdev, skb->data,
 					      frg_len, PCI_DMA_TODEVICE);
 	if (pci_dma_mapping_error(sp->pdev, txdp->Buffer_Pointer))
@@ -4161,8 +4132,6 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	txdp->Host_Control = (unsigned long)skb;
 	txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len);
-	if (offload_type == SKB_GSO_UDP)
-		txdp->Control_1 |= TXD_UFO_EN;
 
 	frg_cnt = skb_shinfo(skb)->nr_frags;
 	/* For fragmented SKB. */
@@ -4177,14 +4146,9 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 							     skb_frag_size(frag),
 							     DMA_TO_DEVICE);
 		txdp->Control_1 = TXD_BUFFER0_SIZE(skb_frag_size(frag));
-		if (offload_type == SKB_GSO_UDP)
-			txdp->Control_1 |= TXD_UFO_EN;
 	}
 	txdp->Control_1 |= TXD_GATHER_CODE_LAST;
 
-	if (offload_type == SKB_GSO_UDP)
-		frg_cnt++; /* as Txd0 was used for inband header */
-
 	tx_fifo = mac_control->tx_FIFO_start[queue];
 	val64 = fifo->list_info[put_off].list_phy_addr;
 	writeq(val64, &tx_fifo->TxDL_Pointer);
@@ -7910,11 +7874,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 		NETIF_F_RXCSUM | NETIF_F_LRO;
 	dev->features |= dev->hw_features |
 		NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
-	if (sp->device_type & XFRAME_II_DEVICE) {
-		dev->hw_features |= NETIF_F_UFO;
-		if (ufo)
-			dev->features |= NETIF_F_UFO;
-	}
 	if (sp->high_dma_flag == true)
 		dev->features |= NETIF_F_HIGHDMA;
 	dev->watchdog_timeo = WATCH_DOG_TIMEOUT;
@@ -8147,10 +8106,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 
 	DBG_PRINT(ERR_DBG, "%s: Large receive offload enabled\n",
 		  dev->name);
-	if (ufo)
-		DBG_PRINT(ERR_DBG,
-			  "%s: UDP Fragmentation Offload(UFO) enabled\n",
-			  dev->name);
 	/* Initialize device name */
 	snprintf(sp->name, sizeof(sp->name), "%s Neterion %s", dev->name,
 		 sp->product_name);
diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig
index 0e331e2..ae0c46b 100644
--- a/drivers/net/ethernet/netronome/Kconfig
+++ b/drivers/net/ethernet/netronome/Kconfig
@@ -29,6 +29,7 @@ config NFP_APP_FLOWER
 	bool "NFP4000/NFP6000 TC Flower offload support"
 	depends on NFP
 	depends on NET_SWITCHDEV
+	default y
 	---help---
 	  Enable driver support for TC Flower offload on NFP4000 and NFP6000.
 	  Say Y, if you are planning to make use of TC Flower offload
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index b8e1358..96e579a 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -23,6 +23,7 @@ nfp-objs := \
 	    nfp_net_ethtool.o \
 	    nfp_net_main.o \
 	    nfp_net_repr.o \
+	    nfp_net_sriov.o \
 	    nfp_netvf_main.o \
 	    nfp_port.o \
 	    bpf/main.o \
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 8e57fda..239dfbe 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1238,6 +1238,16 @@ static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
 }
 
+static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false);
+}
+
+static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true);
+}
+
 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	const struct bpf_insn *insn = &meta->insn;
@@ -1325,6 +1335,16 @@ static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
 }
 
+static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false);
+}
+
+static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true);
+}
+
 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
@@ -1383,11 +1403,15 @@ static const instr_cb_t instr_cb[256] = {
 	[BPF_JMP | BPF_JEQ | BPF_K] =	jeq_imm,
 	[BPF_JMP | BPF_JGT | BPF_K] =	jgt_imm,
 	[BPF_JMP | BPF_JGE | BPF_K] =	jge_imm,
+	[BPF_JMP | BPF_JLT | BPF_K] =	jlt_imm,
+	[BPF_JMP | BPF_JLE | BPF_K] =	jle_imm,
 	[BPF_JMP | BPF_JSET | BPF_K] =	jset_imm,
 	[BPF_JMP | BPF_JNE | BPF_K] =	jne_imm,
 	[BPF_JMP | BPF_JEQ | BPF_X] =	jeq_reg,
 	[BPF_JMP | BPF_JGT | BPF_X] =	jgt_reg,
 	[BPF_JMP | BPF_JGE | BPF_X] =	jge_reg,
+	[BPF_JMP | BPF_JLT | BPF_X] =	jlt_reg,
+	[BPF_JMP | BPF_JLE | BPF_X] =	jle_reg,
 	[BPF_JMP | BPF_JSET | BPF_X] =	jset_reg,
 	[BPF_JMP | BPF_JNE | BPF_X] =	jne_reg,
 	[BPF_JMP | BPF_EXIT] =		goto_out,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index afbdf5f..be2cf10 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -84,7 +84,7 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn)
 }
 
 static int
-nfp_bpf_vnic_init(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
+nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
 {
 	struct nfp_net_bpf_priv *priv;
 	int ret;
@@ -106,14 +106,14 @@ nfp_bpf_vnic_init(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
 	setup_timer(&priv->rx_filter_stats_timer,
 		    nfp_net_filter_stats_timer, (unsigned long)nn);
 
-	ret = nfp_app_nic_vnic_init(app, nn, id);
+	ret = nfp_app_nic_vnic_alloc(app, nn, id);
 	if (ret)
 		kfree(priv);
 
 	return ret;
 }
 
-static void nfp_bpf_vnic_clean(struct nfp_app *app, struct nfp_net *nn)
+static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn)
 {
 	if (nn->dp.bpf_offload_xdp)
 		nfp_bpf_xdp_offload(app, nn, NULL);
@@ -121,23 +121,21 @@ static void nfp_bpf_vnic_clean(struct nfp_app *app, struct nfp_net *nn)
 }
 
 static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev,
-			    u32 handle, __be16 proto, struct tc_to_netdev *tc)
+			    enum tc_setup_type type, void *type_data)
 {
+	struct tc_cls_bpf_offload *cls_bpf = type_data;
 	struct nfp_net *nn = netdev_priv(netdev);
 
-	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
-		return -EOPNOTSUPP;
-	if (proto != htons(ETH_P_ALL))
+	if (type != TC_SETUP_CLSBPF || !nfp_net_ebpf_capable(nn) ||
+	    !is_classid_clsact_ingress(cls_bpf->common.classid) ||
+	    cls_bpf->common.protocol != htons(ETH_P_ALL) ||
+	    cls_bpf->common.chain_index)
 		return -EOPNOTSUPP;
 
-	if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) {
-		if (!nn->dp.bpf_offload_xdp)
-			return nfp_net_bpf_offload(nn, tc->cls_bpf);
-		else
-			return -EBUSY;
-	}
+	if (nn->dp.bpf_offload_xdp)
+		return -EBUSY;
 
-	return -EINVAL;
+	return nfp_net_bpf_offload(nn, cls_bpf);
 }
 
 static bool nfp_bpf_tc_busy(struct nfp_app *app, struct nfp_net *nn)
@@ -151,8 +149,8 @@ const struct nfp_app_type app_bpf = {
 
 	.extra_cap	= nfp_bpf_extra_cap,
 
-	.vnic_init	= nfp_bpf_vnic_init,
-	.vnic_clean	= nfp_bpf_vnic_clean,
+	.vnic_alloc	= nfp_bpf_vnic_alloc,
+	.vnic_free	= nfp_bpf_vnic_free,
 
 	.setup_tc	= nfp_bpf_setup_tc,
 	.tc_busy	= nfp_bpf_tc_busy,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 78d80a3..a88bb5b 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -115,14 +115,14 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
 
 	/* TC direct action */
 	if (cls_bpf->exts_integrated) {
-		if (tc_no_actions(cls_bpf->exts))
+		if (!tcf_exts_has_actions(cls_bpf->exts))
 			return NN_ACT_DIRECT;
 
 		return -EOPNOTSUPP;
 	}
 
 	/* TC legacy mode */
-	if (!tc_single_action(cls_bpf->exts))
+	if (!tcf_exts_has_one_action(cls_bpf->exts))
 		return -EOPNOTSUPP;
 
 	tcf_exts_to_list(cls_bpf->exts, &actions);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index d696ba4..5b783a9 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -79,28 +79,32 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
 		   const struct bpf_verifier_env *env)
 {
 	const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
+	u64 imm;
 
 	if (nfp_prog->act == NN_ACT_XDP)
 		return 0;
 
-	if (reg0->type != CONST_IMM) {
-		pr_info("unsupported exit state: %d, imm: %llx\n",
-			reg0->type, reg0->imm);
+	if (!(reg0->type == SCALAR_VALUE && tnum_is_const(reg0->var_off))) {
+		char tn_buf[48];
+
+		tnum_strn(tn_buf, sizeof(tn_buf), reg0->var_off);
+		pr_info("unsupported exit state: %d, var_off: %s\n",
+			reg0->type, tn_buf);
 		return -EINVAL;
 	}
 
-	if (nfp_prog->act != NN_ACT_DIRECT &&
-	    reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) {
+	imm = reg0->var_off.value;
+	if (nfp_prog->act != NN_ACT_DIRECT && imm != 0 && (imm & ~0U) != ~0U) {
 		pr_info("unsupported exit state: %d, imm: %llx\n",
-			reg0->type, reg0->imm);
+			reg0->type, imm);
 		return -EINVAL;
 	}
 
-	if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT &&
-	    reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN &&
-	    reg0->imm != TC_ACT_QUEUED) {
+	if (nfp_prog->act == NN_ACT_DIRECT && imm <= TC_ACT_REDIRECT &&
+	    imm != TC_ACT_SHOT && imm != TC_ACT_STOLEN &&
+	    imm != TC_ACT_QUEUED) {
 		pr_info("unsupported exit state: %d, imm: %llx\n",
-			reg0->type, reg0->imm);
+			reg0->type, imm);
 		return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index b0837b5..c3ca05d 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -34,10 +34,12 @@
 #include <linux/bitfield.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/workqueue.h>
 #include <net/dst_metadata.h>
 
 #include "main.h"
 #include "../nfpcore/nfp_cpp.h"
+#include "../nfp_net.h"
 #include "../nfp_net_repr.h"
 #include "./cmsg.h"
 
@@ -75,6 +77,39 @@ nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size,
 	return skb;
 }
 
+struct sk_buff *
+nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports)
+{
+	struct nfp_flower_cmsg_mac_repr *msg;
+	struct sk_buff *skb;
+	unsigned int size;
+
+	size = sizeof(*msg) + num_ports * sizeof(msg->ports[0]);
+	skb = nfp_flower_cmsg_alloc(app, size, NFP_FLOWER_CMSG_TYPE_MAC_REPR);
+	if (!skb)
+		return NULL;
+
+	msg = nfp_flower_cmsg_get_data(skb);
+	memset(msg->reserved, 0, sizeof(msg->reserved));
+	msg->num_ports = num_ports;
+
+	return skb;
+}
+
+void
+nfp_flower_cmsg_mac_repr_add(struct sk_buff *skb, unsigned int idx,
+			     unsigned int nbi, unsigned int nbi_port,
+			     unsigned int phys_port)
+{
+	struct nfp_flower_cmsg_mac_repr *msg;
+
+	msg = nfp_flower_cmsg_get_data(skb);
+	msg->ports[idx].idx = idx;
+	msg->ports[idx].info = nbi & NFP_FLOWER_CMSG_MAC_REPR_NBI;
+	msg->ports[idx].nbi_port = nbi_port;
+	msg->ports[idx].phys_port = phys_port;
+}
+
 int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok)
 {
 	struct nfp_flower_cmsg_portmod *msg;
@@ -106,23 +141,33 @@ nfp_flower_cmsg_portmod_rx(struct nfp_app *app, struct sk_buff *skb)
 	msg = nfp_flower_cmsg_get_data(skb);
 	link = msg->info & NFP_FLOWER_CMSG_PORTMOD_INFO_LINK;
 
+	rtnl_lock();
 	rcu_read_lock();
 	netdev = nfp_app_repr_get(app, be32_to_cpu(msg->portnum));
+	rcu_read_unlock();
 	if (!netdev) {
 		nfp_flower_cmsg_warn(app, "ctrl msg for unknown port 0x%08x\n",
 				     be32_to_cpu(msg->portnum));
-		rcu_read_unlock();
+		rtnl_unlock();
 		return;
 	}
 
-	if (link)
+	if (link) {
+		u16 mtu = be16_to_cpu(msg->mtu);
+
 		netif_carrier_on(netdev);
-	else
+
+		/* An MTU of 0 from the firmware should be ignored */
+		if (mtu)
+			dev_set_mtu(netdev, mtu);
+	} else {
 		netif_carrier_off(netdev);
-	rcu_read_unlock();
+	}
+	rtnl_unlock();
 }
 
-void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb)
+static void
+nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
 {
 	struct nfp_flower_cmsg_hdr *cmsg_hdr;
 	enum nfp_flower_cmsg_type_port type;
@@ -146,8 +191,30 @@ void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb)
 	default:
 		nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n",
 				     type);
+		goto out;
 	}
 
+	dev_consume_skb_any(skb);
+	return;
 out:
 	dev_kfree_skb_any(skb);
 }
+
+void nfp_flower_cmsg_process_rx(struct work_struct *work)
+{
+	struct nfp_flower_priv *priv;
+	struct sk_buff *skb;
+
+	priv = container_of(work, struct nfp_flower_priv, cmsg_work);
+
+	while ((skb = skb_dequeue(&priv->cmsg_skbs)))
+		nfp_flower_cmsg_process_one_rx(priv->app, skb);
+}
+
+void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb)
+{
+	struct nfp_flower_priv *priv = app->priv;
+
+	skb_queue_tail(&priv->cmsg_skbs, skb);
+	schedule_work(&priv->cmsg_work);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index cf738de..a2ec603 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -247,12 +247,27 @@ struct nfp_flower_cmsg_hdr {
 enum nfp_flower_cmsg_type_port {
 	NFP_FLOWER_CMSG_TYPE_FLOW_ADD =		0,
 	NFP_FLOWER_CMSG_TYPE_FLOW_DEL =		2,
+	NFP_FLOWER_CMSG_TYPE_MAC_REPR =		7,
 	NFP_FLOWER_CMSG_TYPE_PORT_MOD =		8,
 	NFP_FLOWER_CMSG_TYPE_FLOW_STATS =	15,
 	NFP_FLOWER_CMSG_TYPE_PORT_ECHO =	16,
 	NFP_FLOWER_CMSG_TYPE_MAX =		32,
 };
 
+/* NFP_FLOWER_CMSG_TYPE_MAC_REPR */
+struct nfp_flower_cmsg_mac_repr {
+	u8 reserved[3];
+	u8 num_ports;
+	struct {
+		u8 idx;
+		u8 info;
+		u8 nbi_port;
+		u8 phys_port;
+	} ports[0];
+};
+
+#define NFP_FLOWER_CMSG_MAC_REPR_NBI		GENMASK(1, 0)
+
 /* NFP_FLOWER_CMSG_TYPE_PORT_MOD */
 struct nfp_flower_cmsg_portmod {
 	__be32 portnum;
@@ -308,7 +323,14 @@ static inline void *nfp_flower_cmsg_get_data(struct sk_buff *skb)
 	return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN;
 }
 
+struct sk_buff *
+nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports);
+void
+nfp_flower_cmsg_mac_repr_add(struct sk_buff *skb, unsigned int idx,
+			     unsigned int nbi, unsigned int nbi_port,
+			     unsigned int phys_port);
 int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok);
+void nfp_flower_cmsg_process_rx(struct work_struct *work);
 void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb);
 struct sk_buff *
 nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index 6a65c8b..91fe036 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -127,6 +127,11 @@ nfp_flower_repr_netdev_stop(struct nfp_app *app, struct nfp_repr *repr)
 
 static void nfp_flower_sriov_disable(struct nfp_app *app)
 {
+	struct nfp_flower_priv *priv = app->priv;
+
+	if (!priv->nn)
+		return;
+
 	nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_VF);
 }
 
@@ -159,12 +164,18 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
 			goto err_reprs_clean;
 		}
 
+		/* For now we only support 1 PF */
+		WARN_ON(repr_type == NFP_REPR_TYPE_PF && i);
+
 		port = nfp_port_alloc(app, port_type, reprs->reprs[i]);
 		if (repr_type == NFP_REPR_TYPE_PF) {
 			port->pf_id = i;
+			port->vnic = priv->nn->dp.ctrl_bar;
 		} else {
-			port->pf_id = 0; /* For now we only support 1 PF */
+			port->pf_id = 0;
 			port->vf_id = i;
+			port->vnic =
+				app->pf->vf_cfg_mem + i * NFP_NET_CFG_BAR_SZ;
 		}
 
 		eth_hw_addr_random(reprs->reprs[i]);
@@ -197,32 +208,37 @@ err_reprs_clean:
 
 static int nfp_flower_sriov_enable(struct nfp_app *app, int num_vfs)
 {
+	struct nfp_flower_priv *priv = app->priv;
+
+	if (!priv->nn)
+		return 0;
+
 	return nfp_flower_spawn_vnic_reprs(app,
 					   NFP_FLOWER_CMSG_PORT_VNIC_TYPE_VF,
 					   NFP_REPR_TYPE_VF, num_vfs);
 }
 
-static void nfp_flower_stop(struct nfp_app *app)
-{
-	nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF);
-	nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
-
-}
-
 static int
 nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
 {
 	struct nfp_eth_table *eth_tbl = app->pf->eth_tbl;
 	struct nfp_reprs *reprs, *old_reprs;
+	struct sk_buff *ctrl_skb;
 	unsigned int i;
 	int err;
 
-	reprs = nfp_reprs_alloc(eth_tbl->max_index + 1);
-	if (!reprs)
+	ctrl_skb = nfp_flower_cmsg_mac_repr_start(app, eth_tbl->count);
+	if (!ctrl_skb)
 		return -ENOMEM;
 
+	reprs = nfp_reprs_alloc(eth_tbl->max_index + 1);
+	if (!reprs) {
+		err = -ENOMEM;
+		goto err_free_ctrl_skb;
+	}
+
 	for (i = 0; i < eth_tbl->count; i++) {
-		int phys_port = eth_tbl->ports[i].index;
+		unsigned int phys_port = eth_tbl->ports[i].index;
 		struct nfp_port *port;
 		u32 cmsg_port_id;
 
@@ -255,6 +271,11 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
 			goto err_reprs_clean;
 		}
 
+		nfp_flower_cmsg_mac_repr_add(ctrl_skb, i,
+					     eth_tbl->ports[i].nbi,
+					     eth_tbl->ports[i].base,
+					     phys_port);
+
 		nfp_info(app->cpp, "Phys Port %d Representor(%s) created\n",
 			 phys_port, reprs->reprs[phys_port]->name);
 	}
@@ -265,37 +286,31 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
 		goto err_reprs_clean;
 	}
 
+	/* The MAC_REPR control message should be sent after the MAC
+	 * representors are registered using nfp_app_reprs_set().  This is
+	 * because the firmware may respond with control messages for the
+	 * MAC representors, f.e. to provide the driver with information
+	 * about their state, and without registration the driver will drop
+	 * any such messages.
+	 */
+	nfp_ctrl_tx(app->ctrl, ctrl_skb);
+
 	return 0;
 err_reprs_clean:
 	nfp_reprs_clean_and_free(reprs);
+err_free_ctrl_skb:
+	kfree_skb(ctrl_skb);
 	return err;
 }
 
-static int nfp_flower_start(struct nfp_app *app)
-{
-	int err;
-
-	err = nfp_flower_spawn_phy_reprs(app, app->priv);
-	if (err)
-		return err;
-
-	return nfp_flower_spawn_vnic_reprs(app,
-					   NFP_FLOWER_CMSG_PORT_VNIC_TYPE_PF,
-					   NFP_REPR_TYPE_PF, 1);
-}
-
-static int nfp_flower_vnic_init(struct nfp_app *app, struct nfp_net *nn,
-				unsigned int id)
+static int nfp_flower_vnic_alloc(struct nfp_app *app, struct nfp_net *nn,
+				 unsigned int id)
 {
-	struct nfp_flower_priv *priv = app->priv;
-
 	if (id > 0) {
 		nfp_warn(app->cpp, "FlowerNIC doesn't support more than one data vNIC\n");
 		goto err_invalid_port;
 	}
 
-	priv->nn = nn;
-
 	eth_hw_addr_random(nn->dp.netdev);
 	netif_keep_dst(nn->dp.netdev);
 
@@ -306,9 +321,59 @@ err_invalid_port:
 	return PTR_ERR_OR_ZERO(nn->port);
 }
 
+static void nfp_flower_vnic_clean(struct nfp_app *app, struct nfp_net *nn)
+{
+	struct nfp_flower_priv *priv = app->priv;
+
+	if (app->pf->num_vfs)
+		nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_VF);
+	nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF);
+	nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
+
+	priv->nn = NULL;
+}
+
+static int nfp_flower_vnic_init(struct nfp_app *app, struct nfp_net *nn)
+{
+	struct nfp_flower_priv *priv = app->priv;
+	int err;
+
+	priv->nn = nn;
+
+	err = nfp_flower_spawn_phy_reprs(app, app->priv);
+	if (err)
+		goto err_clear_nn;
+
+	err = nfp_flower_spawn_vnic_reprs(app,
+					  NFP_FLOWER_CMSG_PORT_VNIC_TYPE_PF,
+					  NFP_REPR_TYPE_PF, 1);
+	if (err)
+		goto err_destroy_reprs_phy;
+
+	if (app->pf->num_vfs) {
+		err = nfp_flower_spawn_vnic_reprs(app,
+						  NFP_FLOWER_CMSG_PORT_VNIC_TYPE_VF,
+						  NFP_REPR_TYPE_VF,
+						  app->pf->num_vfs);
+		if (err)
+			goto err_destroy_reprs_pf;
+	}
+
+	return 0;
+
+err_destroy_reprs_pf:
+	nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF);
+err_destroy_reprs_phy:
+	nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT);
+err_clear_nn:
+	priv->nn = NULL;
+	return err;
+}
+
 static int nfp_flower_init(struct nfp_app *app)
 {
 	const struct nfp_pf *pf = app->pf;
+	struct nfp_flower_priv *app_priv;
 	u64 version;
 	int err;
 
@@ -339,10 +404,15 @@ static int nfp_flower_init(struct nfp_app *app)
 		return -EINVAL;
 	}
 
-	app->priv = vzalloc(sizeof(struct nfp_flower_priv));
-	if (!app->priv)
+	app_priv = vzalloc(sizeof(struct nfp_flower_priv));
+	if (!app_priv)
 		return -ENOMEM;
 
+	app->priv = app_priv;
+	app_priv->app = app;
+	skb_queue_head_init(&app_priv->cmsg_skbs);
+	INIT_WORK(&app_priv->cmsg_work, nfp_flower_cmsg_process_rx);
+
 	err = nfp_flower_metadata_init(app);
 	if (err)
 		goto err_free_app_priv;
@@ -356,6 +426,11 @@ err_free_app_priv:
 
 static void nfp_flower_clean(struct nfp_app *app)
 {
+	struct nfp_flower_priv *app_priv = app->priv;
+
+	skb_queue_purge(&app_priv->cmsg_skbs);
+	flush_work(&app_priv->cmsg_work);
+
 	nfp_flower_metadata_cleanup(app);
 	vfree(app->priv);
 	app->priv = NULL;
@@ -371,14 +446,13 @@ const struct nfp_app_type app_flower = {
 	.init		= nfp_flower_init,
 	.clean		= nfp_flower_clean,
 
+	.vnic_alloc	= nfp_flower_vnic_alloc,
 	.vnic_init	= nfp_flower_vnic_init,
+	.vnic_clean	= nfp_flower_vnic_clean,
 
 	.repr_open	= nfp_flower_repr_netdev_open,
 	.repr_stop	= nfp_flower_repr_netdev_stop,
 
-	.start		= nfp_flower_start,
-	.stop		= nfp_flower_stop,
-
 	.ctrl_msg_rx	= nfp_flower_cmsg_rx,
 
 	.sriov_enable	= nfp_flower_sriov_enable,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 9e64c04..c20dd00 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -38,8 +38,9 @@
 #include <linux/hashtable.h>
 #include <linux/time64.h>
 #include <linux/types.h>
+#include <net/pkt_cls.h>
+#include <linux/workqueue.h>
 
-struct tc_to_netdev;
 struct net_device;
 struct nfp_app;
 
@@ -71,6 +72,7 @@ struct nfp_fl_stats_id {
 
 /**
  * struct nfp_flower_priv - Flower APP per-vNIC priv data
+ * @app:		Back pointer to app
  * @nn:			Pointer to vNIC
  * @mask_id_seed:	Seed used for mask hash table
  * @flower_version:	HW version of flower
@@ -78,8 +80,11 @@ struct nfp_fl_stats_id {
  * @mask_ids:		List of free mask ids
  * @mask_table:		Hash table used to store masks
  * @flow_table:		Hash table used to store flower rules
+ * @cmsg_work:		Workqueue for control messages processing
+ * @cmsg_skbs:		List of skbs for control message processing
  */
 struct nfp_flower_priv {
+	struct nfp_app *app;
 	struct nfp_net *nn;
 	u32 mask_id_seed;
 	u64 flower_version;
@@ -87,6 +92,8 @@ struct nfp_flower_priv {
 	struct nfp_fl_mask_id mask_ids;
 	DECLARE_HASHTABLE(mask_table, NFP_FLOWER_MASK_HASH_BITS);
 	DECLARE_HASHTABLE(flow_table, NFP_FLOWER_HASH_BITS);
+	struct work_struct cmsg_work;
+	struct sk_buff_head cmsg_skbs;
 };
 
 struct nfp_fl_key_ls {
@@ -135,7 +142,7 @@ int nfp_flower_metadata_init(struct nfp_app *app);
 void nfp_flower_metadata_cleanup(struct nfp_app *app);
 
 int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
-			u32 handle, __be16 proto, struct tc_to_netdev *tc);
+			enum tc_setup_type type, void *type_data);
 int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
 				  struct nfp_fl_key_ls *key_ls,
 				  struct net_device *netdev,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 74a96d6..d396183 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -409,16 +409,15 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
 }
 
 int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
-			u32 handle, __be16 proto, struct tc_to_netdev *tc)
+			enum tc_setup_type type, void *type_data)
 {
-	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
-		return -EOPNOTSUPP;
+	struct tc_cls_flower_offload *cls_flower = type_data;
 
-	if (!eth_proto_is_802_3(proto))
+	if (type != TC_SETUP_CLSFLOWER ||
+	    !is_classid_clsact_ingress(cls_flower->common.classid) ||
+	    !eth_proto_is_802_3(cls_flower->common.protocol) ||
+	    cls_flower->common.chain_index)
 		return -EOPNOTSUPP;
 
-	if (tc->type != TC_SETUP_CLSFLOWER)
-		return -EINVAL;
-
-	return nfp_flower_repr_offload(app, netdev, tc->cls_flower);
+	return nfp_flower_repr_offload(app, netdev, cls_flower);
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c
index c704c02..82c2907 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c
@@ -38,6 +38,7 @@
 #include "nfpcore/nfp_nffw.h"
 #include "nfp_app.h"
 #include "nfp_main.h"
+#include "nfp_net.h"
 #include "nfp_net_repr.h"
 
 static const struct nfp_app_type *apps[] = {
@@ -48,6 +49,25 @@ static const struct nfp_app_type *apps[] = {
 #endif
 };
 
+struct nfp_app *nfp_app_from_netdev(struct net_device *netdev)
+{
+	if (nfp_netdev_is_nfp_net(netdev)) {
+		struct nfp_net *nn = netdev_priv(netdev);
+
+		return nn->app;
+	}
+
+	if (nfp_netdev_is_nfp_repr(netdev)) {
+		struct nfp_repr *repr = netdev_priv(netdev);
+
+		return repr->app;
+	}
+
+	WARN(1, "Unknown netdev type for nfp_app\n");
+
+	return NULL;
+}
+
 const char *nfp_app_mip_name(struct nfp_app *app)
 {
 	if (!app || !app->pf->mip)
@@ -105,7 +125,7 @@ struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id)
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (WARN_ON(!apps[i]->name || !apps[i]->vnic_init))
+	if (WARN_ON(!apps[i]->name || !apps[i]->vnic_alloc))
 		return ERR_PTR(-EINVAL);
 
 	app = kzalloc(sizeof(*app), GFP_KERNEL);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index 5d714e1..af640b5 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -42,7 +42,6 @@ struct bpf_prog;
 struct net_device;
 struct pci_dev;
 struct sk_buff;
-struct tc_to_netdev;
 struct sk_buff;
 struct nfp_app;
 struct nfp_cpp;
@@ -70,8 +69,10 @@ extern const struct nfp_app_type app_flower;
  * @init:	perform basic app checks and init
  * @clean:	clean app state
  * @extra_cap:	extra capabilities string
- * @vnic_init:	init vNICs (assign port types, etc.)
- * @vnic_clean:	clean up app's vNIC state
+ * @vnic_alloc:	allocate vNICs (assign port types, etc.)
+ * @vnic_free:	free up app's vNIC state
+ * @vnic_init:	vNIC netdev was registered
+ * @vnic_clean:	vNIC netdev about to be unregistered
  * @repr_open:	representor netdev open callback
  * @repr_stop:	representor netdev stop callback
  * @start:	start application logic
@@ -96,8 +97,10 @@ struct nfp_app_type {
 
 	const char *(*extra_cap)(struct nfp_app *app, struct nfp_net *nn);
 
-	int (*vnic_init)(struct nfp_app *app, struct nfp_net *nn,
-			 unsigned int id);
+	int (*vnic_alloc)(struct nfp_app *app, struct nfp_net *nn,
+			  unsigned int id);
+	void (*vnic_free)(struct nfp_app *app, struct nfp_net *nn);
+	int (*vnic_init)(struct nfp_app *app, struct nfp_net *nn);
 	void (*vnic_clean)(struct nfp_app *app, struct nfp_net *nn);
 
 	int (*repr_open)(struct nfp_app *app, struct nfp_repr *repr);
@@ -109,7 +112,7 @@ struct nfp_app_type {
 	void (*ctrl_msg_rx)(struct nfp_app *app, struct sk_buff *skb);
 
 	int (*setup_tc)(struct nfp_app *app, struct net_device *netdev,
-			u32 handle, __be16 proto, struct tc_to_netdev *tc);
+			enum tc_setup_type type, void *type_data);
 	bool (*tc_busy)(struct nfp_app *app, struct nfp_net *nn);
 	int (*xdp_offload)(struct nfp_app *app, struct nfp_net *nn,
 			   struct bpf_prog *prog);
@@ -158,10 +161,23 @@ static inline void nfp_app_clean(struct nfp_app *app)
 		app->type->clean(app);
 }
 
-static inline int nfp_app_vnic_init(struct nfp_app *app, struct nfp_net *nn,
-				    unsigned int id)
+static inline int nfp_app_vnic_alloc(struct nfp_app *app, struct nfp_net *nn,
+				     unsigned int id)
+{
+	return app->type->vnic_alloc(app, nn, id);
+}
+
+static inline void nfp_app_vnic_free(struct nfp_app *app, struct nfp_net *nn)
+{
+	if (app->type->vnic_free)
+		app->type->vnic_free(app, nn);
+}
+
+static inline int nfp_app_vnic_init(struct nfp_app *app, struct nfp_net *nn)
 {
-	return app->type->vnic_init(app, nn, id);
+	if (!app->type->vnic_init)
+		return 0;
+	return app->type->vnic_init(app, nn);
 }
 
 static inline void nfp_app_vnic_clean(struct nfp_app *app, struct nfp_net *nn)
@@ -238,12 +254,11 @@ static inline bool nfp_app_tc_busy(struct nfp_app *app, struct nfp_net *nn)
 
 static inline int nfp_app_setup_tc(struct nfp_app *app,
 				   struct net_device *netdev,
-				   u32 handle, __be16 proto,
-				   struct tc_to_netdev *tc)
+				   enum tc_setup_type type, void *type_data)
 {
 	if (!app || !app->type->setup_tc)
 		return -EOPNOTSUPP;
-	return app->type->setup_tc(app, netdev, handle, proto, tc);
+	return app->type->setup_tc(app, netdev, type, type_data);
 }
 
 static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
@@ -295,6 +310,8 @@ static inline struct net_device *nfp_app_repr_get(struct nfp_app *app, u32 id)
 	return app->type->repr_get(app, id);
 }
 
+struct nfp_app *nfp_app_from_netdev(struct net_device *netdev);
+
 struct nfp_reprs *
 nfp_app_reprs_set(struct nfp_app *app, enum nfp_repr_type type,
 		  struct nfp_reprs *reprs);
@@ -308,7 +325,7 @@ void nfp_app_free(struct nfp_app *app);
 
 /* Callbacks shared between apps */
 
-int nfp_app_nic_vnic_init(struct nfp_app *app, struct nfp_net *nn,
-			  unsigned int id);
+int nfp_app_nic_vnic_alloc(struct nfp_app *app, struct nfp_net *nn,
+			   unsigned int id);
 
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c
index 4e37c81..2a2f2fb 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c
@@ -60,8 +60,8 @@ nfp_app_nic_vnic_init_phy_port(struct nfp_pf *pf, struct nfp_app *app,
 	return nn->port->type == NFP_PORT_INVALID;
 }
 
-int nfp_app_nic_vnic_init(struct nfp_app *app, struct nfp_net *nn,
-			  unsigned int id)
+int nfp_app_nic_vnic_alloc(struct nfp_app *app, struct nfp_net *nn,
+			   unsigned int id)
 {
 	int err;
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index 3f199db..f055b17 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -172,6 +172,21 @@ static int nfp_pcie_sriov_configure(struct pci_dev *pdev, int num_vfs)
 		return nfp_pcie_sriov_enable(pdev, num_vfs);
 }
 
+static const struct firmware *
+nfp_net_fw_request(struct pci_dev *pdev, struct nfp_pf *pf, const char *name)
+{
+	const struct firmware *fw = NULL;
+	int err;
+
+	err = request_firmware_direct(&fw, name, &pdev->dev);
+	nfp_info(pf->cpp, "  %s: %s\n",
+		 name, err ? "not found" : "found, loading...");
+	if (err)
+		return NULL;
+
+	return fw;
+}
+
 /**
  * nfp_net_fw_find() - Find the correct firmware image for netdev mode
  * @pdev:	PCI Device structure
@@ -182,13 +197,32 @@ static int nfp_pcie_sriov_configure(struct pci_dev *pdev, int num_vfs)
 static const struct firmware *
 nfp_net_fw_find(struct pci_dev *pdev, struct nfp_pf *pf)
 {
-	const struct firmware *fw = NULL;
 	struct nfp_eth_table_port *port;
+	const struct firmware *fw;
 	const char *fw_model;
 	char fw_name[256];
-	int spc, err = 0;
-	int i, j;
+	const u8 *serial;
+	u16 interface;
+	int spc, i, j;
 
+	nfp_info(pf->cpp, "Looking for firmware file in order of priority:\n");
+
+	/* First try to find a firmware image specific for this device */
+	interface = nfp_cpp_interface(pf->cpp);
+	nfp_cpp_serial(pf->cpp, &serial);
+	sprintf(fw_name, "netronome/serial-%pMF-%02hhx-%02hhx.nffw",
+		serial, interface >> 8, interface & 0xff);
+	fw = nfp_net_fw_request(pdev, pf, fw_name);
+	if (fw)
+		return fw;
+
+	/* Then try the PCI name */
+	sprintf(fw_name, "netronome/pci-%s.nffw", pci_name(pdev));
+	fw = nfp_net_fw_request(pdev, pf, fw_name);
+	if (fw)
+		return fw;
+
+	/* Finally try the card type and media */
 	if (!pf->eth_tbl) {
 		dev_err(&pdev->dev, "Error: can't identify media config\n");
 		return NULL;
@@ -221,13 +255,7 @@ nfp_net_fw_find(struct pci_dev *pdev, struct nfp_pf *pf)
 	if (spc <= 0)
 		return NULL;
 
-	err = request_firmware(&fw, fw_name, &pdev->dev);
-	if (err)
-		return NULL;
-
-	dev_info(&pdev->dev, "Loading FW image: %s\n", fw_name);
-
-	return fw;
+	return nfp_net_fw_request(pdev, pf, fw_name);
 }
 
 /**
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h
index 6922410..be0ee59 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h
@@ -73,6 +73,8 @@ struct nfp_rtsym_table;
  * @mac_stats_mem:	Pointer to mapped MAC stats area
  * @vf_cfg_bar:		Pointer to the CPP area for the VF configuration BAR
  * @vf_cfg_mem:		Pointer to mapped VF configuration area
+ * @vfcfg_tbl2_area:	Pointer to the CPP area for the VF config table
+ * @vfcfg_tbl2:		Pointer to mapped VF config table
  * @irq_entries:	Array of MSI-X entries for all vNICs
  * @limit_vfs:		Number of VFs supported by firmware (~0 for PCI limit)
  * @num_vfs:		Number of SR-IOV VFs enabled
@@ -107,6 +109,8 @@ struct nfp_pf {
 	u8 __iomem *mac_stats_mem;
 	struct nfp_cpp_area *vf_cfg_bar;
 	u8 __iomem *vf_cfg_mem;
+	struct nfp_cpp_area *vfcfg_tbl2_area;
+	u8 __iomem *vfcfg_tbl2;
 
 	struct msix_entry *irq_entries;
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index b1fa77b..d51d823 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -573,7 +573,6 @@ struct nfp_net_dp {
  * @tx_bar:             Pointer to mapped TX queues
  * @rx_bar:             Pointer to mapped FL/RX queues
  * @debugfs_dir:	Device directory in debugfs
- * @ethtool_dump_flag:	Ethtool dump flag
  * @vnic_list:		Entry on device vNIC list
  * @pdev:		Backpointer to PCI device
  * @app:		APP handle if available
@@ -640,7 +639,6 @@ struct nfp_net {
 	u8 __iomem *rx_bar;
 
 	struct dentry *debugfs_dir;
-	u32 ethtool_dump_flag;
 
 	struct list_head vnic_list;
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 66a09e4..1c0187f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -71,6 +71,7 @@
 #include "nfp_app.h"
 #include "nfp_net_ctrl.h"
 #include "nfp_net.h"
+#include "nfp_net_sriov.h"
 #include "nfp_port.h"
 
 /**
@@ -990,7 +991,7 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
 
 		/* check for last gather fragment */
 		if (fidx == nr_frags - 1)
-			dev_kfree_skb_any(skb);
+			dev_consume_skb_any(skb);
 
 		tx_ring->txbufs[idx].dma_addr = 0;
 		tx_ring->txbufs[idx].skb = NULL;
@@ -2659,6 +2660,7 @@ static int nfp_net_netdev_close(struct net_device *netdev)
 	/* Step 2: Tell NFP
 	 */
 	nfp_net_clear_config_and_disable(nn);
+	nfp_port_configure(netdev, false);
 
 	/* Step 3: Free resources
 	 */
@@ -2776,16 +2778,21 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 		goto err_free_all;
 
 	/* Step 2: Configure the NFP
+	 * - Ifup the physical interface if it exists
 	 * - Enable rings from 0 to tx_rings/rx_rings - 1.
 	 * - Write MAC address (in case it changed)
 	 * - Set the MTU
 	 * - Set the Freelist buffer size
 	 * - Enable the FW
 	 */
-	err = nfp_net_set_config_and_enable(nn);
+	err = nfp_port_configure(netdev, true);
 	if (err)
 		goto err_free_all;
 
+	err = nfp_net_set_config_and_enable(nn);
+	if (err)
+		goto err_port_disable;
+
 	/* Step 3: Enable for kernel
 	 * - put some freelist descriptors on each RX ring
 	 * - enable NAPI on each ring
@@ -2796,6 +2803,8 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 
 	return 0;
 
+err_port_disable:
+	nfp_port_configure(netdev, false);
 err_free_all:
 	nfp_net_close_free_all(nn);
 	return err;
@@ -3413,6 +3422,11 @@ const struct net_device_ops nfp_net_netdev_ops = {
 	.ndo_get_stats64	= nfp_net_stat64,
 	.ndo_vlan_rx_add_vid	= nfp_net_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= nfp_net_vlan_rx_kill_vid,
+	.ndo_set_vf_mac         = nfp_app_set_vf_mac,
+	.ndo_set_vf_vlan        = nfp_app_set_vf_vlan,
+	.ndo_set_vf_spoofchk    = nfp_app_set_vf_spoofchk,
+	.ndo_get_vf_config	= nfp_app_get_vf_config,
+	.ndo_set_vf_link_state  = nfp_app_set_vf_link_state,
 	.ndo_setup_tc		= nfp_port_setup_tc,
 	.ndo_tx_timeout		= nfp_net_tx_timeout,
 	.ndo_set_rx_mode	= nfp_net_set_rx_mode,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index e5e94e0..b0a452b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -164,6 +164,7 @@
 #define   NFP_NET_CFG_UPDATE_BPF	  (0x1 << 10) /* BPF program load */
 #define   NFP_NET_CFG_UPDATE_MACADDR	  (0x1 << 11) /* MAC address change */
 #define   NFP_NET_CFG_UPDATE_MBOX	  (0x1 << 12) /* Mailbox update */
+#define   NFP_NET_CFG_UPDATE_VF		  (0x1 << 13) /* VF settings change */
 #define   NFP_NET_CFG_UPDATE_ERR          (0x1 << 31) /* A error occurred */
 #define NFP_NET_CFG_TXRS_ENABLE         0x0008
 #define NFP_NET_CFG_RXRS_ENABLE         0x0010
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
index 40217ece..cf81cf9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
@@ -125,7 +125,6 @@ static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
 	struct nfp_net_tx_ring *tx_ring;
 	struct nfp_net_tx_desc *txd;
 	int d_rd_p, d_wr_p, txd_cnt;
-	struct sk_buff *skb;
 	struct nfp_net *nn;
 	int i;
 
@@ -158,13 +157,15 @@ static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
 			   txd->vals[0], txd->vals[1],
 			   txd->vals[2], txd->vals[3]);
 
-		skb = READ_ONCE(tx_ring->txbufs[i].skb);
-		if (skb) {
-			if (tx_ring == r_vec->tx_ring)
+		if (tx_ring == r_vec->tx_ring) {
+			struct sk_buff *skb = READ_ONCE(tx_ring->txbufs[i].skb);
+
+			if (skb)
 				seq_printf(file, " skb->head=%p skb->data=%p",
 					   skb->head, skb->data);
-			else
-				seq_printf(file, " frag=%p", skb);
+		} else {
+			seq_printf(file, " frag=%p",
+				   READ_ONCE(tx_ring->txbufs[i].frag));
 		}
 
 		if (tx_ring->txbufs[i].dma_addr)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 6e31355..07969f0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -59,82 +59,129 @@ enum nfp_dump_diag {
 	NFP_DUMP_NSP_DIAG = 0,
 };
 
-/* Support for stats. Returns netdev, driver, and device stats */
-enum { NETDEV_ET_STATS, NFP_NET_DRV_ET_STATS, NFP_NET_DEV_ET_STATS };
-struct _nfp_net_et_stats {
+struct nfp_et_stat {
 	char name[ETH_GSTRING_LEN];
-	int type;
-	int sz;
 	int off;
 };
 
-#define NN_ET_NETDEV_STAT(m) NETDEV_ET_STATS,			\
-		FIELD_SIZEOF(struct net_device_stats, m),	\
-		offsetof(struct net_device_stats, m)
-/* For stats in the control BAR (other than Q stats) */
-#define NN_ET_DEV_STAT(m) NFP_NET_DEV_ET_STATS,			\
-		sizeof(u64),					\
-		(m)
-static const struct _nfp_net_et_stats nfp_net_et_stats[] = {
-	/* netdev stats */
-	{"rx_packets", NN_ET_NETDEV_STAT(rx_packets)},
-	{"tx_packets", NN_ET_NETDEV_STAT(tx_packets)},
-	{"rx_bytes", NN_ET_NETDEV_STAT(rx_bytes)},
-	{"tx_bytes", NN_ET_NETDEV_STAT(tx_bytes)},
-	{"rx_errors", NN_ET_NETDEV_STAT(rx_errors)},
-	{"tx_errors", NN_ET_NETDEV_STAT(tx_errors)},
-	{"rx_dropped", NN_ET_NETDEV_STAT(rx_dropped)},
-	{"tx_dropped", NN_ET_NETDEV_STAT(tx_dropped)},
-	{"multicast", NN_ET_NETDEV_STAT(multicast)},
-	{"collisions", NN_ET_NETDEV_STAT(collisions)},
-	{"rx_over_errors", NN_ET_NETDEV_STAT(rx_over_errors)},
-	{"rx_crc_errors", NN_ET_NETDEV_STAT(rx_crc_errors)},
-	{"rx_frame_errors", NN_ET_NETDEV_STAT(rx_frame_errors)},
-	{"rx_fifo_errors", NN_ET_NETDEV_STAT(rx_fifo_errors)},
-	{"rx_missed_errors", NN_ET_NETDEV_STAT(rx_missed_errors)},
-	{"tx_aborted_errors", NN_ET_NETDEV_STAT(tx_aborted_errors)},
-	{"tx_carrier_errors", NN_ET_NETDEV_STAT(tx_carrier_errors)},
-	{"tx_fifo_errors", NN_ET_NETDEV_STAT(tx_fifo_errors)},
+static const struct nfp_et_stat nfp_net_et_stats[] = {
 	/* Stats from the device */
-	{"dev_rx_discards", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_DISCARDS)},
-	{"dev_rx_errors", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_ERRORS)},
-	{"dev_rx_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_OCTETS)},
-	{"dev_rx_uc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_UC_OCTETS)},
-	{"dev_rx_mc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_MC_OCTETS)},
-	{"dev_rx_bc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_BC_OCTETS)},
-	{"dev_rx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_FRAMES)},
-	{"dev_rx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_MC_FRAMES)},
-	{"dev_rx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_BC_FRAMES)},
-
-	{"dev_tx_discards", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_DISCARDS)},
-	{"dev_tx_errors", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_ERRORS)},
-	{"dev_tx_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_OCTETS)},
-	{"dev_tx_uc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_UC_OCTETS)},
-	{"dev_tx_mc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_OCTETS)},
-	{"dev_tx_bc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_OCTETS)},
-	{"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)},
-	{"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)},
-	{"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)},
-
-	{"bpf_pass_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_FRAMES)},
-	{"bpf_pass_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_BYTES)},
+	{ "dev_rx_discards",	NFP_NET_CFG_STATS_RX_DISCARDS },
+	{ "dev_rx_errors",	NFP_NET_CFG_STATS_RX_ERRORS },
+	{ "dev_rx_bytes",	NFP_NET_CFG_STATS_RX_OCTETS },
+	{ "dev_rx_uc_bytes",	NFP_NET_CFG_STATS_RX_UC_OCTETS },
+	{ "dev_rx_mc_bytes",	NFP_NET_CFG_STATS_RX_MC_OCTETS },
+	{ "dev_rx_bc_bytes",	NFP_NET_CFG_STATS_RX_BC_OCTETS },
+	{ "dev_rx_pkts",	NFP_NET_CFG_STATS_RX_FRAMES },
+	{ "dev_rx_mc_pkts",	NFP_NET_CFG_STATS_RX_MC_FRAMES },
+	{ "dev_rx_bc_pkts",	NFP_NET_CFG_STATS_RX_BC_FRAMES },
+
+	{ "dev_tx_discards",	NFP_NET_CFG_STATS_TX_DISCARDS },
+	{ "dev_tx_errors",	NFP_NET_CFG_STATS_TX_ERRORS },
+	{ "dev_tx_bytes",	NFP_NET_CFG_STATS_TX_OCTETS },
+	{ "dev_tx_uc_bytes",	NFP_NET_CFG_STATS_TX_UC_OCTETS },
+	{ "dev_tx_mc_bytes",	NFP_NET_CFG_STATS_TX_MC_OCTETS },
+	{ "dev_tx_bc_bytes",	NFP_NET_CFG_STATS_TX_BC_OCTETS },
+	{ "dev_tx_pkts",	NFP_NET_CFG_STATS_TX_FRAMES },
+	{ "dev_tx_mc_pkts",	NFP_NET_CFG_STATS_TX_MC_FRAMES },
+	{ "dev_tx_bc_pkts",	NFP_NET_CFG_STATS_TX_BC_FRAMES },
+
+	{ "bpf_pass_pkts",	NFP_NET_CFG_STATS_APP0_FRAMES },
+	{ "bpf_pass_bytes",	NFP_NET_CFG_STATS_APP0_BYTES },
 	/* see comments in outro functions in nfp_bpf_jit.c to find out
 	 * how different BPF modes use app-specific counters
 	 */
-	{"bpf_app1_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_FRAMES)},
-	{"bpf_app1_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_BYTES)},
-	{"bpf_app2_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_FRAMES)},
-	{"bpf_app2_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_BYTES)},
-	{"bpf_app3_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_FRAMES)},
-	{"bpf_app3_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_BYTES)},
+	{ "bpf_app1_pkts",	NFP_NET_CFG_STATS_APP1_FRAMES },
+	{ "bpf_app1_bytes",	NFP_NET_CFG_STATS_APP1_BYTES },
+	{ "bpf_app2_pkts",	NFP_NET_CFG_STATS_APP2_FRAMES },
+	{ "bpf_app2_bytes",	NFP_NET_CFG_STATS_APP2_BYTES },
+	{ "bpf_app3_pkts",	NFP_NET_CFG_STATS_APP3_FRAMES },
+	{ "bpf_app3_bytes",	NFP_NET_CFG_STATS_APP3_BYTES },
+};
+
+static const struct nfp_et_stat nfp_mac_et_stats[] = {
+	{ "rx_octets",			NFP_MAC_STATS_RX_IN_OCTETS, },
+	{ "rx_frame_too_long_errors",
+			NFP_MAC_STATS_RX_FRAME_TOO_LONG_ERRORS, },
+	{ "rx_range_length_errors",	NFP_MAC_STATS_RX_RANGE_LENGTH_ERRORS, },
+	{ "rx_vlan_reveive_ok",		NFP_MAC_STATS_RX_VLAN_REVEIVE_OK, },
+	{ "rx_errors",			NFP_MAC_STATS_RX_IN_ERRORS, },
+	{ "rx_broadcast_pkts",		NFP_MAC_STATS_RX_IN_BROADCAST_PKTS, },
+	{ "rx_drop_events",		NFP_MAC_STATS_RX_DROP_EVENTS, },
+	{ "rx_alignment_errors",	NFP_MAC_STATS_RX_ALIGNMENT_ERRORS, },
+	{ "rx_pause_mac_ctrl_frames",
+			NFP_MAC_STATS_RX_PAUSE_MAC_CTRL_FRAMES, },
+	{ "rx_frames_received_ok",	NFP_MAC_STATS_RX_FRAMES_RECEIVED_OK, },
+	{ "rx_frame_check_sequence_errors",
+			NFP_MAC_STATS_RX_FRAME_CHECK_SEQUENCE_ERRORS, },
+	{ "rx_unicast_pkts",		NFP_MAC_STATS_RX_UNICAST_PKTS, },
+	{ "rx_multicast_pkts",		NFP_MAC_STATS_RX_MULTICAST_PKTS, },
+	{ "rx_pkts",			NFP_MAC_STATS_RX_PKTS, },
+	{ "rx_undersize_pkts",		NFP_MAC_STATS_RX_UNDERSIZE_PKTS, },
+	{ "rx_pkts_64_octets",		NFP_MAC_STATS_RX_PKTS_64_OCTETS, },
+	{ "rx_pkts_65_to_127_octets",
+			NFP_MAC_STATS_RX_PKTS_65_TO_127_OCTETS, },
+	{ "rx_pkts_128_to_255_octets",
+			NFP_MAC_STATS_RX_PKTS_128_TO_255_OCTETS, },
+	{ "rx_pkts_256_to_511_octets",
+			NFP_MAC_STATS_RX_PKTS_256_TO_511_OCTETS, },
+	{ "rx_pkts_512_to_1023_octets",
+			NFP_MAC_STATS_RX_PKTS_512_TO_1023_OCTETS, },
+	{ "rx_pkts_1024_to_1518_octets",
+			NFP_MAC_STATS_RX_PKTS_1024_TO_1518_OCTETS, },
+	{ "rx_pkts_1519_to_max_octets",
+			NFP_MAC_STATS_RX_PKTS_1519_TO_MAX_OCTETS, },
+	{ "rx_jabbers",			NFP_MAC_STATS_RX_JABBERS, },
+	{ "rx_fragments",		NFP_MAC_STATS_RX_FRAGMENTS, },
+	{ "rx_oversize_pkts",		NFP_MAC_STATS_RX_OVERSIZE_PKTS, },
+	{ "rx_pause_frames_class0",	NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS0, },
+	{ "rx_pause_frames_class1",	NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS1, },
+	{ "rx_pause_frames_class2",	NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS2, },
+	{ "rx_pause_frames_class3",	NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS3, },
+	{ "rx_pause_frames_class4",	NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS4, },
+	{ "rx_pause_frames_class5",	NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS5, },
+	{ "rx_pause_frames_class6",	NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS6, },
+	{ "rx_pause_frames_class7",	NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS7, },
+	{ "rx_mac_ctrl_frames_received",
+			NFP_MAC_STATS_RX_MAC_CTRL_FRAMES_RECEIVED, },
+	{ "rx_mac_head_drop",		NFP_MAC_STATS_RX_MAC_HEAD_DROP, },
+	{ "tx_queue_drop",		NFP_MAC_STATS_TX_QUEUE_DROP, },
+	{ "tx_octets",			NFP_MAC_STATS_TX_OUT_OCTETS, },
+	{ "tx_vlan_transmitted_ok",	NFP_MAC_STATS_TX_VLAN_TRANSMITTED_OK, },
+	{ "tx_errors",			NFP_MAC_STATS_TX_OUT_ERRORS, },
+	{ "tx_broadcast_pkts",		NFP_MAC_STATS_TX_BROADCAST_PKTS, },
+	{ "tx_pause_mac_ctrl_frames",
+			NFP_MAC_STATS_TX_PAUSE_MAC_CTRL_FRAMES, },
+	{ "tx_frames_transmitted_ok",
+			NFP_MAC_STATS_TX_FRAMES_TRANSMITTED_OK, },
+	{ "tx_unicast_pkts",		NFP_MAC_STATS_TX_UNICAST_PKTS, },
+	{ "tx_multicast_pkts",		NFP_MAC_STATS_TX_MULTICAST_PKTS, },
+	{ "tx_pkts_64_octets",		NFP_MAC_STATS_TX_PKTS_64_OCTETS, },
+	{ "tx_pkts_65_to_127_octets",
+			NFP_MAC_STATS_TX_PKTS_65_TO_127_OCTETS, },
+	{ "tx_pkts_128_to_255_octets",
+			NFP_MAC_STATS_TX_PKTS_128_TO_255_OCTETS, },
+	{ "tx_pkts_256_to_511_octets",
+			NFP_MAC_STATS_TX_PKTS_256_TO_511_OCTETS, },
+	{ "tx_pkts_512_to_1023_octets",
+			NFP_MAC_STATS_TX_PKTS_512_TO_1023_OCTETS, },
+	{ "tx_pkts_1024_to_1518_octets",
+			NFP_MAC_STATS_TX_PKTS_1024_TO_1518_OCTETS, },
+	{ "tx_pkts_1519_to_max_octets",
+			NFP_MAC_STATS_TX_PKTS_1519_TO_MAX_OCTETS, },
+	{ "tx_pause_frames_class0",	NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS0, },
+	{ "tx_pause_frames_class1",	NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS1, },
+	{ "tx_pause_frames_class2",	NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS2, },
+	{ "tx_pause_frames_class3",	NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS3, },
+	{ "tx_pause_frames_class4",	NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS4, },
+	{ "tx_pause_frames_class5",	NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS5, },
+	{ "tx_pause_frames_class6",	NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS6, },
+	{ "tx_pause_frames_class7",	NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS7, },
 };
 
 #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
-#define NN_ET_RVEC_STATS_LEN (nn->dp.num_r_vecs * 3)
+#define NN_ET_SWITCH_STATS_LEN 9
 #define NN_ET_RVEC_GATHER_STATS 7
-#define NN_ET_QUEUE_STATS_LEN ((nn->dp.num_tx_rings + nn->dp.num_rx_rings) * 2)
-#define NN_ET_STATS_LEN (NN_ET_GLOBAL_STATS_LEN + NN_ET_RVEC_GATHER_STATS + \
-			 NN_ET_RVEC_STATS_LEN + NN_ET_QUEUE_STATS_LEN)
 
 static void nfp_net_get_nspinfo(struct nfp_app *app, char *version)
 {
@@ -147,34 +194,53 @@ static void nfp_net_get_nspinfo(struct nfp_app *app, char *version)
 	if (IS_ERR(nsp))
 		return;
 
-	snprintf(version, ETHTOOL_FWVERS_LEN, "sp:%hu.%hu",
+	snprintf(version, ETHTOOL_FWVERS_LEN, "%hu.%hu",
 		 nfp_nsp_get_abi_ver_major(nsp),
 		 nfp_nsp_get_abi_ver_minor(nsp));
 
 	nfp_nsp_close(nsp);
 }
 
-static void nfp_net_get_drvinfo(struct net_device *netdev,
-				struct ethtool_drvinfo *drvinfo)
+static void
+nfp_get_drvinfo(struct nfp_app *app, struct pci_dev *pdev,
+		const char *vnic_version, struct ethtool_drvinfo *drvinfo)
 {
 	char nsp_version[ETHTOOL_FWVERS_LEN] = {};
-	struct nfp_net *nn = netdev_priv(netdev);
 
-	strlcpy(drvinfo->driver, nn->pdev->driver->name,
-		sizeof(drvinfo->driver));
+	strlcpy(drvinfo->driver, pdev->driver->name, sizeof(drvinfo->driver));
 	strlcpy(drvinfo->version, nfp_driver_version, sizeof(drvinfo->version));
 
-	nfp_net_get_nspinfo(nn->app, nsp_version);
+	nfp_net_get_nspinfo(app, nsp_version);
 	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
-		 "%d.%d.%d.%d %s %s %s",
+		 "%s %s %s %s", vnic_version, nsp_version,
+		 nfp_app_mip_name(app), nfp_app_name(app));
+}
+
+static void
+nfp_net_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
+{
+	char vnic_version[ETHTOOL_FWVERS_LEN] = {};
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	snprintf(vnic_version, sizeof(vnic_version), "%d.%d.%d.%d",
 		 nn->fw_ver.resv, nn->fw_ver.class,
-		 nn->fw_ver.major, nn->fw_ver.minor, nsp_version,
-		 nfp_app_mip_name(nn->app), nfp_app_name(nn->app));
+		 nn->fw_ver.major, nn->fw_ver.minor);
 	strlcpy(drvinfo->bus_info, pci_name(nn->pdev),
 		sizeof(drvinfo->bus_info));
 
-	drvinfo->n_stats = NN_ET_STATS_LEN;
-	drvinfo->regdump_len = NFP_NET_CFG_BAR_SZ;
+	nfp_get_drvinfo(nn->app, nn->pdev, vnic_version, drvinfo);
+}
+
+static void
+nfp_app_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
+{
+	struct nfp_app *app;
+
+	app = nfp_app_from_netdev(netdev);
+	if (!app)
+		return;
+
+	nfp_get_drvinfo(app, app->pdev, "*", drvinfo);
 }
 
 /**
@@ -346,132 +412,270 @@ static int nfp_net_set_ringparam(struct net_device *netdev,
 	return nfp_net_set_ring_size(nn, rxd_cnt, txd_cnt);
 }
 
-static void nfp_net_get_strings(struct net_device *netdev,
-				u32 stringset, u8 *data)
+static __printf(2, 3) u8 *nfp_pr_et(u8 *data, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	vsnprintf(data, ETH_GSTRING_LEN, fmt, args);
+	va_end(args);
+
+	return data + ETH_GSTRING_LEN;
+}
+
+static unsigned int nfp_vnic_get_sw_stats_count(struct net_device *netdev)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	return NN_ET_RVEC_GATHER_STATS + nn->dp.num_r_vecs * 3;
+}
+
+static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
-	u8 *p = data;
 	int i;
 
-	switch (stringset) {
-	case ETH_SS_STATS:
-		for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++) {
-			memcpy(p, nfp_net_et_stats[i].name, ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < nn->dp.num_r_vecs; i++) {
-			sprintf(p, "rvec_%u_rx_pkts", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rvec_%u_tx_pkts", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rvec_%u_tx_busy", i);
-			p += ETH_GSTRING_LEN;
-		}
-		strncpy(p, "hw_rx_csum_ok", ETH_GSTRING_LEN);
-		p += ETH_GSTRING_LEN;
-		strncpy(p, "hw_rx_csum_inner_ok", ETH_GSTRING_LEN);
-		p += ETH_GSTRING_LEN;
-		strncpy(p, "hw_rx_csum_err", ETH_GSTRING_LEN);
-		p += ETH_GSTRING_LEN;
-		strncpy(p, "hw_tx_csum", ETH_GSTRING_LEN);
-		p += ETH_GSTRING_LEN;
-		strncpy(p, "hw_tx_inner_csum", ETH_GSTRING_LEN);
-		p += ETH_GSTRING_LEN;
-		strncpy(p, "tx_gather", ETH_GSTRING_LEN);
-		p += ETH_GSTRING_LEN;
-		strncpy(p, "tx_lso", ETH_GSTRING_LEN);
-		p += ETH_GSTRING_LEN;
-		for (i = 0; i < nn->dp.num_tx_rings; i++) {
-			sprintf(p, "txq_%u_pkts", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "txq_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < nn->dp.num_rx_rings; i++) {
-			sprintf(p, "rxq_%u_pkts", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rxq_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-		}
-		break;
+	for (i = 0; i < nn->dp.num_r_vecs; i++) {
+		data = nfp_pr_et(data, "rvec_%u_rx_pkts", i);
+		data = nfp_pr_et(data, "rvec_%u_tx_pkts", i);
+		data = nfp_pr_et(data, "rvec_%u_tx_busy", i);
 	}
+
+	data = nfp_pr_et(data, "hw_rx_csum_ok");
+	data = nfp_pr_et(data, "hw_rx_csum_inner_ok");
+	data = nfp_pr_et(data, "hw_rx_csum_err");
+	data = nfp_pr_et(data, "hw_tx_csum");
+	data = nfp_pr_et(data, "hw_tx_inner_csum");
+	data = nfp_pr_et(data, "tx_gather");
+	data = nfp_pr_et(data, "tx_lso");
+
+	return data;
 }
 
-static void nfp_net_get_stats(struct net_device *netdev,
-			      struct ethtool_stats *stats, u64 *data)
+static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data)
 {
 	u64 gathered_stats[NN_ET_RVEC_GATHER_STATS] = {};
 	struct nfp_net *nn = netdev_priv(netdev);
-	struct rtnl_link_stats64 *netdev_stats;
-	struct rtnl_link_stats64 temp = {};
 	u64 tmp[NN_ET_RVEC_GATHER_STATS];
-	u8 __iomem *io_p;
-	int i, j, k;
-	u8 *p;
-
-	netdev_stats = dev_get_stats(netdev, &temp);
-
-	for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++) {
-		switch (nfp_net_et_stats[i].type) {
-		case NETDEV_ET_STATS:
-			p = (char *)netdev_stats + nfp_net_et_stats[i].off;
-			data[i] = nfp_net_et_stats[i].sz == sizeof(u64) ?
-				*(u64 *)p : *(u32 *)p;
-			break;
-
-		case NFP_NET_DEV_ET_STATS:
-			io_p = nn->dp.ctrl_bar + nfp_net_et_stats[i].off;
-			data[i] = readq(io_p);
-			break;
-		}
-	}
-	for (j = 0; j < nn->dp.num_r_vecs; j++) {
+	unsigned int i, j;
+
+	for (i = 0; i < nn->dp.num_r_vecs; i++) {
 		unsigned int start;
 
 		do {
-			start = u64_stats_fetch_begin(&nn->r_vecs[j].rx_sync);
-			data[i++] = nn->r_vecs[j].rx_pkts;
-			tmp[0] = nn->r_vecs[j].hw_csum_rx_ok;
-			tmp[1] = nn->r_vecs[j].hw_csum_rx_inner_ok;
-			tmp[2] = nn->r_vecs[j].hw_csum_rx_error;
-		} while (u64_stats_fetch_retry(&nn->r_vecs[j].rx_sync, start));
+			start = u64_stats_fetch_begin(&nn->r_vecs[i].rx_sync);
+			*data++ = nn->r_vecs[i].rx_pkts;
+			tmp[0] = nn->r_vecs[i].hw_csum_rx_ok;
+			tmp[1] = nn->r_vecs[i].hw_csum_rx_inner_ok;
+			tmp[2] = nn->r_vecs[i].hw_csum_rx_error;
+		} while (u64_stats_fetch_retry(&nn->r_vecs[i].rx_sync, start));
 
 		do {
-			start = u64_stats_fetch_begin(&nn->r_vecs[j].tx_sync);
-			data[i++] = nn->r_vecs[j].tx_pkts;
-			data[i++] = nn->r_vecs[j].tx_busy;
-			tmp[3] = nn->r_vecs[j].hw_csum_tx;
-			tmp[4] = nn->r_vecs[j].hw_csum_tx_inner;
-			tmp[5] = nn->r_vecs[j].tx_gather;
-			tmp[6] = nn->r_vecs[j].tx_lso;
-		} while (u64_stats_fetch_retry(&nn->r_vecs[j].tx_sync, start));
-
-		for (k = 0; k < NN_ET_RVEC_GATHER_STATS; k++)
-			gathered_stats[k] += tmp[k];
+			start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync);
+			*data++ = nn->r_vecs[i].tx_pkts;
+			*data++ = nn->r_vecs[i].tx_busy;
+			tmp[3] = nn->r_vecs[i].hw_csum_tx;
+			tmp[4] = nn->r_vecs[i].hw_csum_tx_inner;
+			tmp[5] = nn->r_vecs[i].tx_gather;
+			tmp[6] = nn->r_vecs[i].tx_lso;
+		} while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start));
+
+		for (j = 0; j < NN_ET_RVEC_GATHER_STATS; j++)
+			gathered_stats[j] += tmp[j];
 	}
+
 	for (j = 0; j < NN_ET_RVEC_GATHER_STATS; j++)
-		data[i++] = gathered_stats[j];
-	for (j = 0; j < nn->dp.num_tx_rings; j++) {
-		io_p = nn->dp.ctrl_bar + NFP_NET_CFG_TXR_STATS(j);
-		data[i++] = readq(io_p);
-		io_p = nn->dp.ctrl_bar + NFP_NET_CFG_TXR_STATS(j) + 8;
-		data[i++] = readq(io_p);
+		*data++ = gathered_stats[j];
+
+	return data;
+}
+
+static unsigned int
+nfp_vnic_get_hw_stats_count(unsigned int rx_rings, unsigned int tx_rings)
+{
+	return NN_ET_GLOBAL_STATS_LEN + (rx_rings + tx_rings) * 2;
+}
+
+static u8 *
+nfp_vnic_get_hw_stats_strings(u8 *data, unsigned int rx_rings,
+			      unsigned int tx_rings, bool repr)
+{
+	int swap_off, i;
+
+	BUILD_BUG_ON(NN_ET_GLOBAL_STATS_LEN < NN_ET_SWITCH_STATS_LEN * 2);
+	/* If repr is true first add SWITCH_STATS_LEN and then subtract it
+	 * effectively swapping the RX and TX statistics (giving us the RX
+	 * and TX from perspective of the switch).
+	 */
+	swap_off = repr * NN_ET_SWITCH_STATS_LEN;
+
+	for (i = 0; i < NN_ET_SWITCH_STATS_LEN; i++)
+		data = nfp_pr_et(data, nfp_net_et_stats[i + swap_off].name);
+
+	for (i = NN_ET_SWITCH_STATS_LEN; i < NN_ET_SWITCH_STATS_LEN * 2; i++)
+		data = nfp_pr_et(data, nfp_net_et_stats[i - swap_off].name);
+
+	for (i = NN_ET_SWITCH_STATS_LEN * 2; i < NN_ET_GLOBAL_STATS_LEN; i++)
+		data = nfp_pr_et(data, nfp_net_et_stats[i].name);
+
+	for (i = 0; i < tx_rings; i++) {
+		data = nfp_pr_et(data, "txq_%u_pkts", i);
+		data = nfp_pr_et(data, "txq_%u_bytes", i);
+	}
+
+	for (i = 0; i < rx_rings; i++) {
+		data = nfp_pr_et(data, "rxq_%u_pkts", i);
+		data = nfp_pr_et(data, "rxq_%u_bytes", i);
+	}
+
+	return data;
+}
+
+static u64 *
+nfp_vnic_get_hw_stats(u64 *data, u8 __iomem *mem,
+		      unsigned int rx_rings, unsigned int tx_rings)
+{
+	unsigned int i;
+
+	for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++)
+		*data++ = readq(mem + nfp_net_et_stats[i].off);
+
+	for (i = 0; i < tx_rings; i++) {
+		*data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i));
+		*data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i) + 8);
+	}
+
+	for (i = 0; i < rx_rings; i++) {
+		*data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i));
+		*data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i) + 8);
 	}
-	for (j = 0; j < nn->dp.num_rx_rings; j++) {
-		io_p = nn->dp.ctrl_bar + NFP_NET_CFG_RXR_STATS(j);
-		data[i++] = readq(io_p);
-		io_p = nn->dp.ctrl_bar + NFP_NET_CFG_RXR_STATS(j) + 8;
-		data[i++] = readq(io_p);
+
+	return data;
+}
+
+static unsigned int nfp_mac_get_stats_count(struct net_device *netdev)
+{
+	struct nfp_port *port;
+
+	port = nfp_port_from_netdev(netdev);
+	if (!__nfp_port_get_eth_port(port) || !port->eth_stats)
+		return 0;
+
+	return ARRAY_SIZE(nfp_mac_et_stats);
+}
+
+static u8 *nfp_mac_get_stats_strings(struct net_device *netdev, u8 *data)
+{
+	struct nfp_port *port;
+	unsigned int i;
+
+	port = nfp_port_from_netdev(netdev);
+	if (!__nfp_port_get_eth_port(port) || !port->eth_stats)
+		return data;
+
+	for (i = 0; i < ARRAY_SIZE(nfp_mac_et_stats); i++)
+		data = nfp_pr_et(data, "mac.%s", nfp_mac_et_stats[i].name);
+
+	return data;
+}
+
+static u64 *nfp_mac_get_stats(struct net_device *netdev, u64 *data)
+{
+	struct nfp_port *port;
+	unsigned int i;
+
+	port = nfp_port_from_netdev(netdev);
+	if (!__nfp_port_get_eth_port(port) || !port->eth_stats)
+		return data;
+
+	for (i = 0; i < ARRAY_SIZE(nfp_mac_et_stats); i++)
+		*data++ = readq(port->eth_stats + nfp_mac_et_stats[i].off);
+
+	return data;
+}
+
+static void nfp_net_get_strings(struct net_device *netdev,
+				u32 stringset, u8 *data)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		data = nfp_vnic_get_sw_stats_strings(netdev, data);
+		data = nfp_vnic_get_hw_stats_strings(data, nn->dp.num_rx_rings,
+						     nn->dp.num_tx_rings,
+						     false);
+		data = nfp_mac_get_stats_strings(netdev, data);
+		break;
 	}
 }
 
+static void
+nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats,
+		  u64 *data)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	data = nfp_vnic_get_sw_stats(netdev, data);
+	data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar,
+				     nn->dp.num_rx_rings, nn->dp.num_tx_rings);
+	data = nfp_mac_get_stats(netdev, data);
+}
+
 static int nfp_net_get_sset_count(struct net_device *netdev, int sset)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
 
 	switch (sset) {
 	case ETH_SS_STATS:
-		return NN_ET_STATS_LEN;
+		return nfp_vnic_get_sw_stats_count(netdev) +
+		       nfp_vnic_get_hw_stats_count(nn->dp.num_rx_rings,
+						   nn->dp.num_tx_rings) +
+		       nfp_mac_get_stats_count(netdev);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void nfp_port_get_strings(struct net_device *netdev,
+				 u32 stringset, u8 *data)
+{
+	struct nfp_port *port = nfp_port_from_netdev(netdev);
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		if (nfp_port_is_vnic(port))
+			data = nfp_vnic_get_hw_stats_strings(data, 0, 0, true);
+		else
+			data = nfp_mac_get_stats_strings(netdev, data);
+		break;
+	}
+}
+
+static void
+nfp_port_get_stats(struct net_device *netdev, struct ethtool_stats *stats,
+		   u64 *data)
+{
+	struct nfp_port *port = nfp_port_from_netdev(netdev);
+
+	if (nfp_port_is_vnic(port))
+		data = nfp_vnic_get_hw_stats(data, port->vnic, 0, 0);
+	else
+		data = nfp_mac_get_stats(netdev, data);
+}
+
+static int nfp_port_get_sset_count(struct net_device *netdev, int sset)
+{
+	struct nfp_port *port = nfp_port_from_netdev(netdev);
+	unsigned int count;
+
+	switch (sset) {
+	case ETH_SS_STATS:
+		if (nfp_port_is_vnic(port))
+			count = nfp_vnic_get_hw_stats_count(0, 0);
+		else
+			count = nfp_mac_get_stats_count(netdev);
+		return count;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -708,18 +912,18 @@ static int nfp_net_get_coalesce(struct net_device *netdev,
 /* Other debug dumps
  */
 static int
-nfp_dump_nsp_diag(struct nfp_net *nn, struct ethtool_dump *dump, void *buffer)
+nfp_dump_nsp_diag(struct nfp_app *app, struct ethtool_dump *dump, void *buffer)
 {
 	struct nfp_resource *res;
 	int ret;
 
-	if (!nn->app)
+	if (!app)
 		return -EOPNOTSUPP;
 
 	dump->version = 1;
 	dump->flag = NFP_DUMP_NSP_DIAG;
 
-	res = nfp_resource_acquire(nn->app->cpp, NFP_RESOURCE_NSP_DIAG);
+	res = nfp_resource_acquire(app->cpp, NFP_RESOURCE_NSP_DIAG);
 	if (IS_ERR(res))
 		return PTR_ERR(res);
 
@@ -729,7 +933,7 @@ nfp_dump_nsp_diag(struct nfp_net *nn, struct ethtool_dump *dump, void *buffer)
 			goto exit_release;
 		}
 
-		ret = nfp_cpp_read(nn->app->cpp, nfp_resource_cpp_id(res),
+		ret = nfp_cpp_read(app->cpp, nfp_resource_cpp_id(res),
 				   nfp_resource_address(res),
 				   buffer, dump->len);
 		if (ret != dump->len)
@@ -746,32 +950,30 @@ exit_release:
 	return ret;
 }
 
-static int nfp_net_set_dump(struct net_device *netdev, struct ethtool_dump *val)
+static int nfp_app_set_dump(struct net_device *netdev, struct ethtool_dump *val)
 {
-	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_app *app = nfp_app_from_netdev(netdev);
 
-	if (!nn->app)
+	if (!app)
 		return -EOPNOTSUPP;
 
 	if (val->flag != NFP_DUMP_NSP_DIAG)
 		return -EINVAL;
 
-	nn->ethtool_dump_flag = val->flag;
-
 	return 0;
 }
 
 static int
-nfp_net_get_dump_flag(struct net_device *netdev, struct ethtool_dump *dump)
+nfp_app_get_dump_flag(struct net_device *netdev, struct ethtool_dump *dump)
 {
-	return nfp_dump_nsp_diag(netdev_priv(netdev), dump, NULL);
+	return nfp_dump_nsp_diag(nfp_app_from_netdev(netdev), dump, NULL);
 }
 
 static int
-nfp_net_get_dump_data(struct net_device *netdev, struct ethtool_dump *dump,
+nfp_app_get_dump_data(struct net_device *netdev, struct ethtool_dump *dump,
 		      void *buffer)
 {
-	return nfp_dump_nsp_diag(netdev_priv(netdev), dump, buffer);
+	return nfp_dump_nsp_diag(nfp_app_from_netdev(netdev), dump, buffer);
 }
 
 static int nfp_net_set_coalesce(struct net_device *netdev,
@@ -928,9 +1130,9 @@ static const struct ethtool_ops nfp_net_ethtool_ops = {
 	.set_rxfh		= nfp_net_set_rxfh,
 	.get_regs_len		= nfp_net_get_regs_len,
 	.get_regs		= nfp_net_get_regs,
-	.set_dump		= nfp_net_set_dump,
-	.get_dump_flag		= nfp_net_get_dump_flag,
-	.get_dump_data		= nfp_net_get_dump_data,
+	.set_dump		= nfp_app_set_dump,
+	.get_dump_flag		= nfp_app_get_dump_flag,
+	.get_dump_data		= nfp_app_get_dump_data,
 	.get_coalesce           = nfp_net_get_coalesce,
 	.set_coalesce           = nfp_net_set_coalesce,
 	.get_channels		= nfp_net_get_channels,
@@ -939,6 +1141,17 @@ static const struct ethtool_ops nfp_net_ethtool_ops = {
 	.set_link_ksettings	= nfp_net_set_link_ksettings,
 };
 
+const struct ethtool_ops nfp_port_ethtool_ops = {
+	.get_drvinfo		= nfp_app_get_drvinfo,
+	.get_link		= ethtool_op_get_link,
+	.get_strings		= nfp_port_get_strings,
+	.get_ethtool_stats	= nfp_port_get_stats,
+	.get_sset_count		= nfp_port_get_sset_count,
+	.set_dump		= nfp_app_set_dump,
+	.get_dump_flag		= nfp_app_get_dump_flag,
+	.get_dump_data		= nfp_app_get_dump_data,
+};
+
 void nfp_net_set_ethtool_ops(struct net_device *netdev)
 {
 	netdev->ethtool_ops = &nfp_net_ethtool_ops;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 34b9853..5abb9ba 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -57,6 +57,7 @@
 #include "nfpcore/nfp6000_pcie.h"
 #include "nfp_app.h"
 #include "nfp_net_ctrl.h"
+#include "nfp_net_sriov.h"
 #include "nfp_net.h"
 #include "nfp_main.h"
 #include "nfp_port.h"
@@ -160,6 +161,8 @@ nfp_net_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt,
 
 static void nfp_net_pf_free_vnic(struct nfp_pf *pf, struct nfp_net *nn)
 {
+	if (nfp_net_is_data_vnic(nn))
+		nfp_app_vnic_free(pf->app, nn);
 	nfp_port_free(nn->port);
 	list_del(&nn->vnic_list);
 	pf->num_vnics--;
@@ -204,7 +207,7 @@ nfp_net_pf_alloc_vnic(struct nfp_pf *pf, bool needs_netdev,
 	nn->stride_tx = stride;
 
 	if (needs_netdev) {
-		err = nfp_app_vnic_init(pf->app, nn, id);
+		err = nfp_app_vnic_alloc(pf->app, nn, id);
 		if (err) {
 			nfp_net_free(nn);
 			return ERR_PTR(err);
@@ -242,8 +245,17 @@ nfp_net_pf_init_vnic(struct nfp_pf *pf, struct nfp_net *nn, unsigned int id)
 
 	nfp_net_info(nn);
 
+	if (nfp_net_is_data_vnic(nn)) {
+		err = nfp_app_vnic_init(pf->app, nn);
+		if (err)
+			goto err_devlink_port_clean;
+	}
+
 	return 0;
 
+err_devlink_port_clean:
+	if (nn->port)
+		nfp_devlink_port_unregister(nn->port);
 err_dfs_clean:
 	nfp_net_debugfs_dir_clean(&nn->debugfs_dir);
 	nfp_net_clean(nn);
@@ -287,11 +299,12 @@ err_free_prev:
 
 static void nfp_net_pf_clean_vnic(struct nfp_pf *pf, struct nfp_net *nn)
 {
+	if (nfp_net_is_data_vnic(nn))
+		nfp_app_vnic_clean(pf->app, nn);
 	if (nn->port)
 		nfp_devlink_port_unregister(nn->port);
 	nfp_net_debugfs_dir_clean(&nn->debugfs_dir);
 	nfp_net_clean(nn);
-	nfp_app_vnic_clean(pf->app, nn);
 }
 
 static int nfp_net_pf_alloc_irqs(struct nfp_pf *pf)
@@ -388,7 +401,7 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride)
 					NFP_PF_CSR_SLICE_SIZE,
 					&pf->ctrl_vnic_bar);
 	if (IS_ERR(ctrl_bar)) {
-		nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n");
+		nfp_err(pf->cpp, "Failed to find ctrl vNIC memory symbol\n");
 		err = PTR_ERR(ctrl_bar);
 		goto err_app_clean;
 	}
@@ -456,10 +469,14 @@ static int nfp_net_pf_app_start(struct nfp_pf *pf)
 {
 	int err;
 
-	err = nfp_app_start(pf->app, pf->ctrl_vnic);
+	err = nfp_net_pf_app_start_ctrl(pf);
 	if (err)
 		return err;
 
+	err = nfp_app_start(pf->app, pf->ctrl_vnic);
+	if (err)
+		goto err_ctrl_stop;
+
 	if (pf->num_vfs) {
 		err = nfp_app_sriov_enable(pf->app, pf->num_vfs);
 		if (err)
@@ -470,6 +487,8 @@ static int nfp_net_pf_app_start(struct nfp_pf *pf)
 
 err_app_stop:
 	nfp_app_stop(pf->app);
+err_ctrl_stop:
+	nfp_net_pf_app_stop_ctrl(pf);
 	return err;
 }
 
@@ -478,10 +497,13 @@ static void nfp_net_pf_app_stop(struct nfp_pf *pf)
 	if (pf->num_vfs)
 		nfp_app_sriov_disable(pf->app);
 	nfp_app_stop(pf->app);
+	nfp_net_pf_app_stop_ctrl(pf);
 }
 
 static void nfp_net_pci_unmap_mem(struct nfp_pf *pf)
 {
+	if (pf->vfcfg_tbl2_area)
+		nfp_cpp_area_release_free(pf->vfcfg_tbl2_area);
 	if (pf->vf_cfg_bar)
 		nfp_cpp_area_release_free(pf->vf_cfg_bar);
 	if (pf->mac_stats_bar)
@@ -497,7 +519,7 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf)
 	int err;
 
 	min_size = pf->max_data_vnics * NFP_PF_CSR_SLICE_SIZE;
-	mem = nfp_net_pf_map_rtsym(pf, "net.ctrl", "_pf%d_net_bar0",
+	mem = nfp_net_pf_map_rtsym(pf, "net.bar0", "_pf%d_net_bar0",
 				   min_size, &pf->data_vnic_bar);
 	if (IS_ERR(mem)) {
 		nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n");
@@ -528,17 +550,32 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf)
 		pf->vf_cfg_mem = NULL;
 	}
 
+	min_size = NFP_NET_VF_CFG_SZ * pf->limit_vfs + NFP_NET_VF_CFG_MB_SZ;
+	pf->vfcfg_tbl2 = nfp_net_pf_map_rtsym(pf, "net.vfcfg_tbl2",
+					      "_pf%d_net_vf_cfg2",
+					      min_size, &pf->vfcfg_tbl2_area);
+	if (IS_ERR(pf->vfcfg_tbl2)) {
+		if (PTR_ERR(pf->vfcfg_tbl2) != -ENOENT) {
+			err = PTR_ERR(pf->vfcfg_tbl2);
+			goto err_unmap_vf_cfg;
+		}
+		pf->vfcfg_tbl2 = NULL;
+	}
+
 	mem = nfp_cpp_map_area(pf->cpp, "net.qc", 0, 0,
 			       NFP_PCIE_QUEUE(0), NFP_QCP_QUEUE_AREA_SZ,
 			       &pf->qc_area);
 	if (IS_ERR(mem)) {
 		nfp_err(pf->cpp, "Failed to map Queue Controller area.\n");
 		err = PTR_ERR(mem);
-		goto err_unmap_vf_cfg;
+		goto err_unmap_vfcfg_tbl2;
 	}
 
 	return 0;
 
+err_unmap_vfcfg_tbl2:
+	if (pf->vfcfg_tbl2_area)
+		nfp_cpp_area_release_free(pf->vfcfg_tbl2_area);
 err_unmap_vf_cfg:
 	if (pf->vf_cfg_bar)
 		nfp_cpp_area_release_free(pf->vf_cfg_bar);
@@ -552,7 +589,7 @@ err_unmap_ctrl:
 
 static void nfp_net_pci_remove_finish(struct nfp_pf *pf)
 {
-	nfp_net_pf_app_stop_ctrl(pf);
+	nfp_net_pf_app_stop(pf);
 	/* stop app first, to avoid double free of ctrl vNIC's ddir */
 	nfp_net_debugfs_dir_clean(&pf->ddir);
 
@@ -683,7 +720,6 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
 {
 	struct nfp_net_fw_version fw_ver;
 	u8 __iomem *ctrl_bar, *qc_bar;
-	struct nfp_net *nn;
 	int stride;
 	int err;
 
@@ -698,7 +734,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
 	if (!pf->rtbl) {
 		nfp_err(pf->cpp, "No %s, giving up.\n",
 			pf->fw_loaded ? "symbol table" : "firmware found");
-		return -EPROBE_DEFER;
+		return -EINVAL;
 	}
 
 	mutex_lock(&pf->lock);
@@ -760,7 +796,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
 	if (err)
 		goto err_free_vnics;
 
-	err = nfp_net_pf_app_start_ctrl(pf);
+	err = nfp_net_pf_app_start(pf);
 	if (err)
 		goto err_free_irqs;
 
@@ -768,20 +804,12 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
 	if (err)
 		goto err_stop_app;
 
-	err = nfp_net_pf_app_start(pf);
-	if (err)
-		goto err_clean_vnics;
-
 	mutex_unlock(&pf->lock);
 
 	return 0;
 
-err_clean_vnics:
-	list_for_each_entry(nn, &pf->vnics, vnic_list)
-		if (nfp_net_is_data_vnic(nn))
-			nfp_net_pf_clean_vnic(pf, nn);
 err_stop_app:
-	nfp_net_pf_app_stop_ctrl(pf);
+	nfp_net_pf_app_stop(pf);
 err_free_irqs:
 	nfp_net_pf_free_irqs(pf);
 err_free_vnics:
@@ -805,8 +833,6 @@ void nfp_net_pci_remove(struct nfp_pf *pf)
 	if (list_empty(&pf->vnics))
 		goto out;
 
-	nfp_net_pf_app_stop(pf);
-
 	list_for_each_entry(nn, &pf->vnics, vnic_list)
 		if (nfp_net_is_data_vnic(nn))
 			nfp_net_pf_clean_vnic(pf, nn);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index 8ec5474..d540a9d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -43,6 +43,7 @@
 #include "nfp_main.h"
 #include "nfp_net_ctrl.h"
 #include "nfp_net_repr.h"
+#include "nfp_net_sriov.h"
 #include "nfp_port.h"
 
 static void
@@ -78,12 +79,10 @@ void nfp_repr_inc_rx_stats(struct net_device *netdev, unsigned int len)
 }
 
 static void
-nfp_repr_phy_port_get_stats64(const struct nfp_app *app, u8 phy_port,
+nfp_repr_phy_port_get_stats64(struct nfp_port *port,
 			      struct rtnl_link_stats64 *stats)
 {
-	u8 __iomem *mem;
-
-	mem = app->pf->mac_stats_mem + phy_port * NFP_MAC_STATS_SIZE;
+	u8 __iomem *mem = port->eth_stats;
 
 	/* TX and RX stats are flipped as we are returning the stats as seen
 	 * at the switch port corresponding to the phys port.
@@ -98,67 +97,38 @@ nfp_repr_phy_port_get_stats64(const struct nfp_app *app, u8 phy_port,
 }
 
 static void
-nfp_repr_vf_get_stats64(const struct nfp_app *app, u8 vf,
-			struct rtnl_link_stats64 *stats)
+nfp_repr_vnic_get_stats64(struct nfp_port *port,
+			  struct rtnl_link_stats64 *stats)
 {
-	u8 __iomem *mem;
-
-	mem = app->pf->vf_cfg_mem + vf * NFP_NET_CFG_BAR_SZ;
-
 	/* TX and RX stats are flipped as we are returning the stats as seen
 	 * at the switch port corresponding to the VF.
 	 */
-	stats->tx_packets = readq(mem + NFP_NET_CFG_STATS_RX_FRAMES);
-	stats->tx_bytes = readq(mem + NFP_NET_CFG_STATS_RX_OCTETS);
-	stats->tx_dropped = readq(mem + NFP_NET_CFG_STATS_RX_DISCARDS);
-
-	stats->rx_packets = readq(mem + NFP_NET_CFG_STATS_TX_FRAMES);
-	stats->rx_bytes = readq(mem + NFP_NET_CFG_STATS_TX_OCTETS);
-	stats->rx_dropped = readq(mem + NFP_NET_CFG_STATS_TX_DISCARDS);
-}
-
-static void
-nfp_repr_pf_get_stats64(const struct nfp_app *app, u8 pf,
-			struct rtnl_link_stats64 *stats)
-{
-	u8 __iomem *mem;
-
-	if (pf)
-		return;
+	stats->tx_packets = readq(port->vnic + NFP_NET_CFG_STATS_RX_FRAMES);
+	stats->tx_bytes = readq(port->vnic + NFP_NET_CFG_STATS_RX_OCTETS);
+	stats->tx_dropped = readq(port->vnic + NFP_NET_CFG_STATS_RX_DISCARDS);
 
-	mem = nfp_cpp_area_iomem(app->pf->data_vnic_bar);
-
-	stats->tx_packets = readq(mem + NFP_NET_CFG_STATS_RX_FRAMES);
-	stats->tx_bytes = readq(mem + NFP_NET_CFG_STATS_RX_OCTETS);
-	stats->tx_dropped = readq(mem + NFP_NET_CFG_STATS_RX_DISCARDS);
-
-	stats->rx_packets = readq(mem + NFP_NET_CFG_STATS_TX_FRAMES);
-	stats->rx_bytes = readq(mem + NFP_NET_CFG_STATS_TX_OCTETS);
-	stats->rx_dropped = readq(mem + NFP_NET_CFG_STATS_TX_DISCARDS);
+	stats->rx_packets = readq(port->vnic + NFP_NET_CFG_STATS_TX_FRAMES);
+	stats->rx_bytes = readq(port->vnic + NFP_NET_CFG_STATS_TX_OCTETS);
+	stats->rx_dropped = readq(port->vnic + NFP_NET_CFG_STATS_TX_DISCARDS);
 }
 
 static void
 nfp_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
 {
 	struct nfp_repr *repr = netdev_priv(netdev);
-	struct nfp_eth_table_port *eth_port;
-	struct nfp_app *app = repr->app;
 
 	if (WARN_ON(!repr->port))
 		return;
 
 	switch (repr->port->type) {
 	case NFP_PORT_PHYS_PORT:
-		eth_port = __nfp_port_get_eth_port(repr->port);
-		if (!eth_port)
+		if (!__nfp_port_get_eth_port(repr->port))
 			break;
-		nfp_repr_phy_port_get_stats64(app, eth_port->index, stats);
+		nfp_repr_phy_port_get_stats64(repr->port, stats);
 		break;
 	case NFP_PORT_PF_PORT:
-		nfp_repr_pf_get_stats64(app, repr->port->pf_id, stats);
-		break;
 	case NFP_PORT_VF_PORT:
-		nfp_repr_vf_get_stats64(app, repr->port->vf_id, stats);
+		nfp_repr_vnic_get_stats64(repr->port, stats);
 	default:
 		break;
 	}
@@ -239,15 +209,34 @@ static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev)
 static int nfp_repr_stop(struct net_device *netdev)
 {
 	struct nfp_repr *repr = netdev_priv(netdev);
+	int err;
+
+	err = nfp_app_repr_stop(repr->app, repr);
+	if (err)
+		return err;
 
-	return nfp_app_repr_stop(repr->app, repr);
+	nfp_port_configure(netdev, false);
+	return 0;
 }
 
 static int nfp_repr_open(struct net_device *netdev)
 {
 	struct nfp_repr *repr = netdev_priv(netdev);
+	int err;
+
+	err = nfp_port_configure(netdev, true);
+	if (err)
+		return err;
 
-	return nfp_app_repr_open(repr->app, repr);
+	err = nfp_app_repr_open(repr->app, repr);
+	if (err)
+		goto err_port_disable;
+
+	return 0;
+
+err_port_disable:
+	nfp_port_configure(netdev, false);
+	return err;
 }
 
 const struct net_device_ops nfp_repr_netdev_ops = {
@@ -259,6 +248,11 @@ const struct net_device_ops nfp_repr_netdev_ops = {
 	.ndo_get_offload_stats	= nfp_repr_get_offload_stats,
 	.ndo_get_phys_port_name	= nfp_port_get_phys_port_name,
 	.ndo_setup_tc		= nfp_port_setup_tc,
+	.ndo_set_vf_mac		= nfp_app_set_vf_mac,
+	.ndo_set_vf_vlan	= nfp_app_set_vf_vlan,
+	.ndo_set_vf_spoofchk	= nfp_app_set_vf_spoofchk,
+	.ndo_get_vf_config	= nfp_app_get_vf_config,
+	.ndo_set_vf_link_state	= nfp_app_set_vf_link_state,
 };
 
 static void nfp_repr_clean(struct nfp_repr *repr)
@@ -301,6 +295,8 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
 	repr->dst->u.port_info.lower_dev = pf_netdev;
 
 	netdev->netdev_ops = &nfp_repr_netdev_ops;
+	netdev->ethtool_ops = &nfp_port_ethtool_ops;
+
 	SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops);
 
 	if (nfp_app_has_tc(app)) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c
new file mode 100644
index 0000000..e6d2e06
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/if_link.h>
+#include <linux/if_ether.h>
+
+#include "nfpcore/nfp_cpp.h"
+#include "nfp_app.h"
+#include "nfp_main.h"
+#include "nfp_net_ctrl.h"
+#include "nfp_net.h"
+#include "nfp_net_sriov.h"
+
+static int
+nfp_net_sriov_check(struct nfp_app *app, int vf, u16 cap, const char *msg)
+{
+	u16 cap_vf;
+
+	if (!app || !app->pf->vfcfg_tbl2)
+		return -EOPNOTSUPP;
+
+	cap_vf = readw(app->pf->vfcfg_tbl2 + NFP_NET_VF_CFG_MB_CAP);
+	if ((cap_vf & cap) != cap) {
+		nfp_warn(app->pf->cpp, "ndo_set_vf_%s not supported\n", msg);
+		return -EOPNOTSUPP;
+	}
+
+	if (vf < 0 || vf >= app->pf->num_vfs) {
+		nfp_warn(app->pf->cpp, "invalid VF id %d\n", vf);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+nfp_net_sriov_update(struct nfp_app *app, int vf, u16 update, const char *msg)
+{
+	struct nfp_net *nn;
+	int ret;
+
+	/* Write update info to mailbox in VF config symbol */
+	writeb(vf, app->pf->vfcfg_tbl2 + NFP_NET_VF_CFG_MB_VF_NUM);
+	writew(update, app->pf->vfcfg_tbl2 + NFP_NET_VF_CFG_MB_UPD);
+
+	nn = list_first_entry(&app->pf->vnics, struct nfp_net, vnic_list);
+	/* Signal VF reconfiguration */
+	ret = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_VF);
+	if (ret)
+		return ret;
+
+	ret = readw(app->pf->vfcfg_tbl2 + NFP_NET_VF_CFG_MB_RET);
+	if (ret)
+		nfp_warn(app->pf->cpp,
+			 "FW refused VF %s update with errno: %d\n", msg, ret);
+	return -ret;
+}
+
+int nfp_app_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
+{
+	struct nfp_app *app = nfp_app_from_netdev(netdev);
+	unsigned int vf_offset;
+	int err;
+
+	err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_MAC, "mac");
+	if (err)
+		return err;
+
+	if (is_multicast_ether_addr(mac)) {
+		nfp_warn(app->pf->cpp,
+			 "invalid Ethernet address %pM for VF id %d\n",
+			 mac, vf);
+		return -EINVAL;
+	}
+
+	/* Write MAC to VF entry in VF config symbol */
+	vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ;
+	writel(get_unaligned_be32(mac), app->pf->vfcfg_tbl2 + vf_offset);
+	writew(get_unaligned_be16(mac + 4),
+	       app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_MAC_LO);
+
+	return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_MAC, "MAC");
+}
+
+int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
+			__be16 vlan_proto)
+{
+	struct nfp_app *app = nfp_app_from_netdev(netdev);
+	unsigned int vf_offset;
+	u16 vlan_tci;
+	int err;
+
+	err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_VLAN, "vlan");
+	if (err)
+		return err;
+
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EOPNOTSUPP;
+
+	if (vlan > 4095 || qos > 7) {
+		nfp_warn(app->pf->cpp,
+			 "invalid vlan id or qos for VF id %d\n", vf);
+		return -EINVAL;
+	}
+
+	/* Write VLAN tag to VF entry in VF config symbol */
+	vlan_tci = FIELD_PREP(NFP_NET_VF_CFG_VLAN_VID, vlan) |
+		FIELD_PREP(NFP_NET_VF_CFG_VLAN_QOS, qos);
+	vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ;
+	writew(vlan_tci, app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_VLAN);
+
+	return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_VLAN,
+				    "vlan");
+}
+
+int nfp_app_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
+{
+	struct nfp_app *app = nfp_app_from_netdev(netdev);
+	unsigned int vf_offset;
+	u8 vf_ctrl;
+	int err;
+
+	err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_SPOOF,
+				  "spoofchk");
+	if (err)
+		return err;
+
+	/* Write spoof check control bit to VF entry in VF config symbol */
+	vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ +
+		NFP_NET_VF_CFG_CTRL;
+	vf_ctrl = readb(app->pf->vfcfg_tbl2 + vf_offset);
+	vf_ctrl &= ~NFP_NET_VF_CFG_CTRL_SPOOF;
+	vf_ctrl |= FIELD_PREP(NFP_NET_VF_CFG_CTRL_SPOOF, enable);
+	writeb(vf_ctrl, app->pf->vfcfg_tbl2 + vf_offset);
+
+	return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_SPOOF,
+				    "spoofchk");
+}
+
+int nfp_app_set_vf_link_state(struct net_device *netdev, int vf,
+			      int link_state)
+{
+	struct nfp_app *app = nfp_app_from_netdev(netdev);
+	unsigned int vf_offset;
+	u8 vf_ctrl;
+	int err;
+
+	err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_LINK_STATE,
+				  "link_state");
+	if (err)
+		return err;
+
+	switch (link_state) {
+	case IFLA_VF_LINK_STATE_AUTO:
+	case IFLA_VF_LINK_STATE_ENABLE:
+	case IFLA_VF_LINK_STATE_DISABLE:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Write link state to VF entry in VF config symbol */
+	vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ +
+		NFP_NET_VF_CFG_CTRL;
+	vf_ctrl = readb(app->pf->vfcfg_tbl2 + vf_offset);
+	vf_ctrl &= ~NFP_NET_VF_CFG_CTRL_LINK_STATE;
+	vf_ctrl |= FIELD_PREP(NFP_NET_VF_CFG_CTRL_LINK_STATE, link_state);
+	writeb(vf_ctrl, app->pf->vfcfg_tbl2 + vf_offset);
+
+	return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_LINK_STATE,
+				    "link state");
+}
+
+int nfp_app_get_vf_config(struct net_device *netdev, int vf,
+			  struct ifla_vf_info *ivi)
+{
+	struct nfp_app *app = nfp_app_from_netdev(netdev);
+	unsigned int vf_offset;
+	u16 vlan_tci;
+	u32 mac_hi;
+	u16 mac_lo;
+	u8 flags;
+	int err;
+
+	err = nfp_net_sriov_check(app, vf, 0, "");
+	if (err)
+		return err;
+
+	vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ;
+
+	mac_hi = readl(app->pf->vfcfg_tbl2 + vf_offset);
+	mac_lo = readw(app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_MAC_LO);
+
+	flags = readb(app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_CTRL);
+	vlan_tci = readw(app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_VLAN);
+
+	memset(ivi, 0, sizeof(*ivi));
+	ivi->vf = vf;
+
+	put_unaligned_be32(mac_hi, &ivi->mac[0]);
+	put_unaligned_be16(mac_lo, &ivi->mac[4]);
+
+	ivi->vlan = FIELD_GET(NFP_NET_VF_CFG_VLAN_VID, vlan_tci);
+	ivi->qos = FIELD_GET(NFP_NET_VF_CFG_VLAN_QOS, vlan_tci);
+
+	ivi->spoofchk = FIELD_GET(NFP_NET_VF_CFG_CTRL_SPOOF, flags);
+	ivi->linkstate = FIELD_GET(NFP_NET_VF_CFG_CTRL_LINK_STATE, flags);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h
new file mode 100644
index 0000000..e9df9d1
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _NFP_NET_SRIOV_H_
+#define _NFP_NET_SRIOV_H_
+
+/**
+ * SRIOV VF configuration.
+ * The configuration memory begins with a mailbox region for communication with
+ * the firmware followed by individual VF entries.
+ */
+#define NFP_NET_VF_CFG_SZ		16
+#define NFP_NET_VF_CFG_MB_SZ		16
+
+/* VF config mailbox */
+#define NFP_NET_VF_CFG_MB				0x0
+#define NFP_NET_VF_CFG_MB_CAP				0x0
+#define   NFP_NET_VF_CFG_MB_CAP_MAC			  (0x1 << 0)
+#define   NFP_NET_VF_CFG_MB_CAP_VLAN			  (0x1 << 1)
+#define   NFP_NET_VF_CFG_MB_CAP_SPOOF			  (0x1 << 2)
+#define   NFP_NET_VF_CFG_MB_CAP_LINK_STATE		  (0x1 << 3)
+#define NFP_NET_VF_CFG_MB_RET				0x2
+#define NFP_NET_VF_CFG_MB_UPD				0x4
+#define   NFP_NET_VF_CFG_MB_UPD_MAC			  (0x1 << 0)
+#define   NFP_NET_VF_CFG_MB_UPD_VLAN			  (0x1 << 1)
+#define   NFP_NET_VF_CFG_MB_UPD_SPOOF			  (0x1 << 2)
+#define   NFP_NET_VF_CFG_MB_UPD_LINK_STATE		  (0x1 << 3)
+#define NFP_NET_VF_CFG_MB_VF_NUM			0x7
+
+/* VF config entry
+ * MAC_LO is set that the MAC address can be read in a single 6 byte read
+ * by the NFP
+ */
+#define NFP_NET_VF_CFG_MAC				0x0
+#define   NFP_NET_VF_CFG_MAC_HI				  0x0
+#define   NFP_NET_VF_CFG_MAC_LO				  0x6
+#define NFP_NET_VF_CFG_CTRL				0x4
+#define   NFP_NET_VF_CFG_CTRL_SPOOF			  0x4
+#define   NFP_NET_VF_CFG_CTRL_LINK_STATE		  0x3
+#define     NFP_NET_VF_CFG_LS_MODE_AUTO			    0
+#define     NFP_NET_VF_CFG_LS_MODE_ENABLE		    1
+#define     NFP_NET_VF_CFG_LS_MODE_DISABLE		    2
+#define NFP_NET_VF_CFG_VLAN				0x8
+#define   NFP_NET_VF_CFG_VLAN_QOS			  0xe000
+#define   NFP_NET_VF_CFG_VLAN_VID			  0x0fff
+
+int nfp_app_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
+int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
+			__be16 vlan_proto);
+int nfp_app_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
+int nfp_app_set_vf_link_state(struct net_device *netdev, int vf,
+			      int link_state);
+int nfp_app_get_vf_config(struct net_device *netdev, int vf,
+			  struct ifla_vf_info *ivi);
+
+#endif /* _NFP_NET_SRIOV_H_ */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c
index e42644d..34a6e03 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c
@@ -88,19 +88,16 @@ const struct switchdev_ops nfp_port_switchdev_ops = {
 	.switchdev_port_attr_get	= nfp_port_attr_get,
 };
 
-int nfp_port_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
-		      __be16 proto, struct tc_to_netdev *tc)
+int nfp_port_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+		      void *type_data)
 {
 	struct nfp_port *port;
 
-	if (chain_index)
-		return -EOPNOTSUPP;
-
 	port = nfp_port_from_netdev(netdev);
 	if (!port)
 		return -EOPNOTSUPP;
 
-	return nfp_app_setup_tc(port->app, netdev, handle, proto, tc);
+	return nfp_app_setup_tc(port->app, netdev, type, type_data);
 }
 
 struct nfp_port *
@@ -181,6 +178,33 @@ nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
 	return 0;
 }
 
+/**
+ * nfp_port_configure() - helper to set the interface configured bit
+ * @netdev:	net_device instance
+ * @configed:	Desired state
+ *
+ * Helper to set the ifup/ifdown state on the PHY only if there is a physical
+ * interface associated with the netdev.
+ *
+ * Return:
+ * 0 - configuration successful (or no change);
+ * -ERRNO - configuration failed.
+ */
+int nfp_port_configure(struct net_device *netdev, bool configed)
+{
+	struct nfp_eth_table_port *eth_port;
+	struct nfp_port *port;
+	int err;
+
+	port = nfp_port_from_netdev(netdev);
+	eth_port = __nfp_port_get_eth_port(port);
+	if (!eth_port)
+		return 0;
+
+	err = nfp_eth_set_configured(port->app->cpp, eth_port->index, configed);
+	return err < 0 && err != -EOPNOTSUPP ? err : 0;
+}
+
 int nfp_port_init_phy_port(struct nfp_pf *pf, struct nfp_app *app,
 			   struct nfp_port *port, unsigned int id)
 {
@@ -201,6 +225,9 @@ int nfp_port_init_phy_port(struct nfp_pf *pf, struct nfp_app *app,
 
 	port->eth_port = &pf->eth_tbl->ports[id];
 	port->eth_id = pf->eth_tbl->ports[id].index;
+	if (pf->mac_stats_mem)
+		port->eth_stats =
+			pf->mac_stats_mem + port->eth_id * NFP_MAC_STATS_SIZE;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h
index a33d22e..51dcb9c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -36,7 +36,6 @@
 
 #include <net/devlink.h>
 
-struct tc_to_netdev;
 struct net_device;
 struct nfp_app;
 struct nfp_pf;
@@ -77,8 +76,10 @@ enum nfp_port_flags {
  * @dl_port:	devlink port structure
  * @eth_id:	for %NFP_PORT_PHYS_PORT port ID in NFP enumeration scheme
  * @eth_port:	for %NFP_PORT_PHYS_PORT translated ETH Table port entry
+ * @eth_stats:	for %NFP_PORT_PHYS_PORT MAC stats if available
  * @pf_id:	for %NFP_PORT_PF_PORT, %NFP_PORT_VF_PORT ID of the PCI PF (0-3)
  * @vf_id:	for %NFP_PORT_VF_PORT ID of the PCI VF within @pf_id
+ * @vnic:	for %NFP_PORT_PF_PORT, %NFP_PORT_VF_PORT vNIC ctrl memory
  * @port_list:	entry on pf's list of ports
  */
 struct nfp_port {
@@ -96,21 +97,29 @@ struct nfp_port {
 		struct {
 			unsigned int eth_id;
 			struct nfp_eth_table_port *eth_port;
+			u8 __iomem *eth_stats;
 		};
 		/* NFP_PORT_PF_PORT, NFP_PORT_VF_PORT */
 		struct {
 			unsigned int pf_id;
 			unsigned int vf_id;
+			u8 __iomem *vnic;
 		};
 	};
 
 	struct list_head port_list;
 };
 
+extern const struct ethtool_ops nfp_port_ethtool_ops;
 extern const struct switchdev_ops nfp_port_switchdev_ops;
 
-int nfp_port_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
-		      __be16 proto, struct tc_to_netdev *tc);
+int nfp_port_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+		      void *type_data);
+
+static inline bool nfp_port_is_vnic(const struct nfp_port *port)
+{
+	return port->type == NFP_PORT_PF_PORT || port->type == NFP_PORT_VF_PORT;
+}
 
 struct nfp_port *nfp_port_from_netdev(struct net_device *netdev);
 struct nfp_port *
@@ -120,6 +129,7 @@ struct nfp_eth_table_port *nfp_port_get_eth_port(struct nfp_port *port);
 
 int
 nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len);
+int nfp_port_configure(struct net_device *netdev, bool configed);
 
 struct nfp_port *
 nfp_port_alloc(struct nfp_app *app, enum nfp_port_type type,
@@ -144,31 +154,32 @@ void nfp_devlink_port_unregister(struct nfp_port *port);
 #define NFP_MAC_STATS_SIZE                0x0200
 
 #define NFP_MAC_STATS_RX_IN_OCTETS			(NFP_MAC_STATS_BASE + 0x000)
+							/* unused 0x008 */
 #define NFP_MAC_STATS_RX_FRAME_TOO_LONG_ERRORS		(NFP_MAC_STATS_BASE + 0x010)
 #define NFP_MAC_STATS_RX_RANGE_LENGTH_ERRORS		(NFP_MAC_STATS_BASE + 0x018)
 #define NFP_MAC_STATS_RX_VLAN_REVEIVE_OK		(NFP_MAC_STATS_BASE + 0x020)
 #define NFP_MAC_STATS_RX_IN_ERRORS			(NFP_MAC_STATS_BASE + 0x028)
 #define NFP_MAC_STATS_RX_IN_BROADCAST_PKTS		(NFP_MAC_STATS_BASE + 0x030)
-#define NFP_MAC_STATS_RX_STATS_DROP_EVENTS		(NFP_MAC_STATS_BASE + 0x038)
+#define NFP_MAC_STATS_RX_DROP_EVENTS			(NFP_MAC_STATS_BASE + 0x038)
 #define NFP_MAC_STATS_RX_ALIGNMENT_ERRORS		(NFP_MAC_STATS_BASE + 0x040)
 #define NFP_MAC_STATS_RX_PAUSE_MAC_CTRL_FRAMES		(NFP_MAC_STATS_BASE + 0x048)
 #define NFP_MAC_STATS_RX_FRAMES_RECEIVED_OK		(NFP_MAC_STATS_BASE + 0x050)
 #define NFP_MAC_STATS_RX_FRAME_CHECK_SEQUENCE_ERRORS	(NFP_MAC_STATS_BASE + 0x058)
 #define NFP_MAC_STATS_RX_UNICAST_PKTS			(NFP_MAC_STATS_BASE + 0x060)
 #define NFP_MAC_STATS_RX_MULTICAST_PKTS			(NFP_MAC_STATS_BASE + 0x068)
-#define NFP_MAC_STATS_RX_STATS_PKTS			(NFP_MAC_STATS_BASE + 0x070)
-#define NFP_MAC_STATS_RX_STATS_UNDERSIZE_PKTS		(NFP_MAC_STATS_BASE + 0x078)
-#define NFP_MAC_STATS_RX_STATS_PKTS_64_OCTETS		(NFP_MAC_STATS_BASE + 0x080)
-#define NFP_MAC_STATS_RX_STATS_PKTS_65_TO_127_OCTETS	(NFP_MAC_STATS_BASE + 0x088)
-#define NFP_MAC_STATS_RX_STATS_PKTS_512_TO_1023_OCTETS	(NFP_MAC_STATS_BASE + 0x090)
-#define NFP_MAC_STATS_RX_STATS_PKTS_1024_TO_1518_OCTETS	(NFP_MAC_STATS_BASE + 0x098)
-#define NFP_MAC_STATS_RX_STATS_JABBERS			(NFP_MAC_STATS_BASE + 0x0a0)
-#define NFP_MAC_STATS_RX_STATS_FRAGMENTS		(NFP_MAC_STATS_BASE + 0x0a8)
+#define NFP_MAC_STATS_RX_PKTS				(NFP_MAC_STATS_BASE + 0x070)
+#define NFP_MAC_STATS_RX_UNDERSIZE_PKTS			(NFP_MAC_STATS_BASE + 0x078)
+#define NFP_MAC_STATS_RX_PKTS_64_OCTETS			(NFP_MAC_STATS_BASE + 0x080)
+#define NFP_MAC_STATS_RX_PKTS_65_TO_127_OCTETS		(NFP_MAC_STATS_BASE + 0x088)
+#define NFP_MAC_STATS_RX_PKTS_512_TO_1023_OCTETS	(NFP_MAC_STATS_BASE + 0x090)
+#define NFP_MAC_STATS_RX_PKTS_1024_TO_1518_OCTETS	(NFP_MAC_STATS_BASE + 0x098)
+#define NFP_MAC_STATS_RX_JABBERS			(NFP_MAC_STATS_BASE + 0x0a0)
+#define NFP_MAC_STATS_RX_FRAGMENTS			(NFP_MAC_STATS_BASE + 0x0a8)
 #define NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS2		(NFP_MAC_STATS_BASE + 0x0b0)
 #define NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS3		(NFP_MAC_STATS_BASE + 0x0b8)
-#define NFP_MAC_STATS_RX_STATS_PKTS_128_TO_255_OCTETS	(NFP_MAC_STATS_BASE + 0x0c0)
-#define NFP_MAC_STATS_RX_STATS_PKTS_256_TO_511_OCTETS	(NFP_MAC_STATS_BASE + 0x0c8)
-#define NFP_MAC_STATS_RX_STATS_PKTS_1519_TO_MAX_OCTETS	(NFP_MAC_STATS_BASE + 0x0d0)
+#define NFP_MAC_STATS_RX_PKTS_128_TO_255_OCTETS		(NFP_MAC_STATS_BASE + 0x0c0)
+#define NFP_MAC_STATS_RX_PKTS_256_TO_511_OCTETS		(NFP_MAC_STATS_BASE + 0x0c8)
+#define NFP_MAC_STATS_RX_PKTS_1519_TO_MAX_OCTETS	(NFP_MAC_STATS_BASE + 0x0d0)
 #define NFP_MAC_STATS_RX_OVERSIZE_PKTS			(NFP_MAC_STATS_BASE + 0x0d8)
 #define NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS0		(NFP_MAC_STATS_BASE + 0x0e0)
 #define NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS1		(NFP_MAC_STATS_BASE + 0x0e8)
@@ -178,9 +189,12 @@ void nfp_devlink_port_unregister(struct nfp_port *port);
 #define NFP_MAC_STATS_RX_PAUSE_FRAMES_CLASS7		(NFP_MAC_STATS_BASE + 0x108)
 #define NFP_MAC_STATS_RX_MAC_CTRL_FRAMES_RECEIVED	(NFP_MAC_STATS_BASE + 0x110)
 #define NFP_MAC_STATS_RX_MAC_HEAD_DROP			(NFP_MAC_STATS_BASE + 0x118)
-
+							/* unused 0x120 */
+							/* unused 0x128 */
+							/* unused 0x130 */
 #define NFP_MAC_STATS_TX_QUEUE_DROP			(NFP_MAC_STATS_BASE + 0x138)
 #define NFP_MAC_STATS_TX_OUT_OCTETS			(NFP_MAC_STATS_BASE + 0x140)
+							/* unused 0x148 */
 #define NFP_MAC_STATS_TX_VLAN_TRANSMITTED_OK		(NFP_MAC_STATS_BASE + 0x150)
 #define NFP_MAC_STATS_TX_OUT_ERRORS			(NFP_MAC_STATS_BASE + 0x158)
 #define NFP_MAC_STATS_TX_BROADCAST_PKTS			(NFP_MAC_STATS_BASE + 0x160)
@@ -192,8 +206,16 @@ void nfp_devlink_port_unregister(struct nfp_port *port);
 #define NFP_MAC_STATS_TX_UNICAST_PKTS			(NFP_MAC_STATS_BASE + 0x190)
 #define NFP_MAC_STATS_TX_MULTICAST_PKTS			(NFP_MAC_STATS_BASE + 0x198)
 #define NFP_MAC_STATS_TX_PKTS_65_TO_127_OCTETS		(NFP_MAC_STATS_BASE + 0x1a0)
-#define NFP_MAC_STATS_TX_PKTS_127_TO_512_OCTETS		(NFP_MAC_STATS_BASE + 0x1a8)
-#define NFP_MAC_STATS_TX_PKTS_128_TO_1518_OCTETS	(NFP_MAC_STATS_BASE + 0x1b0)
-#define NFP_MAC_STATS_TX_PKTS_1518_TO_MAX_OCTETS	(NFP_MAC_STATS_BASE + 0x1b8)
+#define NFP_MAC_STATS_TX_PKTS_128_TO_255_OCTETS		(NFP_MAC_STATS_BASE + 0x1a8)
+#define NFP_MAC_STATS_TX_PKTS_1024_TO_1518_OCTETS	(NFP_MAC_STATS_BASE + 0x1b0)
+#define NFP_MAC_STATS_TX_PKTS_1519_TO_MAX_OCTETS	(NFP_MAC_STATS_BASE + 0x1b8)
+#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS0		(NFP_MAC_STATS_BASE + 0x1c0)
+#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS1		(NFP_MAC_STATS_BASE + 0x1c8)
+#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS4		(NFP_MAC_STATS_BASE + 0x1d0)
+#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS5		(NFP_MAC_STATS_BASE + 0x1d8)
+#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS2		(NFP_MAC_STATS_BASE + 0x1e0)
+#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS3		(NFP_MAC_STATS_BASE + 0x1e8)
+#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS6		(NFP_MAC_STATS_BASE + 0x1f0)
+#define NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS7		(NFP_MAC_STATS_BASE + 0x1f8)
 
 #endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
index c2bc36e..f6f7c08 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c
@@ -391,7 +391,10 @@ int nfp_eth_config_commit_end(struct nfp_nsp *nsp)
  * Enable or disable PHY module (this usually means setting the TX lanes
  * disable bits).
  *
- * Return: 0 or -ERRNO.
+ * Return:
+ * 0 - configuration successful;
+ * 1 - no changes were needed;
+ * -ERRNO - configuration failed.
  */
 int nfp_eth_set_mod_enable(struct nfp_cpp *cpp, unsigned int idx, bool enable)
 {
@@ -427,7 +430,10 @@ int nfp_eth_set_mod_enable(struct nfp_cpp *cpp, unsigned int idx, bool enable)
  *
  * Set the ifup/ifdown state on the PHY.
  *
- * Return: 0 or -ERRNO.
+ * Return:
+ * 0 - configuration successful;
+ * 1 - no changes were needed;
+ * -ERRNO - configuration failed.
  */
 int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, bool configed)
 {
@@ -439,6 +445,14 @@ int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, bool configed)
 	if (IS_ERR(nsp))
 		return PTR_ERR(nsp);
 
+	/* Older ABI versions did support this feature, however this has only
+	 * been reliable since ABI 20.
+	 */
+	if (nfp_nsp_get_abi_ver_minor(nsp) < 20) {
+		nfp_eth_config_cleanup_end(nsp);
+		return -EOPNOTSUPP;
+	}
+
 	entries = nfp_nsp_config_entries(nsp);
 
 	/* Check if we are already in requested state */
diff --git a/drivers/net/ethernet/netronome/nfp/nic/main.c b/drivers/net/ethernet/netronome/nfp/nic/main.c
index 5206842..d5b587f 100644
--- a/drivers/net/ethernet/netronome/nfp/nic/main.c
+++ b/drivers/net/ethernet/netronome/nfp/nic/main.c
@@ -49,10 +49,22 @@ static int nfp_nic_init(struct nfp_app *app)
 	return 0;
 }
 
+static int nfp_nic_sriov_enable(struct nfp_app *app, int num_vfs)
+{
+	return 0;
+}
+
+static void nfp_nic_sriov_disable(struct nfp_app *app)
+{
+}
+
 const struct nfp_app_type app_nic = {
 	.id		= NFP_APP_CORE_NIC,
 	.name		= "nic",
 
 	.init		= nfp_nic_init,
-	.vnic_init	= nfp_app_nic_vnic_init,
+	.vnic_alloc	= nfp_app_nic_vnic_alloc,
+
+	.sriov_enable	= nfp_nic_sriov_enable,
+	.sriov_disable	= nfp_nic_sriov_disable,
 };
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index aa912f4..994a83a 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -5629,9 +5629,8 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
 
 	setup_timer(&np->oom_kick, nv_do_rx_refill, (unsigned long)dev);
 	setup_timer(&np->nic_poll, nv_do_nic_poll, (unsigned long)dev);
-	init_timer_deferrable(&np->stats_poll);
-	np->stats_poll.data = (unsigned long) dev;
-	np->stats_poll.function = nv_do_stats_poll;	/* timer handler */
+	setup_deferrable_timer(&np->stats_poll, nv_do_stats_poll,
+			       (unsigned long)dev);
 
 	err = pci_enable_device(pci_dev);
 	if (err)
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 827de83..f2e8de6 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -2828,7 +2828,7 @@ netxen_show_bridged_mode(struct device *dev,
 	return sprintf(buf, "%d\n", bridged_mode);
 }
 
-static struct device_attribute dev_attr_bridged_mode = {
+static const struct device_attribute dev_attr_bridged_mode = {
        .attr = {.name = "bridged_mode", .mode = (S_IRUGO | S_IWUSR)},
        .show = netxen_show_bridged_mode,
        .store = netxen_store_bridged_mode,
@@ -2860,7 +2860,7 @@ netxen_show_diag_mode(struct device *dev,
 			!!(adapter->flags & NETXEN_NIC_DIAG_ENABLED));
 }
 
-static struct device_attribute dev_attr_diag_mode = {
+static const struct device_attribute dev_attr_diag_mode = {
 	.attr = {.name = "diag_mode", .mode = (S_IRUGO | S_IWUSR)},
 	.show = netxen_show_diag_mode,
 	.store = netxen_store_diag_mode,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 6c87bed..58a689f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1684,6 +1684,8 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 			   "Load request was sent. Load code: 0x%x\n",
 			   load_code);
 
+		qed_mcp_set_capabilities(p_hwfn, p_hwfn->p_main_ptt);
+
 		qed_reset_mb_shadow(p_hwfn, p_hwfn->p_main_ptt);
 
 		p_hwfn->first_on_engine = (load_code ==
@@ -2472,6 +2474,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 port_cfg_addr, link_temp, nvm_cfg_addr, device_capabilities;
 	u32 nvm_cfg1_offset, mf_mode, addr, generic_cont0, core_cfg;
+	struct qed_mcp_link_capabilities *p_caps;
 	struct qed_mcp_link_params *link;
 
 	/* Read global nvm_cfg address */
@@ -2534,6 +2537,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 
 	/* Read default link configuration */
 	link = &p_hwfn->mcp_info->link_input;
+	p_caps = &p_hwfn->mcp_info->link_capabilities;
 	port_cfg_addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
 			offsetof(struct nvm_cfg1, port[MFW_PORT(p_hwfn)]);
 	link_temp = qed_rd(p_hwfn, p_ptt,
@@ -2588,10 +2592,45 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 				   NVM_CFG1_PORT_DRV_FLOW_CONTROL_TX);
 	link->loopback_mode = 0;
 
-	DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
-		   "Read default link: Speed 0x%08x, Adv. Speed 0x%08x, AN: 0x%02x, PAUSE AN: 0x%02x\n",
-		   link->speed.forced_speed, link->speed.advertised_speeds,
-		   link->speed.autoneg, link->pause.autoneg);
+	if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE) {
+		link_temp = qed_rd(p_hwfn, p_ptt, port_cfg_addr +
+				   offsetof(struct nvm_cfg1_port, ext_phy));
+		link_temp &= NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_MASK;
+		link_temp >>= NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_OFFSET;
+		p_caps->default_eee = QED_MCP_EEE_ENABLED;
+		link->eee.enable = true;
+		switch (link_temp) {
+		case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_DISABLED:
+			p_caps->default_eee = QED_MCP_EEE_DISABLED;
+			link->eee.enable = false;
+			break;
+		case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_BALANCED:
+			p_caps->eee_lpi_timer = EEE_TX_TIMER_USEC_BALANCED_TIME;
+			break;
+		case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_AGGRESSIVE:
+			p_caps->eee_lpi_timer =
+			    EEE_TX_TIMER_USEC_AGGRESSIVE_TIME;
+			break;
+		case NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_LOW_LATENCY:
+			p_caps->eee_lpi_timer = EEE_TX_TIMER_USEC_LATENCY_TIME;
+			break;
+		}
+
+		link->eee.tx_lpi_timer = p_caps->eee_lpi_timer;
+		link->eee.tx_lpi_enable = link->eee.enable;
+		link->eee.adv_caps = QED_EEE_1G_ADV | QED_EEE_10G_ADV;
+	} else {
+		p_caps->default_eee = QED_MCP_EEE_UNSUPPORTED;
+	}
+
+	DP_VERBOSE(p_hwfn,
+		   NETIF_MSG_LINK,
+		   "Read default link: Speed 0x%08x, Adv. Speed 0x%08x, AN: 0x%02x, PAUSE AN: 0x%02x EEE: %02x [%08x usec]\n",
+		   link->speed.forced_speed,
+		   link->speed.advertised_speeds,
+		   link->speed.autoneg,
+		   link->pause.autoneg,
+		   p_caps->default_eee, p_caps->eee_lpi_timer);
 
 	/* Read Multi-function information from shmem */
 	addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
@@ -2751,6 +2790,27 @@ static void qed_hw_info_port_num(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 		qed_hw_info_port_num_ah(p_hwfn, p_ptt);
 }
 
+static void qed_get_eee_caps(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_mcp_link_capabilities *p_caps;
+	u32 eee_status;
+
+	p_caps = &p_hwfn->mcp_info->link_capabilities;
+	if (p_caps->default_eee == QED_MCP_EEE_UNSUPPORTED)
+		return;
+
+	p_caps->eee_speed_caps = 0;
+	eee_status = qed_rd(p_hwfn, p_ptt, p_hwfn->mcp_info->port_addr +
+			    offsetof(struct public_port, eee_status));
+	eee_status = (eee_status & EEE_SUPPORTED_SPEED_MASK) >>
+			EEE_SUPPORTED_SPEED_OFFSET;
+
+	if (eee_status & EEE_1G_SUPPORTED)
+		p_caps->eee_speed_caps |= QED_EEE_1G_ADV;
+	if (eee_status & EEE_10G_ADV)
+		p_caps->eee_speed_caps |= QED_EEE_10G_ADV;
+}
+
 static int
 qed_get_hw_info(struct qed_hwfn *p_hwfn,
 		struct qed_ptt *p_ptt,
@@ -2767,6 +2827,8 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn,
 
 	qed_hw_info_port_num(p_hwfn, p_ptt);
 
+	qed_mcp_get_capabilities(p_hwfn, p_ptt);
+
 	qed_hw_get_nvm_info(p_hwfn, p_ptt);
 
 	rc = qed_int_igu_read_cam(p_hwfn, p_ptt);
@@ -2785,6 +2847,8 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn,
 				p_hwfn->mcp_info->func_info.ovlan;
 
 		qed_mcp_cmd_port_init(p_hwfn, p_ptt);
+
+		qed_get_eee_caps(p_hwfn, p_ptt);
 	}
 
 	if (qed_mcp_is_init(p_hwfn)) {
@@ -3630,7 +3694,7 @@ static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 	}
 
 	p_coal_timeset = p_eth_qzone;
-	memset(p_coal_timeset, 0, eth_qzone_size);
+	memset(p_eth_qzone, 0, eth_qzone_size);
 	SET_FIELD(p_coal_timeset->value, COALESCING_TIMESET_TIMESET, timeset);
 	SET_FIELD(p_coal_timeset->value, COALESCING_TIMESET_VALID, 1);
 	qed_memcpy_to(p_hwfn, p_ptt, hw_addr, p_eth_qzone, eth_qzone_size);
@@ -3638,12 +3702,46 @@ static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 	return 0;
 }
 
-int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-			 u16 coalesce, u16 qid, u16 sb_id)
+int qed_set_queue_coalesce(u16 rx_coal, u16 tx_coal, void *p_handle)
+{
+	struct qed_queue_cid *p_cid = p_handle;
+	struct qed_hwfn *p_hwfn;
+	struct qed_ptt *p_ptt;
+	int rc = 0;
+
+	p_hwfn = p_cid->p_owner;
+
+	if (IS_VF(p_hwfn->cdev))
+		return qed_vf_pf_set_coalesce(p_hwfn, rx_coal, tx_coal, p_cid);
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (rx_coal) {
+		rc = qed_set_rxq_coalesce(p_hwfn, p_ptt, rx_coal, p_cid);
+		if (rc)
+			goto out;
+		p_hwfn->cdev->rx_coalesce_usecs = rx_coal;
+	}
+
+	if (tx_coal) {
+		rc = qed_set_txq_coalesce(p_hwfn, p_ptt, tx_coal, p_cid);
+		if (rc)
+			goto out;
+		p_hwfn->cdev->tx_coalesce_usecs = tx_coal;
+	}
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+	return rc;
+}
+
+int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 u16 coalesce, struct qed_queue_cid *p_cid)
 {
 	struct ustorm_eth_queue_zone eth_qzone;
 	u8 timeset, timer_res;
-	u16 fw_qid = 0;
 	u32 address;
 	int rc;
 
@@ -3660,32 +3758,29 @@ int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 	}
 	timeset = (u8)(coalesce >> timer_res);
 
-	rc = qed_fw_l2_queue(p_hwfn, qid, &fw_qid);
-	if (rc)
-		return rc;
-
-	rc = qed_int_set_timer_res(p_hwfn, p_ptt, timer_res, sb_id, false);
+	rc = qed_int_set_timer_res(p_hwfn, p_ptt, timer_res,
+				   p_cid->sb_igu_id, false);
 	if (rc)
 		goto out;
 
-	address = BAR0_MAP_REG_USDM_RAM + USTORM_ETH_QUEUE_ZONE_OFFSET(fw_qid);
+	address = BAR0_MAP_REG_USDM_RAM +
+		  USTORM_ETH_QUEUE_ZONE_OFFSET(p_cid->abs.queue_id);
 
 	rc = qed_set_coalesce(p_hwfn, p_ptt, address, &eth_qzone,
 			      sizeof(struct ustorm_eth_queue_zone), timeset);
 	if (rc)
 		goto out;
 
-	p_hwfn->cdev->rx_coalesce_usecs = coalesce;
 out:
 	return rc;
 }
 
-int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-			 u16 coalesce, u16 qid, u16 sb_id)
+int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 u16 coalesce, struct qed_queue_cid *p_cid)
 {
 	struct xstorm_eth_queue_zone eth_qzone;
 	u8 timeset, timer_res;
-	u16 fw_qid = 0;
 	u32 address;
 	int rc;
 
@@ -3702,22 +3797,16 @@ int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 	}
 	timeset = (u8)(coalesce >> timer_res);
 
-	rc = qed_fw_l2_queue(p_hwfn, qid, &fw_qid);
-	if (rc)
-		return rc;
-
-	rc = qed_int_set_timer_res(p_hwfn, p_ptt, timer_res, sb_id, true);
+	rc = qed_int_set_timer_res(p_hwfn, p_ptt, timer_res,
+				   p_cid->sb_igu_id, true);
 	if (rc)
 		goto out;
 
-	address = BAR0_MAP_REG_XSDM_RAM + XSTORM_ETH_QUEUE_ZONE_OFFSET(fw_qid);
+	address = BAR0_MAP_REG_XSDM_RAM +
+		  XSTORM_ETH_QUEUE_ZONE_OFFSET(p_cid->abs.queue_id);
 
 	rc = qed_set_coalesce(p_hwfn, p_ptt, address, &eth_qzone,
 			      sizeof(struct xstorm_eth_queue_zone), timeset);
-	if (rc)
-		goto out;
-
-	p_hwfn->cdev->tx_coalesce_usecs = coalesce;
 out:
 	return rc;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index 1f1df1b..defdda1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -443,38 +443,35 @@ int qed_final_cleanup(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt, u16 id, bool is_vf);
 
 /**
- * @brief qed_set_rxq_coalesce - Configure coalesce parameters for an Rx queue
- * The fact that we can configure coalescing to up to 511, but on varying
- * accuracy [the bigger the value the less accurate] up to a mistake of 3usec
- * for the highest values.
+ * @brief qed_get_queue_coalesce - Retrieve coalesce value for a given queue.
  *
  * @param p_hwfn
- * @param p_ptt
- * @param coalesce - Coalesce value in micro seconds.
- * @param qid - Queue index.
- * @param qid - SB Id
+ * @param p_coal - store coalesce value read from the hardware.
+ * @param p_handle
  *
  * @return int
- */
-int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-			 u16 coalesce, u16 qid, u16 sb_id);
+ **/
+int qed_get_queue_coalesce(struct qed_hwfn *p_hwfn, u16 *coal, void *handle);
 
 /**
- * @brief qed_set_txq_coalesce - Configure coalesce parameters for a Tx queue
- * While the API allows setting coalescing per-qid, all tx queues sharing a
- * SB should be in same range [i.e., either 0-0x7f, 0x80-0xff or 0x100-0x1ff]
- * otherwise configuration would break.
+ * @brief qed_set_queue_coalesce - Configure coalesce parameters for Rx and
+ *    Tx queue. The fact that we can configure coalescing to up to 511, but on
+ *    varying accuracy [the bigger the value the less accurate] up to a mistake
+ *    of 3usec for the highest values.
+ *    While the API allows setting coalescing per-qid, all queues sharing a SB
+ *    should be in same range [i.e., either 0-0x7f, 0x80-0xff or 0x100-0x1ff]
+ *    otherwise configuration would break.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param coalesce - Coalesce value in micro seconds.
- * @param qid - Queue index.
- * @param qid - SB Id
+ *
+ * @param rx_coal - Rx Coalesce value in micro seconds.
+ * @param tx_coal - TX Coalesce value in micro seconds.
+ * @param p_handle
  *
  * @return int
- */
-int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-			 u16 coalesce, u16 qid, u16 sb_id);
+ **/
+int
+qed_set_queue_coalesce(u16 rx_coal, u16 tx_coal, void *p_handle);
+
 
 const char *qed_hw_get_resc_name(enum qed_resources res_id);
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 31fb0bf..3427fe70 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -10825,6 +10825,17 @@ struct eth_phy_cfg {
 #define ETH_LOOPBACK_EXT		(3)
 #define ETH_LOOPBACK_MAC		(4)
 
+	u32 eee_cfg;
+#define EEE_CFG_EEE_ENABLED			BIT(0)
+#define EEE_CFG_TX_LPI				BIT(1)
+#define EEE_CFG_ADV_SPEED_1G			BIT(2)
+#define EEE_CFG_ADV_SPEED_10G			BIT(3)
+#define EEE_TX_TIMER_USEC_MASK			(0xfffffff0)
+#define EEE_TX_TIMER_USEC_OFFSET		4
+#define EEE_TX_TIMER_USEC_BALANCED_TIME		(0xa00)
+#define EEE_TX_TIMER_USEC_AGGRESSIVE_TIME	(0x100)
+#define EEE_TX_TIMER_USEC_LATENCY_TIME		(0x6000)
+
 	u32 feature_config_flags;
 #define ETH_EEE_MODE_ADV_LPI		(1 << 0)
 };
@@ -11242,6 +11253,25 @@ struct public_port {
 	u32 wol_pkt_len;
 	u32 wol_pkt_details;
 	struct dcb_dscp_map dcb_dscp_map;
+
+	u32 eee_status;
+#define EEE_ACTIVE_BIT			BIT(0)
+#define EEE_LD_ADV_STATUS_MASK		0x000000f0
+#define EEE_LD_ADV_STATUS_OFFSET	4
+#define EEE_1G_ADV			BIT(1)
+#define EEE_10G_ADV			BIT(2)
+#define EEE_LP_ADV_STATUS_MASK		0x00000f00
+#define EEE_LP_ADV_STATUS_OFFSET	8
+#define EEE_SUPPORTED_SPEED_MASK	0x0000f000
+#define EEE_SUPPORTED_SPEED_OFFSET	12
+#define EEE_1G_SUPPORTED		BIT(1)
+#define EEE_10G_SUPPORTED		BIT(2)
+
+	u32 eee_remote;
+#define EEE_REMOTE_TW_TX_MASK   0x0000ffff
+#define EEE_REMOTE_TW_TX_OFFSET 0
+#define EEE_REMOTE_TW_RX_MASK   0xffff0000
+#define EEE_REMOTE_TW_RX_OFFSET 16
 };
 
 struct public_func {
@@ -11570,6 +11600,9 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL	0x002b0000
 #define DRV_MSG_CODE_OS_WOL			0x002e0000
 
+#define DRV_MSG_CODE_FEATURE_SUPPORT		0x00300000
+#define DRV_MSG_CODE_GET_MFW_FEATURE_SUPPORT	0x00310000
+
 #define DRV_MSG_SEQ_NUMBER_MASK			0x0000ffff
 
 	u32 drv_mb_param;
@@ -11653,6 +11686,10 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_SHIFT	8
 #define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_MASK		0x0000FF00
 
+#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_MASK		0x0000FFFF
+#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_OFFSET	0
+#define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE		0x00000002
+
 	u32 fw_mb_header;
 #define FW_MSG_CODE_MASK			0xffff0000
 #define FW_MSG_CODE_UNSUPPORTED                 0x00000000
@@ -11696,6 +11733,9 @@ struct public_drv_mb {
 #define FW_MB_PARAM_GET_PF_RDMA_IWARP		0x2
 #define FW_MB_PARAM_GET_PF_RDMA_BOTH		0x3
 
+/* get MFW feature support response */
+#define FW_MB_PARAM_FEATURE_SUPPORT_EEE		0x00000002
+
 #define FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR	(1 << 0)
 
 	u32 drv_pulse_mb;
@@ -11891,7 +11931,16 @@ struct nvm_cfg1_port {
 #define NVM_CFG1_PORT_DRV_FLOW_CONTROL_TX			0x4
 	u32 phy_cfg;
 	u32 mgmt_traffic;
+
 	u32 ext_phy;
+	/* EEE power saving mode */
+#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_MASK		0x00FF0000
+#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_OFFSET		16
+#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_DISABLED		0x0
+#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_BALANCED		0x1
+#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_AGGRESSIVE		0x2
+#define NVM_CFG1_PORT_EEE_POWER_SAVING_MODE_LOW_LATENCY		0x3
+
 	u32 mba_cfg1;
 	u32 mba_cfg2;
 	u32 vf_cfg;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 0ba5ec8..0853389 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -2047,6 +2047,106 @@ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
+int qed_get_rxq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 struct qed_queue_cid *p_cid, u16 *p_rx_coal)
+{
+	u32 coalesce, address, is_valid;
+	struct cau_sb_entry sb_entry;
+	u8 timer_res;
+	int rc;
+
+	rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
+			       p_cid->sb_igu_id * sizeof(u64),
+			       (u64)(uintptr_t)&sb_entry, 2, 0);
+	if (rc) {
+		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
+		return rc;
+	}
+
+	timer_res = GET_FIELD(sb_entry.params, CAU_SB_ENTRY_TIMER_RES0);
+
+	address = BAR0_MAP_REG_USDM_RAM +
+		  USTORM_ETH_QUEUE_ZONE_OFFSET(p_cid->abs.queue_id);
+	coalesce = qed_rd(p_hwfn, p_ptt, address);
+
+	is_valid = GET_FIELD(coalesce, COALESCING_TIMESET_VALID);
+	if (!is_valid)
+		return -EINVAL;
+
+	coalesce = GET_FIELD(coalesce, COALESCING_TIMESET_TIMESET);
+	*p_rx_coal = (u16)(coalesce << timer_res);
+
+	return 0;
+}
+
+int qed_get_txq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 struct qed_queue_cid *p_cid, u16 *p_tx_coal)
+{
+	u32 coalesce, address, is_valid;
+	struct cau_sb_entry sb_entry;
+	u8 timer_res;
+	int rc;
+
+	rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
+			       p_cid->sb_igu_id * sizeof(u64),
+			       (u64)(uintptr_t)&sb_entry, 2, 0);
+	if (rc) {
+		DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
+		return rc;
+	}
+
+	timer_res = GET_FIELD(sb_entry.params, CAU_SB_ENTRY_TIMER_RES1);
+
+	address = BAR0_MAP_REG_XSDM_RAM +
+		  XSTORM_ETH_QUEUE_ZONE_OFFSET(p_cid->abs.queue_id);
+	coalesce = qed_rd(p_hwfn, p_ptt, address);
+
+	is_valid = GET_FIELD(coalesce, COALESCING_TIMESET_VALID);
+	if (!is_valid)
+		return -EINVAL;
+
+	coalesce = GET_FIELD(coalesce, COALESCING_TIMESET_TIMESET);
+	*p_tx_coal = (u16)(coalesce << timer_res);
+
+	return 0;
+}
+
+int qed_get_queue_coalesce(struct qed_hwfn *p_hwfn, u16 *p_coal, void *handle)
+{
+	struct qed_queue_cid *p_cid = handle;
+	struct qed_ptt *p_ptt;
+	int rc = 0;
+
+	if (IS_VF(p_hwfn->cdev)) {
+		rc = qed_vf_pf_get_coalesce(p_hwfn, p_coal, p_cid);
+		if (rc)
+			DP_NOTICE(p_hwfn, "Unable to read queue coalescing\n");
+
+		return rc;
+	}
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EAGAIN;
+
+	if (p_cid->b_is_rx) {
+		rc = qed_get_rxq_coalesce(p_hwfn, p_ptt, p_cid, p_coal);
+		if (rc)
+			goto out;
+	} else {
+		rc = qed_get_txq_coalesce(p_hwfn, p_ptt, p_cid, p_coal);
+		if (rc)
+			goto out;
+	}
+
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
 static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 				 struct qed_dev_eth_info *info)
 {
@@ -2696,6 +2796,20 @@ static int qed_ntuple_arfs_filter_config(struct qed_dev *cdev, void *cookie,
 	return rc;
 }
 
+static int qed_get_coalesce(struct qed_dev *cdev, u16 *coal, void *handle)
+{
+	struct qed_queue_cid *p_cid = handle;
+	struct qed_hwfn *p_hwfn;
+	int rc;
+
+	p_hwfn = p_cid->p_owner;
+	rc = qed_get_queue_coalesce(p_hwfn, coal, handle);
+	if (rc)
+		DP_NOTICE(p_hwfn, "Unable to read queue coalescing\n");
+
+	return rc;
+}
+
 static int qed_fp_cqe_completion(struct qed_dev *dev,
 				 u8 rss_id, struct eth_slow_path_rx_cqe *cqe)
 {
@@ -2739,6 +2853,7 @@ static const struct qed_eth_ops qed_eth_ops_pass = {
 	.tunn_config = &qed_tunn_configure,
 	.ntuple_filter_config = &qed_ntuple_arfs_filter_config,
 	.configure_arfs_searcher = &qed_configure_arfs_searcher,
+	.get_coalesce = &qed_get_coalesce,
 };
 
 const struct qed_eth_ops *qed_get_eth_ops(void)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
index f8f09aa..cc1f248 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
@@ -400,4 +400,20 @@ qed_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn,
 
 u8 qed_mcast_bin_from_mac(u8 *mac);
 
-#endif /* _QED_L2_H */
+int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 u16 coalesce, struct qed_queue_cid *p_cid);
+
+int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 u16 coalesce, struct qed_queue_cid *p_cid);
+
+int qed_get_rxq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 struct qed_queue_cid *p_cid, u16 *p_hw_coal);
+
+int qed_get_txq_coalesce(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 struct qed_queue_cid *p_cid, u16 *p_hw_coal);
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index b113996..2783288 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -954,9 +954,7 @@ static int qed_slowpath_start(struct qed_dev *cdev,
 	struct qed_tunnel_info tunn_info;
 	const u8 *data = NULL;
 	struct qed_hwfn *hwfn;
-#ifdef CONFIG_RFS_ACCEL
 	struct qed_ptt *p_ptt;
-#endif
 	int rc = -EINVAL;
 
 	if (qed_iov_wq_start(cdev))
@@ -972,7 +970,6 @@ static int qed_slowpath_start(struct qed_dev *cdev,
 			goto err;
 		}
 
-#ifdef CONFIG_RFS_ACCEL
 		if (cdev->num_hwfns == 1) {
 			p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
 			if (p_ptt) {
@@ -983,7 +980,6 @@ static int qed_slowpath_start(struct qed_dev *cdev,
 				goto err;
 			}
 		}
-#endif
 	}
 
 	cdev->rx_coalesce_usecs = QED_DEFAULT_RX_USECS;
@@ -1091,12 +1087,10 @@ err:
 	if (IS_PF(cdev))
 		release_firmware(cdev->firmware);
 
-#ifdef CONFIG_RFS_ACCEL
 	if (IS_PF(cdev) && (cdev->num_hwfns == 1) &&
 	    QED_LEADING_HWFN(cdev)->p_arfs_ptt)
 		qed_ptt_release(QED_LEADING_HWFN(cdev),
 				QED_LEADING_HWFN(cdev)->p_arfs_ptt);
-#endif
 
 	qed_iov_wq_stop(cdev, false);
 
@@ -1111,11 +1105,9 @@ static int qed_slowpath_stop(struct qed_dev *cdev)
 	qed_ll2_dealloc_if(cdev);
 
 	if (IS_PF(cdev)) {
-#ifdef CONFIG_RFS_ACCEL
 		if (cdev->num_hwfns == 1)
 			qed_ptt_release(QED_LEADING_HWFN(cdev),
 					QED_LEADING_HWFN(cdev)->p_arfs_ptt);
-#endif
 		qed_free_stream_mem(cdev);
 		if (IS_QED_ETH_IF(cdev))
 			qed_sriov_disable(cdev, true);
@@ -1305,6 +1297,10 @@ static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params)
 		}
 	}
 
+	if (params->override_flags & QED_LINK_OVERRIDE_EEE_CONFIG)
+		memcpy(&link_params->eee, &params->eee,
+		       sizeof(link_params->eee));
+
 	rc = qed_mcp_set_link(hwfn, ptt, params->link_up);
 
 	qed_ptt_release(hwfn, ptt);
@@ -1491,6 +1487,21 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
 	if (link.partner_adv_pause == QED_LINK_PARTNER_ASYMMETRIC_PAUSE ||
 	    link.partner_adv_pause == QED_LINK_PARTNER_BOTH_PAUSE)
 		if_link->lp_caps |= QED_LM_Asym_Pause_BIT;
+
+	if (link_caps.default_eee == QED_MCP_EEE_UNSUPPORTED) {
+		if_link->eee_supported = false;
+	} else {
+		if_link->eee_supported = true;
+		if_link->eee_active = link.eee_active;
+		if_link->sup_caps = link_caps.eee_speed_caps;
+		/* MFW clears adv_caps on eee disable; use configured value */
+		if_link->eee.adv_caps = link.eee_adv_caps ? link.eee_adv_caps :
+					params.eee.adv_caps;
+		if_link->eee.lp_adv_caps = link.eee_lp_adv_caps;
+		if_link->eee.enable = params.eee.enable;
+		if_link->eee.tx_lpi_enable = params.eee.tx_lpi_enable;
+		if_link->eee.tx_lpi_timer = params.eee.tx_lpi_timer;
+	}
 }
 
 static void qed_get_current_link(struct qed_dev *cdev,
@@ -1557,36 +1568,10 @@ static int qed_nvm_get_image(struct qed_dev *cdev, enum qed_nvm_images type,
 	return rc;
 }
 
-static void qed_get_coalesce(struct qed_dev *cdev, u16 *rx_coal, u16 *tx_coal)
-{
-	*rx_coal = cdev->rx_coalesce_usecs;
-	*tx_coal = cdev->tx_coalesce_usecs;
-}
-
 static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
-			    u16 qid, u16 sb_id)
+			    void *handle)
 {
-	struct qed_hwfn *hwfn;
-	struct qed_ptt *ptt;
-	int hwfn_index;
-	int status = 0;
-
-	hwfn_index = qid % cdev->num_hwfns;
-	hwfn = &cdev->hwfns[hwfn_index];
-	ptt = qed_ptt_acquire(hwfn);
-	if (!ptt)
-		return -EAGAIN;
-
-	status = qed_set_rxq_coalesce(hwfn, ptt, rx_coal,
-				      qid / cdev->num_hwfns, sb_id);
-	if (status)
-		goto out;
-	status = qed_set_txq_coalesce(hwfn, ptt, tx_coal,
-				      qid / cdev->num_hwfns, sb_id);
-out:
-	qed_ptt_release(hwfn, ptt);
-
-	return status;
+		return qed_set_queue_coalesce(rx_coal, tx_coal, handle);
 }
 
 static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode)
@@ -1735,7 +1720,6 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.chain_alloc = &qed_chain_alloc,
 	.chain_free = &qed_chain_free,
 	.nvm_get_image = &qed_nvm_get_image,
-	.get_coalesce = &qed_get_coalesce,
 	.set_coalesce = &qed_set_coalesce,
 	.set_led = &qed_set_led,
 	.update_drv_state = &qed_update_drv_state,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 3eb2416..376485d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1097,6 +1097,31 @@ static void qed_mcp_handle_transceiver_change(struct qed_hwfn *p_hwfn,
 		DP_NOTICE(p_hwfn, "Transceiver is unplugged.\n");
 }
 
+static void qed_mcp_read_eee_config(struct qed_hwfn *p_hwfn,
+				    struct qed_ptt *p_ptt,
+				    struct qed_mcp_link_state *p_link)
+{
+	u32 eee_status, val;
+
+	p_link->eee_adv_caps = 0;
+	p_link->eee_lp_adv_caps = 0;
+	eee_status = qed_rd(p_hwfn,
+			    p_ptt,
+			    p_hwfn->mcp_info->port_addr +
+			    offsetof(struct public_port, eee_status));
+	p_link->eee_active = !!(eee_status & EEE_ACTIVE_BIT);
+	val = (eee_status & EEE_LD_ADV_STATUS_MASK) >> EEE_LD_ADV_STATUS_OFFSET;
+	if (val & EEE_1G_ADV)
+		p_link->eee_adv_caps |= QED_EEE_1G_ADV;
+	if (val & EEE_10G_ADV)
+		p_link->eee_adv_caps |= QED_EEE_10G_ADV;
+	val = (eee_status & EEE_LP_ADV_STATUS_MASK) >> EEE_LP_ADV_STATUS_OFFSET;
+	if (val & EEE_1G_ADV)
+		p_link->eee_lp_adv_caps |= QED_EEE_1G_ADV;
+	if (val & EEE_10G_ADV)
+		p_link->eee_lp_adv_caps |= QED_EEE_10G_ADV;
+}
+
 static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
 				       struct qed_ptt *p_ptt, bool b_reset)
 {
@@ -1228,6 +1253,9 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
 
 	p_link->sfp_tx_fault = !!(status & LINK_STATUS_SFP_TX_FAULT);
 
+	if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE)
+		qed_mcp_read_eee_config(p_hwfn, p_ptt, p_link);
+
 	qed_link_update(p_hwfn);
 out:
 	spin_unlock_bh(&p_hwfn->mcp_info->link_lock);
@@ -1251,6 +1279,19 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up)
 	phy_cfg.pause |= (params->pause.forced_tx) ? ETH_PAUSE_TX : 0;
 	phy_cfg.adv_speed = params->speed.advertised_speeds;
 	phy_cfg.loopback_mode = params->loopback_mode;
+	if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE) {
+		if (params->eee.enable)
+			phy_cfg.eee_cfg |= EEE_CFG_EEE_ENABLED;
+		if (params->eee.tx_lpi_enable)
+			phy_cfg.eee_cfg |= EEE_CFG_TX_LPI;
+		if (params->eee.adv_caps & QED_EEE_1G_ADV)
+			phy_cfg.eee_cfg |= EEE_CFG_ADV_SPEED_1G;
+		if (params->eee.adv_caps & QED_EEE_10G_ADV)
+			phy_cfg.eee_cfg |= EEE_CFG_ADV_SPEED_10G;
+		phy_cfg.eee_cfg |= (params->eee.tx_lpi_timer <<
+				    EEE_TX_TIMER_USEC_OFFSET) &
+				   EEE_TX_TIMER_USEC_MASK;
+	}
 
 	p_hwfn->b_drv_link_init = b_up;
 
@@ -2822,3 +2863,28 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock,
 		p_unlock->resource = resource;
 	}
 }
+
+int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	u32 mcp_resp;
+	int rc;
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_GET_MFW_FEATURE_SUPPORT,
+			 0, &mcp_resp, &p_hwfn->mcp_info->capabilities);
+	if (!rc)
+		DP_VERBOSE(p_hwfn, (QED_MSG_SP | NETIF_MSG_PROBE),
+			   "MFW supported features: %08x\n",
+			   p_hwfn->mcp_info->capabilities);
+
+	return rc;
+}
+
+int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	u32 mcp_resp, mcp_param, features;
+
+	features = DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE;
+
+	return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_FEATURE_SUPPORT,
+			   features, &mcp_resp, &mcp_param);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index af03b36..c7ec239 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -53,15 +53,25 @@ struct qed_mcp_link_pause_params {
 	bool    forced_tx;
 };
 
+enum qed_mcp_eee_mode {
+	QED_MCP_EEE_DISABLED,
+	QED_MCP_EEE_ENABLED,
+	QED_MCP_EEE_UNSUPPORTED
+};
+
 struct qed_mcp_link_params {
-	struct qed_mcp_link_speed_params	speed;
-	struct qed_mcp_link_pause_params	pause;
-	u32				     loopback_mode;
+	struct qed_mcp_link_speed_params speed;
+	struct qed_mcp_link_pause_params pause;
+	u32 loopback_mode;
+	struct qed_link_eee_params eee;
 };
 
 struct qed_mcp_link_capabilities {
 	u32 speed_capabilities;
 	bool default_speed_autoneg;
+	enum qed_mcp_eee_mode default_eee;
+	u32 eee_lpi_timer;
+	u8 eee_speed_caps;
 };
 
 struct qed_mcp_link_state {
@@ -102,6 +112,9 @@ struct qed_mcp_link_state {
 	u8      partner_adv_pause;
 
 	bool    sfp_tx_fault;
+	bool    eee_active;
+	u8      eee_adv_caps;
+	u8      eee_lp_adv_caps;
 };
 
 struct qed_mcp_function_info {
@@ -546,6 +559,9 @@ struct qed_mcp_info {
 	u8					*mfw_mb_shadow;
 	u16					mfw_mb_length;
 	u32					mcp_hist;
+
+	/* Capabilties negotiated with the MFW */
+	u32					capabilities;
 };
 
 struct qed_mcp_mb_params {
@@ -925,5 +941,20 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock,
 				    struct qed_resc_unlock_params *p_unlock,
 				    enum qed_resc_lock
 				    resource, bool b_is_permanent);
+/**
+ * @brief Learn of supported MFW features; To be done during early init
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
+/**
+ * @brief Inform MFW of set of features supported by driver. Should be done
+ * inside the content of the LOAD_REQ.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 2cfd3bd..3f40b1d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -3400,6 +3400,157 @@ static void qed_iov_vf_mbx_release(struct qed_hwfn *p_hwfn,
 			     length, status);
 }
 
+static void qed_iov_vf_pf_get_coalesce(struct qed_hwfn *p_hwfn,
+				       struct qed_ptt *p_ptt,
+				       struct qed_vf_info *p_vf)
+{
+	struct qed_iov_vf_mbx *mbx = &p_vf->vf_mbx;
+	struct pfvf_read_coal_resp_tlv *p_resp;
+	struct vfpf_read_coal_req_tlv *req;
+	u8 status = PFVF_STATUS_FAILURE;
+	struct qed_vf_queue *p_queue;
+	struct qed_queue_cid *p_cid;
+	u16 coal = 0, qid, i;
+	bool b_is_rx;
+	int rc = 0;
+
+	mbx->offset = (u8 *)mbx->reply_virt;
+	req = &mbx->req_virt->read_coal_req;
+
+	qid = req->qid;
+	b_is_rx = req->is_rx ? true : false;
+
+	if (b_is_rx) {
+		if (!qed_iov_validate_rxq(p_hwfn, p_vf, qid,
+					  QED_IOV_VALIDATE_Q_ENABLE)) {
+			DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+				   "VF[%d]: Invalid Rx queue_id = %d\n",
+				   p_vf->abs_vf_id, qid);
+			goto send_resp;
+		}
+
+		p_cid = qed_iov_get_vf_rx_queue_cid(&p_vf->vf_queues[qid]);
+		rc = qed_get_rxq_coalesce(p_hwfn, p_ptt, p_cid, &coal);
+		if (rc)
+			goto send_resp;
+	} else {
+		if (!qed_iov_validate_txq(p_hwfn, p_vf, qid,
+					  QED_IOV_VALIDATE_Q_ENABLE)) {
+			DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+				   "VF[%d]: Invalid Tx queue_id = %d\n",
+				   p_vf->abs_vf_id, qid);
+			goto send_resp;
+		}
+		for (i = 0; i < MAX_QUEUES_PER_QZONE; i++) {
+			p_queue = &p_vf->vf_queues[qid];
+			if ((!p_queue->cids[i].p_cid) ||
+			    (!p_queue->cids[i].b_is_tx))
+				continue;
+
+			p_cid = p_queue->cids[i].p_cid;
+
+			rc = qed_get_txq_coalesce(p_hwfn, p_ptt, p_cid, &coal);
+			if (rc)
+				goto send_resp;
+			break;
+		}
+	}
+
+	status = PFVF_STATUS_SUCCESS;
+
+send_resp:
+	p_resp = qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_COALESCE_READ,
+			     sizeof(*p_resp));
+	p_resp->coal = coal;
+
+	qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END,
+		    sizeof(struct channel_list_end_tlv));
+
+	qed_iov_send_response(p_hwfn, p_ptt, p_vf, sizeof(*p_resp), status);
+}
+
+static void qed_iov_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn,
+				       struct qed_ptt *p_ptt,
+				       struct qed_vf_info *vf)
+{
+	struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
+	struct vfpf_update_coalesce *req;
+	u8 status = PFVF_STATUS_FAILURE;
+	struct qed_queue_cid *p_cid;
+	u16 rx_coal, tx_coal;
+	int rc = 0, i;
+	u16 qid;
+
+	req = &mbx->req_virt->update_coalesce;
+
+	rx_coal = req->rx_coal;
+	tx_coal = req->tx_coal;
+	qid = req->qid;
+
+	if (!qed_iov_validate_rxq(p_hwfn, vf, qid,
+				  QED_IOV_VALIDATE_Q_ENABLE) && rx_coal) {
+		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+			   "VF[%d]: Invalid Rx queue_id = %d\n",
+			   vf->abs_vf_id, qid);
+		goto out;
+	}
+
+	if (!qed_iov_validate_txq(p_hwfn, vf, qid,
+				  QED_IOV_VALIDATE_Q_ENABLE) && tx_coal) {
+		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+			   "VF[%d]: Invalid Tx queue_id = %d\n",
+			   vf->abs_vf_id, qid);
+		goto out;
+	}
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_IOV,
+		   "VF[%d]: Setting coalesce for VF rx_coal = %d, tx_coal = %d at queue = %d\n",
+		   vf->abs_vf_id, rx_coal, tx_coal, qid);
+
+	if (rx_coal) {
+		p_cid = qed_iov_get_vf_rx_queue_cid(&vf->vf_queues[qid]);
+
+		rc = qed_set_rxq_coalesce(p_hwfn, p_ptt, rx_coal, p_cid);
+		if (rc) {
+			DP_VERBOSE(p_hwfn,
+				   QED_MSG_IOV,
+				   "VF[%d]: Unable to set rx queue = %d coalesce\n",
+				   vf->abs_vf_id, vf->vf_queues[qid].fw_rx_qid);
+			goto out;
+		}
+		vf->rx_coal = rx_coal;
+	}
+
+	if (tx_coal) {
+		struct qed_vf_queue *p_queue = &vf->vf_queues[qid];
+
+		for (i = 0; i < MAX_QUEUES_PER_QZONE; i++) {
+			if (!p_queue->cids[i].p_cid)
+				continue;
+
+			if (!p_queue->cids[i].b_is_tx)
+				continue;
+
+			rc = qed_set_txq_coalesce(p_hwfn, p_ptt, tx_coal,
+						  p_queue->cids[i].p_cid);
+
+			if (rc) {
+				DP_VERBOSE(p_hwfn,
+					   QED_MSG_IOV,
+					   "VF[%d]: Unable to set tx queue coalesce\n",
+					   vf->abs_vf_id);
+				goto out;
+			}
+		}
+		vf->tx_coal = tx_coal;
+	}
+
+	status = PFVF_STATUS_SUCCESS;
+out:
+	qed_iov_prepare_resp(p_hwfn, p_ptt, vf, CHANNEL_TLV_COALESCE_UPDATE,
+			     sizeof(struct pfvf_def_resp_tlv), status);
+}
 static int
 qed_iov_vf_flr_poll_dorq(struct qed_hwfn *p_hwfn,
 			 struct qed_vf_info *p_vf, struct qed_ptt *p_ptt)
@@ -3725,6 +3876,12 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
 		case CHANNEL_TLV_UPDATE_TUNN_PARAM:
 			qed_iov_vf_mbx_update_tunn_param(p_hwfn, p_ptt, p_vf);
 			break;
+		case CHANNEL_TLV_COALESCE_UPDATE:
+			qed_iov_vf_pf_set_coalesce(p_hwfn, p_ptt, p_vf);
+			break;
+		case CHANNEL_TLV_COALESCE_READ:
+			qed_iov_vf_pf_get_coalesce(p_hwfn, p_ptt, p_vf);
+			break;
 		}
 	} else if (qed_iov_tlv_supported(mbx->first_tlv.tl.type)) {
 		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
index c2e44bc..3955929 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
@@ -217,6 +217,9 @@ struct qed_vf_info {
 	u8 num_rxqs;
 	u8 num_txqs;
 
+	u16 rx_coal;
+	u16 tx_coal;
+
 	u8 num_sbs;
 
 	u8 num_mac_filters;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index 1926d1e..91b5e9f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -1343,6 +1343,81 @@ exit:
 	return rc;
 }
 
+int qed_vf_pf_get_coalesce(struct qed_hwfn *p_hwfn,
+			   u16 *p_coal, struct qed_queue_cid *p_cid)
+{
+	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
+	struct pfvf_read_coal_resp_tlv *resp;
+	struct vfpf_read_coal_req_tlv *req;
+	int rc;
+
+	/* clear mailbox and prep header tlv */
+	req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_COALESCE_READ, sizeof(*req));
+	req->qid = p_cid->rel.queue_id;
+	req->is_rx = p_cid->b_is_rx ? 1 : 0;
+
+	qed_add_tlv(p_hwfn, &p_iov->offset, CHANNEL_TLV_LIST_END,
+		    sizeof(struct channel_list_end_tlv));
+	resp = &p_iov->pf2vf_reply->read_coal_resp;
+
+	rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
+	if (rc)
+		goto exit;
+
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS)
+		goto exit;
+
+	*p_coal = resp->coal;
+exit:
+	qed_vf_pf_req_end(p_hwfn, rc);
+
+	return rc;
+}
+
+int
+qed_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn,
+		       u16 rx_coal, u16 tx_coal, struct qed_queue_cid *p_cid)
+{
+	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
+	struct vfpf_update_coalesce *req;
+	struct pfvf_def_resp_tlv *resp;
+	int rc;
+
+	/* clear mailbox and prep header tlv */
+	req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_COALESCE_UPDATE, sizeof(*req));
+
+	req->rx_coal = rx_coal;
+	req->tx_coal = tx_coal;
+	req->qid = p_cid->rel.queue_id;
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_IOV,
+		   "Setting coalesce rx_coal = %d, tx_coal = %d at queue = %d\n",
+		   rx_coal, tx_coal, req->qid);
+
+	/* add list termination tlv */
+	qed_add_tlv(p_hwfn, &p_iov->offset, CHANNEL_TLV_LIST_END,
+		    sizeof(struct channel_list_end_tlv));
+
+	resp = &p_iov->pf2vf_reply->default_resp;
+	rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
+	if (rc)
+		goto exit;
+
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS)
+		goto exit;
+
+	if (rx_coal)
+		p_hwfn->cdev->rx_coalesce_usecs = rx_coal;
+
+	if (tx_coal)
+		p_hwfn->cdev->tx_coalesce_usecs = tx_coal;
+
+exit:
+	qed_vf_pf_req_end(p_hwfn, rc);
+	return rc;
+}
+
 u16 qed_vf_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id)
 {
 	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h
index 34d9b88..97d44df 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h
@@ -497,6 +497,27 @@ struct tlv_buffer_size {
 	u8 tlv_buffer[TLV_BUFFER_SIZE];
 };
 
+struct vfpf_update_coalesce {
+	struct vfpf_first_tlv first_tlv;
+	u16 rx_coal;
+	u16 tx_coal;
+	u16 qid;
+	u8 padding[2];
+};
+
+struct vfpf_read_coal_req_tlv {
+	struct vfpf_first_tlv first_tlv;
+	u16 qid;
+	u8 is_rx;
+	u8 padding[5];
+};
+
+struct pfvf_read_coal_resp_tlv {
+	struct pfvf_tlv hdr;
+	u16 coal;
+	u8 padding[6];
+};
+
 union vfpf_tlvs {
 	struct vfpf_first_tlv first_tlv;
 	struct vfpf_acquire_tlv acquire;
@@ -509,7 +530,8 @@ union vfpf_tlvs {
 	struct vfpf_vport_update_tlv vport_update;
 	struct vfpf_ucast_filter_tlv ucast_filter;
 	struct vfpf_update_tunn_param_tlv tunn_param_update;
-	struct channel_list_end_tlv list_end;
+	struct vfpf_update_coalesce update_coalesce;
+	struct vfpf_read_coal_req_tlv read_coal_req;
 	struct tlv_buffer_size tlv_buf_size;
 };
 
@@ -519,6 +541,7 @@ union pfvf_tlvs {
 	struct tlv_buffer_size tlv_buf_size;
 	struct pfvf_start_queue_resp_tlv queue_start;
 	struct pfvf_update_tunn_param_tlv tunn_param_resp;
+	struct pfvf_read_coal_resp_tlv read_coal_resp;
 };
 
 enum qed_bulletin_bit {
@@ -624,8 +647,9 @@ enum {
 	CHANNEL_TLV_VPORT_UPDATE_ACCEPT_ANY_VLAN,
 	CHANNEL_TLV_VPORT_UPDATE_SGE_TPA,
 	CHANNEL_TLV_UPDATE_TUNN_PARAM,
-	CHANNEL_TLV_RESERVED,
+	CHANNEL_TLV_COALESCE_UPDATE,
 	CHANNEL_TLV_QID,
+	CHANNEL_TLV_COALESCE_READ,
 	CHANNEL_TLV_MAX,
 
 	/* Required for iterating over vport-update tlvs.
@@ -677,6 +701,31 @@ struct qed_vf_iov {
 	bool b_doorbell_bar;
 };
 
+/**
+ * @brief VF - Set Rx/Tx coalesce per VF's relative queue.
+ *             Coalesce value '0' will omit the configuration.
+ *
+ * @param p_hwfn
+ * @param rx_coal - coalesce value in micro second for rx queue
+ * @param tx_coal - coalesce value in micro second for tx queue
+ * @param p_cid   - queue cid
+ *
+ **/
+int qed_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn,
+			   u16 rx_coal,
+			   u16 tx_coal, struct qed_queue_cid *p_cid);
+
+/**
+ * @brief VF - Get coalesce per VF's relative queue.
+ *
+ * @param p_hwfn
+ * @param p_coal - coalesce value in micro second for VF queues.
+ * @param p_cid  - queue cid
+ *
+ **/
+int qed_vf_pf_get_coalesce(struct qed_hwfn *p_hwfn,
+			   u16 *p_coal, struct qed_queue_cid *p_cid);
+
 #ifdef CONFIG_QED_SRIOV
 /**
  * @brief Read the VF bulletin and act on it if needed
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 4dfb238..adb7005 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -160,6 +160,8 @@ struct qede_rdma_dev {
 
 struct qede_ptp;
 
+#define QEDE_RFS_MAX_FLTR	256
+
 struct qede_dev {
 	struct qed_dev			*cdev;
 	struct net_device		*ndev;
@@ -241,9 +243,7 @@ struct qede_dev {
 	u16				vxlan_dst_port;
 	u16				geneve_dst_port;
 
-#ifdef CONFIG_RFS_ACCEL
 	struct qede_arfs		*arfs;
-#endif
 	bool				wol_enabled;
 
 	struct qede_rdma_dev		rdma_info;
@@ -447,16 +447,21 @@ struct qede_fastpath {
 #ifdef CONFIG_RFS_ACCEL
 int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 		       u16 rxq_index, u32 flow_id);
+#define QEDE_SP_ARFS_CONFIG	4
+#define QEDE_SP_TASK_POLL_DELAY	(5 * HZ)
+#endif
+
 void qede_process_arfs_filters(struct qede_dev *edev, bool free_fltr);
 void qede_poll_for_freeing_arfs_filters(struct qede_dev *edev);
 void qede_arfs_filter_op(void *dev, void *filter, u8 fw_rc);
 void qede_free_arfs(struct qede_dev *edev);
 int qede_alloc_arfs(struct qede_dev *edev);
-
-#define QEDE_SP_ARFS_CONFIG	4
-#define QEDE_SP_TASK_POLL_DELAY	(5 * HZ)
-#define QEDE_RFS_MAX_FLTR	256
-#endif
+int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info);
+int qede_del_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info);
+int qede_get_cls_rule_entry(struct qede_dev *edev, struct ethtool_rxnfc *cmd);
+int qede_get_cls_rule_all(struct qede_dev *edev, struct ethtool_rxnfc *info,
+			  u32 *rule_locs);
+int qede_get_arfs_filter_count(struct qede_dev *edev);
 
 struct qede_reload_args {
 	void (*func)(struct qede_dev *edev, struct qede_reload_args *args);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 6a03d3e..dae7412 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -702,24 +702,62 @@ static u32 qede_get_link(struct net_device *dev)
 static int qede_get_coalesce(struct net_device *dev,
 			     struct ethtool_coalesce *coal)
 {
+	void *rx_handle = NULL, *tx_handle = NULL;
 	struct qede_dev *edev = netdev_priv(dev);
-	u16 rxc, txc;
+	u16 rx_coal, tx_coal, i, rc = 0;
+	struct qede_fastpath *fp;
+
+	rx_coal = QED_DEFAULT_RX_USECS;
+	tx_coal = QED_DEFAULT_TX_USECS;
 
 	memset(coal, 0, sizeof(struct ethtool_coalesce));
-	edev->ops->common->get_coalesce(edev->cdev, &rxc, &txc);
 
-	coal->rx_coalesce_usecs = rxc;
-	coal->tx_coalesce_usecs = txc;
+	__qede_lock(edev);
+	if (edev->state == QEDE_STATE_OPEN) {
+		for_each_queue(i) {
+			fp = &edev->fp_array[i];
 
-	return 0;
+			if (fp->type & QEDE_FASTPATH_RX) {
+				rx_handle = fp->rxq->handle;
+				break;
+			}
+		}
+
+		rc = edev->ops->get_coalesce(edev->cdev, &rx_coal, rx_handle);
+		if (rc) {
+			DP_INFO(edev, "Read Rx coalesce error\n");
+			goto out;
+		}
+
+		for_each_queue(i) {
+			fp = &edev->fp_array[i];
+			if (fp->type & QEDE_FASTPATH_TX) {
+				tx_handle = fp->txq->handle;
+				break;
+			}
+		}
+
+		rc = edev->ops->get_coalesce(edev->cdev, &tx_coal, tx_handle);
+		if (rc)
+			DP_INFO(edev, "Read Tx coalesce error\n");
+	}
+
+out:
+	__qede_unlock(edev);
+
+	coal->rx_coalesce_usecs = rx_coal;
+	coal->tx_coalesce_usecs = tx_coal;
+
+	return rc;
 }
 
 static int qede_set_coalesce(struct net_device *dev,
 			     struct ethtool_coalesce *coal)
 {
 	struct qede_dev *edev = netdev_priv(dev);
+	struct qede_fastpath *fp;
 	int i, rc = 0;
-	u16 rxc, txc, sb_id;
+	u16 rxc, txc;
 
 	if (!netif_running(dev)) {
 		DP_INFO(edev, "Interface is down\n");
@@ -730,21 +768,36 @@ static int qede_set_coalesce(struct net_device *dev,
 	    coal->tx_coalesce_usecs > QED_COALESCE_MAX) {
 		DP_INFO(edev,
 			"Can't support requested %s coalesce value [max supported value %d]\n",
-			coal->rx_coalesce_usecs > QED_COALESCE_MAX ? "rx"
-								   : "tx",
-			QED_COALESCE_MAX);
+			coal->rx_coalesce_usecs > QED_COALESCE_MAX ? "rx" :
+			"tx", QED_COALESCE_MAX);
 		return -EINVAL;
 	}
 
 	rxc = (u16)coal->rx_coalesce_usecs;
 	txc = (u16)coal->tx_coalesce_usecs;
 	for_each_queue(i) {
-		sb_id = edev->fp_array[i].sb_info->igu_sb_id;
-		rc = edev->ops->common->set_coalesce(edev->cdev, rxc, txc,
-						     (u16)i, sb_id);
-		if (rc) {
-			DP_INFO(edev, "Set coalesce error, rc = %d\n", rc);
-			return rc;
+		fp = &edev->fp_array[i];
+
+		if (edev->fp_array[i].type & QEDE_FASTPATH_RX) {
+			rc = edev->ops->common->set_coalesce(edev->cdev,
+							     rxc, 0,
+							     fp->rxq->handle);
+			if (rc) {
+				DP_INFO(edev,
+					"Set RX coalesce error, rc = %d\n", rc);
+				return rc;
+			}
+		}
+
+		if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
+			rc = edev->ops->common->set_coalesce(edev->cdev,
+							     0, txc,
+							     fp->txq->handle);
+			if (rc) {
+				DP_INFO(edev,
+					"Set TX coalesce error, rc = %d\n", rc);
+				return rc;
+			}
 		}
 	}
 
@@ -1045,20 +1098,34 @@ static int qede_get_rss_flags(struct qede_dev *edev, struct ethtool_rxnfc *info)
 }
 
 static int qede_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
-			  u32 *rules __always_unused)
+			  u32 *rule_locs)
 {
 	struct qede_dev *edev = netdev_priv(dev);
+	int rc = 0;
 
 	switch (info->cmd) {
 	case ETHTOOL_GRXRINGS:
 		info->data = QEDE_RSS_COUNT(edev);
-		return 0;
+		break;
 	case ETHTOOL_GRXFH:
-		return qede_get_rss_flags(edev, info);
+		rc = qede_get_rss_flags(edev, info);
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		info->rule_cnt = qede_get_arfs_filter_count(edev);
+		info->data = QEDE_RFS_MAX_FLTR;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		rc = qede_get_cls_rule_entry(edev, info);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		rc = qede_get_cls_rule_all(edev, info, rule_locs);
+		break;
 	default:
 		DP_ERR(edev, "Command parameters not supported\n");
-		return -EOPNOTSUPP;
+		rc = -EOPNOTSUPP;
 	}
+
+	return rc;
 }
 
 static int qede_set_rss_flags(struct qede_dev *edev, struct ethtool_rxnfc *info)
@@ -1168,14 +1235,24 @@ static int qede_set_rss_flags(struct qede_dev *edev, struct ethtool_rxnfc *info)
 static int qede_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
 {
 	struct qede_dev *edev = netdev_priv(dev);
+	int rc;
 
 	switch (info->cmd) {
 	case ETHTOOL_SRXFH:
-		return qede_set_rss_flags(edev, info);
+		rc = qede_set_rss_flags(edev, info);
+		break;
+	case ETHTOOL_SRXCLSRLINS:
+		rc = qede_add_cls_rule(edev, info);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		rc = qede_del_cls_rule(edev, info);
+		break;
 	default:
 		DP_INFO(edev, "Command parameters not supported\n");
-		return -EOPNOTSUPP;
+		rc = -EOPNOTSUPP;
 	}
+
+	return rc;
 }
 
 static u32 qede_get_rxfh_indir_size(struct net_device *dev)
@@ -1607,6 +1684,87 @@ static int qede_get_tunable(struct net_device *dev,
 	return 0;
 }
 
+static int qede_get_eee(struct net_device *dev, struct ethtool_eee *edata)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	struct qed_link_output current_link;
+
+	memset(&current_link, 0, sizeof(current_link));
+	edev->ops->common->get_link(edev->cdev, &current_link);
+
+	if (!current_link.eee_supported) {
+		DP_INFO(edev, "EEE is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (current_link.eee.adv_caps & QED_EEE_1G_ADV)
+		edata->advertised = ADVERTISED_1000baseT_Full;
+	if (current_link.eee.adv_caps & QED_EEE_10G_ADV)
+		edata->advertised |= ADVERTISED_10000baseT_Full;
+	if (current_link.sup_caps & QED_EEE_1G_ADV)
+		edata->supported = ADVERTISED_1000baseT_Full;
+	if (current_link.sup_caps & QED_EEE_10G_ADV)
+		edata->supported |= ADVERTISED_10000baseT_Full;
+	if (current_link.eee.lp_adv_caps & QED_EEE_1G_ADV)
+		edata->lp_advertised = ADVERTISED_1000baseT_Full;
+	if (current_link.eee.lp_adv_caps & QED_EEE_10G_ADV)
+		edata->lp_advertised |= ADVERTISED_10000baseT_Full;
+
+	edata->tx_lpi_timer = current_link.eee.tx_lpi_timer;
+	edata->eee_enabled = current_link.eee.enable;
+	edata->tx_lpi_enabled = current_link.eee.tx_lpi_enable;
+	edata->eee_active = current_link.eee_active;
+
+	return 0;
+}
+
+static int qede_set_eee(struct net_device *dev, struct ethtool_eee *edata)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	struct qed_link_output current_link;
+	struct qed_link_params params;
+
+	if (!edev->ops->common->can_link_change(edev->cdev)) {
+		DP_INFO(edev, "Link settings are not allowed to be changed\n");
+		return -EOPNOTSUPP;
+	}
+
+	memset(&current_link, 0, sizeof(current_link));
+	edev->ops->common->get_link(edev->cdev, &current_link);
+
+	if (!current_link.eee_supported) {
+		DP_INFO(edev, "EEE is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	memset(&params, 0, sizeof(params));
+	params.override_flags |= QED_LINK_OVERRIDE_EEE_CONFIG;
+
+	if (!(edata->advertised & (ADVERTISED_1000baseT_Full |
+				   ADVERTISED_10000baseT_Full)) ||
+	    ((edata->advertised & (ADVERTISED_1000baseT_Full |
+				   ADVERTISED_10000baseT_Full)) !=
+	     edata->advertised)) {
+		DP_VERBOSE(edev, QED_MSG_DEBUG,
+			   "Invalid advertised capabilities %d\n",
+			   edata->advertised);
+		return -EINVAL;
+	}
+
+	if (edata->advertised & ADVERTISED_1000baseT_Full)
+		params.eee.adv_caps = QED_EEE_1G_ADV;
+	if (edata->advertised & ADVERTISED_10000baseT_Full)
+		params.eee.adv_caps |= QED_EEE_10G_ADV;
+	params.eee.enable = edata->eee_enabled;
+	params.eee.tx_lpi_enable = edata->tx_lpi_enabled;
+	params.eee.tx_lpi_timer = edata->tx_lpi_timer;
+
+	params.link_up = true;
+	edev->ops->common->set_link(edev->cdev, &params);
+
+	return 0;
+}
+
 static const struct ethtool_ops qede_ethtool_ops = {
 	.get_link_ksettings = qede_get_link_ksettings,
 	.set_link_ksettings = qede_set_link_ksettings,
@@ -1640,6 +1798,9 @@ static const struct ethtool_ops qede_ethtool_ops = {
 	.get_channels = qede_get_channels,
 	.set_channels = qede_set_channels,
 	.self_test = qede_self_test,
+	.get_eee = qede_get_eee,
+	.set_eee = qede_set_eee,
+
 	.get_tunable = qede_get_tunable,
 	.set_tunable = qede_set_tunable,
 };
@@ -1650,6 +1811,8 @@ static const struct ethtool_ops qede_vf_ethtool_ops = {
 	.get_msglevel = qede_get_msglevel,
 	.set_msglevel = qede_set_msglevel,
 	.get_link = qede_get_link,
+	.get_coalesce = qede_get_coalesce,
+	.set_coalesce = qede_set_coalesce,
 	.get_ringparam = qede_get_ringparam,
 	.set_ringparam = qede_set_ringparam,
 	.get_strings = qede_get_strings,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index f939db5..f79e36e 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -38,7 +38,6 @@
 #include <linux/qed/qed_if.h>
 #include "qede.h"
 
-#ifdef CONFIG_RFS_ACCEL
 struct qede_arfs_tuple {
 	union {
 		__be32 src_ipv4;
@@ -76,10 +75,12 @@ struct qede_arfs_fltr_node {
 	u16 next_rxq_id;
 	bool filter_op;
 	bool used;
+	u8 fw_rc;
 	struct hlist_node node;
 };
 
 struct qede_arfs {
+#define QEDE_ARFS_BUCKET_HEAD(edev, idx) (&(edev)->arfs->arfs_hl_head[idx])
 #define QEDE_ARFS_POLL_COUNT	100
 #define QEDE_RFS_FLW_BITSHIFT	(4)
 #define QEDE_RFS_FLW_MASK	((1 << QEDE_RFS_FLW_BITSHIFT) - 1)
@@ -121,11 +122,56 @@ qede_free_arfs_filter(struct qede_dev *edev,  struct qede_arfs_fltr_node *fltr)
 	kfree(fltr);
 }
 
+static int
+qede_enqueue_fltr_and_config_searcher(struct qede_dev *edev,
+				      struct qede_arfs_fltr_node *fltr,
+				      u16 bucket_idx)
+{
+	fltr->mapping = dma_map_single(&edev->pdev->dev, fltr->data,
+				       fltr->buf_len, DMA_TO_DEVICE);
+	if (dma_mapping_error(&edev->pdev->dev, fltr->mapping)) {
+		DP_NOTICE(edev, "Failed to map DMA memory for rule\n");
+		qede_free_arfs_filter(edev, fltr);
+		return -ENOMEM;
+	}
+
+	INIT_HLIST_NODE(&fltr->node);
+	hlist_add_head(&fltr->node,
+		       QEDE_ARFS_BUCKET_HEAD(edev, bucket_idx));
+	edev->arfs->filter_count++;
+
+	if (edev->arfs->filter_count == 1 && !edev->arfs->enable) {
+		edev->ops->configure_arfs_searcher(edev->cdev, true);
+		edev->arfs->enable = true;
+	}
+
+	return 0;
+}
+
+static void
+qede_dequeue_fltr_and_config_searcher(struct qede_dev *edev,
+				      struct qede_arfs_fltr_node *fltr)
+{
+	hlist_del(&fltr->node);
+	dma_unmap_single(&edev->pdev->dev, fltr->mapping,
+			 fltr->buf_len, DMA_TO_DEVICE);
+
+	qede_free_arfs_filter(edev, fltr);
+	edev->arfs->filter_count--;
+
+	if (!edev->arfs->filter_count && edev->arfs->enable) {
+		edev->arfs->enable = false;
+		edev->ops->configure_arfs_searcher(edev->cdev, false);
+	}
+}
+
 void qede_arfs_filter_op(void *dev, void *filter, u8 fw_rc)
 {
 	struct qede_arfs_fltr_node *fltr = filter;
 	struct qede_dev *edev = dev;
 
+	fltr->fw_rc = fw_rc;
+
 	if (fw_rc) {
 		DP_NOTICE(edev,
 			  "Failed arfs filter configuration fw_rc=%d, flow_id=%d, sw_id=%d, src_port=%d, dst_port=%d, rxq=%d\n",
@@ -185,18 +231,17 @@ void qede_process_arfs_filters(struct qede_dev *edev, bool free_fltr)
 
 			if ((!test_bit(QEDE_FLTR_VALID, &fltr->state) &&
 			     !fltr->used) || free_fltr) {
-				hlist_del(&fltr->node);
-				dma_unmap_single(&edev->pdev->dev,
-						 fltr->mapping,
-						 fltr->buf_len, DMA_TO_DEVICE);
-				qede_free_arfs_filter(edev, fltr);
-				edev->arfs->filter_count--;
+				qede_dequeue_fltr_and_config_searcher(edev,
+								      fltr);
 			} else {
-				if ((rps_may_expire_flow(edev->ndev,
-							 fltr->rxq_id,
-							 fltr->flow_id,
-							 fltr->sw_id) || del) &&
-							 !free_fltr)
+				bool flow_exp = false;
+#ifdef CONFIG_RFS_ACCEL
+				flow_exp = rps_may_expire_flow(edev->ndev,
+							       fltr->rxq_id,
+							       fltr->flow_id,
+							       fltr->sw_id);
+#endif
+				if ((flow_exp || del) && !free_fltr)
 					qede_configure_arfs_fltr(edev, fltr,
 								 fltr->rxq_id,
 								 false);
@@ -213,10 +258,12 @@ void qede_process_arfs_filters(struct qede_dev *edev, bool free_fltr)
 			edev->arfs->enable = false;
 			edev->ops->configure_arfs_searcher(edev->cdev, false);
 		}
+#ifdef CONFIG_RFS_ACCEL
 	} else {
 		set_bit(QEDE_SP_ARFS_CONFIG, &edev->sp_flags);
 		schedule_delayed_work(&edev->sp_task,
 				      QEDE_SP_TASK_POLL_DELAY);
+#endif
 	}
 
 	spin_unlock_bh(&edev->arfs->arfs_list_lock);
@@ -258,25 +305,26 @@ int qede_alloc_arfs(struct qede_dev *edev)
 	spin_lock_init(&edev->arfs->arfs_list_lock);
 
 	for (i = 0; i <= QEDE_RFS_FLW_MASK; i++)
-		INIT_HLIST_HEAD(&edev->arfs->arfs_hl_head[i]);
+		INIT_HLIST_HEAD(QEDE_ARFS_BUCKET_HEAD(edev, i));
 
-	edev->ndev->rx_cpu_rmap = alloc_irq_cpu_rmap(QEDE_RSS_COUNT(edev));
-	if (!edev->ndev->rx_cpu_rmap) {
+	edev->arfs->arfs_fltr_bmap = vzalloc(BITS_TO_LONGS(QEDE_RFS_MAX_FLTR) *
+					     sizeof(long));
+	if (!edev->arfs->arfs_fltr_bmap) {
 		vfree(edev->arfs);
 		edev->arfs = NULL;
 		return -ENOMEM;
 	}
 
-	edev->arfs->arfs_fltr_bmap = vzalloc(BITS_TO_LONGS(QEDE_RFS_MAX_FLTR) *
-					     sizeof(long));
-	if (!edev->arfs->arfs_fltr_bmap) {
-		free_irq_cpu_rmap(edev->ndev->rx_cpu_rmap);
-		edev->ndev->rx_cpu_rmap = NULL;
+#ifdef CONFIG_RFS_ACCEL
+	edev->ndev->rx_cpu_rmap = alloc_irq_cpu_rmap(QEDE_RSS_COUNT(edev));
+	if (!edev->ndev->rx_cpu_rmap) {
+		vfree(edev->arfs->arfs_fltr_bmap);
+		edev->arfs->arfs_fltr_bmap = NULL;
 		vfree(edev->arfs);
 		edev->arfs = NULL;
 		return -ENOMEM;
 	}
-
+#endif
 	return 0;
 }
 
@@ -285,16 +333,19 @@ void qede_free_arfs(struct qede_dev *edev)
 	if (!edev->arfs)
 		return;
 
+#ifdef CONFIG_RFS_ACCEL
 	if (edev->ndev->rx_cpu_rmap)
 		free_irq_cpu_rmap(edev->ndev->rx_cpu_rmap);
 
 	edev->ndev->rx_cpu_rmap = NULL;
+#endif
 	vfree(edev->arfs->arfs_fltr_bmap);
 	edev->arfs->arfs_fltr_bmap = NULL;
 	vfree(edev->arfs);
 	edev->arfs = NULL;
 }
 
+#ifdef CONFIG_RFS_ACCEL
 static bool qede_compare_ip_addr(struct qede_arfs_fltr_node *tpos,
 				 const struct sk_buff *skb)
 {
@@ -394,9 +445,8 @@ int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 
 	spin_lock_bh(&edev->arfs->arfs_list_lock);
 
-	n = qede_arfs_htbl_key_search(&edev->arfs->arfs_hl_head[tbl_idx],
+	n = qede_arfs_htbl_key_search(QEDE_ARFS_BUCKET_HEAD(edev, tbl_idx),
 				      skb, ports[0], ports[1], ip_proto);
-
 	if (n) {
 		/* Filter match */
 		n->next_rxq_id = rxq_index;
@@ -448,23 +498,9 @@ int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 	n->tuple.ip_proto = ip_proto;
 	memcpy(n->data + ETH_HLEN, skb->data, skb_headlen(skb));
 
-	n->mapping = dma_map_single(&edev->pdev->dev, n->data,
-				    n->buf_len, DMA_TO_DEVICE);
-	if (dma_mapping_error(&edev->pdev->dev, n->mapping)) {
-		DP_NOTICE(edev, "Failed to map DMA memory for arfs\n");
-		qede_free_arfs_filter(edev, n);
-		rc = -ENOMEM;
+	rc = qede_enqueue_fltr_and_config_searcher(edev, n, tbl_idx);
+	if (rc)
 		goto ret_unlock;
-	}
-
-	INIT_HLIST_NODE(&n->node);
-	hlist_add_head(&n->node, &edev->arfs->arfs_hl_head[tbl_idx]);
-	edev->arfs->filter_count++;
-
-	if (edev->arfs->filter_count == 1 && !edev->arfs->enable) {
-		edev->ops->configure_arfs_searcher(edev->cdev, true);
-		edev->arfs->enable = true;
-	}
 
 	qede_configure_arfs_fltr(edev, n, n->rxq_id, true);
 
@@ -472,6 +508,7 @@ int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 
 	set_bit(QEDE_SP_ARFS_CONFIG, &edev->sp_flags);
 	schedule_delayed_work(&edev->sp_task, 0);
+
 	return n->sw_id;
 
 ret_unlock:
@@ -1263,3 +1300,371 @@ void qede_config_rx_mode(struct net_device *ndev)
 out:
 	kfree(uc_macs);
 }
+
+static struct qede_arfs_fltr_node *
+qede_get_arfs_fltr_by_loc(struct hlist_head *head, u32 location)
+{
+	struct qede_arfs_fltr_node *fltr;
+
+	hlist_for_each_entry(fltr, head, node)
+		if (location == fltr->sw_id)
+			return fltr;
+
+	return NULL;
+}
+
+static bool
+qede_compare_user_flow_ips(struct qede_arfs_fltr_node *tpos,
+			   struct ethtool_rx_flow_spec *fsp,
+			   __be16 proto)
+{
+	if (proto == htons(ETH_P_IP)) {
+		struct ethtool_tcpip4_spec *ip;
+
+		ip = &fsp->h_u.tcp_ip4_spec;
+
+		if (tpos->tuple.src_ipv4 == ip->ip4src &&
+		    tpos->tuple.dst_ipv4 == ip->ip4dst)
+			return true;
+		else
+			return false;
+	} else {
+		struct ethtool_tcpip6_spec *ip6;
+		struct in6_addr *src;
+
+		ip6 = &fsp->h_u.tcp_ip6_spec;
+		src = &tpos->tuple.src_ipv6;
+
+		if (!memcmp(src, &ip6->ip6src, sizeof(struct in6_addr)) &&
+		    !memcmp(&tpos->tuple.dst_ipv6, &ip6->ip6dst,
+			    sizeof(struct in6_addr)))
+			return true;
+		else
+			return false;
+	}
+	return false;
+}
+
+int qede_get_cls_rule_all(struct qede_dev *edev, struct ethtool_rxnfc *info,
+			  u32 *rule_locs)
+{
+	struct qede_arfs_fltr_node *fltr;
+	struct hlist_head *head;
+	int cnt = 0, rc = 0;
+
+	info->data = QEDE_RFS_MAX_FLTR;
+
+	__qede_lock(edev);
+
+	if (!edev->arfs) {
+		rc = -EPERM;
+		goto unlock;
+	}
+
+	head = QEDE_ARFS_BUCKET_HEAD(edev, 0);
+
+	hlist_for_each_entry(fltr, head, node) {
+		if (cnt == info->rule_cnt) {
+			rc = -EMSGSIZE;
+			goto unlock;
+		}
+
+		rule_locs[cnt] = fltr->sw_id;
+		cnt++;
+	}
+
+	info->rule_cnt = cnt;
+
+unlock:
+	__qede_unlock(edev);
+	return rc;
+}
+
+int qede_get_cls_rule_entry(struct qede_dev *edev, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp = &cmd->fs;
+	struct qede_arfs_fltr_node *fltr = NULL;
+	int rc = 0;
+
+	cmd->data = QEDE_RFS_MAX_FLTR;
+
+	__qede_lock(edev);
+
+	if (!edev->arfs) {
+		rc = -EPERM;
+		goto unlock;
+	}
+
+	fltr = qede_get_arfs_fltr_by_loc(QEDE_ARFS_BUCKET_HEAD(edev, 0),
+					 fsp->location);
+	if (!fltr) {
+		DP_NOTICE(edev, "Rule not found - location=0x%x\n",
+			  fsp->location);
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	if (fltr->tuple.eth_proto == htons(ETH_P_IP)) {
+		if (fltr->tuple.ip_proto == IPPROTO_TCP)
+			fsp->flow_type = TCP_V4_FLOW;
+		else
+			fsp->flow_type = UDP_V4_FLOW;
+
+		fsp->h_u.tcp_ip4_spec.psrc = fltr->tuple.src_port;
+		fsp->h_u.tcp_ip4_spec.pdst = fltr->tuple.dst_port;
+		fsp->h_u.tcp_ip4_spec.ip4src = fltr->tuple.src_ipv4;
+		fsp->h_u.tcp_ip4_spec.ip4dst = fltr->tuple.dst_ipv4;
+	} else {
+		if (fltr->tuple.ip_proto == IPPROTO_TCP)
+			fsp->flow_type = TCP_V6_FLOW;
+		else
+			fsp->flow_type = UDP_V6_FLOW;
+		fsp->h_u.tcp_ip6_spec.psrc = fltr->tuple.src_port;
+		fsp->h_u.tcp_ip6_spec.pdst = fltr->tuple.dst_port;
+		memcpy(&fsp->h_u.tcp_ip6_spec.ip6src,
+		       &fltr->tuple.src_ipv6, sizeof(struct in6_addr));
+		memcpy(&fsp->h_u.tcp_ip6_spec.ip6dst,
+		       &fltr->tuple.dst_ipv6, sizeof(struct in6_addr));
+	}
+
+	fsp->ring_cookie = fltr->rxq_id;
+
+unlock:
+	__qede_unlock(edev);
+	return rc;
+}
+
+static int
+qede_validate_and_check_flow_exist(struct qede_dev *edev,
+				   struct ethtool_rx_flow_spec *fsp,
+				   int *min_hlen)
+{
+	__be16 src_port = 0x0, dst_port = 0x0;
+	struct qede_arfs_fltr_node *fltr;
+	struct hlist_node *temp;
+	struct hlist_head *head;
+	__be16 eth_proto;
+	u8 ip_proto;
+
+	if (fsp->location >= QEDE_RFS_MAX_FLTR ||
+	    fsp->ring_cookie >= QEDE_RSS_COUNT(edev))
+		return -EINVAL;
+
+	if (fsp->flow_type == TCP_V4_FLOW) {
+		*min_hlen += sizeof(struct iphdr) +
+				sizeof(struct tcphdr);
+		eth_proto = htons(ETH_P_IP);
+		ip_proto = IPPROTO_TCP;
+	} else if (fsp->flow_type == UDP_V4_FLOW) {
+		*min_hlen += sizeof(struct iphdr) +
+				sizeof(struct udphdr);
+		eth_proto = htons(ETH_P_IP);
+		ip_proto = IPPROTO_UDP;
+	} else if (fsp->flow_type == TCP_V6_FLOW) {
+		*min_hlen += sizeof(struct ipv6hdr) +
+				sizeof(struct tcphdr);
+		eth_proto = htons(ETH_P_IPV6);
+		ip_proto = IPPROTO_TCP;
+	} else if (fsp->flow_type == UDP_V6_FLOW) {
+		*min_hlen += sizeof(struct ipv6hdr) +
+				sizeof(struct udphdr);
+		eth_proto = htons(ETH_P_IPV6);
+		ip_proto = IPPROTO_UDP;
+	} else {
+		DP_NOTICE(edev, "Unsupported flow type = 0x%x\n",
+			  fsp->flow_type);
+		return -EPROTONOSUPPORT;
+	}
+
+	if (eth_proto == htons(ETH_P_IP)) {
+		src_port = fsp->h_u.tcp_ip4_spec.psrc;
+		dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+	} else {
+		src_port = fsp->h_u.tcp_ip6_spec.psrc;
+		dst_port = fsp->h_u.tcp_ip6_spec.pdst;
+	}
+
+	head = QEDE_ARFS_BUCKET_HEAD(edev, 0);
+	hlist_for_each_entry_safe(fltr, temp, head, node) {
+		if ((fltr->tuple.ip_proto == ip_proto &&
+		     fltr->tuple.eth_proto == eth_proto &&
+		     qede_compare_user_flow_ips(fltr, fsp, eth_proto) &&
+		     fltr->tuple.src_port == src_port &&
+		     fltr->tuple.dst_port == dst_port) ||
+		    fltr->sw_id == fsp->location)
+			return -EEXIST;
+	}
+
+	return 0;
+}
+
+static int
+qede_poll_arfs_filter_config(struct qede_dev *edev,
+			     struct qede_arfs_fltr_node *fltr)
+{
+	int count = QEDE_ARFS_POLL_COUNT;
+
+	while (fltr->used && count) {
+		msleep(20);
+		count--;
+	}
+
+	if (count == 0 || fltr->fw_rc) {
+		qede_dequeue_fltr_and_config_searcher(edev, fltr);
+		return -EIO;
+	}
+
+	return fltr->fw_rc;
+}
+
+int qede_add_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info)
+{
+	struct ethtool_rx_flow_spec *fsp = &info->fs;
+	struct qede_arfs_fltr_node *n;
+	int min_hlen = ETH_HLEN, rc;
+	struct ethhdr *eth;
+	struct iphdr *ip;
+	__be16 *ports;
+
+	__qede_lock(edev);
+
+	if (!edev->arfs) {
+		rc = -EPERM;
+		goto unlock;
+	}
+
+	rc = qede_validate_and_check_flow_exist(edev, fsp, &min_hlen);
+	if (rc)
+		goto unlock;
+
+	n = kzalloc(sizeof(*n), GFP_KERNEL);
+	if (!n) {
+		rc = -ENOMEM;
+		goto unlock;
+	}
+
+	n->data = kzalloc(min_hlen, GFP_KERNEL);
+	if (!n->data) {
+		kfree(n);
+		rc = -ENOMEM;
+		goto unlock;
+	}
+
+	n->sw_id = fsp->location;
+	set_bit(n->sw_id, edev->arfs->arfs_fltr_bmap);
+	n->buf_len = min_hlen;
+	n->rxq_id = fsp->ring_cookie;
+	n->next_rxq_id = n->rxq_id;
+	eth = (struct ethhdr *)n->data;
+
+	if (info->fs.flow_type == TCP_V4_FLOW ||
+	    info->fs.flow_type == UDP_V4_FLOW) {
+		ports = (__be16 *)(n->data + ETH_HLEN +
+					sizeof(struct iphdr));
+		eth->h_proto = htons(ETH_P_IP);
+		n->tuple.eth_proto = htons(ETH_P_IP);
+		n->tuple.src_ipv4 = info->fs.h_u.tcp_ip4_spec.ip4src;
+		n->tuple.dst_ipv4 = info->fs.h_u.tcp_ip4_spec.ip4dst;
+		n->tuple.src_port = info->fs.h_u.tcp_ip4_spec.psrc;
+		n->tuple.dst_port = info->fs.h_u.tcp_ip4_spec.pdst;
+		ports[0] = n->tuple.src_port;
+		ports[1] = n->tuple.dst_port;
+		ip = (struct iphdr *)(n->data + ETH_HLEN);
+		ip->saddr = info->fs.h_u.tcp_ip4_spec.ip4src;
+		ip->daddr = info->fs.h_u.tcp_ip4_spec.ip4dst;
+		ip->version = 0x4;
+		ip->ihl = 0x5;
+
+		if (info->fs.flow_type == TCP_V4_FLOW) {
+			n->tuple.ip_proto = IPPROTO_TCP;
+			ip->protocol = IPPROTO_TCP;
+		} else {
+			n->tuple.ip_proto = IPPROTO_UDP;
+			ip->protocol = IPPROTO_UDP;
+		}
+		ip->tot_len = cpu_to_be16(min_hlen - ETH_HLEN);
+	} else {
+		struct ipv6hdr *ip6;
+
+		ip6 = (struct ipv6hdr *)(n->data + ETH_HLEN);
+		ports = (__be16 *)(n->data + ETH_HLEN +
+					sizeof(struct ipv6hdr));
+		eth->h_proto = htons(ETH_P_IPV6);
+		n->tuple.eth_proto = htons(ETH_P_IPV6);
+		memcpy(&n->tuple.src_ipv6, &info->fs.h_u.tcp_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&n->tuple.dst_ipv6, &info->fs.h_u.tcp_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		n->tuple.src_port = info->fs.h_u.tcp_ip6_spec.psrc;
+		n->tuple.dst_port = info->fs.h_u.tcp_ip6_spec.pdst;
+		ports[0] = n->tuple.src_port;
+		ports[1] = n->tuple.dst_port;
+		memcpy(&ip6->saddr, &n->tuple.src_ipv6,
+		       sizeof(struct in6_addr));
+		memcpy(&ip6->daddr, &n->tuple.dst_ipv6,
+		       sizeof(struct in6_addr));
+		ip6->version = 0x6;
+
+		if (info->fs.flow_type == TCP_V6_FLOW) {
+			n->tuple.ip_proto = IPPROTO_TCP;
+			ip6->nexthdr = NEXTHDR_TCP;
+			ip6->payload_len = cpu_to_be16(sizeof(struct tcphdr));
+		} else {
+			n->tuple.ip_proto = IPPROTO_UDP;
+			ip6->nexthdr = NEXTHDR_UDP;
+			ip6->payload_len = cpu_to_be16(sizeof(struct udphdr));
+		}
+	}
+
+	rc = qede_enqueue_fltr_and_config_searcher(edev, n, 0);
+	if (rc)
+		goto unlock;
+
+	qede_configure_arfs_fltr(edev, n, n->rxq_id, true);
+	rc = qede_poll_arfs_filter_config(edev, n);
+unlock:
+	__qede_unlock(edev);
+	return rc;
+}
+
+int qede_del_cls_rule(struct qede_dev *edev, struct ethtool_rxnfc *info)
+{
+	struct ethtool_rx_flow_spec *fsp = &info->fs;
+	struct qede_arfs_fltr_node *fltr = NULL;
+	int rc = -EPERM;
+
+	__qede_lock(edev);
+	if (!edev->arfs)
+		goto unlock;
+
+	fltr = qede_get_arfs_fltr_by_loc(QEDE_ARFS_BUCKET_HEAD(edev, 0),
+					 fsp->location);
+	if (!fltr)
+		goto unlock;
+
+	qede_configure_arfs_fltr(edev, fltr, fltr->rxq_id, false);
+
+	rc = qede_poll_arfs_filter_config(edev, fltr);
+	if (rc == 0)
+		qede_dequeue_fltr_and_config_searcher(edev, fltr);
+
+unlock:
+	__qede_unlock(edev);
+	return rc;
+}
+
+int qede_get_arfs_filter_count(struct qede_dev *edev)
+{
+	int count = 0;
+
+	__qede_lock(edev);
+
+	if (!edev->arfs)
+		goto unlock;
+
+	count = edev->arfs->filter_count;
+
+unlock:
+	__qede_unlock(edev);
+	return count;
+}
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 06ca13d..e5ee9f2 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -873,9 +873,7 @@ static void qede_update_pf_params(struct qed_dev *cdev)
 	 */
 	pf_params.eth_pf_params.num_vf_cons = 48;
 
-#ifdef CONFIG_RFS_ACCEL
 	pf_params.eth_pf_params.num_arfs_filters = QEDE_RFS_MAX_FLTR;
-#endif
 	qed_ops->common->update_pf_params(cdev, &pf_params);
 }
 
@@ -1984,12 +1982,12 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 
 	qede_vlan_mark_nonconfigured(edev);
 	edev->ops->fastpath_stop(edev->cdev);
-#ifdef CONFIG_RFS_ACCEL
+
 	if (!IS_VF(edev) && edev->dev_info.common.num_hwfns == 1) {
 		qede_poll_for_freeing_arfs_filters(edev);
 		qede_free_arfs(edev);
 	}
-#endif
+
 	/* Release the interrupts */
 	qede_sync_free_irqs(edev);
 	edev->ops->common->set_fp_int(edev->cdev, 0);
@@ -2041,13 +2039,12 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
 	if (rc)
 		goto err2;
 
-#ifdef CONFIG_RFS_ACCEL
 	if (!IS_VF(edev) && edev->dev_info.common.num_hwfns == 1) {
 		rc = qede_alloc_arfs(edev);
 		if (rc)
 			DP_NOTICE(edev, "aRFS memory allocation failed\n");
 	}
-#endif
+
 	qede_napi_add_enable(edev);
 	DP_INFO(edev, "Napi added and enabled\n");
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
index be41e4c..c48a0e2 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
@@ -592,13 +592,9 @@ qlcnic_receive_peg_ready(struct qlcnic_adapter *adapter)
 
 	} while (--retries);
 
-	if (!retries) {
-		dev_err(&adapter->pdev->dev, "Receive Peg initialization not "
-			      "complete, state: 0x%x.\n", val);
-		return -EIO;
-	}
-
-	return 0;
+	dev_err(&adapter->pdev->dev, "Receive Peg initialization not complete, state: 0x%x.\n",
+		val);
+	return -EIO;
 }
 
 int
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
index 0844b7c..afa10a1 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
@@ -1285,7 +1285,7 @@ flash_temp:
 int qlcnic_dump_fw(struct qlcnic_adapter *adapter)
 {
 	struct qlcnic_fw_dump *fw_dump = &adapter->ahw->fw_dump;
-	static const struct qlcnic_dump_operations *fw_dump_ops;
+	const struct qlcnic_dump_operations *fw_dump_ops;
 	struct qlcnic_83xx_dump_template_hdr *hdr_83xx;
 	u32 entry_offset, dump, no_entries, buf_offset = 0;
 	int i, k, ops_cnt, ops_index, dump_size = 0;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
index 73027a6..287d89d 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
@@ -1174,19 +1174,19 @@ static ssize_t qlcnic_83xx_sysfs_flash_write_handler(struct file *filp,
 	return size;
 }
 
-static struct device_attribute dev_attr_bridged_mode = {
+static const struct device_attribute dev_attr_bridged_mode = {
        .attr = {.name = "bridged_mode", .mode = (S_IRUGO | S_IWUSR)},
        .show = qlcnic_show_bridged_mode,
        .store = qlcnic_store_bridged_mode,
 };
 
-static struct device_attribute dev_attr_diag_mode = {
+static const struct device_attribute dev_attr_diag_mode = {
 	.attr = {.name = "diag_mode", .mode = (S_IRUGO | S_IWUSR)},
 	.show = qlcnic_show_diag_mode,
 	.store = qlcnic_store_diag_mode,
 };
 
-static struct device_attribute dev_attr_beacon = {
+static const struct device_attribute dev_attr_beacon = {
 	.attr = {.name = "beacon", .mode = (S_IRUGO | S_IWUSR)},
 	.show = qlcnic_show_beacon,
 	.store = qlcnic_store_beacon,
@@ -1248,7 +1248,7 @@ static const struct bin_attribute bin_attr_pm_config = {
 	.write = qlcnic_sysfs_write_pm_config,
 };
 
-static struct bin_attribute bin_attr_flash = {
+static const struct bin_attribute bin_attr_flash = {
 	.attr = {.name = "flash", .mode = (S_IRUGO | S_IWUSR)},
 	.size = 0,
 	.read = qlcnic_83xx_sysfs_flash_read_handler,
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c
index e3223f2..fe2599b 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c
@@ -144,42 +144,23 @@ static int ql_get_serdes_regs(struct ql_adapter *qdev,
 	xaui_direct_valid = xaui_indirect_valid = 1;
 
 	/* The XAUI needs to be read out per port */
-	if (qdev->func & 1) {
-		/* We are NIC 2	*/
-		status = ql_read_other_func_serdes_reg(qdev,
-				XG_SERDES_XAUI_HSS_PCS_START, &temp);
-		if (status)
-			temp = XG_SERDES_ADDR_XAUI_PWR_DOWN;
-		if ((temp & XG_SERDES_ADDR_XAUI_PWR_DOWN) ==
-					XG_SERDES_ADDR_XAUI_PWR_DOWN)
-			xaui_indirect_valid = 0;
+	status = ql_read_other_func_serdes_reg(qdev,
+			XG_SERDES_XAUI_HSS_PCS_START, &temp);
+	if (status)
+		temp = XG_SERDES_ADDR_XAUI_PWR_DOWN;
 
-		status = ql_read_serdes_reg(qdev,
-				XG_SERDES_XAUI_HSS_PCS_START, &temp);
-		if (status)
-			temp = XG_SERDES_ADDR_XAUI_PWR_DOWN;
+	if ((temp & XG_SERDES_ADDR_XAUI_PWR_DOWN) ==
+				XG_SERDES_ADDR_XAUI_PWR_DOWN)
+		xaui_indirect_valid = 0;
 
-		if ((temp & XG_SERDES_ADDR_XAUI_PWR_DOWN) ==
-					XG_SERDES_ADDR_XAUI_PWR_DOWN)
-			xaui_direct_valid = 0;
-	} else {
-		/* We are NIC 1	*/
-		status = ql_read_other_func_serdes_reg(qdev,
-				XG_SERDES_XAUI_HSS_PCS_START, &temp);
-		if (status)
-			temp = XG_SERDES_ADDR_XAUI_PWR_DOWN;
-		if ((temp & XG_SERDES_ADDR_XAUI_PWR_DOWN) ==
-					XG_SERDES_ADDR_XAUI_PWR_DOWN)
-			xaui_indirect_valid = 0;
+	status = ql_read_serdes_reg(qdev, XG_SERDES_XAUI_HSS_PCS_START, &temp);
 
-		status = ql_read_serdes_reg(qdev,
-				XG_SERDES_XAUI_HSS_PCS_START, &temp);
-		if (status)
-			temp = XG_SERDES_ADDR_XAUI_PWR_DOWN;
-		if ((temp & XG_SERDES_ADDR_XAUI_PWR_DOWN) ==
-					XG_SERDES_ADDR_XAUI_PWR_DOWN)
-			xaui_direct_valid = 0;
-	}
+	if (status)
+		temp = XG_SERDES_ADDR_XAUI_PWR_DOWN;
+
+	if ((temp & XG_SERDES_ADDR_XAUI_PWR_DOWN) ==
+				XG_SERDES_ADDR_XAUI_PWR_DOWN)
+		xaui_direct_valid = 0;
 
 	/*
 	 * XFI register is shared so only need to read one
diff --git a/drivers/net/ethernet/qualcomm/Kconfig b/drivers/net/ethernet/qualcomm/Kconfig
index 877675a..f520071 100644
--- a/drivers/net/ethernet/qualcomm/Kconfig
+++ b/drivers/net/ethernet/qualcomm/Kconfig
@@ -59,4 +59,6 @@ config QCOM_EMAC
 	  low power, Receive-Side Scaling (RSS), and IEEE 1588-2008
 	  Precision Clock Synchronization Protocol.
 
+source "drivers/net/ethernet/qualcomm/rmnet/Kconfig"
+
 endif # NET_VENDOR_QUALCOMM
diff --git a/drivers/net/ethernet/qualcomm/Makefile b/drivers/net/ethernet/qualcomm/Makefile
index 92fa7c4..1847350 100644
--- a/drivers/net/ethernet/qualcomm/Makefile
+++ b/drivers/net/ethernet/qualcomm/Makefile
@@ -9,3 +9,5 @@ obj-$(CONFIG_QCA7000_UART) += qcauart.o
 qcauart-objs := qca_uart.o
 
 obj-y += emac/
+
+obj-$(CONFIG_RMNET) += rmnet/
diff --git a/drivers/net/ethernet/qualcomm/rmnet/Kconfig b/drivers/net/ethernet/qualcomm/rmnet/Kconfig
new file mode 100644
index 0000000..6e2587a
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/Kconfig
@@ -0,0 +1,12 @@
+#
+# RMNET MAP driver
+#
+
+menuconfig RMNET
+	tristate "RmNet MAP driver"
+	default n
+	---help---
+	  If you select this, you will enable the RMNET module which is used
+	  for handling data in the multiplexing and aggregation protocol (MAP)
+	  format in the embedded data path. RMNET devices can be attached to
+	  any IP mode physical device.
diff --git a/drivers/net/ethernet/qualcomm/rmnet/Makefile b/drivers/net/ethernet/qualcomm/rmnet/Makefile
new file mode 100644
index 0000000..01bddf2
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for the RMNET module
+#
+
+rmnet-y		 := rmnet_config.o
+rmnet-y		 += rmnet_vnd.o
+rmnet-y		 += rmnet_handlers.o
+rmnet-y		 += rmnet_map_data.o
+rmnet-y		 += rmnet_map_command.o
+obj-$(CONFIG_RMNET) += rmnet.o
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
new file mode 100644
index 0000000..98f2255
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -0,0 +1,356 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * RMNET configuration engine
+ *
+ */
+
+#include <net/sock.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netdevice.h>
+#include "rmnet_config.h"
+#include "rmnet_handlers.h"
+#include "rmnet_vnd.h"
+#include "rmnet_private.h"
+
+/* Locking scheme -
+ * The shared resource which needs to be protected is realdev->rx_handler_data.
+ * For the writer path, this is using rtnl_lock(). The writer paths are
+ * rmnet_newlink(), rmnet_dellink() and rmnet_force_unassociate_device(). These
+ * paths are already called with rtnl_lock() acquired in. There is also an
+ * ASSERT_RTNL() to ensure that we are calling with rtnl acquired. For
+ * dereference here, we will need to use rtnl_dereference(). Dev list writing
+ * needs to happen with rtnl_lock() acquired for netdev_master_upper_dev_link().
+ * For the reader path, the real_dev->rx_handler_data is called in the TX / RX
+ * path. We only need rcu_read_lock() for these scenarios. In these cases,
+ * the rcu_read_lock() is held in __dev_queue_xmit() and
+ * netif_receive_skb_internal(), so readers need to use rcu_dereference_rtnl()
+ * to get the relevant information. For dev list reading, we again acquire
+ * rcu_read_lock() in rmnet_dellink() for netdev_master_upper_dev_get_rcu().
+ * We also use unregister_netdevice_many() to free all rmnet devices in
+ * rmnet_force_unassociate_device() so we dont lose the rtnl_lock() and free in
+ * same context.
+ */
+
+/* Local Definitions and Declarations */
+
+struct rmnet_walk_data {
+	struct net_device *real_dev;
+	struct list_head *head;
+	struct rmnet_port *port;
+};
+
+static int rmnet_is_real_dev_registered(const struct net_device *real_dev)
+{
+	rx_handler_func_t *rx_handler;
+
+	rx_handler = rcu_dereference(real_dev->rx_handler);
+	return (rx_handler == rmnet_rx_handler);
+}
+
+/* Needs rtnl lock */
+static struct rmnet_port*
+rmnet_get_port_rtnl(const struct net_device *real_dev)
+{
+	return rtnl_dereference(real_dev->rx_handler_data);
+}
+
+static struct rmnet_endpoint*
+rmnet_get_endpoint(struct net_device *dev, int config_id)
+{
+	struct rmnet_endpoint *ep;
+	struct rmnet_port *port;
+
+	if (!rmnet_is_real_dev_registered(dev)) {
+		ep = rmnet_vnd_get_endpoint(dev);
+	} else {
+		port = rmnet_get_port_rtnl(dev);
+
+		ep = &port->muxed_ep[config_id];
+	}
+
+	return ep;
+}
+
+static int rmnet_unregister_real_device(struct net_device *real_dev,
+					struct rmnet_port *port)
+{
+	if (port->nr_rmnet_devs)
+		return -EINVAL;
+
+	kfree(port);
+
+	netdev_rx_handler_unregister(real_dev);
+
+	/* release reference on real_dev */
+	dev_put(real_dev);
+
+	netdev_dbg(real_dev, "Removed from rmnet\n");
+	return 0;
+}
+
+static int rmnet_register_real_device(struct net_device *real_dev)
+{
+	struct rmnet_port *port;
+	int rc;
+
+	ASSERT_RTNL();
+
+	if (rmnet_is_real_dev_registered(real_dev))
+		return 0;
+
+	port = kzalloc(sizeof(*port), GFP_ATOMIC);
+	if (!port)
+		return -ENOMEM;
+
+	port->dev = real_dev;
+	rc = netdev_rx_handler_register(real_dev, rmnet_rx_handler, port);
+	if (rc) {
+		kfree(port);
+		return -EBUSY;
+	}
+
+	/* hold on to real dev for MAP data */
+	dev_hold(real_dev);
+
+	netdev_dbg(real_dev, "registered with rmnet\n");
+	return 0;
+}
+
+static void rmnet_set_endpoint_config(struct net_device *dev,
+				      u8 mux_id, u8 rmnet_mode,
+				      struct net_device *egress_dev)
+{
+	struct rmnet_endpoint *ep;
+
+	netdev_dbg(dev, "id %d mode %d dev %s\n",
+		   mux_id, rmnet_mode, egress_dev->name);
+
+	ep = rmnet_get_endpoint(dev, mux_id);
+	/* This config is cleared on every set, so its ok to not
+	 * clear it on a device delete.
+	 */
+	memset(ep, 0, sizeof(struct rmnet_endpoint));
+	ep->rmnet_mode = rmnet_mode;
+	ep->egress_dev = egress_dev;
+	ep->mux_id = mux_id;
+}
+
+static int rmnet_newlink(struct net *src_net, struct net_device *dev,
+			 struct nlattr *tb[], struct nlattr *data[],
+			 struct netlink_ext_ack *extack)
+{
+	int ingress_format = RMNET_INGRESS_FORMAT_DEMUXING |
+			     RMNET_INGRESS_FORMAT_DEAGGREGATION |
+			     RMNET_INGRESS_FORMAT_MAP;
+	int egress_format = RMNET_EGRESS_FORMAT_MUXING |
+			    RMNET_EGRESS_FORMAT_MAP;
+	struct net_device *real_dev;
+	int mode = RMNET_EPMODE_VND;
+	struct rmnet_port *port;
+	int err = 0;
+	u16 mux_id;
+
+	real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
+	if (!real_dev || !dev)
+		return -ENODEV;
+
+	if (!data[IFLA_VLAN_ID])
+		return -EINVAL;
+
+	mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+
+	err = rmnet_register_real_device(real_dev);
+	if (err)
+		goto err0;
+
+	port = rmnet_get_port_rtnl(real_dev);
+	err = rmnet_vnd_newlink(mux_id, dev, port, real_dev);
+	if (err)
+		goto err1;
+
+	err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL);
+	if (err)
+		goto err2;
+
+	netdev_dbg(dev, "data format [ingress 0x%08X] [egress 0x%08X]\n",
+		   ingress_format, egress_format);
+	port->egress_data_format = egress_format;
+	port->ingress_data_format = ingress_format;
+
+	rmnet_set_endpoint_config(real_dev, mux_id, mode, dev);
+	rmnet_set_endpoint_config(dev, mux_id, mode, real_dev);
+	return 0;
+
+err2:
+	rmnet_vnd_dellink(mux_id, port);
+err1:
+	rmnet_unregister_real_device(real_dev, port);
+err0:
+	return err;
+}
+
+static void rmnet_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct net_device *real_dev;
+	struct rmnet_port *port;
+	u8 mux_id;
+
+	rcu_read_lock();
+	real_dev = netdev_master_upper_dev_get_rcu(dev);
+	rcu_read_unlock();
+
+	if (!real_dev || !rmnet_is_real_dev_registered(real_dev))
+		return;
+
+	port = rmnet_get_port_rtnl(real_dev);
+
+	mux_id = rmnet_vnd_get_mux(dev);
+	rmnet_vnd_dellink(mux_id, port);
+	netdev_upper_dev_unlink(dev, real_dev);
+	rmnet_unregister_real_device(real_dev, port);
+
+	unregister_netdevice_queue(dev, head);
+}
+
+static int rmnet_dev_walk_unreg(struct net_device *rmnet_dev, void *data)
+{
+	struct rmnet_walk_data *d = data;
+	u8 mux_id;
+
+	mux_id = rmnet_vnd_get_mux(rmnet_dev);
+
+	rmnet_vnd_dellink(mux_id, d->port);
+	netdev_upper_dev_unlink(rmnet_dev, d->real_dev);
+	unregister_netdevice_queue(rmnet_dev, d->head);
+
+	return 0;
+}
+
+static void rmnet_force_unassociate_device(struct net_device *dev)
+{
+	struct net_device *real_dev = dev;
+	struct rmnet_walk_data d;
+	struct rmnet_port *port;
+	LIST_HEAD(list);
+
+	if (!rmnet_is_real_dev_registered(real_dev))
+		return;
+
+	ASSERT_RTNL();
+
+	d.real_dev = real_dev;
+	d.head = &list;
+
+	port = rmnet_get_port_rtnl(dev);
+	d.port = port;
+
+	rcu_read_lock();
+	netdev_walk_all_lower_dev_rcu(real_dev, rmnet_dev_walk_unreg, &d);
+	rcu_read_unlock();
+	unregister_netdevice_many(&list);
+
+	rmnet_unregister_real_device(real_dev, port);
+}
+
+static int rmnet_config_notify_cb(struct notifier_block *nb,
+				  unsigned long event, void *data)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(data);
+
+	if (!dev)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UNREGISTER:
+		netdev_dbg(dev, "Kernel unregister\n");
+		rmnet_force_unassociate_device(dev);
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block rmnet_dev_notifier __read_mostly = {
+	.notifier_call = rmnet_config_notify_cb,
+};
+
+static int rmnet_rtnl_validate(struct nlattr *tb[], struct nlattr *data[],
+			       struct netlink_ext_ack *extack)
+{
+	u16 mux_id;
+
+	if (!data || !data[IFLA_VLAN_ID])
+		return -EINVAL;
+
+	mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
+	if (mux_id > (RMNET_MAX_LOGICAL_EP - 1))
+		return -ERANGE;
+
+	return 0;
+}
+
+static size_t rmnet_get_size(const struct net_device *dev)
+{
+	return nla_total_size(2); /* IFLA_VLAN_ID */
+}
+
+struct rtnl_link_ops rmnet_link_ops __read_mostly = {
+	.kind		= "rmnet",
+	.maxtype	= __IFLA_VLAN_MAX,
+	.priv_size	= sizeof(struct rmnet_priv),
+	.setup		= rmnet_vnd_setup,
+	.validate	= rmnet_rtnl_validate,
+	.newlink	= rmnet_newlink,
+	.dellink	= rmnet_dellink,
+	.get_size	= rmnet_get_size,
+};
+
+/* Needs either rcu_read_lock() or rtnl lock */
+struct rmnet_port *rmnet_get_port(struct net_device *real_dev)
+{
+	if (rmnet_is_real_dev_registered(real_dev))
+		return rcu_dereference_rtnl(real_dev->rx_handler_data);
+	else
+		return NULL;
+}
+
+/* Startup/Shutdown */
+
+static int __init rmnet_init(void)
+{
+	int rc;
+
+	rc = register_netdevice_notifier(&rmnet_dev_notifier);
+	if (rc != 0)
+		return rc;
+
+	rc = rtnl_link_register(&rmnet_link_ops);
+	if (rc != 0) {
+		unregister_netdevice_notifier(&rmnet_dev_notifier);
+		return rc;
+	}
+	return rc;
+}
+
+static void __exit rmnet_exit(void)
+{
+	unregister_netdevice_notifier(&rmnet_dev_notifier);
+	rtnl_link_unregister(&rmnet_link_ops);
+}
+
+module_init(rmnet_init)
+module_exit(rmnet_exit)
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
new file mode 100644
index 0000000..dde4e9f
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -0,0 +1,55 @@
+/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * RMNET Data configuration engine
+ *
+ */
+
+#include <linux/skbuff.h>
+
+#ifndef _RMNET_CONFIG_H_
+#define _RMNET_CONFIG_H_
+
+#define RMNET_MAX_LOGICAL_EP 255
+
+/* Information about the next device to deliver the packet to.
+ * Exact usage of this parameter depends on the rmnet_mode.
+ */
+struct rmnet_endpoint {
+	u8 rmnet_mode;
+	u8 mux_id;
+	struct net_device *egress_dev;
+};
+
+/* One instance of this structure is instantiated for each real_dev associated
+ * with rmnet.
+ */
+struct rmnet_port {
+	struct net_device *dev;
+	struct rmnet_endpoint local_ep;
+	struct rmnet_endpoint muxed_ep[RMNET_MAX_LOGICAL_EP];
+	u32 ingress_data_format;
+	u32 egress_data_format;
+	struct net_device *rmnet_devices[RMNET_MAX_LOGICAL_EP];
+	u8 nr_rmnet_devs;
+};
+
+extern struct rtnl_link_ops rmnet_link_ops;
+
+struct rmnet_priv {
+	struct rmnet_endpoint local_ep;
+	u8 mux_id;
+	struct net_device *real_dev;
+};
+
+struct rmnet_port *rmnet_get_port(struct net_device *real_dev);
+
+#endif /* _RMNET_CONFIG_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
new file mode 100644
index 0000000..540c762
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -0,0 +1,271 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * RMNET Data ingress/egress handler
+ *
+ */
+
+#include <linux/netdevice.h>
+#include <linux/netdev_features.h>
+#include "rmnet_private.h"
+#include "rmnet_config.h"
+#include "rmnet_vnd.h"
+#include "rmnet_map.h"
+#include "rmnet_handlers.h"
+
+#define RMNET_IP_VERSION_4 0x40
+#define RMNET_IP_VERSION_6 0x60
+
+/* Helper Functions */
+
+static void rmnet_set_skb_proto(struct sk_buff *skb)
+{
+	switch (skb->data[0] & 0xF0) {
+	case RMNET_IP_VERSION_4:
+		skb->protocol = htons(ETH_P_IP);
+		break;
+	case RMNET_IP_VERSION_6:
+		skb->protocol = htons(ETH_P_IPV6);
+		break;
+	default:
+		skb->protocol = htons(ETH_P_MAP);
+		break;
+	}
+}
+
+/* Generic handler */
+
+static rx_handler_result_t
+rmnet_bridge_handler(struct sk_buff *skb, struct rmnet_endpoint *ep)
+{
+	if (!ep->egress_dev)
+		kfree_skb(skb);
+	else
+		rmnet_egress_handler(skb, ep);
+
+	return RX_HANDLER_CONSUMED;
+}
+
+static rx_handler_result_t
+rmnet_deliver_skb(struct sk_buff *skb, struct rmnet_endpoint *ep)
+{
+	switch (ep->rmnet_mode) {
+	case RMNET_EPMODE_NONE:
+		return RX_HANDLER_PASS;
+
+	case RMNET_EPMODE_BRIDGE:
+		return rmnet_bridge_handler(skb, ep);
+
+	case RMNET_EPMODE_VND:
+		skb_reset_transport_header(skb);
+		skb_reset_network_header(skb);
+		rmnet_vnd_rx_fixup(skb, skb->dev);
+
+		skb->pkt_type = PACKET_HOST;
+		skb_set_mac_header(skb, 0);
+		netif_receive_skb(skb);
+		return RX_HANDLER_CONSUMED;
+
+	default:
+		kfree_skb(skb);
+		return RX_HANDLER_CONSUMED;
+	}
+}
+
+static rx_handler_result_t
+rmnet_ingress_deliver_packet(struct sk_buff *skb,
+			     struct rmnet_port *port)
+{
+	if (!port) {
+		kfree_skb(skb);
+		return RX_HANDLER_CONSUMED;
+	}
+
+	skb->dev = port->local_ep.egress_dev;
+
+	return rmnet_deliver_skb(skb, &port->local_ep);
+}
+
+/* MAP handler */
+
+static rx_handler_result_t
+__rmnet_map_ingress_handler(struct sk_buff *skb,
+			    struct rmnet_port *port)
+{
+	struct rmnet_endpoint *ep;
+	u8 mux_id;
+	u16 len;
+
+	if (RMNET_MAP_GET_CD_BIT(skb)) {
+		if (port->ingress_data_format
+		    & RMNET_INGRESS_FORMAT_MAP_COMMANDS)
+			return rmnet_map_command(skb, port);
+
+		kfree_skb(skb);
+		return RX_HANDLER_CONSUMED;
+	}
+
+	mux_id = RMNET_MAP_GET_MUX_ID(skb);
+	len = RMNET_MAP_GET_LENGTH(skb) - RMNET_MAP_GET_PAD(skb);
+
+	if (mux_id >= RMNET_MAX_LOGICAL_EP) {
+		kfree_skb(skb);
+		return RX_HANDLER_CONSUMED;
+	}
+
+	ep = &port->muxed_ep[mux_id];
+
+	if (port->ingress_data_format & RMNET_INGRESS_FORMAT_DEMUXING)
+		skb->dev = ep->egress_dev;
+
+	/* Subtract MAP header */
+	skb_pull(skb, sizeof(struct rmnet_map_header));
+	skb_trim(skb, len);
+	rmnet_set_skb_proto(skb);
+	return rmnet_deliver_skb(skb, ep);
+}
+
+static rx_handler_result_t
+rmnet_map_ingress_handler(struct sk_buff *skb,
+			  struct rmnet_port *port)
+{
+	struct sk_buff *skbn;
+	int rc;
+
+	if (port->ingress_data_format & RMNET_INGRESS_FORMAT_DEAGGREGATION) {
+		while ((skbn = rmnet_map_deaggregate(skb)) != NULL)
+			__rmnet_map_ingress_handler(skbn, port);
+
+		consume_skb(skb);
+		rc = RX_HANDLER_CONSUMED;
+	} else {
+		rc = __rmnet_map_ingress_handler(skb, port);
+	}
+
+	return rc;
+}
+
+static int rmnet_map_egress_handler(struct sk_buff *skb,
+				    struct rmnet_port *port,
+				    struct rmnet_endpoint *ep,
+				    struct net_device *orig_dev)
+{
+	int required_headroom, additional_header_len;
+	struct rmnet_map_header *map_header;
+
+	additional_header_len = 0;
+	required_headroom = sizeof(struct rmnet_map_header);
+
+	if (skb_headroom(skb) < required_headroom) {
+		if (pskb_expand_head(skb, required_headroom, 0, GFP_KERNEL))
+			return RMNET_MAP_CONSUMED;
+	}
+
+	map_header = rmnet_map_add_map_header(skb, additional_header_len, 0);
+	if (!map_header)
+		return RMNET_MAP_CONSUMED;
+
+	if (port->egress_data_format & RMNET_EGRESS_FORMAT_MUXING) {
+		if (ep->mux_id == 0xff)
+			map_header->mux_id = 0;
+		else
+			map_header->mux_id = ep->mux_id;
+	}
+
+	skb->protocol = htons(ETH_P_MAP);
+
+	return RMNET_MAP_SUCCESS;
+}
+
+/* Ingress / Egress Entry Points */
+
+/* Processes packet as per ingress data format for receiving device. Logical
+ * endpoint is determined from packet inspection. Packet is then sent to the
+ * egress device listed in the logical endpoint configuration.
+ */
+rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb)
+{
+	struct rmnet_port *port;
+	struct sk_buff *skb = *pskb;
+	struct net_device *dev;
+	int rc;
+
+	if (!skb)
+		return RX_HANDLER_CONSUMED;
+
+	dev = skb->dev;
+	port = rmnet_get_port(dev);
+
+	if (port->ingress_data_format & RMNET_INGRESS_FORMAT_MAP) {
+		rc = rmnet_map_ingress_handler(skb, port);
+	} else {
+		switch (ntohs(skb->protocol)) {
+		case ETH_P_MAP:
+			if (port->local_ep.rmnet_mode ==
+				RMNET_EPMODE_BRIDGE) {
+				rc = rmnet_ingress_deliver_packet(skb, port);
+			} else {
+				kfree_skb(skb);
+				rc = RX_HANDLER_CONSUMED;
+			}
+			break;
+
+		case ETH_P_IP:
+		case ETH_P_IPV6:
+			rc = rmnet_ingress_deliver_packet(skb, port);
+			break;
+
+		default:
+			rc = RX_HANDLER_PASS;
+		}
+	}
+
+	return rc;
+}
+
+/* Modifies packet as per logical endpoint configuration and egress data format
+ * for egress device configured in logical endpoint. Packet is then transmitted
+ * on the egress device.
+ */
+void rmnet_egress_handler(struct sk_buff *skb,
+			  struct rmnet_endpoint *ep)
+{
+	struct net_device *orig_dev;
+	struct rmnet_port *port;
+
+	orig_dev = skb->dev;
+	skb->dev = ep->egress_dev;
+
+	port = rmnet_get_port(skb->dev);
+	if (!port) {
+		kfree_skb(skb);
+		return;
+	}
+
+	if (port->egress_data_format & RMNET_EGRESS_FORMAT_MAP) {
+		switch (rmnet_map_egress_handler(skb, port, ep, orig_dev)) {
+		case RMNET_MAP_CONSUMED:
+			return;
+
+		case RMNET_MAP_SUCCESS:
+			break;
+
+		default:
+			kfree_skb(skb);
+			return;
+		}
+	}
+
+	if (ep->rmnet_mode == RMNET_EPMODE_VND)
+		rmnet_vnd_tx_fixup(skb, orig_dev);
+
+	dev_queue_xmit(skb);
+}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
new file mode 100644
index 0000000..f2638cf
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
@@ -0,0 +1,26 @@
+/* Copyright (c) 2013, 2016-2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * RMNET Data ingress/egress handler
+ *
+ */
+
+#ifndef _RMNET_HANDLERS_H_
+#define _RMNET_HANDLERS_H_
+
+#include "rmnet_config.h"
+
+void rmnet_egress_handler(struct sk_buff *skb,
+			  struct rmnet_endpoint *ep);
+
+rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb);
+
+#endif /* _RMNET_HANDLERS_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
new file mode 100644
index 0000000..ce2302c
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -0,0 +1,86 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _RMNET_MAP_H_
+#define _RMNET_MAP_H_
+
+struct rmnet_map_control_command {
+	u8  command_name;
+	u8  cmd_type:2;
+	u8  reserved:6;
+	u16 reserved2;
+	u32 transaction_id;
+	union {
+		struct {
+			u16 ip_family:2;
+			u16 reserved:14;
+			u16 flow_control_seq_num;
+			u32 qos_id;
+		} flow_control;
+		u8 data[0];
+	};
+}  __aligned(1);
+
+enum rmnet_map_results {
+	RMNET_MAP_SUCCESS,
+	RMNET_MAP_CONSUMED,
+	RMNET_MAP_GENERAL_FAILURE,
+	RMNET_MAP_NOT_ENABLED,
+	RMNET_MAP_FAILED_AGGREGATION,
+	RMNET_MAP_FAILED_MUX
+};
+
+enum rmnet_map_commands {
+	RMNET_MAP_COMMAND_NONE,
+	RMNET_MAP_COMMAND_FLOW_DISABLE,
+	RMNET_MAP_COMMAND_FLOW_ENABLE,
+	/* These should always be the last 2 elements */
+	RMNET_MAP_COMMAND_UNKNOWN,
+	RMNET_MAP_COMMAND_ENUM_LENGTH
+};
+
+struct rmnet_map_header {
+	u8  pad_len:6;
+	u8  reserved_bit:1;
+	u8  cd_bit:1;
+	u8  mux_id;
+	u16 pkt_len;
+}  __aligned(1);
+
+#define RMNET_MAP_GET_MUX_ID(Y) (((struct rmnet_map_header *) \
+				 (Y)->data)->mux_id)
+#define RMNET_MAP_GET_CD_BIT(Y) (((struct rmnet_map_header *) \
+				(Y)->data)->cd_bit)
+#define RMNET_MAP_GET_PAD(Y) (((struct rmnet_map_header *) \
+				(Y)->data)->pad_len)
+#define RMNET_MAP_GET_CMD_START(Y) ((struct rmnet_map_control_command *) \
+				    ((Y)->data + \
+				      sizeof(struct rmnet_map_header)))
+#define RMNET_MAP_GET_LENGTH(Y) (ntohs(((struct rmnet_map_header *) \
+					(Y)->data)->pkt_len))
+
+#define RMNET_MAP_COMMAND_REQUEST     0
+#define RMNET_MAP_COMMAND_ACK         1
+#define RMNET_MAP_COMMAND_UNSUPPORTED 2
+#define RMNET_MAP_COMMAND_INVALID     3
+
+#define RMNET_MAP_NO_PAD_BYTES        0
+#define RMNET_MAP_ADD_PAD_BYTES       1
+
+u8 rmnet_map_demultiplex(struct sk_buff *skb);
+struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb);
+struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
+						  int hdrlen, int pad);
+rx_handler_result_t rmnet_map_command(struct sk_buff *skb,
+				      struct rmnet_port *port);
+
+#endif /* _RMNET_MAP_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
new file mode 100644
index 0000000..d1ea5e2
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
@@ -0,0 +1,106 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/netdevice.h>
+#include "rmnet_config.h"
+#include "rmnet_map.h"
+#include "rmnet_private.h"
+#include "rmnet_vnd.h"
+
+static u8 rmnet_map_do_flow_control(struct sk_buff *skb,
+				    struct rmnet_port *rdinfo,
+				    int enable)
+{
+	struct rmnet_map_control_command *cmd;
+	struct rmnet_endpoint *ep;
+	struct net_device *vnd;
+	u16 ip_family;
+	u16 fc_seq;
+	u32 qos_id;
+	u8 mux_id;
+	int r;
+
+	mux_id = RMNET_MAP_GET_MUX_ID(skb);
+	cmd = RMNET_MAP_GET_CMD_START(skb);
+
+	if (mux_id >= RMNET_MAX_LOGICAL_EP) {
+		kfree_skb(skb);
+		return RX_HANDLER_CONSUMED;
+	}
+
+	ep = &rdinfo->muxed_ep[mux_id];
+	vnd = ep->egress_dev;
+
+	ip_family = cmd->flow_control.ip_family;
+	fc_seq = ntohs(cmd->flow_control.flow_control_seq_num);
+	qos_id = ntohl(cmd->flow_control.qos_id);
+
+	/* Ignore the ip family and pass the sequence number for both v4 and v6
+	 * sequence. User space does not support creating dedicated flows for
+	 * the 2 protocols
+	 */
+	r = rmnet_vnd_do_flow_control(vnd, enable);
+	if (r) {
+		kfree_skb(skb);
+		return RMNET_MAP_COMMAND_UNSUPPORTED;
+	} else {
+		return RMNET_MAP_COMMAND_ACK;
+	}
+}
+
+static void rmnet_map_send_ack(struct sk_buff *skb,
+			       unsigned char type)
+{
+	struct rmnet_map_control_command *cmd;
+	int xmit_status;
+
+	skb->protocol = htons(ETH_P_MAP);
+
+	cmd = RMNET_MAP_GET_CMD_START(skb);
+	cmd->cmd_type = type & 0x03;
+
+	netif_tx_lock(skb->dev);
+	xmit_status = skb->dev->netdev_ops->ndo_start_xmit(skb, skb->dev);
+	netif_tx_unlock(skb->dev);
+}
+
+/* Process MAP command frame and send N/ACK message as appropriate. Message cmd
+ * name is decoded here and appropriate handler is called.
+ */
+rx_handler_result_t rmnet_map_command(struct sk_buff *skb,
+				      struct rmnet_port *port)
+{
+	struct rmnet_map_control_command *cmd;
+	unsigned char command_name;
+	unsigned char rc = 0;
+
+	cmd = RMNET_MAP_GET_CMD_START(skb);
+	command_name = cmd->command_name;
+
+	switch (command_name) {
+	case RMNET_MAP_COMMAND_FLOW_ENABLE:
+		rc = rmnet_map_do_flow_control(skb, port, 1);
+		break;
+
+	case RMNET_MAP_COMMAND_FLOW_DISABLE:
+		rc = rmnet_map_do_flow_control(skb, port, 0);
+		break;
+
+	default:
+		rc = RMNET_MAP_COMMAND_UNSUPPORTED;
+		kfree_skb(skb);
+		break;
+	}
+	if (rc == RMNET_MAP_COMMAND_ACK)
+		rmnet_map_send_ack(skb, rc);
+	return RX_HANDLER_CONSUMED;
+}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
new file mode 100644
index 0000000..557c9bf
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -0,0 +1,104 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * RMNET Data MAP protocol
+ *
+ */
+
+#include <linux/netdevice.h>
+#include "rmnet_config.h"
+#include "rmnet_map.h"
+#include "rmnet_private.h"
+
+#define RMNET_MAP_DEAGGR_SPACING  64
+#define RMNET_MAP_DEAGGR_HEADROOM (RMNET_MAP_DEAGGR_SPACING / 2)
+
+/* Adds MAP header to front of skb->data
+ * Padding is calculated and set appropriately in MAP header. Mux ID is
+ * initialized to 0.
+ */
+struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
+						  int hdrlen, int pad)
+{
+	struct rmnet_map_header *map_header;
+	u32 padding, map_datalen;
+	u8 *padbytes;
+
+	if (skb_headroom(skb) < sizeof(struct rmnet_map_header))
+		return NULL;
+
+	map_datalen = skb->len - hdrlen;
+	map_header = (struct rmnet_map_header *)
+			skb_push(skb, sizeof(struct rmnet_map_header));
+	memset(map_header, 0, sizeof(struct rmnet_map_header));
+
+	if (pad == RMNET_MAP_NO_PAD_BYTES) {
+		map_header->pkt_len = htons(map_datalen);
+		return map_header;
+	}
+
+	padding = ALIGN(map_datalen, 4) - map_datalen;
+
+	if (padding == 0)
+		goto done;
+
+	if (skb_tailroom(skb) < padding)
+		return NULL;
+
+	padbytes = (u8 *)skb_put(skb, padding);
+	memset(padbytes, 0, padding);
+
+done:
+	map_header->pkt_len = htons(map_datalen + padding);
+	map_header->pad_len = padding & 0x3F;
+
+	return map_header;
+}
+
+/* Deaggregates a single packet
+ * A whole new buffer is allocated for each portion of an aggregated frame.
+ * Caller should keep calling deaggregate() on the source skb until 0 is
+ * returned, indicating that there are no more packets to deaggregate. Caller
+ * is responsible for freeing the original skb.
+ */
+struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb)
+{
+	struct rmnet_map_header *maph;
+	struct sk_buff *skbn;
+	u32 packet_len;
+
+	if (skb->len == 0)
+		return NULL;
+
+	maph = (struct rmnet_map_header *)skb->data;
+	packet_len = ntohs(maph->pkt_len) + sizeof(struct rmnet_map_header);
+
+	if (((int)skb->len - (int)packet_len) < 0)
+		return NULL;
+
+	skbn = alloc_skb(packet_len + RMNET_MAP_DEAGGR_SPACING, GFP_ATOMIC);
+	if (!skbn)
+		return NULL;
+
+	skbn->dev = skb->dev;
+	skb_reserve(skbn, RMNET_MAP_DEAGGR_HEADROOM);
+	skb_put(skbn, packet_len);
+	memcpy(skbn->data, skb->data, packet_len);
+	skb_pull(skb, packet_len);
+
+	/* Some hardware can send us empty frames. Catch them */
+	if (ntohs(maph->pkt_len) == 0) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	return skbn;
+}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
new file mode 100644
index 0000000..7967198
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
@@ -0,0 +1,44 @@
+/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _RMNET_PRIVATE_H_
+#define _RMNET_PRIVATE_H_
+
+#define RMNET_MAX_PACKET_SIZE      16384
+#define RMNET_DFLT_PACKET_SIZE     1500
+#define RMNET_NEEDED_HEADROOM      16
+#define RMNET_TX_QUEUE_LEN         1000
+
+/* Constants */
+#define RMNET_EGRESS_FORMAT__RESERVED__         BIT(0)
+#define RMNET_EGRESS_FORMAT_MAP                 BIT(1)
+#define RMNET_EGRESS_FORMAT_AGGREGATION         BIT(2)
+#define RMNET_EGRESS_FORMAT_MUXING              BIT(3)
+#define RMNET_EGRESS_FORMAT_MAP_CKSUMV3         BIT(4)
+#define RMNET_EGRESS_FORMAT_MAP_CKSUMV4         BIT(5)
+
+#define RMNET_INGRESS_FIX_ETHERNET              BIT(0)
+#define RMNET_INGRESS_FORMAT_MAP                BIT(1)
+#define RMNET_INGRESS_FORMAT_DEAGGREGATION      BIT(2)
+#define RMNET_INGRESS_FORMAT_DEMUXING           BIT(3)
+#define RMNET_INGRESS_FORMAT_MAP_COMMANDS       BIT(4)
+#define RMNET_INGRESS_FORMAT_MAP_CKSUMV3        BIT(5)
+#define RMNET_INGRESS_FORMAT_MAP_CKSUMV4        BIT(6)
+
+/* Pass the frame up the stack with no modifications to skb->dev */
+#define RMNET_EPMODE_NONE (0)
+/* Replace skb->dev to a virtual rmnet device and pass up the stack */
+#define RMNET_EPMODE_VND (1)
+/* Pass the frame directly to another device with dev_queue_xmit() */
+#define RMNET_EPMODE_BRIDGE (2)
+
+#endif /* _RMNET_PRIVATE_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
new file mode 100644
index 0000000..7f90d55
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -0,0 +1,174 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ *
+ * RMNET Data virtual network driver
+ *
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <net/pkt_sched.h>
+#include "rmnet_config.h"
+#include "rmnet_handlers.h"
+#include "rmnet_private.h"
+#include "rmnet_map.h"
+#include "rmnet_vnd.h"
+
+/* RX/TX Fixup */
+
+void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev)
+{
+	dev->stats.rx_packets++;
+	dev->stats.rx_bytes += skb->len;
+}
+
+void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev)
+{
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+}
+
+/* Network Device Operations */
+
+static netdev_tx_t rmnet_vnd_start_xmit(struct sk_buff *skb,
+					struct net_device *dev)
+{
+	struct rmnet_priv *priv;
+
+	priv = netdev_priv(dev);
+	if (priv->local_ep.egress_dev) {
+		rmnet_egress_handler(skb, &priv->local_ep);
+	} else {
+		dev->stats.tx_dropped++;
+		kfree_skb(skb);
+	}
+	return NETDEV_TX_OK;
+}
+
+static int rmnet_vnd_change_mtu(struct net_device *rmnet_dev, int new_mtu)
+{
+	if (new_mtu < 0 || new_mtu > RMNET_MAX_PACKET_SIZE)
+		return -EINVAL;
+
+	rmnet_dev->mtu = new_mtu;
+	return 0;
+}
+
+static int rmnet_vnd_get_iflink(const struct net_device *dev)
+{
+	struct rmnet_priv *priv = netdev_priv(dev);
+
+	return priv->real_dev->ifindex;
+}
+
+static const struct net_device_ops rmnet_vnd_ops = {
+	.ndo_start_xmit = rmnet_vnd_start_xmit,
+	.ndo_change_mtu = rmnet_vnd_change_mtu,
+	.ndo_get_iflink = rmnet_vnd_get_iflink,
+};
+
+/* Called by kernel whenever a new rmnet<n> device is created. Sets MTU,
+ * flags, ARP type, needed headroom, etc...
+ */
+void rmnet_vnd_setup(struct net_device *rmnet_dev)
+{
+	rmnet_dev->netdev_ops = &rmnet_vnd_ops;
+	rmnet_dev->mtu = RMNET_DFLT_PACKET_SIZE;
+	rmnet_dev->needed_headroom = RMNET_NEEDED_HEADROOM;
+	random_ether_addr(rmnet_dev->dev_addr);
+	rmnet_dev->tx_queue_len = RMNET_TX_QUEUE_LEN;
+
+	/* Raw IP mode */
+	rmnet_dev->header_ops = NULL;  /* No header */
+	rmnet_dev->type = ARPHRD_RAWIP;
+	rmnet_dev->hard_header_len = 0;
+	rmnet_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
+
+	rmnet_dev->needs_free_netdev = true;
+}
+
+/* Exposed API */
+
+int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
+		      struct rmnet_port *port,
+		      struct net_device *real_dev)
+{
+	struct rmnet_priv *priv;
+	int rc;
+
+	if (port->rmnet_devices[id])
+		return -EINVAL;
+
+	rc = register_netdevice(rmnet_dev);
+	if (!rc) {
+		port->rmnet_devices[id] = rmnet_dev;
+		port->nr_rmnet_devs++;
+
+		rmnet_dev->rtnl_link_ops = &rmnet_link_ops;
+
+		priv = netdev_priv(rmnet_dev);
+		priv->mux_id = id;
+		priv->real_dev = real_dev;
+
+		netdev_dbg(rmnet_dev, "rmnet dev created\n");
+	}
+
+	return rc;
+}
+
+int rmnet_vnd_dellink(u8 id, struct rmnet_port *port)
+{
+	if (id >= RMNET_MAX_LOGICAL_EP || !port->rmnet_devices[id])
+		return -EINVAL;
+
+	port->rmnet_devices[id] = NULL;
+	port->nr_rmnet_devs--;
+	return 0;
+}
+
+u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev)
+{
+	struct rmnet_priv *priv;
+
+	priv = netdev_priv(rmnet_dev);
+	return priv->mux_id;
+}
+
+/* Gets the logical endpoint configuration for a RmNet virtual network device
+ * node. Caller should confirm that devices is a RmNet VND before calling.
+ */
+struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device *rmnet_dev)
+{
+	struct rmnet_priv *priv;
+
+	if (!rmnet_dev)
+		return NULL;
+
+	priv = netdev_priv(rmnet_dev);
+
+	return &priv->local_ep;
+}
+
+int rmnet_vnd_do_flow_control(struct net_device *rmnet_dev, int enable)
+{
+	netdev_dbg(rmnet_dev, "Setting VND TX queue state to %d\n", enable);
+	/* Although we expect similar number of enable/disable
+	 * commands, optimize for the disable. That is more
+	 * latency sensitive than enable
+	 */
+	if (unlikely(enable))
+		netif_wake_queue(rmnet_dev);
+	else
+		netif_stop_queue(rmnet_dev);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
new file mode 100644
index 0000000..8a4042f
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
@@ -0,0 +1,29 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * RMNET Data Virtual Network Device APIs
+ *
+ */
+
+#ifndef _RMNET_VND_H_
+#define _RMNET_VND_H_
+
+int rmnet_vnd_do_flow_control(struct net_device *dev, int enable);
+struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device *dev);
+int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
+		      struct rmnet_port *port,
+		      struct net_device *real_dev);
+int rmnet_vnd_dellink(u8 id, struct rmnet_port *port);
+void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev);
+void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev);
+u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev);
+void rmnet_vnd_setup(struct net_device *dev);
+#endif /* _RMNET_VND_H_ */
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index 0525bd6..96a27b0 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -991,6 +991,7 @@ struct ravb_private {
 	struct net_device *ndev;
 	struct platform_device *pdev;
 	void __iomem *addr;
+	struct clk *clk;
 	struct mdiobb_ctrl mdiobb;
 	u32 num_rx_ring[NUM_RX_QUEUE];
 	u32 num_tx_ring[NUM_TX_QUEUE];
@@ -1033,6 +1034,7 @@ struct ravb_private {
 
 	unsigned no_avb_link:1;
 	unsigned avb_link_active_low:1;
+	unsigned wol_enabled:1;
 };
 
 static inline u32 ravb_read(struct net_device *ndev, enum ravb_reg reg)
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 5931e85..fdf30bf 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -680,6 +680,9 @@ static void ravb_emac_interrupt_unlocked(struct net_device *ndev)
 
 	ecsr = ravb_read(ndev, ECSR);
 	ravb_write(ndev, ecsr, ECSR);	/* clear interrupt */
+
+	if (ecsr & ECSR_MPD)
+		pm_wakeup_event(&priv->pdev->dev, 0);
 	if (ecsr & ECSR_ICD)
 		ndev->stats.tx_carrier_errors++;
 	if (ecsr & ECSR_LCHNG) {
@@ -1330,6 +1333,33 @@ static int ravb_get_ts_info(struct net_device *ndev,
 	return 0;
 }
 
+static void ravb_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct ravb_private *priv = netdev_priv(ndev);
+
+	wol->supported = 0;
+	wol->wolopts = 0;
+
+	if (priv->clk) {
+		wol->supported = WAKE_MAGIC;
+		wol->wolopts = priv->wol_enabled ? WAKE_MAGIC : 0;
+	}
+}
+
+static int ravb_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct ravb_private *priv = netdev_priv(ndev);
+
+	if (!priv->clk || wol->wolopts & ~WAKE_MAGIC)
+		return -EOPNOTSUPP;
+
+	priv->wol_enabled = !!(wol->wolopts & WAKE_MAGIC);
+
+	device_set_wakeup_enable(&priv->pdev->dev, priv->wol_enabled);
+
+	return 0;
+}
+
 static const struct ethtool_ops ravb_ethtool_ops = {
 	.nway_reset		= ravb_nway_reset,
 	.get_msglevel		= ravb_get_msglevel,
@@ -1343,6 +1373,8 @@ static const struct ethtool_ops ravb_ethtool_ops = {
 	.get_ts_info		= ravb_get_ts_info,
 	.get_link_ksettings	= ravb_get_link_ksettings,
 	.set_link_ksettings	= ravb_set_link_ksettings,
+	.get_wol		= ravb_get_wol,
+	.set_wol		= ravb_set_wol,
 };
 
 static inline int ravb_hook_irq(unsigned int irq, irq_handler_t handler,
@@ -2041,6 +2073,11 @@ static int ravb_probe(struct platform_device *pdev)
 
 	priv->chip_id = chip_id;
 
+	/* Get clock, if not found that's OK but Wake-On-Lan is unavailable */
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk))
+		priv->clk = NULL;
+
 	/* Set function */
 	ndev->netdev_ops = &ravb_netdev_ops;
 	ndev->ethtool_ops = &ravb_ethtool_ops;
@@ -2107,6 +2144,9 @@ static int ravb_probe(struct platform_device *pdev)
 	if (error)
 		goto out_napi_del;
 
+	if (priv->clk)
+		device_set_wakeup_capable(&pdev->dev, 1);
+
 	/* Print device information */
 	netdev_info(ndev, "Base address at %#x, %pM, IRQ %d.\n",
 		    (u32)ndev->base_addr, ndev->dev_addr, ndev->irq);
@@ -2160,15 +2200,66 @@ static int ravb_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int ravb_wol_setup(struct net_device *ndev)
+{
+	struct ravb_private *priv = netdev_priv(ndev);
+
+	/* Disable interrupts by clearing the interrupt masks. */
+	ravb_write(ndev, 0, RIC0);
+	ravb_write(ndev, 0, RIC2);
+	ravb_write(ndev, 0, TIC);
+
+	/* Only allow ECI interrupts */
+	synchronize_irq(priv->emac_irq);
+	napi_disable(&priv->napi[RAVB_NC]);
+	napi_disable(&priv->napi[RAVB_BE]);
+	ravb_write(ndev, ECSIPR_MPDIP, ECSIPR);
+
+	/* Enable MagicPacket */
+	ravb_modify(ndev, ECMR, ECMR_MPDE, ECMR_MPDE);
+
+	/* Increased clock usage so device won't be suspended */
+	clk_enable(priv->clk);
+
+	return enable_irq_wake(priv->emac_irq);
+}
+
+static int ravb_wol_restore(struct net_device *ndev)
+{
+	struct ravb_private *priv = netdev_priv(ndev);
+	int ret;
+
+	napi_enable(&priv->napi[RAVB_NC]);
+	napi_enable(&priv->napi[RAVB_BE]);
+
+	/* Disable MagicPacket */
+	ravb_modify(ndev, ECMR, ECMR_MPDE, 0);
+
+	ret = ravb_close(ndev);
+	if (ret < 0)
+		return ret;
+
+	/* Restore clock usage count */
+	clk_disable(priv->clk);
+
+	return disable_irq_wake(priv->emac_irq);
+}
+
 static int __maybe_unused ravb_suspend(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
-	int ret = 0;
+	struct ravb_private *priv = netdev_priv(ndev);
+	int ret;
 
-	if (netif_running(ndev)) {
-		netif_device_detach(ndev);
+	if (!netif_running(ndev))
+		return 0;
+
+	netif_device_detach(ndev);
+
+	if (priv->wol_enabled)
+		ret = ravb_wol_setup(ndev);
+	else
 		ret = ravb_close(ndev);
-	}
 
 	return ret;
 }
@@ -2179,6 +2270,33 @@ static int __maybe_unused ravb_resume(struct device *dev)
 	struct ravb_private *priv = netdev_priv(ndev);
 	int ret = 0;
 
+	if (priv->wol_enabled) {
+		/* Reduce the usecount of the clock to zero and then
+		 * restore it to its original value. This is done to force
+		 * the clock to be re-enabled which is a workaround
+		 * for renesas-cpg-mssr driver which do not enable clocks
+		 * when resuming from PSCI suspend/resume.
+		 *
+		 * Without this workaround the driver fails to communicate
+		 * with the hardware if WoL was enabled when the system
+		 * entered PSCI suspend. This is due to that if WoL is enabled
+		 * we explicitly keep the clock from being turned off when
+		 * suspending, but in PSCI sleep power is cut so the clock
+		 * is disabled anyhow, the clock driver is not aware of this
+		 * so the clock is not turned back on when resuming.
+		 *
+		 * TODO: once the renesas-cpg-mssr suspend/resume is working
+		 *       this clock dance should be removed.
+		 */
+		clk_disable(priv->clk);
+		clk_disable(priv->clk);
+		clk_enable(priv->clk);
+		clk_enable(priv->clk);
+
+		/* Set reset mode to rearm the WoL logic */
+		ravb_write(ndev, CCC_OPC_RESET, CCC);
+	}
+
 	/* All register have been reset to default values.
 	 * Restore all registers which where setup at probe time and
 	 * reopen device if it was running before system suspended.
@@ -2202,6 +2320,11 @@ static int __maybe_unused ravb_resume(struct device *dev)
 	ravb_write(ndev, priv->desc_bat_dma, DBAT);
 
 	if (netif_running(ndev)) {
+		if (priv->wol_enabled) {
+			ret = ravb_wol_restore(ndev);
+			if (ret)
+				return ret;
+		}
 		ret = ravb_open(ndev);
 		if (ret < 0)
 			return ret;
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index d2dc0a8..d2e88a3 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -3402,7 +3402,7 @@ static const struct dev_pm_ops sh_eth_dev_pm_ops = {
 #define SH_ETH_PM_OPS NULL
 #endif
 
-static struct platform_device_id sh_eth_id_table[] = {
+static const struct platform_device_id sh_eth_id_table[] = {
 	{ "sh7619-ether", (kernel_ulong_t)&sh7619_data },
 	{ "sh771x-ether", (kernel_ulong_t)&sh771x_data },
 	{ "sh7724-ether", (kernel_ulong_t)&sh7724_data },
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index b1e5c07..fc8f8bd 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -34,6 +34,7 @@
 #include <net/netevent.h>
 #include <net/arp.h>
 #include <net/fib_rules.h>
+#include <net/fib_notifier.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <generated/utsrelease.h>
 
@@ -2191,6 +2192,10 @@ static int rocker_router_fib_event(struct notifier_block *nb,
 {
 	struct rocker *rocker = container_of(nb, struct rocker, fib_nb);
 	struct rocker_fib_event_work *fib_work;
+	struct fib_notifier_info *info = ptr;
+
+	if (info->family != AF_INET)
+		return NOTIFY_DONE;
 
 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
 	if (WARN_ON(!fib_work))
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 600e30e..0653b70 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -1177,7 +1177,7 @@ static int ofdpa_group_l2_fan_out(struct ofdpa_port *ofdpa_port,
 	entry->group_id = group_id;
 	entry->group_count = group_count;
 
-	entry->group_ids = kcalloc(flags, group_count, sizeof(u32));
+	entry->group_ids = kcalloc(group_count, sizeof(u32), GFP_KERNEL);
 	if (!entry->group_ids) {
 		kfree(entry);
 		return -ENOMEM;
@@ -1456,7 +1456,7 @@ static int ofdpa_port_vlan_flood_group(struct ofdpa_port *ofdpa_port,
 	int err = 0;
 	int i;
 
-	group_ids = kcalloc(flags, port_count, sizeof(u32));
+	group_ids = kcalloc(port_count, sizeof(u32), GFP_KERNEL);
 	if (!group_ids)
 		return -ENOMEM;
 
@@ -2761,7 +2761,7 @@ static int ofdpa_fib4_add(struct rocker *rocker,
 				  fen_info->tb_id, 0);
 	if (err)
 		return err;
-	fib_info_offload_inc(fen_info->fi);
+	fen_info->fi->fib_nh->nh_flags |= RTNH_F_OFFLOAD;
 	return 0;
 }
 
@@ -2776,7 +2776,7 @@ static int ofdpa_fib4_del(struct rocker *rocker,
 	ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker);
 	if (!ofdpa_port)
 		return 0;
-	fib_info_offload_dec(fen_info->fi);
+	fen_info->fi->fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
 	return ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst),
 				   fen_info->dst_len, fen_info->fi,
 				   fen_info->tb_id, OFDPA_OP_FLAG_REMOVE);
@@ -2803,7 +2803,7 @@ static void ofdpa_fib4_abort(struct rocker *rocker)
 						       rocker);
 		if (!ofdpa_port)
 			continue;
-		fib_info_offload_dec(flow_entry->fi);
+		flow_entry->fi->fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
 		ofdpa_flow_tbl_del(ofdpa_port, OFDPA_OP_FLAG_REMOVE,
 				   flow_entry);
 	}
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index fcea937..d407adf 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -32,8 +32,8 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
 				struct net_device *net_dev);
 netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
 void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
-int efx_setup_tc(struct net_device *net_dev, u32 handle, u32 chain_index,
-		 __be16 proto, struct tc_to_netdev *tc);
+int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
+		 void *type_data);
 unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
 extern unsigned int efx_piobuf_size;
 extern bool efx_separate_tx_channels;
diff --git a/drivers/net/ethernet/sfc/falcon/efx.h b/drivers/net/ethernet/sfc/falcon/efx.h
index e5a7a40..4f3bb30 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.h
+++ b/drivers/net/ethernet/sfc/falcon/efx.h
@@ -32,8 +32,8 @@ netdev_tx_t ef4_hard_start_xmit(struct sk_buff *skb,
 				struct net_device *net_dev);
 netdev_tx_t ef4_enqueue_skb(struct ef4_tx_queue *tx_queue, struct sk_buff *skb);
 void ef4_xmit_done(struct ef4_tx_queue *tx_queue, unsigned int index);
-int ef4_setup_tc(struct net_device *net_dev, u32 handle, u32 chain_index,
-		 __be16 proto, struct tc_to_netdev *tc);
+int ef4_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
+		 void *type_data);
 unsigned int ef4_tx_max_skb_descs(struct ef4_nic *efx);
 extern bool ef4_separate_tx_channels;
 
diff --git a/drivers/net/ethernet/sfc/falcon/tx.c b/drivers/net/ethernet/sfc/falcon/tx.c
index f1520a4..6a75f41 100644
--- a/drivers/net/ethernet/sfc/falcon/tx.c
+++ b/drivers/net/ethernet/sfc/falcon/tx.c
@@ -425,24 +425,25 @@ void ef4_init_tx_queue_core_txq(struct ef4_tx_queue *tx_queue)
 				     efx->n_tx_channels : 0));
 }
 
-int ef4_setup_tc(struct net_device *net_dev, u32 handle, u32 chain_index,
-		 __be16 proto, struct tc_to_netdev *ntc)
+int ef4_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
+		 void *type_data)
 {
 	struct ef4_nic *efx = netdev_priv(net_dev);
+	struct tc_mqprio_qopt *mqprio = type_data;
 	struct ef4_channel *channel;
 	struct ef4_tx_queue *tx_queue;
 	unsigned tc, num_tc;
 	int rc;
 
-	if (ntc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	num_tc = ntc->mqprio->num_tc;
+	num_tc = mqprio->num_tc;
 
 	if (ef4_nic_rev(efx) < EF4_REV_FALCON_B0 || num_tc > EF4_MAX_TX_TC)
 		return -EINVAL;
 
-	ntc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 
 	if (num_tc == net_dev->num_tc)
 		return 0;
diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c
index 990a63d7..c7407d1 100644
--- a/drivers/net/ethernet/sfc/mcdi_port.c
+++ b/drivers/net/ethernet/sfc/mcdi_port.c
@@ -746,59 +746,171 @@ static const char *efx_mcdi_phy_test_name(struct efx_nic *efx,
 	return NULL;
 }
 
-#define SFP_PAGE_SIZE	128
-#define SFP_NUM_PAGES	2
-static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx,
-					  struct ethtool_eeprom *ee, u8 *data)
+#define SFP_PAGE_SIZE		128
+#define SFF_DIAG_TYPE_OFFSET	92
+#define SFF_DIAG_ADDR_CHANGE	BIT(2)
+#define SFF_8079_NUM_PAGES	2
+#define SFF_8472_NUM_PAGES	4
+#define SFF_8436_NUM_PAGES	5
+#define SFF_DMT_LEVEL_OFFSET	94
+
+/** efx_mcdi_phy_get_module_eeprom_page() - Get a single page of module eeprom
+ * @efx:	NIC context
+ * @page:	EEPROM page number
+ * @data:	Destination data pointer
+ * @offset:	Offset in page to copy from in to data
+ * @space:	Space available in data
+ *
+ * Return:
+ *   >=0 - amount of data copied
+ *   <0  - error
+ */
+static int efx_mcdi_phy_get_module_eeprom_page(struct efx_nic *efx,
+					       unsigned int page,
+					       u8 *data, ssize_t offset,
+					       ssize_t space)
 {
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMAX);
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PHY_MEDIA_INFO_IN_LEN);
 	size_t outlen;
-	int rc;
 	unsigned int payload_len;
-	unsigned int space_remaining = ee->len;
-	unsigned int page;
-	unsigned int page_off;
 	unsigned int to_copy;
-	u8 *user_data = data;
+	int rc;
 
-	BUILD_BUG_ON(SFP_PAGE_SIZE * SFP_NUM_PAGES != ETH_MODULE_SFF_8079_LEN);
+	if (offset > SFP_PAGE_SIZE)
+		return -EINVAL;
 
-	page_off = ee->offset % SFP_PAGE_SIZE;
-	page = ee->offset / SFP_PAGE_SIZE;
+	to_copy = min(space, SFP_PAGE_SIZE - offset);
 
-	while (space_remaining && (page < SFP_NUM_PAGES)) {
-		MCDI_SET_DWORD(inbuf, GET_PHY_MEDIA_INFO_IN_PAGE, page);
+	MCDI_SET_DWORD(inbuf, GET_PHY_MEDIA_INFO_IN_PAGE, page);
+	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_PHY_MEDIA_INFO,
+				inbuf, sizeof(inbuf),
+				outbuf, sizeof(outbuf),
+				&outlen);
 
-		rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_MEDIA_INFO,
-				  inbuf, sizeof(inbuf),
-				  outbuf, sizeof(outbuf),
-				  &outlen);
-		if (rc)
-			return rc;
+	if (rc)
+		return rc;
+
+	if (outlen < (MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST +
+			SFP_PAGE_SIZE))
+		return -EIO;
+
+	payload_len = MCDI_DWORD(outbuf, GET_PHY_MEDIA_INFO_OUT_DATALEN);
+	if (payload_len != SFP_PAGE_SIZE)
+		return -EIO;
 
-		if (outlen < (MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST +
-			      SFP_PAGE_SIZE))
-			return -EIO;
+	memcpy(data, MCDI_PTR(outbuf, GET_PHY_MEDIA_INFO_OUT_DATA) + offset,
+	       to_copy);
 
-		payload_len = MCDI_DWORD(outbuf,
-					 GET_PHY_MEDIA_INFO_OUT_DATALEN);
-		if (payload_len != SFP_PAGE_SIZE)
-			return -EIO;
+	return to_copy;
+}
 
-		/* Copy as much as we can into data */
-		payload_len -= page_off;
-		to_copy = (space_remaining < payload_len) ?
-			space_remaining : payload_len;
+static int efx_mcdi_phy_get_module_eeprom_byte(struct efx_nic *efx,
+					       unsigned int page,
+					       u8 byte)
+{
+	int rc;
+	u8 data;
 
-		memcpy(user_data,
-		       MCDI_PTR(outbuf, GET_PHY_MEDIA_INFO_OUT_DATA) + page_off,
-		       to_copy);
+	rc = efx_mcdi_phy_get_module_eeprom_page(efx, page, &data, byte, 1);
+	if (rc == 1)
+		return data;
+
+	return rc;
+}
+
+static int efx_mcdi_phy_diag_type(struct efx_nic *efx)
+{
+	/* Page zero of the EEPROM includes the diagnostic type at byte 92. */
+	return efx_mcdi_phy_get_module_eeprom_byte(efx, 0,
+						   SFF_DIAG_TYPE_OFFSET);
+}
 
-		space_remaining -= to_copy;
-		user_data += to_copy;
-		page_off = 0;
-		page++;
+static int efx_mcdi_phy_sff_8472_level(struct efx_nic *efx)
+{
+	/* Page zero of the EEPROM includes the DMT level at byte 94. */
+	return efx_mcdi_phy_get_module_eeprom_byte(efx, 0,
+						   SFF_DMT_LEVEL_OFFSET);
+}
+
+static u32 efx_mcdi_phy_module_type(struct efx_nic *efx)
+{
+	struct efx_mcdi_phy_data *phy_data = efx->phy_data;
+
+	if (phy_data->media != MC_CMD_MEDIA_QSFP_PLUS)
+		return phy_data->media;
+
+	/* A QSFP+ NIC may actually have an SFP+ module attached.
+	 * The ID is page 0, byte 0.
+	 */
+	switch (efx_mcdi_phy_get_module_eeprom_byte(efx, 0, 0)) {
+	case 0x3:
+		return MC_CMD_MEDIA_SFP_PLUS;
+	case 0xc:
+	case 0xd:
+		return MC_CMD_MEDIA_QSFP_PLUS;
+	default:
+		return 0;
+	}
+}
+
+static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx,
+					  struct ethtool_eeprom *ee, u8 *data)
+{
+	int rc;
+	ssize_t space_remaining = ee->len;
+	unsigned int page_off;
+	bool ignore_missing;
+	int num_pages;
+	int page;
+
+	switch (efx_mcdi_phy_module_type(efx)) {
+	case MC_CMD_MEDIA_SFP_PLUS:
+		num_pages = efx_mcdi_phy_sff_8472_level(efx) > 0 ?
+				SFF_8472_NUM_PAGES : SFF_8079_NUM_PAGES;
+		page = 0;
+		ignore_missing = false;
+		break;
+	case MC_CMD_MEDIA_QSFP_PLUS:
+		num_pages = SFF_8436_NUM_PAGES;
+		page = -1; /* We obtain the lower page by asking for -1. */
+		ignore_missing = true; /* Ignore missing pages after page 0. */
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	page_off = ee->offset % SFP_PAGE_SIZE;
+	page += ee->offset / SFP_PAGE_SIZE;
+
+	while (space_remaining && (page < num_pages)) {
+		rc = efx_mcdi_phy_get_module_eeprom_page(efx, page,
+							 data, page_off,
+							 space_remaining);
+
+		if (rc > 0) {
+			space_remaining -= rc;
+			data += rc;
+			page_off = 0;
+			page++;
+		} else if (rc == 0) {
+			space_remaining = 0;
+		} else if (ignore_missing && (page > 0)) {
+			int intended_size = SFP_PAGE_SIZE - page_off;
+
+			space_remaining -= intended_size;
+			if (space_remaining < 0) {
+				space_remaining = 0;
+			} else {
+				memset(data, 0, intended_size);
+				data += intended_size;
+				page_off = 0;
+				page++;
+				rc = 0;
+			}
+		} else {
+			return rc;
+		}
 	}
 
 	return 0;
@@ -807,16 +919,42 @@ static int efx_mcdi_phy_get_module_eeprom(struct efx_nic *efx,
 static int efx_mcdi_phy_get_module_info(struct efx_nic *efx,
 					struct ethtool_modinfo *modinfo)
 {
-	struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+	int sff_8472_level;
+	int diag_type;
 
-	switch (phy_cfg->media) {
+	switch (efx_mcdi_phy_module_type(efx)) {
 	case MC_CMD_MEDIA_SFP_PLUS:
-		modinfo->type = ETH_MODULE_SFF_8079;
-		modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
-		return 0;
+		sff_8472_level = efx_mcdi_phy_sff_8472_level(efx);
+
+		/* If we can't read the diagnostics level we have none. */
+		if (sff_8472_level < 0)
+			return -EOPNOTSUPP;
+
+		/* Check if this module requires the (unsupported) address
+		 * change operation.
+		 */
+		diag_type = efx_mcdi_phy_diag_type(efx);
+
+		if ((sff_8472_level == 0) ||
+		    (diag_type & SFF_DIAG_ADDR_CHANGE)) {
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8472;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+		}
+		break;
+
+	case MC_CMD_MEDIA_QSFP_PLUS:
+		modinfo->type = ETH_MODULE_SFF_8436;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
+		break;
+
 	default:
 		return -EOPNOTSUPP;
 	}
+
+	return 0;
 }
 
 static const struct efx_phy_operations efx_mcdi_phy_ops = {
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 02d41eb..32bf1fe 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -653,24 +653,25 @@ void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue)
 				     efx->n_tx_channels : 0));
 }
 
-int efx_setup_tc(struct net_device *net_dev, u32 handle, u32 chain_index,
-		 __be16 proto, struct tc_to_netdev *ntc)
+int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
+		 void *type_data)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
+	struct tc_mqprio_qopt *mqprio = type_data;
 	struct efx_channel *channel;
 	struct efx_tx_queue *tx_queue;
 	unsigned tc, num_tc;
 	int rc;
 
-	if (ntc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	num_tc = ntc->mqprio->num_tc;
+	num_tc = mqprio->num_tc;
 
 	if (num_tc > EFX_MAX_TX_TC)
 		return -EINVAL;
 
-	ntc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 
 	if (num_tc == net_dev->num_tc)
 		return 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 85c0e41..9703576 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -45,6 +45,15 @@ config DWMAC_GENERIC
 	  platform specific code to function or is using platform
 	  data for setup.
 
+config DWMAC_ANARION
+	tristate "Adaptrum Anarion GMAC support"
+	default ARC
+	depends on OF && (ARC || COMPILE_TEST)
+	help
+	  Support for Adaptrum Anarion GMAC Ethernet controller.
+
+	  This selects the Anarion SoC glue layer support for the stmmac driver.
+
 config DWMAC_IPQ806X
 	tristate "QCA IPQ806x DWMAC support"
 	default ARCH_QCOM
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index fd4937a..238307f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -7,6 +7,7 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o	\
 
 # Ordering matters. Generic driver must be last.
 obj-$(CONFIG_STMMAC_PLATFORM)	+= stmmac-platform.o
+obj-$(CONFIG_DWMAC_ANARION)	+= dwmac-anarion.o
 obj-$(CONFIG_DWMAC_IPQ806X)	+= dwmac-ipq806x.o
 obj-$(CONFIG_DWMAC_LPC18XX)	+= dwmac-lpc18xx.o
 obj-$(CONFIG_DWMAC_MESON)	+= dwmac-meson.o dwmac-meson8b.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
new file mode 100644
index 0000000..85ce80c
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
@@ -0,0 +1,152 @@
+/*
+ * Adaptrum Anarion DWMAC glue layer
+ *
+ * Copyright (C) 2017, Adaptrum, Inc.
+ * (Written by Alexandru Gagniuc <alex.g at adaptrum.com> for Adaptrum, Inc.)
+ * Licensed under the GPLv2 or (at your option) any later version.
+ */
+
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+#include <linux/stmmac.h>
+
+#include "stmmac.h"
+#include "stmmac_platform.h"
+
+#define GMAC_RESET_CONTROL_REG		0
+#define GMAC_SW_CONFIG_REG		4
+#define  GMAC_CONFIG_INTF_SEL_MASK	(0x7 << 0)
+#define  GMAC_CONFIG_INTF_RGMII		(0x1 << 0)
+
+struct anarion_gmac {
+	uintptr_t ctl_block;
+	uint32_t phy_intf_sel;
+};
+
+static uint32_t gmac_read_reg(struct anarion_gmac *gmac, uint8_t reg)
+{
+	return readl((void *)(gmac->ctl_block + reg));
+};
+
+static void gmac_write_reg(struct anarion_gmac *gmac, uint8_t reg, uint32_t val)
+{
+	writel(val, (void *)(gmac->ctl_block + reg));
+}
+
+static int anarion_gmac_init(struct platform_device *pdev, void *priv)
+{
+	uint32_t sw_config;
+	struct anarion_gmac *gmac = priv;
+
+	/* Reset logic, configure interface mode, then release reset. SIMPLE! */
+	gmac_write_reg(gmac, GMAC_RESET_CONTROL_REG, 1);
+
+	sw_config = gmac_read_reg(gmac, GMAC_SW_CONFIG_REG);
+	sw_config &= ~GMAC_CONFIG_INTF_SEL_MASK;
+	sw_config |= (gmac->phy_intf_sel & GMAC_CONFIG_INTF_SEL_MASK);
+	gmac_write_reg(gmac, GMAC_SW_CONFIG_REG, sw_config);
+
+	gmac_write_reg(gmac, GMAC_RESET_CONTROL_REG, 0);
+
+	return 0;
+}
+
+static void anarion_gmac_exit(struct platform_device *pdev, void *priv)
+{
+	struct anarion_gmac *gmac = priv;
+
+	gmac_write_reg(gmac, GMAC_RESET_CONTROL_REG, 1);
+}
+
+static struct anarion_gmac *anarion_config_dt(struct platform_device *pdev)
+{
+	int phy_mode;
+	struct resource *res;
+	void __iomem *ctl_block;
+	struct anarion_gmac *gmac;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	ctl_block = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ctl_block)) {
+		dev_err(&pdev->dev, "Cannot get reset region (%ld)!\n",
+			PTR_ERR(ctl_block));
+		return ctl_block;
+	}
+
+	gmac = devm_kzalloc(&pdev->dev, sizeof(*gmac), GFP_KERNEL);
+	if (!gmac)
+		return ERR_PTR(-ENOMEM);
+
+	gmac->ctl_block = (uintptr_t)ctl_block;
+
+	phy_mode = of_get_phy_mode(pdev->dev.of_node);
+	switch (phy_mode) {
+	case PHY_INTERFACE_MODE_RGMII:		/* Fall through */
+	case PHY_INTERFACE_MODE_RGMII_ID	/* Fall through */:
+	case PHY_INTERFACE_MODE_RGMII_RXID:	/* Fall through */
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		gmac->phy_intf_sel = GMAC_CONFIG_INTF_RGMII;
+		break;
+	default:
+		dev_err(&pdev->dev, "Unsupported phy-mode (%d)\n",
+			phy_mode);
+		return ERR_PTR(-ENOTSUPP);
+	}
+
+	return gmac;
+}
+
+static int anarion_dwmac_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct anarion_gmac *gmac;
+	struct plat_stmmacenet_data *plat_dat;
+	struct stmmac_resources stmmac_res;
+
+	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+	if (ret)
+		return ret;
+
+	gmac = anarion_config_dt(pdev);
+	if (IS_ERR(gmac))
+		return PTR_ERR(gmac);
+
+	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	if (IS_ERR(plat_dat))
+		return PTR_ERR(plat_dat);
+
+	plat_dat->init = anarion_gmac_init;
+	plat_dat->exit = anarion_gmac_exit;
+	anarion_gmac_init(pdev, gmac);
+	plat_dat->bsp_priv = gmac;
+
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret) {
+		stmmac_remove_config_dt(pdev, plat_dat);
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id anarion_dwmac_match[] = {
+	{ .compatible = "adaptrum,anarion-gmac" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, anarion_dwmac_match);
+
+static struct platform_driver anarion_dwmac_driver = {
+	.probe  = anarion_dwmac_probe,
+	.remove = stmmac_pltfr_remove,
+	.driver = {
+		.name           = "anarion-dwmac",
+		.pm		= &stmmac_pltfr_pm_ops,
+		.of_match_table = anarion_dwmac_match,
+	},
+};
+module_platform_driver(anarion_dwmac_driver);
+
+MODULE_DESCRIPTION("Adaptrum Anarion DWMAC specific glue layer");
+MODULE_AUTHOR("Alexandru Gagniuc <mr.nuke.me@gmail.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 9685555..4404650b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -89,7 +89,7 @@ static int meson8b_init_clk(struct meson8b_dwmac *dwmac)
 	char clk_name[32];
 	const char *clk_div_parents[1];
 	const char *mux_parent_names[MUX_CLK_NUM_PARENTS];
-	static struct clk_div_table clk_25m_div_table[] = {
+	static const struct clk_div_table clk_25m_div_table[] = {
 		{ .val = 0, .div = 5 },
 		{ .val = 1, .div = 10 },
 		{ /* sentinel */ },
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index f0df519..99823f5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -41,6 +41,7 @@ struct rk_gmac_ops {
 	void (*set_to_rmii)(struct rk_priv_data *bsp_priv);
 	void (*set_rgmii_speed)(struct rk_priv_data *bsp_priv, int speed);
 	void (*set_rmii_speed)(struct rk_priv_data *bsp_priv, int speed);
+	void (*integrated_phy_powerup)(struct rk_priv_data *bsp_priv);
 };
 
 struct rk_priv_data {
@@ -52,6 +53,7 @@ struct rk_priv_data {
 
 	bool clk_enabled;
 	bool clock_input;
+	bool integrated_phy;
 
 	struct clk *clk_mac;
 	struct clk *gmac_clkin;
@@ -61,6 +63,9 @@ struct rk_priv_data {
 	struct clk *clk_mac_refout;
 	struct clk *aclk_mac;
 	struct clk *pclk_mac;
+	struct clk *clk_phy;
+
+	struct reset_control *phy_reset;
 
 	int tx_delay;
 	int rx_delay;
@@ -81,6 +86,8 @@ struct rk_priv_data {
 #define RK3228_GRF_MAC_CON0	0x0900
 #define RK3228_GRF_MAC_CON1	0x0904
 
+#define RK3228_GRF_CON_MUX	0x50
+
 /* RK3228_GRF_MAC_CON0 */
 #define RK3228_GMAC_CLK_RX_DL_CFG(val)	HIWORD_UPDATE(val, 0x7F, 7)
 #define RK3228_GMAC_CLK_TX_DL_CFG(val)	HIWORD_UPDATE(val, 0x7F, 0)
@@ -106,6 +113,9 @@ struct rk_priv_data {
 #define RK3228_GMAC_RXCLK_DLY_ENABLE	GRF_BIT(1)
 #define RK3228_GMAC_RXCLK_DLY_DISABLE	GRF_CLR_BIT(1)
 
+/* RK3228_GRF_COM_MUX */
+#define RK3228_GRF_CON_MUX_GMAC_INTEGRATED_PHY	GRF_BIT(15)
+
 static void rk3228_set_to_rgmii(struct rk_priv_data *bsp_priv,
 				int tx_delay, int rx_delay)
 {
@@ -186,11 +196,18 @@ static void rk3228_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
 		dev_err(dev, "unknown speed value for RMII! speed=%d", speed);
 }
 
+static void rk3228_integrated_phy_powerup(struct rk_priv_data *priv)
+{
+	regmap_write(priv->grf, RK3228_GRF_CON_MUX,
+		     RK3228_GRF_CON_MUX_GMAC_INTEGRATED_PHY);
+}
+
 static const struct rk_gmac_ops rk3228_ops = {
 	.set_to_rgmii = rk3228_set_to_rgmii,
 	.set_to_rmii = rk3228_set_to_rmii,
 	.set_rgmii_speed = rk3228_set_rgmii_speed,
 	.set_rmii_speed = rk3228_set_rmii_speed,
+	.integrated_phy_powerup =  rk3228_integrated_phy_powerup,
 };
 
 #define RK3288_GRF_SOC_CON1	0x0248
@@ -306,6 +323,8 @@ static const struct rk_gmac_ops rk3288_ops = {
 
 #define RK3328_GRF_MAC_CON0	0x0900
 #define RK3328_GRF_MAC_CON1	0x0904
+#define RK3328_GRF_MAC_CON2	0x0908
+#define RK3328_GRF_MACPHY_CON1	0xb04
 
 /* RK3328_GRF_MAC_CON0 */
 #define RK3328_GMAC_CLK_RX_DL_CFG(val)	HIWORD_UPDATE(val, 0x7F, 7)
@@ -332,6 +351,9 @@ static const struct rk_gmac_ops rk3288_ops = {
 #define RK3328_GMAC_RXCLK_DLY_ENABLE	GRF_BIT(1)
 #define RK3328_GMAC_RXCLK_DLY_DISABLE	GRF_CLR_BIT(0)
 
+/* RK3328_GRF_MACPHY_CON1 */
+#define RK3328_MACPHY_RMII_MODE		GRF_BIT(9)
+
 static void rk3328_set_to_rgmii(struct rk_priv_data *bsp_priv,
 				int tx_delay, int rx_delay)
 {
@@ -356,18 +378,19 @@ static void rk3328_set_to_rgmii(struct rk_priv_data *bsp_priv,
 static void rk3328_set_to_rmii(struct rk_priv_data *bsp_priv)
 {
 	struct device *dev = &bsp_priv->pdev->dev;
+	unsigned int reg;
 
 	if (IS_ERR(bsp_priv->grf)) {
 		dev_err(dev, "Missing rockchip,grf property\n");
 		return;
 	}
 
-	regmap_write(bsp_priv->grf, RK3328_GRF_MAC_CON1,
+	reg = bsp_priv->integrated_phy ? RK3328_GRF_MAC_CON2 :
+		  RK3328_GRF_MAC_CON1;
+
+	regmap_write(bsp_priv->grf, reg,
 		     RK3328_GMAC_PHY_INTF_SEL_RMII |
 		     RK3328_GMAC_RMII_MODE);
-
-	/* set MAC to RMII mode */
-	regmap_write(bsp_priv->grf, RK3328_GRF_MAC_CON1, GRF_BIT(11));
 }
 
 static void rk3328_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
@@ -395,29 +418,40 @@ static void rk3328_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
 static void rk3328_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
 {
 	struct device *dev = &bsp_priv->pdev->dev;
+	unsigned int reg;
 
 	if (IS_ERR(bsp_priv->grf)) {
 		dev_err(dev, "Missing rockchip,grf property\n");
 		return;
 	}
 
+	reg = bsp_priv->integrated_phy ? RK3328_GRF_MAC_CON2 :
+		  RK3328_GRF_MAC_CON1;
+
 	if (speed == 10)
-		regmap_write(bsp_priv->grf, RK3328_GRF_MAC_CON1,
+		regmap_write(bsp_priv->grf, reg,
 			     RK3328_GMAC_RMII_CLK_2_5M |
 			     RK3328_GMAC_SPEED_10M);
 	else if (speed == 100)
-		regmap_write(bsp_priv->grf, RK3328_GRF_MAC_CON1,
+		regmap_write(bsp_priv->grf, reg,
 			     RK3328_GMAC_RMII_CLK_25M |
 			     RK3328_GMAC_SPEED_100M);
 	else
 		dev_err(dev, "unknown speed value for RMII! speed=%d", speed);
 }
 
+static void rk3328_integrated_phy_powerup(struct rk_priv_data *priv)
+{
+	regmap_write(priv->grf, RK3328_GRF_MACPHY_CON1,
+		     RK3328_MACPHY_RMII_MODE);
+}
+
 static const struct rk_gmac_ops rk3328_ops = {
 	.set_to_rgmii = rk3328_set_to_rgmii,
 	.set_to_rmii = rk3328_set_to_rmii,
 	.set_rgmii_speed = rk3328_set_rgmii_speed,
 	.set_rmii_speed = rk3328_set_rmii_speed,
+	.integrated_phy_powerup =  rk3328_integrated_phy_powerup,
 };
 
 #define RK3366_GRF_SOC_CON6	0x0418
@@ -753,9 +787,107 @@ static const struct rk_gmac_ops rk3399_ops = {
 	.set_rmii_speed = rk3399_set_rmii_speed,
 };
 
-static int gmac_clk_init(struct rk_priv_data *bsp_priv)
+#define RV1108_GRF_GMAC_CON0		0X0900
+
+/* RV1108_GRF_GMAC_CON0 */
+#define RV1108_GMAC_PHY_INTF_SEL_RMII	(GRF_CLR_BIT(4) | GRF_CLR_BIT(5) | \
+					GRF_BIT(6))
+#define RV1108_GMAC_FLOW_CTRL		GRF_BIT(3)
+#define RV1108_GMAC_FLOW_CTRL_CLR	GRF_CLR_BIT(3)
+#define RV1108_GMAC_SPEED_10M		GRF_CLR_BIT(2)
+#define RV1108_GMAC_SPEED_100M		GRF_BIT(2)
+#define RV1108_GMAC_RMII_CLK_25M	GRF_BIT(7)
+#define RV1108_GMAC_RMII_CLK_2_5M	GRF_CLR_BIT(7)
+
+static void rv1108_set_to_rmii(struct rk_priv_data *bsp_priv)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+		return;
+	}
+
+	regmap_write(bsp_priv->grf, RV1108_GRF_GMAC_CON0,
+		     RV1108_GMAC_PHY_INTF_SEL_RMII);
+}
+
+static void rv1108_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+		return;
+	}
+
+	if (speed == 10) {
+		regmap_write(bsp_priv->grf, RV1108_GRF_GMAC_CON0,
+			     RV1108_GMAC_RMII_CLK_2_5M |
+			     RV1108_GMAC_SPEED_10M);
+	} else if (speed == 100) {
+		regmap_write(bsp_priv->grf, RV1108_GRF_GMAC_CON0,
+			     RV1108_GMAC_RMII_CLK_25M |
+			     RV1108_GMAC_SPEED_100M);
+	} else {
+		dev_err(dev, "unknown speed value for RMII! speed=%d", speed);
+	}
+}
+
+static const struct rk_gmac_ops rv1108_ops = {
+	.set_to_rmii = rv1108_set_to_rmii,
+	.set_rmii_speed = rv1108_set_rmii_speed,
+};
+
+#define RK_GRF_MACPHY_CON0		0xb00
+#define RK_GRF_MACPHY_CON1		0xb04
+#define RK_GRF_MACPHY_CON2		0xb08
+#define RK_GRF_MACPHY_CON3		0xb0c
+
+#define RK_MACPHY_ENABLE		GRF_BIT(0)
+#define RK_MACPHY_DISABLE		GRF_CLR_BIT(0)
+#define RK_MACPHY_CFG_CLK_50M		GRF_BIT(14)
+#define RK_GMAC2PHY_RMII_MODE		(GRF_BIT(6) | GRF_CLR_BIT(7))
+#define RK_GRF_CON2_MACPHY_ID		HIWORD_UPDATE(0x1234, 0xffff, 0)
+#define RK_GRF_CON3_MACPHY_ID		HIWORD_UPDATE(0x35, 0x3f, 0)
+
+static void rk_gmac_integrated_phy_powerup(struct rk_priv_data *priv)
+{
+	if (priv->ops->integrated_phy_powerup)
+		priv->ops->integrated_phy_powerup(priv);
+
+	regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_CFG_CLK_50M);
+	regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_GMAC2PHY_RMII_MODE);
+
+	regmap_write(priv->grf, RK_GRF_MACPHY_CON2, RK_GRF_CON2_MACPHY_ID);
+	regmap_write(priv->grf, RK_GRF_MACPHY_CON3, RK_GRF_CON3_MACPHY_ID);
+
+	if (priv->phy_reset) {
+		/* PHY needs to be disabled before trying to reset it */
+		regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_DISABLE);
+		if (priv->phy_reset)
+			reset_control_assert(priv->phy_reset);
+		usleep_range(10, 20);
+		if (priv->phy_reset)
+			reset_control_deassert(priv->phy_reset);
+		usleep_range(10, 20);
+		regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_ENABLE);
+		msleep(30);
+	}
+}
+
+static void rk_gmac_integrated_phy_powerdown(struct rk_priv_data *priv)
+{
+	regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_DISABLE);
+	if (priv->phy_reset)
+		reset_control_assert(priv->phy_reset);
+}
+
+static int rk_gmac_clk_init(struct plat_stmmacenet_data *plat)
 {
+	struct rk_priv_data *bsp_priv = plat->bsp_priv;
 	struct device *dev = &bsp_priv->pdev->dev;
+	int ret;
 
 	bsp_priv->clk_enabled = false;
 
@@ -806,6 +938,16 @@ static int gmac_clk_init(struct rk_priv_data *bsp_priv)
 			clk_set_rate(bsp_priv->clk_mac, 50000000);
 	}
 
+	if (plat->phy_node && bsp_priv->integrated_phy) {
+		bsp_priv->clk_phy = of_clk_get(plat->phy_node, 0);
+		if (IS_ERR(bsp_priv->clk_phy)) {
+			ret = PTR_ERR(bsp_priv->clk_phy);
+			dev_err(dev, "Cannot get PHY clock: %d\n", ret);
+			return -EINVAL;
+		}
+		clk_set_rate(bsp_priv->clk_phy, 50000000);
+	}
+
 	return 0;
 }
 
@@ -829,6 +971,9 @@ static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable)
 						bsp_priv->clk_mac_refout);
 			}
 
+			if (!IS_ERR(bsp_priv->clk_phy))
+				clk_prepare_enable(bsp_priv->clk_phy);
+
 			if (!IS_ERR(bsp_priv->aclk_mac))
 				clk_prepare_enable(bsp_priv->aclk_mac);
 
@@ -861,6 +1006,9 @@ static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable)
 						bsp_priv->clk_mac_refout);
 			}
 
+			if (!IS_ERR(bsp_priv->clk_phy))
+				clk_disable_unprepare(bsp_priv->clk_phy);
+
 			if (!IS_ERR(bsp_priv->aclk_mac))
 				clk_disable_unprepare(bsp_priv->aclk_mac);
 
@@ -905,6 +1053,7 @@ static int phy_power_on(struct rk_priv_data *bsp_priv, bool enable)
 }
 
 static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev,
+					  struct plat_stmmacenet_data *plat,
 					  const struct rk_gmac_ops *ops)
 {
 	struct rk_priv_data *bsp_priv;
@@ -967,9 +1116,22 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev,
 
 	bsp_priv->grf = syscon_regmap_lookup_by_phandle(dev->of_node,
 							"rockchip,grf");
-	bsp_priv->pdev = pdev;
 
-	gmac_clk_init(bsp_priv);
+	if (plat->phy_node) {
+		bsp_priv->integrated_phy = of_property_read_bool(plat->phy_node,
+								 "phy-is-integrated");
+		if (bsp_priv->integrated_phy) {
+			bsp_priv->phy_reset = of_reset_control_get(plat->phy_node, NULL);
+			if (IS_ERR(bsp_priv->phy_reset)) {
+				dev_err(&pdev->dev, "No PHY reset control found.\n");
+				bsp_priv->phy_reset = NULL;
+			}
+		}
+	}
+	dev_info(dev, "integrated PHY? (%s).\n",
+		 bsp_priv->integrated_phy ? "yes" : "no");
+
+	bsp_priv->pdev = pdev;
 
 	return bsp_priv;
 }
@@ -1017,6 +1179,9 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
 	pm_runtime_enable(dev);
 	pm_runtime_get_sync(dev);
 
+	if (bsp_priv->integrated_phy)
+		rk_gmac_integrated_phy_powerup(bsp_priv);
+
 	return 0;
 }
 
@@ -1024,6 +1189,9 @@ static void rk_gmac_powerdown(struct rk_priv_data *gmac)
 {
 	struct device *dev = &gmac->pdev->dev;
 
+	if (gmac->integrated_phy)
+		rk_gmac_integrated_phy_powerdown(gmac);
+
 	pm_runtime_put_sync(dev);
 	pm_runtime_disable(dev);
 
@@ -1075,12 +1243,16 @@ static int rk_gmac_probe(struct platform_device *pdev)
 	plat_dat->has_gmac = true;
 	plat_dat->fix_mac_speed = rk_fix_speed;
 
-	plat_dat->bsp_priv = rk_gmac_setup(pdev, data);
+	plat_dat->bsp_priv = rk_gmac_setup(pdev, plat_dat, data);
 	if (IS_ERR(plat_dat->bsp_priv)) {
 		ret = PTR_ERR(plat_dat->bsp_priv);
 		goto err_remove_config_dt;
 	}
 
+	ret = rk_gmac_clk_init(plat_dat);
+	if (ret)
+		return ret;
+
 	ret = rk_gmac_powerup(plat_dat->bsp_priv);
 	if (ret)
 		goto err_remove_config_dt;
@@ -1147,6 +1319,7 @@ static const struct of_device_id rk_gmac_dwmac_match[] = {
 	{ .compatible = "rockchip,rk3366-gmac", .data = &rk3366_ops },
 	{ .compatible = "rockchip,rk3368-gmac", .data = &rk3368_ops },
 	{ .compatible = "rockchip,rk3399-gmac", .data = &rk3399_ops },
+	{ .compatible = "rockchip,rv1108-gmac", .data = &rv1108_ops },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 72ec711..f5f37bfa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -248,9 +248,6 @@ int stmmac_mdio_register(struct net_device *ndev)
 	found = 0;
 	for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
 		struct phy_device *phydev = mdiobus_get_phy(new_bus, addr);
-		int act = 0;
-		char irq_num[4];
-		char *irq_str;
 
 		if (!phydev)
 			continue;
@@ -273,19 +270,6 @@ int stmmac_mdio_register(struct net_device *ndev)
 		if (priv->plat->phy_addr == -1)
 			priv->plat->phy_addr = addr;
 
-		act = (priv->plat->phy_addr == addr);
-		switch (phydev->irq) {
-		case PHY_POLL:
-			irq_str = "POLL";
-			break;
-		case PHY_IGNORE_INTERRUPT:
-			irq_str = "IGNORE";
-			break;
-		default:
-			sprintf(irq_num, "%d", phydev->irq);
-			irq_str = irq_num;
-			break;
-		}
 		phy_attached_info(phydev);
 		found = 1;
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index d71bd80..e471a90 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -152,7 +152,7 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
 }
 
 /* structure describing a PTP hardware clock */
-static struct ptp_clock_info stmmac_ptp_clock_ops = {
+static const struct ptp_clock_info stmmac_ptp_clock_ops = {
 	.owner = THIS_MODULE,
 	.name = "stmmac_ptp_clock",
 	.max_adj = 62500000,
diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
index 8603e39..5b56c24 100644
--- a/drivers/net/ethernet/sun/ldmvsw.c
+++ b/drivers/net/ethernet/sun/ldmvsw.c
@@ -248,7 +248,7 @@ static struct net_device *vsw_alloc_netdev(u8 hwaddr[],
 	dev->ethtool_ops = &vsw_ethtool_ops;
 	dev->watchdog_timeo = VSW_TX_TIMEOUT;
 
-	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG;
+	dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG;
 	dev->features = dev->hw_features;
 
 	/* MTU range: 68 - 65535 */
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 4bb04aa..6a4e8e1 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -9221,8 +9221,7 @@ static int niu_get_of_props(struct niu *np)
 
 	phy_type = of_get_property(dp, "phy-type", &prop_len);
 	if (!phy_type) {
-		netdev_err(dev, "%s: OF node lacks phy-type property\n",
-			   dp->full_name);
+		netdev_err(dev, "%pOF: OF node lacks phy-type property\n", dp);
 		return -EINVAL;
 	}
 
@@ -9232,26 +9231,25 @@ static int niu_get_of_props(struct niu *np)
 	strcpy(np->vpd.phy_type, phy_type);
 
 	if (niu_phy_type_prop_decode(np, np->vpd.phy_type)) {
-		netdev_err(dev, "%s: Illegal phy string [%s]\n",
-			   dp->full_name, np->vpd.phy_type);
+		netdev_err(dev, "%pOF: Illegal phy string [%s]\n",
+			   dp, np->vpd.phy_type);
 		return -EINVAL;
 	}
 
 	mac_addr = of_get_property(dp, "local-mac-address", &prop_len);
 	if (!mac_addr) {
-		netdev_err(dev, "%s: OF node lacks local-mac-address property\n",
-			   dp->full_name);
+		netdev_err(dev, "%pOF: OF node lacks local-mac-address property\n",
+			   dp);
 		return -EINVAL;
 	}
 	if (prop_len != dev->addr_len) {
-		netdev_err(dev, "%s: OF MAC address prop len (%d) is wrong\n",
-			   dp->full_name, prop_len);
+		netdev_err(dev, "%pOF: OF MAC address prop len (%d) is wrong\n",
+			   dp, prop_len);
 	}
 	memcpy(dev->dev_addr, mac_addr, dev->addr_len);
 	if (!is_valid_ether_addr(&dev->dev_addr[0])) {
-		netdev_err(dev, "%s: OF MAC address is invalid\n",
-			   dp->full_name);
-		netdev_err(dev, "%s: [ %pM ]\n", dp->full_name, dev->dev_addr);
+		netdev_err(dev, "%pOF: OF MAC address is invalid\n", dp);
+		netdev_err(dev, "%pOF: [ %pM ]\n", dp, dev->dev_addr);
 		return -EINVAL;
 	}
 
@@ -10027,8 +10025,8 @@ static int niu_of_probe(struct platform_device *op)
 
 	reg = of_get_property(op->dev.of_node, "reg", NULL);
 	if (!reg) {
-		dev_err(&op->dev, "%s: No 'reg' property, aborting\n",
-			op->dev.of_node->full_name);
+		dev_err(&op->dev, "%pOF: No 'reg' property, aborting\n",
+			op->dev.of_node);
 		return -ENODEV;
 	}
 
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 75b167e..0b95105 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -312,7 +312,7 @@ static struct vnet *vnet_new(const u64 *local_mac,
 	dev->watchdog_timeo = VNET_TX_TIMEOUT;
 
 	dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE |
-			   NETIF_F_IP_CSUM | NETIF_F_SG;
+			   NETIF_F_HW_CSUM | NETIF_F_SG;
 	dev->features = dev->hw_features;
 
 	/* MTU range: 68 - 65535 */
diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c
index 9e86833..ecf456c 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.c
+++ b/drivers/net/ethernet/sun/sunvnet_common.c
@@ -303,7 +303,7 @@ static struct sk_buff *alloc_and_align_skb(struct net_device *dev,
 	return skb;
 }
 
-static inline void vnet_fullcsum(struct sk_buff *skb)
+static inline void vnet_fullcsum_ipv4(struct sk_buff *skb)
 {
 	struct iphdr *iph = ip_hdr(skb);
 	int offset = skb_transport_offset(skb);
@@ -335,6 +335,40 @@ static inline void vnet_fullcsum(struct sk_buff *skb)
 	}
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static inline void vnet_fullcsum_ipv6(struct sk_buff *skb)
+{
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	int offset = skb_transport_offset(skb);
+
+	if (skb->protocol != htons(ETH_P_IPV6))
+		return;
+	if (ip6h->nexthdr != IPPROTO_TCP &&
+	    ip6h->nexthdr != IPPROTO_UDP)
+		return;
+	skb->ip_summed = CHECKSUM_NONE;
+	skb->csum_level = 1;
+	skb->csum = 0;
+	if (ip6h->nexthdr == IPPROTO_TCP) {
+		struct tcphdr *ptcp = tcp_hdr(skb);
+
+		ptcp->check = 0;
+		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+		ptcp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					      skb->len - offset, IPPROTO_TCP,
+					      skb->csum);
+	} else if (ip6h->nexthdr == IPPROTO_UDP) {
+		struct udphdr *pudp = udp_hdr(skb);
+
+		pudp->check = 0;
+		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+		pudp->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					      skb->len - offset, IPPROTO_UDP,
+					      skb->csum);
+	}
+}
+#endif
+
 static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc)
 {
 	struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
@@ -394,9 +428,14 @@ static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc)
 				struct iphdr *iph = ip_hdr(skb);
 				int ihl = iph->ihl * 4;
 
-				skb_reset_transport_header(skb);
 				skb_set_transport_header(skb, ihl);
-				vnet_fullcsum(skb);
+				vnet_fullcsum_ipv4(skb);
+#if IS_ENABLED(CONFIG_IPV6)
+			} else if (skb->protocol == htons(ETH_P_IPV6)) {
+				skb_set_transport_header(skb,
+							 sizeof(struct ipv6hdr));
+				vnet_fullcsum_ipv6(skb);
+#endif
 			}
 		}
 		if (dext->flags & VNET_PKT_HCK_IPV4_HDRCKSUM_OK) {
@@ -1115,24 +1154,47 @@ static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies)
 		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			start = skb_checksum_start_offset(skb);
 		if (start) {
-			struct iphdr *iph = ip_hdr(nskb);
 			int offset = start + nskb->csum_offset;
 
+			/* copy the headers, no csum here */
 			if (skb_copy_bits(skb, 0, nskb->data, start)) {
 				dev_kfree_skb(nskb);
 				dev_kfree_skb(skb);
 				return NULL;
 			}
+
+			/* copy the rest, with csum calculation */
 			*(__sum16 *)(skb->data + offset) = 0;
 			csum = skb_copy_and_csum_bits(skb, start,
 						      nskb->data + start,
 						      skb->len - start, 0);
-			if (iph->protocol == IPPROTO_TCP ||
-			    iph->protocol == IPPROTO_UDP) {
-				csum = csum_tcpudp_magic(iph->saddr, iph->daddr,
-							 skb->len - start,
-							 iph->protocol, csum);
+
+			/* add in the header checksums */
+			if (skb->protocol == htons(ETH_P_IP)) {
+				struct iphdr *iph = ip_hdr(nskb);
+
+				if (iph->protocol == IPPROTO_TCP ||
+				    iph->protocol == IPPROTO_UDP) {
+					csum = csum_tcpudp_magic(iph->saddr,
+								 iph->daddr,
+								 skb->len - start,
+								 iph->protocol,
+								 csum);
+				}
+			} else if (skb->protocol == htons(ETH_P_IPV6)) {
+				struct ipv6hdr *ip6h = ipv6_hdr(nskb);
+
+				if (ip6h->nexthdr == IPPROTO_TCP ||
+				    ip6h->nexthdr == IPPROTO_UDP) {
+					csum = csum_ipv6_magic(&ip6h->saddr,
+							       &ip6h->daddr,
+							       skb->len - start,
+							       ip6h->nexthdr,
+							       csum);
+				}
 			}
+
+			/* save the final result */
 			*(__sum16 *)(nskb->data + offset) = csum;
 
 			nskb->ip_summed = CHECKSUM_NONE;
@@ -1318,8 +1380,14 @@ int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
 	if (unlikely(!skb))
 		goto out_dropped;
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		vnet_fullcsum(skb);
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		if (skb->protocol == htons(ETH_P_IP))
+			vnet_fullcsum_ipv4(skb);
+#if IS_ENABLED(CONFIG_IPV6)
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			vnet_fullcsum_ipv6(skb);
+#endif
+	}
 
 	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 	i = skb_get_queue_mapping(skb);
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
index 3b91257..e1b55b8 100644
--- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
+++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
@@ -17,6 +17,7 @@
 
 #include <linux/netdevice.h>
 #include <linux/tcp.h>
+#include <linux/interrupt.h>
 
 #include "dwc-xlgmac.h"
 #include "dwc-xlgmac-reg.h"
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index badd0a8..db8a4bc 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1321,8 +1321,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 		phy = of_phy_connect(priv->ndev, slave->data->phy_node,
 				 &cpsw_adjust_link, 0, slave->data->phy_if);
 		if (!phy) {
-			dev_err(priv->dev, "phy \"%s\" not found on slave %d\n",
-				slave->data->phy_node->full_name,
+			dev_err(priv->dev, "phy \"%pOF\" not found on slave %d\n",
+				slave->data->phy_node,
 				slave->slave_num);
 			return;
 		}
@@ -2670,8 +2670,8 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 		parp = of_get_property(slave_node, "phy_id", &lenp);
 		if (slave_data->phy_node) {
 			dev_dbg(&pdev->dev,
-				"slave[%d] using phy-handle=\"%s\"\n",
-				i, slave_data->phy_node->full_name);
+				"slave[%d] using phy-handle=\"%pOF\"\n",
+				i, slave_data->phy_node);
 		} else if (of_phy_is_fixed_link(slave_node)) {
 			/* In the case of a fixed PHY, the DT node associated
 			 * to the PHY is the Ethernet MAC DT node.
@@ -2827,7 +2827,7 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv)
 
 #define CPSW_QUIRK_IRQ		BIT(0)
 
-static struct platform_device_id cpsw_devtype[] = {
+static const struct platform_device_id cpsw_devtype[] = {
 	{
 		/* keep it for existing comaptibles */
 		.name = "cpsw",
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index c2121d2..e7b76f6 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -298,7 +298,7 @@ static long cpts_overflow_check(struct ptp_clock_info *ptp)
 	return (long)delay;
 }
 
-static struct ptp_clock_info cpts_info = {
+static const struct ptp_clock_info cpts_info = {
 	.owner		= THIS_MODULE,
 	.name		= "CTPS timer",
 	.max_adj	= 1000000,
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 64d5527..4bb5618 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1480,8 +1480,8 @@ static int emac_dev_open(struct net_device *ndev)
 		phydev = of_phy_connect(ndev, priv->phy_node,
 					&emac_adjust_link, 0, 0);
 		if (!phydev) {
-			dev_err(emac_dev, "could not connect to phy %s\n",
-				priv->phy_node->full_name);
+			dev_err(emac_dev, "could not connect to phy %pOF\n",
+				priv->phy_node);
 			ret = -ENODEV;
 			goto err;
 		}
diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index 33df340..3c33f45 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c
@@ -159,8 +159,10 @@ static int davinci_mdio_reset(struct mii_bus *bus)
 
 	/* dump hardware version info */
 	ver = __raw_readl(&data->regs->version);
-	dev_info(data->dev, "davinci mdio revision %d.%d\n",
-		 (ver >> 8) & 0xff, ver & 0xff);
+	dev_info(data->dev,
+		 "davinci mdio revision %d.%d, bus freq %ld\n",
+		 (ver >> 8) & 0xff, ver & 0xff,
+		 data->pdata.bus_freq);
 
 	if (data->skip_scan)
 		goto done;
@@ -198,8 +200,10 @@ static inline int wait_for_user_access(struct davinci_mdio_data *data)
 			return 0;
 
 		reg = __raw_readl(&regs->control);
-		if ((reg & CONTROL_IDLE) == 0)
+		if ((reg & CONTROL_IDLE) == 0) {
+			usleep_range(100, 200);
 			continue;
+		}
 
 		/*
 		 * An emac soft_reset may have clobbered the mdio controller's
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 9d52c3a..eb96a69 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1877,20 +1877,21 @@ static u16 netcp_select_queue(struct net_device *dev, struct sk_buff *skb,
 	return 0;
 }
 
-static int netcp_setup_tc(struct net_device *dev, u32 handle, u32 chain_index,
-			  __be16 proto, struct tc_to_netdev *tc)
+static int netcp_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			  void *type_data)
 {
+	struct tc_mqprio_qopt *mqprio = type_data;
 	u8 num_tc;
 	int i;
 
 	/* setup tc must be called under rtnl lock */
 	ASSERT_RTNL();
 
-	if (tc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
+	if (type != TC_SETUP_MQPRIO)
+		return -EOPNOTSUPP;
 
-	tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
-	num_tc = tc->mqprio->num_tc;
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	num_tc = mqprio->num_tc;
 
 	/* Sanity-check the number of traffic classes requested */
 	if ((dev->real_num_tx_queues <= 1) ||
diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c
index aec9538..c00102b 100644
--- a/drivers/net/ethernet/tile/tilegx.c
+++ b/drivers/net/ethernet/tile/tilegx.c
@@ -873,7 +873,7 @@ static int ptp_mpipe_enable(struct ptp_clock_info *ptp,
 	return -EOPNOTSUPP;
 }
 
-static struct ptp_clock_info ptp_mpipe_caps = {
+static const struct ptp_clock_info ptp_mpipe_caps = {
 	.owner		= THIS_MODULE,
 	.name		= "mPIPE clock",
 	.max_adj	= 999999999,
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index d73da8a..60abc92 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1089,7 +1089,7 @@ static int temac_of_probe(struct platform_device *op)
 
 	lp->phy_node = of_parse_phandle(op->dev.of_node, "phy-handle", 0);
 	if (lp->phy_node)
-		dev_dbg(lp->dev, "using PHY node %s (%p)\n", np->full_name, np);
+		dev_dbg(lp->dev, "using PHY node %pOF (%p)\n", np, np);
 
 	/* Add the device attributes */
 	rc = sysfs_create_group(&lp->dev->kobj, &temac_attr_group);
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index af27f7d..5ef6263 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -389,7 +389,7 @@ struct axidma_bd {
  * @dma_err_tasklet: Tasklet structure to process Axi DMA errors
  * @tx_irq:	Axidma TX IRQ number
  * @rx_irq:	Axidma RX IRQ number
- * @phy_type:	Phy type to identify between MII/GMII/RGMII/SGMII/1000 Base-X
+ * @phy_mode:	Phy type to identify between MII/GMII/RGMII/SGMII/1000 Base-X
  * @options:	AxiEthernet option word
  * @last_link:	Phy link state in which the PHY was negotiated earlier
  * @features:	Stores the extended features supported by the axienet hw
@@ -432,7 +432,7 @@ struct axienet_local {
 
 	int tx_irq;
 	int rx_irq;
-	u32 phy_type;
+	phy_interface_t phy_mode;
 
 	u32 options;			/* Current options word */
 	u32 last_link;
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 33c595f..e74e1e8 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -531,11 +531,11 @@ static void axienet_adjust_link(struct net_device *ndev)
 	link_state = phy->speed | (phy->duplex << 1) | phy->link;
 	if (lp->last_link != link_state) {
 		if ((phy->speed == SPEED_10) || (phy->speed == SPEED_100)) {
-			if (lp->phy_type == XAE_PHY_TYPE_1000BASE_X)
+			if (lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX)
 				setspeed = 0;
 		} else {
 			if ((phy->speed == SPEED_1000) &&
-			    (lp->phy_type == XAE_PHY_TYPE_MII))
+			    (lp->phy_mode == PHY_INTERFACE_MODE_MII))
 				setspeed = 0;
 		}
 
@@ -935,15 +935,8 @@ static int axienet_open(struct net_device *ndev)
 		return ret;
 
 	if (lp->phy_node) {
-		if (lp->phy_type == XAE_PHY_TYPE_GMII) {
-			phydev = of_phy_connect(lp->ndev, lp->phy_node,
-						axienet_adjust_link, 0,
-						PHY_INTERFACE_MODE_GMII);
-		} else if (lp->phy_type == XAE_PHY_TYPE_RGMII_2_0) {
-			phydev = of_phy_connect(lp->ndev, lp->phy_node,
-						axienet_adjust_link, 0,
-						PHY_INTERFACE_MODE_RGMII_ID);
-		}
+		phydev = of_phy_connect(lp->ndev, lp->phy_node,
+					axienet_adjust_link, 0, lp->phy_mode);
 
 		if (!phydev)
 			dev_err(lp->dev, "of_phy_connect() failed\n");
@@ -1539,7 +1532,38 @@ static int axienet_probe(struct platform_device *pdev)
 	 * the device-tree and accordingly set flags.
 	 */
 	of_property_read_u32(pdev->dev.of_node, "xlnx,rxmem", &lp->rxmem);
-	of_property_read_u32(pdev->dev.of_node, "xlnx,phy-type", &lp->phy_type);
+
+	/* Start with the proprietary, and broken phy_type */
+	ret = of_property_read_u32(pdev->dev.of_node, "xlnx,phy-type", &value);
+	if (!ret) {
+		netdev_warn(ndev, "Please upgrade your device tree binary blob to use phy-mode");
+		switch (value) {
+		case XAE_PHY_TYPE_MII:
+			lp->phy_mode = PHY_INTERFACE_MODE_MII;
+			break;
+		case XAE_PHY_TYPE_GMII:
+			lp->phy_mode = PHY_INTERFACE_MODE_GMII;
+			break;
+		case XAE_PHY_TYPE_RGMII_2_0:
+			lp->phy_mode = PHY_INTERFACE_MODE_RGMII_ID;
+			break;
+		case XAE_PHY_TYPE_SGMII:
+			lp->phy_mode = PHY_INTERFACE_MODE_SGMII;
+			break;
+		case XAE_PHY_TYPE_1000BASE_X:
+			lp->phy_mode = PHY_INTERFACE_MODE_1000BASEX;
+			break;
+		default:
+			ret = -EINVAL;
+			goto free_netdev;
+		}
+	} else {
+		lp->phy_mode = of_get_phy_mode(pdev->dev.of_node);
+		if (lp->phy_mode < 0) {
+			ret = -EINVAL;
+			goto free_netdev;
+		}
+	}
 
 	/* Find the DMA node, map the DMA registers, and decode the DMA IRQs */
 	np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0);
diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c
index f718832..fd5288f 100644
--- a/drivers/net/ethernet/xircom/xirc2ps_cs.c
+++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c
@@ -1781,7 +1781,7 @@ static int __init setup_xirc2ps_cs(char *str)
 	 */
 	int ints[10] = { -1 };
 
-	str = get_options(str, 9, ints);
+	str = get_options(str, ARRAY_SIZE(ints), ints);
 
 #define MAYBE_SET(X,Y) if (ints[0] >= Y && ints[Y] != -1) { X = ints[Y]; }
 	MAYBE_SET(if_port, 3);
diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c
index f4a816c..61fceee 100644
--- a/drivers/net/fddi/defxx.c
+++ b/drivers/net/fddi/defxx.c
@@ -3767,7 +3767,7 @@ static void dfx_pci_unregister(struct pci_dev *pdev)
 #endif /* CONFIG_PCI */
 
 #ifdef CONFIG_EISA
-static struct eisa_device_id dfx_eisa_table[] = {
+static const struct eisa_device_id dfx_eisa_table[] = {
         { "DEC3001", DEFEA_PROD_ID_1 },
         { "DEC3002", DEFEA_PROD_ID_2 },
         { "DEC3003", DEFEA_PROD_ID_3 },
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 2bbda71..f640407 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -715,6 +715,7 @@ free_dst:
 
 static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
 				       struct net_device *dev,
+				       struct geneve_sock *gs4,
 				       struct flowi4 *fl4,
 				       const struct ip_tunnel_info *info)
 {
@@ -724,7 +725,7 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
 	struct rtable *rt = NULL;
 	__u8 tos;
 
-	if (!rcu_dereference(geneve->sock4))
+	if (!gs4)
 		return ERR_PTR(-EIO);
 
 	memset(fl4, 0, sizeof(*fl4));
@@ -764,6 +765,7 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
 #if IS_ENABLED(CONFIG_IPV6)
 static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
 					   struct net_device *dev,
+					   struct geneve_sock *gs6,
 					   struct flowi6 *fl6,
 					   const struct ip_tunnel_info *info)
 {
@@ -771,10 +773,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
 	struct geneve_dev *geneve = netdev_priv(dev);
 	struct dst_entry *dst = NULL;
 	struct dst_cache *dst_cache;
-	struct geneve_sock *gs6;
 	__u8 prio;
 
-	gs6 = rcu_dereference(geneve->sock6);
 	if (!gs6)
 		return ERR_PTR(-EIO);
 
@@ -827,7 +827,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	__be16 df;
 	int err;
 
-	rt = geneve_get_v4_rt(skb, dev, &fl4, info);
+	rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
 
@@ -866,7 +866,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	__be16 sport;
 	int err;
 
-	dst = geneve_get_v6_dst(skb, dev, &fl6, info);
+	dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info);
 	if (IS_ERR(dst))
 		return PTR_ERR(dst);
 
@@ -951,8 +951,9 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 	if (ip_tunnel_info_af(info) == AF_INET) {
 		struct rtable *rt;
 		struct flowi4 fl4;
+		struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
 
-		rt = geneve_get_v4_rt(skb, dev, &fl4, info);
+		rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
 		if (IS_ERR(rt))
 			return PTR_ERR(rt);
 
@@ -962,8 +963,9 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 	} else if (ip_tunnel_info_af(info) == AF_INET6) {
 		struct dst_entry *dst;
 		struct flowi6 fl6;
+		struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
 
-		dst = geneve_get_v6_dst(skb, dev, &fl6, info);
+		dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info);
 		if (IS_ERR(dst))
 			return PTR_ERR(dst);
 
@@ -1014,16 +1016,22 @@ static struct device_type geneve_type = {
  * supply the listening GENEVE udp ports. Callers are expected
  * to implement the ndo_udp_tunnel_add.
  */
-static void geneve_push_rx_ports(struct net_device *dev)
+static void geneve_offload_rx_ports(struct net_device *dev, bool push)
 {
 	struct net *net = dev_net(dev);
 	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(gs, &gn->sock_list, list)
-		udp_tunnel_push_rx_port(dev, gs->sock,
-					UDP_TUNNEL_TYPE_GENEVE);
+	list_for_each_entry_rcu(gs, &gn->sock_list, list) {
+		if (push) {
+			udp_tunnel_push_rx_port(dev, gs->sock,
+						UDP_TUNNEL_TYPE_GENEVE);
+		} else {
+			udp_tunnel_drop_rx_port(dev, gs->sock,
+						UDP_TUNNEL_TYPE_GENEVE);
+		}
+	}
 	rcu_read_unlock();
 }
 
@@ -1078,21 +1086,33 @@ static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
 			   struct netlink_ext_ack *extack)
 {
 	if (tb[IFLA_ADDRESS]) {
-		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
+					    "Provided link layer address is not Ethernet");
 			return -EINVAL;
+		}
 
-		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
+					    "Provided Ethernet address is not unicast");
 			return -EADDRNOTAVAIL;
+		}
 	}
 
-	if (!data)
+	if (!data) {
+		NL_SET_ERR_MSG(extack,
+			       "Not enough attributes provided to perform the operation");
 		return -EINVAL;
+	}
 
 	if (data[IFLA_GENEVE_ID]) {
 		__u32 vni =  nla_get_u32(data[IFLA_GENEVE_ID]);
 
-		if (vni >= GENEVE_N_VID)
+		if (vni >= GENEVE_N_VID) {
+			NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID],
+					    "Geneve ID must be lower than 16777216");
 			return -ERANGE;
+		}
 	}
 
 	return 0;
@@ -1140,7 +1160,17 @@ static bool is_tnl_info_zero(const struct ip_tunnel_info *info)
 		return true;
 }
 
+static bool geneve_dst_addr_equal(struct ip_tunnel_info *a,
+				  struct ip_tunnel_info *b)
+{
+	if (ip_tunnel_info_af(a) == AF_INET)
+		return a->key.u.ipv4.dst == b->key.u.ipv4.dst;
+	else
+		return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst);
+}
+
 static int geneve_configure(struct net *net, struct net_device *dev,
+			    struct netlink_ext_ack *extack,
 			    const struct ip_tunnel_info *info,
 			    bool metadata, bool ipv6_rx_csum)
 {
@@ -1149,8 +1179,11 @@ static int geneve_configure(struct net *net, struct net_device *dev,
 	bool tun_collect_md, tun_on_same_port;
 	int err, encap_len;
 
-	if (metadata && !is_tnl_info_zero(info))
+	if (metadata && !is_tnl_info_zero(info)) {
+		NL_SET_ERR_MSG(extack,
+			       "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified");
 		return -EINVAL;
+	}
 
 	geneve->net = net;
 	geneve->dev = dev;
@@ -1171,11 +1204,17 @@ static int geneve_configure(struct net *net, struct net_device *dev,
 	dev->needed_headroom = encap_len + ETH_HLEN;
 
 	if (metadata) {
-		if (tun_on_same_port)
+		if (tun_on_same_port) {
+			NL_SET_ERR_MSG(extack,
+				       "There can be only one externally controlled device on a destination port");
 			return -EPERM;
+		}
 	} else {
-		if (tun_collect_md)
+		if (tun_collect_md) {
+			NL_SET_ERR_MSG(extack,
+				       "There already exists an externally controlled device on this destination port");
 			return -EPERM;
+		}
 	}
 
 	dst_cache_reset(&geneve->info.dst_cache);
@@ -1197,47 +1236,62 @@ static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port)
 	info->key.tp_dst = htons(dst_port);
 }
 
-static int geneve_newlink(struct net *net, struct net_device *dev,
-			  struct nlattr *tb[], struct nlattr *data[],
-			  struct netlink_ext_ack *extack)
+static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
+			  struct netlink_ext_ack *extack,
+			  struct ip_tunnel_info *info, bool *metadata,
+			  bool *use_udp6_rx_checksums, bool changelink)
 {
-	bool use_udp6_rx_checksums = false;
-	struct ip_tunnel_info info;
-	bool metadata = false;
+	int attrtype;
 
-	init_tnl_info(&info, GENEVE_UDP_PORT);
-
-	if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6])
+	if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) {
+		NL_SET_ERR_MSG(extack,
+			       "Cannot specify both IPv4 and IPv6 Remote addresses");
 		return -EINVAL;
+	}
 
 	if (data[IFLA_GENEVE_REMOTE]) {
-		info.key.u.ipv4.dst =
+		if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) {
+			attrtype = IFLA_GENEVE_REMOTE;
+			goto change_notsup;
+		}
+
+		info->key.u.ipv4.dst =
 			nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
 
-		if (IN_MULTICAST(ntohl(info.key.u.ipv4.dst))) {
-			netdev_dbg(dev, "multicast remote is unsupported\n");
+		if (IN_MULTICAST(ntohl(info->key.u.ipv4.dst))) {
+			NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE],
+					    "Remote IPv4 address cannot be Multicast");
 			return -EINVAL;
 		}
 	}
 
 	if (data[IFLA_GENEVE_REMOTE6]) {
  #if IS_ENABLED(CONFIG_IPV6)
-		info.mode = IP_TUNNEL_INFO_IPV6;
-		info.key.u.ipv6.dst =
+		if (changelink && (ip_tunnel_info_af(info) == AF_INET)) {
+			attrtype = IFLA_GENEVE_REMOTE6;
+			goto change_notsup;
+		}
+
+		info->mode = IP_TUNNEL_INFO_IPV6;
+		info->key.u.ipv6.dst =
 			nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]);
 
-		if (ipv6_addr_type(&info.key.u.ipv6.dst) &
+		if (ipv6_addr_type(&info->key.u.ipv6.dst) &
 		    IPV6_ADDR_LINKLOCAL) {
-			netdev_dbg(dev, "link-local remote is unsupported\n");
+			NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
+					    "Remote IPv6 address cannot be link-local");
 			return -EINVAL;
 		}
-		if (ipv6_addr_is_multicast(&info.key.u.ipv6.dst)) {
-			netdev_dbg(dev, "multicast remote is unsupported\n");
+		if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) {
+			NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
+					    "Remote IPv6 address cannot be Multicast");
 			return -EINVAL;
 		}
-		info.key.tun_flags |= TUNNEL_CSUM;
-		use_udp6_rx_checksums = true;
+		info->key.tun_flags |= TUNNEL_CSUM;
+		*use_udp6_rx_checksums = true;
 #else
+		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
+				    "IPv6 support not enabled in the kernel");
 		return -EPFNOSUPPORT;
 #endif
 	}
@@ -1245,46 +1299,187 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
 	if (data[IFLA_GENEVE_ID]) {
 		__u32 vni;
 		__u8 tvni[3];
+		__be64 tunid;
 
 		vni = nla_get_u32(data[IFLA_GENEVE_ID]);
 		tvni[0] = (vni & 0x00ff0000) >> 16;
 		tvni[1] = (vni & 0x0000ff00) >> 8;
 		tvni[2] =  vni & 0x000000ff;
 
-		info.key.tun_id = vni_to_tunnel_id(tvni);
+		tunid = vni_to_tunnel_id(tvni);
+		if (changelink && (tunid != info->key.tun_id)) {
+			attrtype = IFLA_GENEVE_ID;
+			goto change_notsup;
+		}
+		info->key.tun_id = tunid;
 	}
+
 	if (data[IFLA_GENEVE_TTL])
-		info.key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
+		info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
 
 	if (data[IFLA_GENEVE_TOS])
-		info.key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
+		info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
 
 	if (data[IFLA_GENEVE_LABEL]) {
-		info.key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
+		info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
 				  IPV6_FLOWLABEL_MASK;
-		if (info.key.label && (!(info.mode & IP_TUNNEL_INFO_IPV6)))
+		if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) {
+			NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL],
+					    "Label attribute only applies for IPv6 Geneve devices");
 			return -EINVAL;
+		}
+	}
+
+	if (data[IFLA_GENEVE_PORT]) {
+		if (changelink) {
+			attrtype = IFLA_GENEVE_PORT;
+			goto change_notsup;
+		}
+		info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
+	}
+
+	if (data[IFLA_GENEVE_COLLECT_METADATA]) {
+		if (changelink) {
+			attrtype = IFLA_GENEVE_COLLECT_METADATA;
+			goto change_notsup;
+		}
+		*metadata = true;
+	}
+
+	if (data[IFLA_GENEVE_UDP_CSUM]) {
+		if (changelink) {
+			attrtype = IFLA_GENEVE_UDP_CSUM;
+			goto change_notsup;
+		}
+		if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
+			info->key.tun_flags |= TUNNEL_CSUM;
+	}
+
+	if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) {
+		if (changelink) {
+			attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX;
+			goto change_notsup;
+		}
+		if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
+			info->key.tun_flags &= ~TUNNEL_CSUM;
+	}
+
+	if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) {
+		if (changelink) {
+			attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX;
+			goto change_notsup;
+		}
+		if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
+			*use_udp6_rx_checksums = false;
 	}
 
-	if (data[IFLA_GENEVE_PORT])
-		info.key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
+	return 0;
+change_notsup:
+	NL_SET_ERR_MSG_ATTR(extack, data[attrtype],
+			    "Changing VNI, Port, endpoint IP address family, external, and UDP checksum attributes are not supported");
+	return -EOPNOTSUPP;
+}
 
-	if (data[IFLA_GENEVE_COLLECT_METADATA])
-		metadata = true;
+static int geneve_newlink(struct net *net, struct net_device *dev,
+			  struct nlattr *tb[], struct nlattr *data[],
+			  struct netlink_ext_ack *extack)
+{
+	bool use_udp6_rx_checksums = false;
+	struct ip_tunnel_info info;
+	bool metadata = false;
+	int err;
 
-	if (data[IFLA_GENEVE_UDP_CSUM] &&
-	    nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
-		info.key.tun_flags |= TUNNEL_CSUM;
+	init_tnl_info(&info, GENEVE_UDP_PORT);
+	err = geneve_nl2info(tb, data, extack, &info, &metadata,
+			     &use_udp6_rx_checksums, false);
+	if (err)
+		return err;
 
-	if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] &&
-	    nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
-		info.key.tun_flags &= ~TUNNEL_CSUM;
+	return geneve_configure(net, dev, extack, &info, metadata,
+				use_udp6_rx_checksums);
+}
 
-	if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] &&
-	    nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
-		use_udp6_rx_checksums = false;
+/* Quiesces the geneve device data path for both TX and RX.
+ *
+ * On transmit geneve checks for non-NULL geneve_sock before it proceeds.
+ * So, if we set that socket to NULL under RCU and wait for synchronize_net()
+ * to complete for the existing set of in-flight packets to be transmitted,
+ * then we would have quiesced the transmit data path. All the future packets
+ * will get dropped until we unquiesce the data path.
+ *
+ * On receive geneve dereference the geneve_sock stashed in the socket. So,
+ * if we set that to NULL under RCU and wait for synchronize_net() to
+ * complete, then we would have quiesced the receive data path.
+ */
+static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4,
+			   struct geneve_sock **gs6)
+{
+	*gs4 = rtnl_dereference(geneve->sock4);
+	rcu_assign_pointer(geneve->sock4, NULL);
+	if (*gs4)
+		rcu_assign_sk_user_data((*gs4)->sock->sk, NULL);
+#if IS_ENABLED(CONFIG_IPV6)
+	*gs6 = rtnl_dereference(geneve->sock6);
+	rcu_assign_pointer(geneve->sock6, NULL);
+	if (*gs6)
+		rcu_assign_sk_user_data((*gs6)->sock->sk, NULL);
+#else
+	*gs6 = NULL;
+#endif
+	synchronize_net();
+}
+
+/* Resumes the geneve device data path for both TX and RX. */
+static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4,
+			     struct geneve_sock __maybe_unused *gs6)
+{
+	rcu_assign_pointer(geneve->sock4, gs4);
+	if (gs4)
+		rcu_assign_sk_user_data(gs4->sock->sk, gs4);
+#if IS_ENABLED(CONFIG_IPV6)
+	rcu_assign_pointer(geneve->sock6, gs6);
+	if (gs6)
+		rcu_assign_sk_user_data(gs6->sock->sk, gs6);
+#endif
+	synchronize_net();
+}
+
+static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
+			     struct nlattr *data[],
+			     struct netlink_ext_ack *extack)
+{
+	struct geneve_dev *geneve = netdev_priv(dev);
+	struct geneve_sock *gs4, *gs6;
+	struct ip_tunnel_info info;
+	bool metadata;
+	bool use_udp6_rx_checksums;
+	int err;
+
+	/* If the geneve device is configured for metadata (or externally
+	 * controlled, for example, OVS), then nothing can be changed.
+	 */
+	if (geneve->collect_md)
+		return -EOPNOTSUPP;
+
+	/* Start with the existing info. */
+	memcpy(&info, &geneve->info, sizeof(info));
+	metadata = geneve->collect_md;
+	use_udp6_rx_checksums = geneve->use_udp6_rx_checksums;
+	err = geneve_nl2info(tb, data, extack, &info, &metadata,
+			     &use_udp6_rx_checksums, true);
+	if (err)
+		return err;
 
-	return geneve_configure(net, dev, &info, metadata, use_udp6_rx_checksums);
+	if (!geneve_dst_addr_equal(&geneve->info, &info))
+		dst_cache_reset(&info.dst_cache);
+
+	geneve_quiesce(geneve, &gs4, &gs6);
+	geneve->info = info;
+	geneve->collect_md = metadata;
+	geneve->use_udp6_rx_checksums = use_udp6_rx_checksums;
+	geneve_unquiesce(geneve, gs4, gs6);
+
+	return 0;
 }
 
 static void geneve_dellink(struct net_device *dev, struct list_head *head)
@@ -1375,6 +1570,7 @@ static struct rtnl_link_ops geneve_link_ops __read_mostly = {
 	.setup		= geneve_setup,
 	.validate	= geneve_validate,
 	.newlink	= geneve_newlink,
+	.changelink	= geneve_changelink,
 	.dellink	= geneve_dellink,
 	.get_size	= geneve_get_size,
 	.fill_info	= geneve_fill_info,
@@ -1396,7 +1592,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 		return dev;
 
 	init_tnl_info(&info, dst_port);
-	err = geneve_configure(net, dev, &info, true, true);
+	err = geneve_configure(net, dev, NULL, &info, true, true);
 	if (err) {
 		free_netdev(dev);
 		return ERR_PTR(err);
@@ -1426,8 +1622,14 @@ static int geneve_netdevice_event(struct notifier_block *unused,
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 
-	if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
-		geneve_push_rx_ports(dev);
+	if (event == NETDEV_UDP_TUNNEL_PUSH_INFO ||
+	    event == NETDEV_UDP_TUNNEL_DROP_INFO) {
+		geneve_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
+	} else if (event == NETDEV_UNREGISTER) {
+		geneve_offload_rx_ports(dev, false);
+	} else if (event == NETDEV_REGISTER) {
+		geneve_offload_rx_ports(dev, true);
+	}
 
 	return NOTIFY_DONE;
 }
diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c
index 92b13b3..e178383 100644
--- a/drivers/net/hamradio/baycom_par.c
+++ b/drivers/net/hamradio/baycom_par.c
@@ -386,7 +386,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
 
 /* --------------------------------------------------------------------- */
 
-static struct hdlcdrv_ops par96_ops = {
+static const struct hdlcdrv_ops par96_ops = {
 	.drvname = bc_drvname,
 	.drvinfo = bc_drvinfo,
 	.open    = par96_open,
diff --git a/drivers/net/hamradio/baycom_ser_fdx.c b/drivers/net/hamradio/baycom_ser_fdx.c
index d9a646a..190f66c 100644
--- a/drivers/net/hamradio/baycom_ser_fdx.c
+++ b/drivers/net/hamradio/baycom_ser_fdx.c
@@ -508,7 +508,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
 
 /* --------------------------------------------------------------------- */
 
-static struct hdlcdrv_ops ser12_ops = {
+static const struct hdlcdrv_ops ser12_ops = {
 	.drvname = bc_drvname,
 	.drvinfo = bc_drvinfo,
 	.open    = ser12_open,
diff --git a/drivers/net/hamradio/baycom_ser_hdx.c b/drivers/net/hamradio/baycom_ser_hdx.c
index f1c8a9f..3c823c6 100644
--- a/drivers/net/hamradio/baycom_ser_hdx.c
+++ b/drivers/net/hamradio/baycom_ser_hdx.c
@@ -542,7 +542,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
 
 /* --------------------------------------------------------------------- */
 
-static struct hdlcdrv_ops ser12_ops = {
+static const struct hdlcdrv_ops ser12_ops = {
 	.drvname = bc_drvname,
 	.drvinfo = bc_drvinfo,
 	.open    = ser12_open,
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index dec6b76..cde4120 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -581,7 +581,7 @@ static int __init setup_adapter(int card_base, int type, int n)
 		priv->param.dma = -1;
 		INIT_WORK(&priv->rx_work, rx_bh);
 		dev->ml_priv = priv;
-		sprintf(dev->name, "dmascc%i", 2 * n + i);
+		snprintf(dev->name, sizeof(dev->name), "dmascc%i", 2 * n + i);
 		dev->base_addr = card_base;
 		dev->irq = irq;
 		dev->netdev_ops = &scc_netdev_ops;
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 12cc64b..ec546da 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -147,8 +147,9 @@ struct hv_netvsc_packet {
 struct netvsc_device_info {
 	unsigned char mac_adr[ETH_ALEN];
 	int  ring_size;
-	u32  max_num_vrss_chns;
 	u32  num_chn;
+	u32  send_sections;
+	u32  recv_sections;
 };
 
 enum rndis_device_state {
@@ -183,13 +184,16 @@ struct rndis_device {
 /* Interface */
 struct rndis_message;
 struct netvsc_device;
-int netvsc_device_add(struct hv_device *device,
-		      const struct netvsc_device_info *info);
+struct net_device_context;
+
+struct netvsc_device *netvsc_device_add(struct hv_device *device,
+					const struct netvsc_device_info *info);
+int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx);
 void netvsc_device_remove(struct hv_device *device);
-int netvsc_send(struct hv_device *device,
+int netvsc_send(struct net_device_context *ndc,
 		struct hv_netvsc_packet *packet,
 		struct rndis_message *rndis_msg,
-		struct hv_page_buffer **page_buffer,
+		struct hv_page_buffer *page_buffer,
 		struct sk_buff *skb);
 void netvsc_linkstatus_callback(struct hv_device *device_obj,
 				struct rndis_message *resp);
@@ -200,22 +204,24 @@ int netvsc_recv_callback(struct net_device *net,
 			 const struct ndis_pkt_8021q_info *vlan);
 void netvsc_channel_cb(void *context);
 int netvsc_poll(struct napi_struct *napi, int budget);
+bool rndis_filter_opened(const struct netvsc_device *nvdev);
 int rndis_filter_open(struct netvsc_device *nvdev);
 int rndis_filter_close(struct netvsc_device *nvdev);
-int rndis_filter_device_add(struct hv_device *dev,
-			    struct netvsc_device_info *info);
+struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+					      struct netvsc_device_info *info);
 void rndis_filter_update(struct netvsc_device *nvdev);
 void rndis_filter_device_remove(struct hv_device *dev,
 				struct netvsc_device *nvdev);
 int rndis_filter_set_rss_param(struct rndis_device *rdev,
-			       const u8 *key, int num_queue);
+			       const u8 *key);
 int rndis_filter_receive(struct net_device *ndev,
 			 struct netvsc_device *net_dev,
 			 struct hv_device *dev,
 			 struct vmbus_channel *channel,
 			 void *data, u32 buflen);
 
-int rndis_filter_set_device_mac(struct net_device *ndev, char *mac);
+int rndis_filter_set_device_mac(struct netvsc_device *ndev,
+				const char *mac);
 
 void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
 
@@ -630,12 +636,12 @@ struct nvsp_message {
 #define NETVSC_SEND_BUFFER_SIZE			(1024 * 1024 * 15)   /* 15MB */
 #define NETVSC_INVALID_INDEX			-1
 
+#define NETVSC_SEND_SECTION_SIZE		6144
+#define NETVSC_RECV_SECTION_SIZE		1728
 
 #define NETVSC_RECEIVE_BUFFER_ID		0xcafe
 #define NETVSC_SEND_BUFFER_ID			0
 
-#define NETVSC_PACKET_SIZE                      4096
-
 #define VRSS_SEND_TAB_SIZE 16  /* must be power of 2 */
 #define VRSS_CHANNEL_MAX 64
 #define VRSS_CHANNEL_DEFAULT 8
@@ -654,13 +660,10 @@ struct recv_comp_data {
 	u32 status;
 };
 
-/* Netvsc Receive Slots Max */
-#define NETVSC_RECVSLOT_MAX (NETVSC_RECEIVE_BUFFER_SIZE / ETH_DATA_LEN + 1)
-
 struct multi_recv_comp {
-	void *buf; /* queued receive completions */
-	u32 first; /* first data entry */
-	u32 next; /* next entry for writing */
+	struct recv_comp_data *slots;
+	u32 first;	/* first data entry */
+	u32 next;	/* next entry for writing */
 };
 
 struct netvsc_stats {
@@ -677,6 +680,17 @@ struct netvsc_ethtool_stats {
 	unsigned long tx_no_space;
 	unsigned long tx_too_big;
 	unsigned long tx_busy;
+	unsigned long tx_send_full;
+	unsigned long rx_comp_busy;
+};
+
+struct netvsc_vf_pcpu_stats {
+	u64     rx_packets;
+	u64     rx_bytes;
+	u64     tx_packets;
+	u64     tx_bytes;
+	struct u64_stats_sync   syncp;
+	u32	tx_dropped;
 };
 
 struct netvsc_reconfig {
@@ -706,24 +720,27 @@ struct net_device_context {
 	u32 tx_send_table[VRSS_SEND_TAB_SIZE];
 
 	/* Ethtool settings */
+	bool udp4_l4_hash;
+	bool udp6_l4_hash;
 	u8 duplex;
 	u32 speed;
 	struct netvsc_ethtool_stats eth_stats;
 
 	/* State to manage the associated VF interface. */
 	struct net_device __rcu *vf_netdev;
+	struct netvsc_vf_pcpu_stats __percpu *vf_stats;
+	struct delayed_work vf_takeover;
 
 	/* 1: allocated, serial number is valid. 0: not allocated */
 	u32 vf_alloc;
 	/* Serial number of the VF to team with */
 	u32 vf_serial;
-
-	bool datapath;	/* 0 - synthetic, 1 - VF nic */
 };
 
 /* Per channel data */
 struct netvsc_channel {
 	struct vmbus_channel *channel;
+	struct netvsc_device *net_device;
 	const struct vmpacket_descriptor *desc;
 	struct napi_struct napi;
 	struct multi_send_data msd;
@@ -743,14 +760,13 @@ struct netvsc_device {
 
 	/* Receive buffer allocated by us but manages by NetVSP */
 	void *recv_buf;
-	u32 recv_buf_size;
 	u32 recv_buf_gpadl_handle;
 	u32 recv_section_cnt;
-	struct nvsp_1_receive_buffer_section *recv_section;
+	u32 recv_section_size;
+	u32 recv_completion_cnt;
 
 	/* Send buffer allocated by us */
 	void *send_buf;
-	u32 send_buf_size;
 	u32 send_buf_gpadl_handle;
 	u32 send_section_cnt;
 	u32 send_section_size;
@@ -775,8 +791,6 @@ struct netvsc_device {
 	u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
 	u32 pkt_align; /* alignment bytes, e.g. 8 */
 
-	atomic_t num_outstanding_recvs;
-
 	atomic_t open_cnt;
 
 	struct netvsc_channel chan_table[VRSS_CHANNEL_MAX];
@@ -784,18 +798,6 @@ struct netvsc_device {
 	struct rcu_head rcu;
 };
 
-static inline struct netvsc_device *
-net_device_to_netvsc_device(struct net_device *ndev)
-{
-	return ((struct net_device_context *)netdev_priv(ndev))->nvdev;
-}
-
-static inline struct netvsc_device *
-hv_device_to_netvsc_device(struct hv_device *device)
-{
-	return net_device_to_netvsc_device(hv_get_drvdata(device));
-}
-
 /* NdisInitialize message */
 struct rndis_initialize_request {
 	u32 req_id;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index d18c332..0062b80 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -29,6 +29,9 @@
 #include <linux/netdevice.h>
 #include <linux/if_ether.h>
 #include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
+#include <linux/prefetch.h>
+
 #include <asm/sync_bitops.h>
 
 #include "hyperv_net.h"
@@ -41,7 +44,7 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
 	struct hv_device *dev = net_device_ctx->device_ctx;
-	struct netvsc_device *nv_dev = net_device_ctx->nvdev;
+	struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
 	struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
 
 	memset(init_pkt, 0, sizeof(struct nvsp_message));
@@ -57,8 +60,6 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
 			       sizeof(struct nvsp_message),
 			       (unsigned long)init_pkt,
 			       VM_PKT_DATA_INBAND, 0);
-
-	net_device_ctx->datapath = vf;
 }
 
 static struct netvsc_device *alloc_net_device(void)
@@ -69,14 +70,15 @@ static struct netvsc_device *alloc_net_device(void)
 	if (!net_device)
 		return NULL;
 
-	net_device->chan_table[0].mrc.buf
-		= vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
-
 	init_waitqueue_head(&net_device->wait_drain);
 	net_device->destroy = false;
 	atomic_set(&net_device->open_cnt, 0);
 	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
 	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
+
+	net_device->recv_section_size = NETVSC_RECV_SECTION_SIZE;
+	net_device->send_section_size = NETVSC_SEND_SECTION_SIZE;
+
 	init_completion(&net_device->channel_init_wait);
 	init_waitqueue_head(&net_device->subchan_open);
 
@@ -90,7 +92,7 @@ static void free_netvsc_device(struct rcu_head *head)
 	int i;
 
 	for (i = 0; i < VRSS_CHANNEL_MAX; i++)
-		vfree(nvdev->chan_table[i].mrc.buf);
+		vfree(nvdev->chan_table[i].mrc.slots);
 
 	kfree(nvdev);
 }
@@ -104,7 +106,8 @@ static void netvsc_destroy_buf(struct hv_device *device)
 {
 	struct nvsp_message *revoke_packet;
 	struct net_device *ndev = hv_get_drvdata(device);
-	struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
+	struct net_device_context *ndc = netdev_priv(ndev);
+	struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev);
 	int ret;
 
 	/*
@@ -144,6 +147,7 @@ static void netvsc_destroy_buf(struct hv_device *device)
 				"revoke receive buffer to netvsp\n");
 			return;
 		}
+		net_device->recv_section_cnt = 0;
 	}
 
 	/* Teardown the gpadl on the vsp end */
@@ -168,19 +172,13 @@ static void netvsc_destroy_buf(struct hv_device *device)
 		net_device->recv_buf = NULL;
 	}
 
-	if (net_device->recv_section) {
-		net_device->recv_section_cnt = 0;
-		kfree(net_device->recv_section);
-		net_device->recv_section = NULL;
-	}
-
 	/* Deal with the send buffer we may have setup.
 	 * If we got a  send section size, it means we received a
 	 * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
 	 * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
 	 * to send a revoke msg here
 	 */
-	if (net_device->send_section_size) {
+	if (net_device->send_section_cnt) {
 		/* Send the revoke receive buffer */
 		revoke_packet = &net_device->revoke_packet;
 		memset(revoke_packet, 0, sizeof(struct nvsp_message));
@@ -212,6 +210,7 @@ static void netvsc_destroy_buf(struct hv_device *device)
 				   "revoke send buffer to netvsp\n");
 			return;
 		}
+		net_device->send_section_cnt = 0;
 	}
 	/* Teardown the gpadl on the vsp end */
 	if (net_device->send_buf_gpadl_handle) {
@@ -236,25 +235,40 @@ static void netvsc_destroy_buf(struct hv_device *device)
 	kfree(net_device->send_section_map);
 }
 
+int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
+{
+	struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
+	int node = cpu_to_node(nvchan->channel->target_cpu);
+	size_t size;
+
+	size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data);
+	nvchan->mrc.slots = vzalloc_node(size, node);
+	if (!nvchan->mrc.slots)
+		nvchan->mrc.slots = vzalloc(size);
+
+	return nvchan->mrc.slots ? 0 : -ENOMEM;
+}
+
 static int netvsc_init_buf(struct hv_device *device,
-			   struct netvsc_device *net_device)
+			   struct netvsc_device *net_device,
+			   const struct netvsc_device_info *device_info)
 {
-	int ret = 0;
+	struct nvsp_1_message_send_receive_buffer_complete *resp;
+	struct net_device *ndev = hv_get_drvdata(device);
 	struct nvsp_message *init_packet;
-	struct net_device *ndev;
+	unsigned int buf_size;
 	size_t map_words;
-	int node;
-
-	ndev = hv_get_drvdata(device);
+	int ret = 0;
 
-	node = cpu_to_node(device->channel->target_cpu);
-	net_device->recv_buf = vzalloc_node(net_device->recv_buf_size, node);
-	if (!net_device->recv_buf)
-		net_device->recv_buf = vzalloc(net_device->recv_buf_size);
+	/* Get receive buffer area. */
+	buf_size = device_info->recv_sections * net_device->recv_section_size;
+	buf_size = roundup(buf_size, PAGE_SIZE);
 
+	net_device->recv_buf = vzalloc(buf_size);
 	if (!net_device->recv_buf) {
-		netdev_err(ndev, "unable to allocate receive "
-			"buffer of size %d\n", net_device->recv_buf_size);
+		netdev_err(ndev,
+			   "unable to allocate receive buffer of size %u\n",
+			   buf_size);
 		ret = -ENOMEM;
 		goto cleanup;
 	}
@@ -265,7 +279,7 @@ static int netvsc_init_buf(struct hv_device *device,
 	 * than the channel to establish the gpadl handle.
 	 */
 	ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
-				    net_device->recv_buf_size,
+				    buf_size,
 				    &net_device->recv_buf_gpadl_handle);
 	if (ret != 0) {
 		netdev_err(ndev,
@@ -297,49 +311,45 @@ static int netvsc_init_buf(struct hv_device *device,
 	wait_for_completion(&net_device->channel_init_wait);
 
 	/* Check the response */
-	if (init_packet->msg.v1_msg.
-	    send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
-		netdev_err(ndev, "Unable to complete receive buffer "
-			   "initialization with NetVsp - status %d\n",
-			   init_packet->msg.v1_msg.
-			   send_recv_buf_complete.status);
+	resp = &init_packet->msg.v1_msg.send_recv_buf_complete;
+	if (resp->status != NVSP_STAT_SUCCESS) {
+		netdev_err(ndev,
+			   "Unable to complete receive buffer initialization with NetVsp - status %d\n",
+			   resp->status);
 		ret = -EINVAL;
 		goto cleanup;
 	}
 
 	/* Parse the response */
+	netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n",
+		   resp->num_sections, resp->sections[0].sub_alloc_size,
+		   resp->sections[0].num_sub_allocs);
 
-	net_device->recv_section_cnt = init_packet->msg.
-		v1_msg.send_recv_buf_complete.num_sections;
-
-	net_device->recv_section = kmemdup(
-		init_packet->msg.v1_msg.send_recv_buf_complete.sections,
-		net_device->recv_section_cnt *
-		sizeof(struct nvsp_1_receive_buffer_section),
-		GFP_KERNEL);
-	if (net_device->recv_section == NULL) {
+	/* There should only be one section for the entire receive buffer */
+	if (resp->num_sections != 1 || resp->sections[0].offset != 0) {
 		ret = -EINVAL;
 		goto cleanup;
 	}
 
-	/*
-	 * For 1st release, there should only be 1 section that represents the
-	 * entire receive buffer
-	 */
-	if (net_device->recv_section_cnt != 1 ||
-	    net_device->recv_section->offset != 0) {
-		ret = -EINVAL;
+	net_device->recv_section_size = resp->sections[0].sub_alloc_size;
+	net_device->recv_section_cnt = resp->sections[0].num_sub_allocs;
+
+	/* Setup receive completion ring */
+	net_device->recv_completion_cnt
+		= round_up(net_device->recv_section_cnt + 1,
+			   PAGE_SIZE / sizeof(u64));
+	ret = netvsc_alloc_recv_comp_ring(net_device, 0);
+	if (ret)
 		goto cleanup;
-	}
 
-	/* Now setup the send buffer.
-	 */
-	net_device->send_buf = vzalloc_node(net_device->send_buf_size, node);
-	if (!net_device->send_buf)
-		net_device->send_buf = vzalloc(net_device->send_buf_size);
+	/* Now setup the send buffer. */
+	buf_size = device_info->send_sections * net_device->send_section_size;
+	buf_size = round_up(buf_size, PAGE_SIZE);
+
+	net_device->send_buf = vzalloc(buf_size);
 	if (!net_device->send_buf) {
-		netdev_err(ndev, "unable to allocate send "
-			   "buffer of size %d\n", net_device->send_buf_size);
+		netdev_err(ndev, "unable to allocate send buffer of size %u\n",
+			   buf_size);
 		ret = -ENOMEM;
 		goto cleanup;
 	}
@@ -349,7 +359,7 @@ static int netvsc_init_buf(struct hv_device *device,
 	 * than the channel to establish the gpadl handle.
 	 */
 	ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
-				    net_device->send_buf_size,
+				    buf_size,
 				    &net_device->send_buf_gpadl_handle);
 	if (ret != 0) {
 		netdev_err(ndev,
@@ -394,10 +404,8 @@ static int netvsc_init_buf(struct hv_device *device,
 	net_device->send_section_size = init_packet->msg.
 				v1_msg.send_send_buf_complete.section_size;
 
-	/* Section count is simply the size divided by the section size.
-	 */
-	net_device->send_section_cnt =
-		net_device->send_buf_size / net_device->send_section_size;
+	/* Section count is simply the size divided by the section size. */
+	net_device->send_section_cnt = buf_size / net_device->send_section_size;
 
 	netdev_dbg(ndev, "Send section size: %d, Section count:%d\n",
 		   net_device->send_section_size, net_device->send_section_cnt);
@@ -475,7 +483,8 @@ static int negotiate_nvsp_ver(struct hv_device *device,
 }
 
 static int netvsc_connect_vsp(struct hv_device *device,
-			      struct netvsc_device *net_device)
+			      struct netvsc_device *net_device,
+			      const struct netvsc_device_info *device_info)
 {
 	const u32 ver_list[] = {
 		NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
@@ -525,14 +534,8 @@ static int netvsc_connect_vsp(struct hv_device *device,
 	if (ret != 0)
 		goto cleanup;
 
-	/* Post the big receive buffer to NetVSP */
-	if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
-		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
-	else
-		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
-	net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
 
-	ret = netvsc_init_buf(device, net_device);
+	ret = netvsc_init_buf(device, net_device, device_info);
 
 cleanup:
 	return ret;
@@ -550,7 +553,8 @@ void netvsc_device_remove(struct hv_device *device)
 {
 	struct net_device *ndev = hv_get_drvdata(device);
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
-	struct netvsc_device *net_device = net_device_ctx->nvdev;
+	struct netvsc_device *net_device
+		= rtnl_dereference(net_device_ctx->nvdev);
 	int i;
 
 	netvsc_disconnect_vsp(device);
@@ -693,7 +697,7 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
 				   u32 pend_size,
 				   struct hv_netvsc_packet *packet,
 				   struct rndis_message *rndis_msg,
-				   struct hv_page_buffer **pb,
+				   struct hv_page_buffer *pb,
 				   struct sk_buff *skb)
 {
 	char *start = net_device->send_buf;
@@ -714,9 +718,9 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
 	}
 
 	for (i = 0; i < page_count; i++) {
-		char *src = phys_to_virt((*pb)[i].pfn << PAGE_SHIFT);
-		u32 offset = (*pb)[i].offset;
-		u32 len = (*pb)[i].len;
+		char *src = phys_to_virt(pb[i].pfn << PAGE_SHIFT);
+		u32 offset = pb[i].offset;
+		u32 len = pb[i].len;
 
 		memcpy(dest, (src + offset), len);
 		msg_size += len;
@@ -735,36 +739,32 @@ static inline int netvsc_send_pkt(
 	struct hv_device *device,
 	struct hv_netvsc_packet *packet,
 	struct netvsc_device *net_device,
-	struct hv_page_buffer **pb,
+	struct hv_page_buffer *pb,
 	struct sk_buff *skb)
 {
 	struct nvsp_message nvmsg;
-	struct netvsc_channel *nvchan
-		= &net_device->chan_table[packet->q_idx];
+	struct nvsp_1_message_send_rndis_packet * const rpkt =
+		&nvmsg.msg.v1_msg.send_rndis_pkt;
+	struct netvsc_channel * const nvchan =
+		&net_device->chan_table[packet->q_idx];
 	struct vmbus_channel *out_channel = nvchan->channel;
 	struct net_device *ndev = hv_get_drvdata(device);
 	struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
 	u64 req_id;
 	int ret;
-	struct hv_page_buffer *pgbuf;
 	u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound);
 
 	nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
-	if (skb != NULL) {
-		/* 0 is RMC_DATA; */
-		nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 0;
-	} else {
-		/* 1 is RMC_CONTROL; */
-		nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 1;
-	}
+	if (skb)
+		rpkt->channel_type = 0;		/* 0 is RMC_DATA */
+	else
+		rpkt->channel_type = 1;		/* 1 is RMC_CONTROL */
 
-	nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
-		packet->send_buf_index;
+	rpkt->send_buf_section_index = packet->send_buf_index;
 	if (packet->send_buf_index == NETVSC_INVALID_INDEX)
-		nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
+		rpkt->send_buf_section_size = 0;
 	else
-		nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size =
-			packet->total_data_buflen;
+		rpkt->send_buf_section_size = packet->total_data_buflen;
 
 	req_id = (ulong)skb;
 
@@ -772,21 +772,18 @@ static inline int netvsc_send_pkt(
 		return -ENODEV;
 
 	if (packet->page_buf_cnt) {
-		pgbuf = packet->cp_partial ? (*pb) +
-			packet->rmsg_pgcnt : (*pb);
-		ret = vmbus_sendpacket_pagebuffer_ctl(out_channel,
-						      pgbuf,
-						      packet->page_buf_cnt,
-						      &nvmsg,
-						      sizeof(struct nvsp_message),
-						      req_id,
-						      VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+		if (packet->cp_partial)
+			pb += packet->rmsg_pgcnt;
+
+		ret = vmbus_sendpacket_pagebuffer(out_channel,
+						  pb, packet->page_buf_cnt,
+						  &nvmsg, sizeof(nvmsg),
+						  req_id);
 	} else {
-		ret = vmbus_sendpacket_ctl(out_channel, &nvmsg,
-					   sizeof(struct nvsp_message),
-					   req_id,
-					   VM_PKT_DATA_INBAND,
-					   VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+		ret = vmbus_sendpacket(out_channel,
+				       &nvmsg, sizeof(nvmsg),
+				       req_id, VM_PKT_DATA_INBAND,
+				       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	}
 
 	if (ret == 0) {
@@ -801,8 +798,10 @@ static inline int netvsc_send_pkt(
 			ret = -ENOSPC;
 		}
 	} else {
-		netdev_err(ndev, "Unable to send packet %p ret %d\n",
-			   packet, ret);
+		netdev_err(ndev,
+			   "Unable to send packet pages %u len %u, ret %d\n",
+			   packet->page_buf_cnt, packet->total_data_buflen,
+			   ret);
 	}
 
 	return ret;
@@ -820,13 +819,16 @@ static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
 	msdp->count = 0;
 }
 
-int netvsc_send(struct hv_device *device,
+/* RCU already held by caller */
+int netvsc_send(struct net_device_context *ndev_ctx,
 		struct hv_netvsc_packet *packet,
 		struct rndis_message *rndis_msg,
-		struct hv_page_buffer **pb,
+		struct hv_page_buffer *pb,
 		struct sk_buff *skb)
 {
-	struct netvsc_device *net_device = hv_device_to_netvsc_device(device);
+	struct netvsc_device *net_device
+		= rcu_dereference_bh(ndev_ctx->nvdev);
+	struct hv_device *device = ndev_ctx->device_ctx;
 	int ret = 0;
 	struct netvsc_channel *nvchan;
 	u32 pktlen = packet->total_data_buflen, msd_len = 0;
@@ -838,7 +840,7 @@ int netvsc_send(struct hv_device *device,
 	bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
 
 	/* If device is rescinded, return error and packet will get dropped. */
-	if (unlikely(net_device->destroy))
+	if (unlikely(!net_device || net_device->destroy))
 		return -ENODEV;
 
 	/* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
@@ -878,7 +880,9 @@ int netvsc_send(struct hv_device *device,
 	} else if (pktlen + net_device->pkt_align <
 		   net_device->send_section_size) {
 		section_index = netvsc_get_next_send_section(net_device);
-		if (section_index != NETVSC_INVALID_INDEX) {
+		if (unlikely(section_index == NETVSC_INVALID_INDEX)) {
+			++ndev_ctx->eth_stats.tx_send_full;
+		} else {
 			move_pkt_msd(&msd_send, &msd_skb, msdp);
 			msd_len = 0;
 		}
@@ -943,130 +947,99 @@ send_now:
 	return ret;
 }
 
-static int netvsc_send_recv_completion(struct vmbus_channel *channel,
-				       u64 transaction_id, u32 status)
+/* Send pending recv completions */
+static int send_recv_completions(struct net_device *ndev,
+				 struct netvsc_device *nvdev,
+				 struct netvsc_channel *nvchan)
 {
-	struct nvsp_message recvcompMessage;
+	struct multi_recv_comp *mrc = &nvchan->mrc;
+	struct recv_comp_msg {
+		struct nvsp_message_header hdr;
+		u32 status;
+	}  __packed;
+	struct recv_comp_msg msg = {
+		.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
+	};
 	int ret;
 
-	recvcompMessage.hdr.msg_type =
-				NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;
-
-	recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
-
-	/* Send the completion */
-	ret = vmbus_sendpacket(channel, &recvcompMessage,
-			       sizeof(struct nvsp_message_header) + sizeof(u32),
-			       transaction_id, VM_PKT_COMP, 0);
-
-	return ret;
-}
-
-static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx,
-					u32 *filled, u32 *avail)
-{
-	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
-	u32 first = mrc->first;
-	u32 next = mrc->next;
-
-	*filled = (first > next) ? NETVSC_RECVSLOT_MAX - first + next :
-		  next - first;
+	while (mrc->first != mrc->next) {
+		const struct recv_comp_data *rcd
+			= mrc->slots + mrc->first;
 
-	*avail = NETVSC_RECVSLOT_MAX - *filled - 1;
-}
+		msg.status = rcd->status;
+		ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg),
+				       rcd->tid, VM_PKT_COMP, 0);
+		if (unlikely(ret)) {
+			struct net_device_context *ndev_ctx = netdev_priv(ndev);
 
-/* Read the first filled slot, no change to index */
-static inline struct recv_comp_data *read_recv_comp_slot(struct netvsc_device
-							 *nvdev, u16 q_idx)
-{
-	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
-	u32 filled, avail;
+			++ndev_ctx->eth_stats.rx_comp_busy;
+			return ret;
+		}
 
-	if (unlikely(!mrc->buf))
-		return NULL;
+		if (++mrc->first == nvdev->recv_completion_cnt)
+			mrc->first = 0;
+	}
 
-	count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
-	if (!filled)
-		return NULL;
+	/* receive completion ring has been emptied */
+	if (unlikely(nvdev->destroy))
+		wake_up(&nvdev->wait_drain);
 
-	return mrc->buf + mrc->first * sizeof(struct recv_comp_data);
+	return 0;
 }
 
-/* Put the first filled slot back to available pool */
-static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx)
+/* Count how many receive completions are outstanding */
+static void recv_comp_slot_avail(const struct netvsc_device *nvdev,
+				 const struct multi_recv_comp *mrc,
+				 u32 *filled, u32 *avail)
 {
-	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
-	int num_recv;
-
-	mrc->first = (mrc->first + 1) % NETVSC_RECVSLOT_MAX;
+	u32 count = nvdev->recv_completion_cnt;
 
-	num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs);
+	if (mrc->next >= mrc->first)
+		*filled = mrc->next - mrc->first;
+	else
+		*filled = (count - mrc->first) + mrc->next;
 
-	if (nvdev->destroy && num_recv == 0)
-		wake_up(&nvdev->wait_drain);
+	*avail = count - *filled - 1;
 }
 
-/* Check and send pending recv completions */
-static void netvsc_chk_recv_comp(struct netvsc_device *nvdev,
-				 struct vmbus_channel *channel, u16 q_idx)
+/* Add receive complete to ring to send to host. */
+static void enq_receive_complete(struct net_device *ndev,
+				 struct netvsc_device *nvdev, u16 q_idx,
+				 u64 tid, u32 status)
 {
+	struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx];
+	struct multi_recv_comp *mrc = &nvchan->mrc;
 	struct recv_comp_data *rcd;
-	int ret;
-
-	while (true) {
-		rcd = read_recv_comp_slot(nvdev, q_idx);
-		if (!rcd)
-			break;
+	u32 filled, avail;
 
-		ret = netvsc_send_recv_completion(channel, rcd->tid,
-						  rcd->status);
-		if (ret)
-			break;
+	recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
 
-		put_recv_comp_slot(nvdev, q_idx);
+	if (unlikely(filled > NAPI_POLL_WEIGHT)) {
+		send_recv_completions(ndev, nvdev, nvchan);
+		recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
 	}
-}
-
-#define NETVSC_RCD_WATERMARK 80
-
-/* Get next available slot */
-static inline struct recv_comp_data *get_recv_comp_slot(
-	struct netvsc_device *nvdev, struct vmbus_channel *channel, u16 q_idx)
-{
-	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
-	u32 filled, avail, next;
-	struct recv_comp_data *rcd;
 
-	if (unlikely(!nvdev->recv_section))
-		return NULL;
-
-	if (unlikely(!mrc->buf))
-		return NULL;
-
-	if (atomic_read(&nvdev->num_outstanding_recvs) >
-	    nvdev->recv_section->num_sub_allocs * NETVSC_RCD_WATERMARK / 100)
-		netvsc_chk_recv_comp(nvdev, channel, q_idx);
-
-	count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
-	if (!avail)
-		return NULL;
-
-	next = mrc->next;
-	rcd = mrc->buf + next * sizeof(struct recv_comp_data);
-	mrc->next = (next + 1) % NETVSC_RECVSLOT_MAX;
+	if (unlikely(!avail)) {
+		netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
+			   q_idx, tid);
+		return;
+	}
 
-	atomic_inc(&nvdev->num_outstanding_recvs);
+	rcd = mrc->slots + mrc->next;
+	rcd->tid = tid;
+	rcd->status = status;
 
-	return rcd;
+	if (++mrc->next == nvdev->recv_completion_cnt)
+		mrc->next = 0;
 }
 
 static int netvsc_receive(struct net_device *ndev,
-		   struct netvsc_device *net_device,
-		   struct net_device_context *net_device_ctx,
-		   struct hv_device *device,
-		   struct vmbus_channel *channel,
-		   const struct vmpacket_descriptor *desc,
-		   struct nvsp_message *nvsp)
+			  struct netvsc_device *net_device,
+			  struct net_device_context *net_device_ctx,
+			  struct hv_device *device,
+			  struct vmbus_channel *channel,
+			  const struct vmpacket_descriptor *desc,
+			  struct nvsp_message *nvsp)
 {
 	const struct vmtransfer_page_packet_header *vmxferpage_packet
 		= container_of(desc, const struct vmtransfer_page_packet_header, d);
@@ -1075,7 +1048,6 @@ static int netvsc_receive(struct net_device *ndev,
 	u32 status = NVSP_STAT_SUCCESS;
 	int i;
 	int count = 0;
-	int ret;
 
 	/* Make sure this is a valid nvsp packet */
 	if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
@@ -1106,25 +1078,9 @@ static int netvsc_receive(struct net_device *ndev,
 					      channel, data, buflen);
 	}
 
-	if (net_device->chan_table[q_idx].mrc.buf) {
-		struct recv_comp_data *rcd;
+	enq_receive_complete(ndev, net_device, q_idx,
+			     vmxferpage_packet->d.trans_id, status);
 
-		rcd = get_recv_comp_slot(net_device, channel, q_idx);
-		if (rcd) {
-			rcd->tid = vmxferpage_packet->d.trans_id;
-			rcd->status = status;
-		} else {
-			netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
-				   q_idx, vmxferpage_packet->d.trans_id);
-		}
-	} else {
-		ret = netvsc_send_recv_completion(channel,
-						  vmxferpage_packet->d.trans_id,
-						  status);
-		if (ret)
-			netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n",
-				   q_idx, vmxferpage_packet->d.trans_id, ret);
-	}
 	return count;
 }
 
@@ -1220,11 +1176,10 @@ int netvsc_poll(struct napi_struct *napi, int budget)
 {
 	struct netvsc_channel *nvchan
 		= container_of(napi, struct netvsc_channel, napi);
+	struct netvsc_device *net_device = nvchan->net_device;
 	struct vmbus_channel *channel = nvchan->channel;
 	struct hv_device *device = netvsc_channel_to_device(channel);
-	u16 q_idx = channel->offermsg.offer.sub_channel_index;
 	struct net_device *ndev = hv_get_drvdata(device);
-	struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
 	int work_done = 0;
 
 	/* If starting a new interval */
@@ -1237,17 +1192,19 @@ int netvsc_poll(struct napi_struct *napi, int budget)
 		nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
 	}
 
-	/* If receive ring was exhausted
-	 * and not doing busy poll
+	/* If send of pending receive completions suceeded
+	 *   and did not exhaust NAPI budget this time
+	 *   and not doing busy poll
 	 * then re-enable host interrupts
-	 *  and reschedule if ring is not empty.
+	 *     and reschedule if ring is not empty.
 	 */
-	if (work_done < budget &&
+	if (send_recv_completions(ndev, net_device, nvchan) == 0 &&
+	    work_done < budget &&
 	    napi_complete_done(napi, work_done) &&
-	    hv_end_read(&channel->inbound) != 0)
+	    hv_end_read(&channel->inbound)) {
+		hv_begin_read(&channel->inbound);
 		napi_reschedule(napi);
-
-	netvsc_chk_recv_comp(net_device, channel, q_idx);
+	}
 
 	/* Driver may overshoot since multiple packets per descriptor */
 	return min(work_done, budget);
@@ -1259,10 +1216,15 @@ int netvsc_poll(struct napi_struct *napi, int budget)
 void netvsc_channel_cb(void *context)
 {
 	struct netvsc_channel *nvchan = context;
+	struct vmbus_channel *channel = nvchan->channel;
+	struct hv_ring_buffer_info *rbi = &channel->inbound;
+
+	/* preload first vmpacket descriptor */
+	prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
 
 	if (napi_schedule_prep(&nvchan->napi)) {
 		/* disable interupts from host */
-		hv_begin_read(&nvchan->channel->inbound);
+		hv_begin_read(rbi);
 
 		__napi_schedule(&nvchan->napi);
 	}
@@ -1272,8 +1234,8 @@ void netvsc_channel_cb(void *context)
  * netvsc_device_add - Callback when the device belonging to this
  * driver is added
  */
-int netvsc_device_add(struct hv_device *device,
-		      const struct netvsc_device_info *device_info)
+struct netvsc_device *netvsc_device_add(struct hv_device *device,
+				const struct netvsc_device_info *device_info)
 {
 	int i, ret = 0;
 	int ring_size = device_info->ring_size;
@@ -1283,7 +1245,7 @@ int netvsc_device_add(struct hv_device *device,
 
 	net_device = alloc_net_device();
 	if (!net_device)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	net_device->ring_size = ring_size;
 
@@ -1303,6 +1265,7 @@ int netvsc_device_add(struct hv_device *device,
 		struct netvsc_channel *nvchan = &net_device->chan_table[i];
 
 		nvchan->channel = device->channel;
+		nvchan->net_device = net_device;
 		u64_stats_init(&nvchan->tx_stats.syncp);
 		u64_stats_init(&nvchan->rx_stats.syncp);
 	}
@@ -1334,17 +1297,18 @@ int netvsc_device_add(struct hv_device *device,
 	rcu_assign_pointer(net_device_ctx->nvdev, net_device);
 
 	/* Connect with the NetVsp */
-	ret = netvsc_connect_vsp(device, net_device);
+	ret = netvsc_connect_vsp(device, net_device, device_info);
 	if (ret != 0) {
 		netdev_err(ndev,
 			"unable to connect to NetVSP - %d\n", ret);
 		goto close;
 	}
 
-	return ret;
+	return net_device;
 
 close:
-	netif_napi_del(&net_device->chan_table[0].napi);
+	RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
+	napi_disable(&net_device->chan_table[0].napi);
 
 	/* Now, we can close the channel safely */
 	vmbus_close(device->channel);
@@ -1352,6 +1316,5 @@ close:
 cleanup:
 	free_netvsc_device(&net_device->rcu);
 
-	return ret;
-
+	return ERR_PTR(ret);
 }
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index d91cbc6..165ba4b 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -33,6 +33,9 @@
 #include <linux/if_vlan.h>
 #include <linux/in.h>
 #include <linux/slab.h>
+#include <linux/rtnetlink.h>
+#include <linux/netpoll.h>
+
 #include <net/arp.h>
 #include <net/route.h>
 #include <net/sock.h>
@@ -42,8 +45,14 @@
 
 #include "hyperv_net.h"
 
-#define RING_SIZE_MIN 64
+#define RING_SIZE_MIN		64
+#define NETVSC_MIN_TX_SECTIONS	10
+#define NETVSC_DEFAULT_TX	192	/* ~1M */
+#define NETVSC_MIN_RX_SECTIONS	10	/* ~64K */
+#define NETVSC_DEFAULT_RX	2048	/* ~4M */
+
 #define LINKCHANGE_INT (2 * HZ)
+#define VF_TAKEOVER_INT (HZ / 10)
 
 static int ring_size = 128;
 module_param(ring_size, int, S_IRUGO);
@@ -69,7 +78,8 @@ static void netvsc_set_multicast_list(struct net_device *net)
 static int netvsc_open(struct net_device *net)
 {
 	struct net_device_context *ndev_ctx = netdev_priv(net);
-	struct netvsc_device *nvdev = ndev_ctx->nvdev;
+	struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+	struct netvsc_device *nvdev = rtnl_dereference(ndev_ctx->nvdev);
 	struct rndis_device *rdev;
 	int ret = 0;
 
@@ -85,22 +95,40 @@ static int netvsc_open(struct net_device *net)
 	netif_tx_wake_all_queues(net);
 
 	rdev = nvdev->extension;
-	if (!rdev->link_state && !ndev_ctx->datapath)
+
+	if (!rdev->link_state)
 		netif_carrier_on(net);
 
-	return ret;
+	if (vf_netdev) {
+		/* Setting synthetic device up transparently sets
+		 * slave as up. If open fails, then slave will be
+		 * still be offline (and not used).
+		 */
+		ret = dev_open(vf_netdev);
+		if (ret)
+			netdev_warn(net,
+				    "unable to open slave: %s: %d\n",
+				    vf_netdev->name, ret);
+	}
+	return 0;
 }
 
 static int netvsc_close(struct net_device *net)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(net);
+	struct net_device *vf_netdev
+		= rtnl_dereference(net_device_ctx->vf_netdev);
 	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
-	int ret;
+	int ret = 0;
 	u32 aread, i, msec = 10, retry = 0, retry_max = 20;
 	struct vmbus_channel *chn;
 
 	netif_tx_disable(net);
 
+	/* No need to close rndis filter if it is removed already */
+	if (!nvdev)
+		goto out;
+
 	ret = rndis_filter_close(nvdev);
 	if (ret != 0) {
 		netdev_err(net, "unable to close device (ret %d).\n", ret);
@@ -139,11 +167,15 @@ static int netvsc_close(struct net_device *net)
 		ret = -ETIMEDOUT;
 	}
 
+out:
+	if (vf_netdev)
+		dev_close(vf_netdev);
+
 	return ret;
 }
 
 static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
-				int pkt_type)
+			   int pkt_type)
 {
 	struct rndis_packet *rndis_pkt;
 	struct rndis_per_packet_info *ppi;
@@ -163,10 +195,12 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
 	return ppi;
 }
 
-/* Azure hosts don't support non-TCP port numbers in hashing yet. We compute
- * hash for non-TCP traffic with only IP numbers.
+/* Azure hosts don't support non-TCP port numbers in hashing for fragmented
+ * packets. We can use ethtool to change UDP hash level when necessary.
  */
-static inline u32 netvsc_get_hash(struct sk_buff *skb, struct sock *sk)
+static inline u32 netvsc_get_hash(
+	struct sk_buff *skb,
+	const struct net_device_context *ndc)
 {
 	struct flow_keys flow;
 	u32 hash;
@@ -177,7 +211,11 @@ static inline u32 netvsc_get_hash(struct sk_buff *skb, struct sock *sk)
 	if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
 		return 0;
 
-	if (flow.basic.ip_proto == IPPROTO_TCP) {
+	if (flow.basic.ip_proto == IPPROTO_TCP ||
+	    (flow.basic.ip_proto == IPPROTO_UDP &&
+	     ((flow.basic.n_proto == htons(ETH_P_IP) && ndc->udp4_l4_hash) ||
+	      (flow.basic.n_proto == htons(ETH_P_IPV6) &&
+	       ndc->udp6_l4_hash)))) {
 		return skb_get_hash(skb);
 	} else {
 		if (flow.basic.n_proto == htons(ETH_P_IP))
@@ -200,7 +238,7 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev,
 	struct sock *sk = skb->sk;
 	int q_idx;
 
-	q_idx = ndc->tx_send_table[netvsc_get_hash(skb, sk) &
+	q_idx = ndc->tx_send_table[netvsc_get_hash(skb, ndc) &
 				   (VRSS_SEND_TAB_SIZE - 1)];
 
 	/* If queue index changed record the new value */
@@ -222,13 +260,11 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev,
  *
  * TODO support XPS - but get_xps_queue not exported
  */
-static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
-			void *accel_priv, select_queue_fallback_t fallback)
+static u16 netvsc_pick_tx(struct net_device *ndev, struct sk_buff *skb)
 {
-	unsigned int num_tx_queues = ndev->real_num_tx_queues;
 	int q_idx = sk_tx_queue_get(skb->sk);
 
-	if (q_idx < 0 || skb->ooo_okay) {
+	if (q_idx < 0 || skb->ooo_okay || q_idx >= ndev->real_num_tx_queues) {
 		/* If forwarding a packet, we use the recorded queue when
 		 * available for better cache locality.
 		 */
@@ -238,14 +274,35 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
 			q_idx = netvsc_get_tx_queue(ndev, skb, q_idx);
 	}
 
-	while (unlikely(q_idx >= num_tx_queues))
-		q_idx -= num_tx_queues;
-
 	return q_idx;
 }
 
+static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
+			       void *accel_priv,
+			       select_queue_fallback_t fallback)
+{
+	struct net_device_context *ndc = netdev_priv(ndev);
+	struct net_device *vf_netdev;
+	u16 txq;
+
+	rcu_read_lock();
+	vf_netdev = rcu_dereference(ndc->vf_netdev);
+	if (vf_netdev) {
+		txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
+		qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
+	} else {
+		txq = netvsc_pick_tx(ndev, skb);
+	}
+	rcu_read_unlock();
+
+	while (unlikely(txq >= ndev->real_num_tx_queues))
+		txq -= ndev->real_num_tx_queues;
+
+	return txq;
+}
+
 static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
-			struct hv_page_buffer *pb)
+		       struct hv_page_buffer *pb)
 {
 	int j = 0;
 
@@ -280,9 +337,8 @@ static u32 fill_pg_buf(struct page *page, u32 offset, u32 len,
 
 static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
 			   struct hv_netvsc_packet *packet,
-			   struct hv_page_buffer **page_buf)
+			   struct hv_page_buffer *pb)
 {
-	struct hv_page_buffer *pb = *page_buf;
 	u32 slots_used = 0;
 	char *data = skb->data;
 	int frags = skb_shinfo(skb)->nr_frags;
@@ -293,10 +349,9 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
 	 * 2. skb linear data
 	 * 3. skb fragment data
 	 */
-	if (hdr != NULL)
-		slots_used += fill_pg_buf(virt_to_page(hdr),
-					offset_in_page(hdr),
-					len, &pb[slots_used]);
+	slots_used += fill_pg_buf(virt_to_page(hdr),
+				  offset_in_page(hdr),
+				  len, &pb[slots_used]);
 
 	packet->rmsg_size = len;
 	packet->rmsg_pgcnt = slots_used;
@@ -359,13 +414,40 @@ static u32 net_checksum_info(struct sk_buff *skb)
 
 		if (ip6->nexthdr == IPPROTO_TCP)
 			return TRANSPORT_INFO_IPV6_TCP;
-		else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP)
+		else if (ip6->nexthdr == IPPROTO_UDP)
 			return TRANSPORT_INFO_IPV6_UDP;
 	}
 
 	return TRANSPORT_INFO_NOT_IP;
 }
 
+/* Send skb on the slave VF device. */
+static int netvsc_vf_xmit(struct net_device *net, struct net_device *vf_netdev,
+			  struct sk_buff *skb)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(net);
+	unsigned int len = skb->len;
+	int rc;
+
+	skb->dev = vf_netdev;
+	skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping;
+
+	rc = dev_queue_xmit(skb);
+	if (likely(rc == NET_XMIT_SUCCESS || rc == NET_XMIT_CN)) {
+		struct netvsc_vf_pcpu_stats *pcpu_stats
+			= this_cpu_ptr(ndev_ctx->vf_stats);
+
+		u64_stats_update_begin(&pcpu_stats->syncp);
+		pcpu_stats->tx_packets++;
+		pcpu_stats->tx_bytes += len;
+		u64_stats_update_end(&pcpu_stats->syncp);
+	} else {
+		this_cpu_inc(ndev_ctx->vf_stats->tx_dropped);
+	}
+
+	return rc;
+}
+
 static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(net);
@@ -374,11 +456,19 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 	unsigned int num_data_pgs;
 	struct rndis_message *rndis_msg;
 	struct rndis_packet *rndis_pkt;
+	struct net_device *vf_netdev;
 	u32 rndis_msg_size;
 	struct rndis_per_packet_info *ppi;
 	u32 hash;
-	struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
-	struct hv_page_buffer *pb = page_buf;
+	struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT];
+
+	/* if VF is present and up then redirect packets
+	 * already called with rcu_read_lock_bh
+	 */
+	vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev);
+	if (vf_netdev && netif_running(vf_netdev) &&
+	    !netpoll_tx_running(net))
+		return netvsc_vf_xmit(net, vf_netdev, skb);
 
 	/* We will atmost need two pages to describe the rndis
 	 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
@@ -448,9 +538,9 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 
 		rndis_msg_size += NDIS_VLAN_PPI_SIZE;
 		ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
-					IEEE_8021Q_INFO);
-		vlan = (struct ndis_pkt_8021q_info *)((void *)ppi +
-						ppi->ppi_offset);
+				    IEEE_8021Q_INFO);
+
+		vlan = (void *)ppi + ppi->ppi_offset;
 		vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK;
 		vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >>
 				VLAN_PRIO_SHIFT;
@@ -463,8 +553,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 		ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
 				    TCP_LARGESEND_PKTINFO);
 
-		lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
-							ppi->ppi_offset);
+		lso_info = (void *)ppi + ppi->ppi_offset;
 
 		lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
 		if (skb->protocol == htons(ETH_P_IP)) {
@@ -524,12 +613,12 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 	rndis_msg->msg_len += rndis_msg_size;
 	packet->total_data_buflen = rndis_msg->msg_len;
 	packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
-					       skb, packet, &pb);
+					       skb, packet, pb);
 
 	/* timestamp packet in software */
 	skb_tx_timestamp(skb);
-	ret = netvsc_send(net_device_ctx->device_ctx, packet,
-			  rndis_msg, &pb, skb);
+
+	ret = netvsc_send(net_device_ctx, packet, rndis_msg, pb, skb);
 	if (likely(ret == 0))
 		return NETDEV_TX_OK;
 
@@ -551,6 +640,7 @@ no_memory:
 	++net_device_ctx->eth_stats.tx_no_memory;
 	goto drop;
 }
+
 /*
  * netvsc_linkstatus_callback - Link up/down notification
  */
@@ -574,8 +664,8 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
 	if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) {
 		u32 speed;
 
-		speed = *(u32 *)((void *)indicate + indicate->
-				 status_buf_offset) / 10000;
+		speed = *(u32 *)((void *)indicate
+				 + indicate->status_buf_offset) / 10000;
 		ndev_ctx->speed = speed;
 		return;
 	}
@@ -658,29 +748,18 @@ int netvsc_recv_callback(struct net_device *net,
 	struct netvsc_device *net_device;
 	u16 q_idx = channel->offermsg.offer.sub_channel_index;
 	struct netvsc_channel *nvchan;
-	struct net_device *vf_netdev;
 	struct sk_buff *skb;
 	struct netvsc_stats *rx_stats;
 
 	if (net->reg_state != NETREG_REGISTERED)
 		return NVSP_STAT_FAIL;
 
-	/*
-	 * If necessary, inject this packet into the VF interface.
-	 * On Hyper-V, multicast and brodcast packets are only delivered
-	 * to the synthetic interface (after subjecting these to
-	 * policy filters on the host). Deliver these via the VF
-	 * interface in the guest.
-	 */
 	rcu_read_lock();
 	net_device = rcu_dereference(net_device_ctx->nvdev);
 	if (unlikely(!net_device))
 		goto drop;
 
 	nvchan = &net_device->chan_table[q_idx];
-	vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
-	if (vf_netdev && (vf_netdev->flags & IFF_UP))
-		net = vf_netdev;
 
 	/* Allocate a skb - TODO direct I/O to pages? */
 	skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
@@ -692,8 +771,7 @@ drop:
 		return NVSP_STAT_FAIL;
 	}
 
-	if (net != vf_netdev)
-		skb_record_rx_queue(skb, q_idx);
+	skb_record_rx_queue(skb, q_idx);
 
 	/*
 	 * Even if injecting the packet, record the statistics
@@ -736,48 +814,22 @@ static void netvsc_get_channels(struct net_device *net,
 	}
 }
 
-static int netvsc_set_queues(struct net_device *net, struct hv_device *dev,
-			     u32 num_chn)
-{
-	struct netvsc_device_info device_info;
-	int ret;
-
-	memset(&device_info, 0, sizeof(device_info));
-	device_info.num_chn = num_chn;
-	device_info.ring_size = ring_size;
-	device_info.max_num_vrss_chns = num_chn;
-
-	ret = rndis_filter_device_add(dev, &device_info);
-	if (ret)
-		return ret;
-
-	ret = netif_set_real_num_tx_queues(net, num_chn);
-	if (ret)
-		return ret;
-
-	ret = netif_set_real_num_rx_queues(net, num_chn);
-
-	return ret;
-}
-
 static int netvsc_set_channels(struct net_device *net,
 			       struct ethtool_channels *channels)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(net);
 	struct hv_device *dev = net_device_ctx->device_ctx;
 	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
-	unsigned int count = channels->combined_count;
-	bool was_running;
-	int ret;
+	unsigned int orig, count = channels->combined_count;
+	struct netvsc_device_info device_info;
+	bool was_opened;
+	int ret = 0;
 
 	/* We do not support separate count for rx, tx, or other */
 	if (count == 0 ||
 	    channels->rx_count || channels->tx_count || channels->other_count)
 		return -EINVAL;
 
-	if (count > net->num_tx_queues || count > VRSS_CHANNEL_MAX)
-		return -EINVAL;
-
 	if (!nvdev || nvdev->destroy)
 		return -ENODEV;
 
@@ -787,25 +839,40 @@ static int netvsc_set_channels(struct net_device *net,
 	if (count > nvdev->max_chn)
 		return -EINVAL;
 
-	was_running = netif_running(net);
-	if (was_running) {
-		ret = netvsc_close(net);
-		if (ret)
-			return ret;
-	}
+	orig = nvdev->num_chn;
+	was_opened = rndis_filter_opened(nvdev);
+	if (was_opened)
+		rndis_filter_close(nvdev);
+
+	memset(&device_info, 0, sizeof(device_info));
+	device_info.num_chn = count;
+	device_info.ring_size = ring_size;
+	device_info.send_sections = nvdev->send_section_cnt;
+	device_info.recv_sections = nvdev->recv_section_cnt;
 
 	rndis_filter_device_remove(dev, nvdev);
 
-	ret = netvsc_set_queues(net, dev, count);
-	if (ret == 0)
-		nvdev->num_chn = count;
-	else
-		netvsc_set_queues(net, dev, nvdev->num_chn);
+	nvdev = rndis_filter_device_add(dev, &device_info);
+	if (!IS_ERR(nvdev)) {
+		netif_set_real_num_tx_queues(net, nvdev->num_chn);
+		netif_set_real_num_rx_queues(net, nvdev->num_chn);
+	} else {
+		ret = PTR_ERR(nvdev);
+		device_info.num_chn = orig;
+		nvdev = rndis_filter_device_add(dev, &device_info);
+
+		if (IS_ERR(nvdev)) {
+			netdev_err(net, "restoring channel setting failed: %ld\n",
+				   PTR_ERR(nvdev));
+			return ret;
+		}
+	}
 
-	if (was_running)
-		ret = netvsc_open(net);
+	if (was_opened)
+		rndis_filter_open(nvdev);
 
 	/* We may have missed link change notifications */
+	net_device_ctx->last_reconfig = 0;
 	schedule_delayed_work(&net_device_ctx->dwork, 0);
 
 	return ret;
@@ -832,6 +899,9 @@ static void netvsc_init_settings(struct net_device *dev)
 {
 	struct net_device_context *ndc = netdev_priv(dev);
 
+	ndc->udp4_l4_hash = true;
+	ndc->udp6_l4_hash = true;
+
 	ndc->speed = SPEED_UNKNOWN;
 	ndc->duplex = DUPLEX_FULL;
 }
@@ -869,41 +939,61 @@ static int netvsc_set_link_ksettings(struct net_device *dev,
 static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 {
 	struct net_device_context *ndevctx = netdev_priv(ndev);
+	struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
 	struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
 	struct hv_device *hdev = ndevctx->device_ctx;
+	int orig_mtu = ndev->mtu;
 	struct netvsc_device_info device_info;
-	bool was_running;
+	bool was_opened;
 	int ret = 0;
 
 	if (!nvdev || nvdev->destroy)
 		return -ENODEV;
 
-	was_running = netif_running(ndev);
-	if (was_running) {
-		ret = netvsc_close(ndev);
+	/* Change MTU of underlying VF netdev first. */
+	if (vf_netdev) {
+		ret = dev_set_mtu(vf_netdev, mtu);
 		if (ret)
 			return ret;
 	}
 
+	netif_device_detach(ndev);
+	was_opened = rndis_filter_opened(nvdev);
+	if (was_opened)
+		rndis_filter_close(nvdev);
+
 	memset(&device_info, 0, sizeof(device_info));
 	device_info.ring_size = ring_size;
 	device_info.num_chn = nvdev->num_chn;
-	device_info.max_num_vrss_chns = nvdev->num_chn;
+	device_info.send_sections = nvdev->send_section_cnt;
+	device_info.recv_sections = nvdev->recv_section_cnt;
 
 	rndis_filter_device_remove(hdev, nvdev);
 
-	/* 'nvdev' has been freed in rndis_filter_device_remove() ->
-	 * netvsc_device_remove () -> free_netvsc_device().
-	 * We mustn't access it before it's re-created in
-	 * rndis_filter_device_add() -> netvsc_device_add().
-	 */
-
 	ndev->mtu = mtu;
 
-	rndis_filter_device_add(hdev, &device_info);
+	nvdev = rndis_filter_device_add(hdev, &device_info);
+	if (IS_ERR(nvdev)) {
+		ret = PTR_ERR(nvdev);
+
+		/* Attempt rollback to original MTU */
+		ndev->mtu = orig_mtu;
+		nvdev = rndis_filter_device_add(hdev, &device_info);
 
-	if (was_running)
-		ret = netvsc_open(ndev);
+		if (vf_netdev)
+			dev_set_mtu(vf_netdev, orig_mtu);
+
+		if (IS_ERR(nvdev)) {
+			netdev_err(ndev, "restoring mtu failed: %ld\n",
+				   PTR_ERR(nvdev));
+			return ret;
+		}
+	}
+
+	if (was_opened)
+		rndis_filter_open(nvdev);
+
+	netif_device_attach(ndev);
 
 	/* We may have missed link change notifications */
 	schedule_delayed_work(&ndevctx->dwork, 0);
@@ -911,16 +1001,56 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 	return ret;
 }
 
+static void netvsc_get_vf_stats(struct net_device *net,
+				struct netvsc_vf_pcpu_stats *tot)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(net);
+	int i;
+
+	memset(tot, 0, sizeof(*tot));
+
+	for_each_possible_cpu(i) {
+		const struct netvsc_vf_pcpu_stats *stats
+			= per_cpu_ptr(ndev_ctx->vf_stats, i);
+		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			rx_packets = stats->rx_packets;
+			tx_packets = stats->tx_packets;
+			rx_bytes = stats->rx_bytes;
+			tx_bytes = stats->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		tot->rx_packets += rx_packets;
+		tot->tx_packets += tx_packets;
+		tot->rx_bytes   += rx_bytes;
+		tot->tx_bytes   += tx_bytes;
+		tot->tx_dropped += stats->tx_dropped;
+	}
+}
+
 static void netvsc_get_stats64(struct net_device *net,
 			       struct rtnl_link_stats64 *t)
 {
 	struct net_device_context *ndev_ctx = netdev_priv(net);
 	struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
+	struct netvsc_vf_pcpu_stats vf_tot;
 	int i;
 
 	if (!nvdev)
 		return;
 
+	netdev_stats_to_stats64(t, &net->stats);
+
+	netvsc_get_vf_stats(net, &vf_tot);
+	t->rx_packets += vf_tot.rx_packets;
+	t->tx_packets += vf_tot.tx_packets;
+	t->rx_bytes   += vf_tot.rx_bytes;
+	t->tx_bytes   += vf_tot.tx_bytes;
+	t->tx_dropped += vf_tot.tx_dropped;
+
 	for (i = 0; i < nvdev->num_chn; i++) {
 		const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
 		const struct netvsc_stats *stats;
@@ -949,33 +1079,36 @@ static void netvsc_get_stats64(struct net_device *net,
 		t->rx_packets	+= packets;
 		t->multicast	+= multicast;
 	}
-
-	t->tx_dropped	= net->stats.tx_dropped;
-	t->tx_errors	= net->stats.tx_errors;
-
-	t->rx_dropped	= net->stats.rx_dropped;
-	t->rx_errors	= net->stats.rx_errors;
 }
 
 static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
 {
+	struct net_device_context *ndc = netdev_priv(ndev);
+	struct net_device *vf_netdev = rtnl_dereference(ndc->vf_netdev);
+	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 	struct sockaddr *addr = p;
-	char save_adr[ETH_ALEN];
-	unsigned char save_aatype;
 	int err;
 
-	memcpy(save_adr, ndev->dev_addr, ETH_ALEN);
-	save_aatype = ndev->addr_assign_type;
-
-	err = eth_mac_addr(ndev, p);
-	if (err != 0)
+	err = eth_prepare_mac_addr_change(ndev, p);
+	if (err)
 		return err;
 
-	err = rndis_filter_set_device_mac(ndev, addr->sa_data);
-	if (err != 0) {
-		/* roll back to saved MAC */
-		memcpy(ndev->dev_addr, save_adr, ETH_ALEN);
-		ndev->addr_assign_type = save_aatype;
+	if (!nvdev)
+		return -ENODEV;
+
+	if (vf_netdev) {
+		err = dev_set_mac_address(vf_netdev, addr);
+		if (err)
+			return err;
+	}
+
+	err = rndis_filter_set_device_mac(nvdev, addr->sa_data);
+	if (!err) {
+		eth_commit_mac_addr_change(ndev, p);
+	} else if (vf_netdev) {
+		/* rollback change on VF */
+		memcpy(addr->sa_data, ndev->dev_addr, ETH_ALEN);
+		dev_set_mac_address(vf_netdev, addr);
 	}
 
 	return err;
@@ -990,9 +1123,18 @@ static const struct {
 	{ "tx_no_space",  offsetof(struct netvsc_ethtool_stats, tx_no_space) },
 	{ "tx_too_big",	  offsetof(struct netvsc_ethtool_stats, tx_too_big) },
 	{ "tx_busy",	  offsetof(struct netvsc_ethtool_stats, tx_busy) },
+	{ "tx_send_full", offsetof(struct netvsc_ethtool_stats, tx_send_full) },
+	{ "rx_comp_busy", offsetof(struct netvsc_ethtool_stats, rx_comp_busy) },
+}, vf_stats[] = {
+	{ "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
+	{ "vf_rx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
+	{ "vf_tx_packets", offsetof(struct netvsc_vf_pcpu_stats, tx_packets) },
+	{ "vf_tx_bytes",   offsetof(struct netvsc_vf_pcpu_stats, tx_bytes) },
+	{ "vf_tx_dropped", offsetof(struct netvsc_vf_pcpu_stats, tx_dropped) },
 };
 
 #define NETVSC_GLOBAL_STATS_LEN	ARRAY_SIZE(netvsc_stats)
+#define NETVSC_VF_STATS_LEN	ARRAY_SIZE(vf_stats)
 
 /* 4 statistics per queue (rx/tx packets/bytes) */
 #define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
@@ -1007,7 +1149,9 @@ static int netvsc_get_sset_count(struct net_device *dev, int string_set)
 
 	switch (string_set) {
 	case ETH_SS_STATS:
-		return NETVSC_GLOBAL_STATS_LEN + NETVSC_QUEUE_STATS_LEN(nvdev);
+		return NETVSC_GLOBAL_STATS_LEN
+			+ NETVSC_VF_STATS_LEN
+			+ NETVSC_QUEUE_STATS_LEN(nvdev);
 	default:
 		return -EINVAL;
 	}
@@ -1017,9 +1161,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 				     struct ethtool_stats *stats, u64 *data)
 {
 	struct net_device_context *ndc = netdev_priv(dev);
-	struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 	const void *nds = &ndc->eth_stats;
 	const struct netvsc_stats *qstats;
+	struct netvsc_vf_pcpu_stats sum;
 	unsigned int start;
 	u64 packets, bytes;
 	int i, j;
@@ -1030,6 +1175,10 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 	for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++)
 		data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset);
 
+	netvsc_get_vf_stats(dev, &sum);
+	for (j = 0; j < NETVSC_VF_STATS_LEN; j++)
+		data[i++] = *(u64 *)((void *)&sum + vf_stats[j].offset);
+
 	for (j = 0; j < nvdev->num_chn; j++) {
 		qstats = &nvdev->chan_table[j].tx_stats;
 
@@ -1055,7 +1204,7 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
 static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
 	struct net_device_context *ndc = netdev_priv(dev);
-	struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 	u8 *p = data;
 	int i;
 
@@ -1064,11 +1213,16 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 
 	switch (stringset) {
 	case ETH_SS_STATS:
-		for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
-			memcpy(p + i * ETH_GSTRING_LEN,
-			       netvsc_stats[i].name, ETH_GSTRING_LEN);
+		for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) {
+			memcpy(p, netvsc_stats[i].name, ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(vf_stats); i++) {
+			memcpy(p, vf_stats[i].name, ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
 
-		p += i * ETH_GSTRING_LEN;
 		for (i = 0; i < nvdev->num_chn; i++) {
 			sprintf(p, "tx_queue_%u_packets", i);
 			p += ETH_GSTRING_LEN;
@@ -1085,7 +1239,7 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 }
 
 static int
-netvsc_get_rss_hash_opts(struct netvsc_device *nvdev,
+netvsc_get_rss_hash_opts(struct net_device_context *ndc,
 			 struct ethtool_rxnfc *info)
 {
 	info->data = RXH_IP_SRC | RXH_IP_DST;
@@ -1094,9 +1248,20 @@ netvsc_get_rss_hash_opts(struct netvsc_device *nvdev,
 	case TCP_V4_FLOW:
 	case TCP_V6_FLOW:
 		info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
-		/* fallthrough */
+		break;
+
 	case UDP_V4_FLOW:
+		if (ndc->udp4_l4_hash)
+			info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+
+		break;
+
 	case UDP_V6_FLOW:
+		if (ndc->udp6_l4_hash)
+			info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+
+		break;
+
 	case IPV4_FLOW:
 	case IPV6_FLOW:
 		break;
@@ -1113,7 +1278,7 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 		 u32 *rules)
 {
 	struct net_device_context *ndc = netdev_priv(dev);
-	struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+	struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
 
 	if (!nvdev)
 		return -ENODEV;
@@ -1124,8 +1289,48 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 		return 0;
 
 	case ETHTOOL_GRXFH:
-		return netvsc_get_rss_hash_opts(nvdev, info);
+		return netvsc_get_rss_hash_opts(ndc, info);
+	}
+	return -EOPNOTSUPP;
+}
+
+static int netvsc_set_rss_hash_opts(struct net_device_context *ndc,
+				    struct ethtool_rxnfc *info)
+{
+	if (info->data == (RXH_IP_SRC | RXH_IP_DST |
+			   RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		if (info->flow_type == UDP_V4_FLOW)
+			ndc->udp4_l4_hash = true;
+		else if (info->flow_type == UDP_V6_FLOW)
+			ndc->udp6_l4_hash = true;
+		else
+			return -EOPNOTSUPP;
+
+		return 0;
 	}
+
+	if (info->data == (RXH_IP_SRC | RXH_IP_DST)) {
+		if (info->flow_type == UDP_V4_FLOW)
+			ndc->udp4_l4_hash = false;
+		else if (info->flow_type == UDP_V6_FLOW)
+			ndc->udp6_l4_hash = false;
+		else
+			return -EOPNOTSUPP;
+
+		return 0;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int
+netvsc_set_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *info)
+{
+	struct net_device_context *ndc = netdev_priv(ndev);
+
+	if (info->cmd == ETHTOOL_SRXFH)
+		return netvsc_set_rss_hash_opts(ndc, info);
+
 	return -EOPNOTSUPP;
 }
 
@@ -1163,7 +1368,7 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
 			   u8 *hfunc)
 {
 	struct net_device_context *ndc = netdev_priv(dev);
-	struct netvsc_device *ndev = rcu_dereference(ndc->nvdev);
+	struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev);
 	struct rndis_device *rndis_dev;
 	int i;
 
@@ -1202,7 +1407,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
 	rndis_dev = ndev->extension;
 	if (indir) {
 		for (i = 0; i < ITAB_NUM; i++)
-			if (indir[i] >= VRSS_CHANNEL_MAX)
+			if (indir[i] >= ndev->num_chn)
 				return -EINVAL;
 
 		for (i = 0; i < ITAB_NUM; i++)
@@ -1216,7 +1421,105 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
 		key = rndis_dev->rss_key;
 	}
 
-	return rndis_filter_set_rss_param(rndis_dev, key, ndev->num_chn);
+	return rndis_filter_set_rss_param(rndis_dev, key);
+}
+
+/* Hyper-V RNDIS protocol does not have ring in the HW sense.
+ * It does have pre-allocated receive area which is divided into sections.
+ */
+static void __netvsc_get_ringparam(struct netvsc_device *nvdev,
+				   struct ethtool_ringparam *ring)
+{
+	u32 max_buf_size;
+
+	ring->rx_pending = nvdev->recv_section_cnt;
+	ring->tx_pending = nvdev->send_section_cnt;
+
+	if (nvdev->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
+		max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
+	else
+		max_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
+
+	ring->rx_max_pending = max_buf_size / nvdev->recv_section_size;
+	ring->tx_max_pending = NETVSC_SEND_BUFFER_SIZE
+		/ nvdev->send_section_size;
+}
+
+static void netvsc_get_ringparam(struct net_device *ndev,
+				 struct ethtool_ringparam *ring)
+{
+	struct net_device_context *ndevctx = netdev_priv(ndev);
+	struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+
+	if (!nvdev)
+		return;
+
+	__netvsc_get_ringparam(nvdev, ring);
+}
+
+static int netvsc_set_ringparam(struct net_device *ndev,
+				struct ethtool_ringparam *ring)
+{
+	struct net_device_context *ndevctx = netdev_priv(ndev);
+	struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+	struct hv_device *hdev = ndevctx->device_ctx;
+	struct netvsc_device_info device_info;
+	struct ethtool_ringparam orig;
+	u32 new_tx, new_rx;
+	bool was_opened;
+	int ret = 0;
+
+	if (!nvdev || nvdev->destroy)
+		return -ENODEV;
+
+	memset(&orig, 0, sizeof(orig));
+	__netvsc_get_ringparam(nvdev, &orig);
+
+	new_tx = clamp_t(u32, ring->tx_pending,
+			 NETVSC_MIN_TX_SECTIONS, orig.tx_max_pending);
+	new_rx = clamp_t(u32, ring->rx_pending,
+			 NETVSC_MIN_RX_SECTIONS, orig.rx_max_pending);
+
+	if (new_tx == orig.tx_pending &&
+	    new_rx == orig.rx_pending)
+		return 0;	 /* no change */
+
+	memset(&device_info, 0, sizeof(device_info));
+	device_info.num_chn = nvdev->num_chn;
+	device_info.ring_size = ring_size;
+	device_info.send_sections = new_tx;
+	device_info.recv_sections = new_rx;
+
+	netif_device_detach(ndev);
+	was_opened = rndis_filter_opened(nvdev);
+	if (was_opened)
+		rndis_filter_close(nvdev);
+
+	rndis_filter_device_remove(hdev, nvdev);
+
+	nvdev = rndis_filter_device_add(hdev, &device_info);
+	if (IS_ERR(nvdev)) {
+		ret = PTR_ERR(nvdev);
+
+		device_info.send_sections = orig.tx_pending;
+		device_info.recv_sections = orig.rx_pending;
+		nvdev = rndis_filter_device_add(hdev, &device_info);
+		if (IS_ERR(nvdev)) {
+			netdev_err(ndev, "restoring ringparam failed: %ld\n",
+				   PTR_ERR(nvdev));
+			return ret;
+		}
+	}
+
+	if (was_opened)
+		rndis_filter_open(nvdev);
+	netif_device_attach(ndev);
+
+	/* We may have missed link change notifications */
+	ndevctx->last_reconfig = 0;
+	schedule_delayed_work(&ndevctx->dwork, 0);
+
+	return ret;
 }
 
 static const struct ethtool_ops ethtool_ops = {
@@ -1229,12 +1532,15 @@ static const struct ethtool_ops ethtool_ops = {
 	.set_channels   = netvsc_set_channels,
 	.get_ts_info	= ethtool_op_get_ts_info,
 	.get_rxnfc	= netvsc_get_rxnfc,
+	.set_rxnfc	= netvsc_set_rxnfc,
 	.get_rxfh_key_size = netvsc_get_rxfh_key_size,
 	.get_rxfh_indir_size = netvsc_rss_indir_size,
 	.get_rxfh	= netvsc_get_rxfh,
 	.set_rxfh	= netvsc_set_rxfh,
 	.get_link_ksettings = netvsc_get_link_ksettings,
 	.set_link_ksettings = netvsc_set_link_ksettings,
+	.get_ringparam	= netvsc_get_ringparam,
+	.set_ringparam	= netvsc_set_ringparam,
 };
 
 static const struct net_device_ops device_ops = {
@@ -1313,8 +1619,7 @@ static void netvsc_link_change(struct work_struct *w)
 	case RNDIS_STATUS_MEDIA_CONNECT:
 		if (rdev->link_state) {
 			rdev->link_state = false;
-			if (!ndev_ctx->datapath)
-				netif_carrier_on(net);
+			netif_carrier_on(net);
 			netif_tx_wake_all_queues(net);
 		} else {
 			notify = true;
@@ -1391,7 +1696,7 @@ static struct net_device *get_netvsc_byref(struct net_device *vf_netdev)
 			continue;	/* not a netvsc device */
 
 		net_device_ctx = netdev_priv(dev);
-		if (net_device_ctx->nvdev == NULL)
+		if (!rtnl_dereference(net_device_ctx->nvdev))
 			continue;	/* device is removed */
 
 		if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev)
@@ -1401,6 +1706,108 @@ static struct net_device *get_netvsc_byref(struct net_device *vf_netdev)
 	return NULL;
 }
 
+/* Called when VF is injecting data into network stack.
+ * Change the associated network device from VF to netvsc.
+ * note: already called with rcu_read_lock
+ */
+static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
+{
+	struct sk_buff *skb = *pskb;
+	struct net_device *ndev = rcu_dereference(skb->dev->rx_handler_data);
+	struct net_device_context *ndev_ctx = netdev_priv(ndev);
+	struct netvsc_vf_pcpu_stats *pcpu_stats
+		 = this_cpu_ptr(ndev_ctx->vf_stats);
+
+	skb->dev = ndev;
+
+	u64_stats_update_begin(&pcpu_stats->syncp);
+	pcpu_stats->rx_packets++;
+	pcpu_stats->rx_bytes += skb->len;
+	u64_stats_update_end(&pcpu_stats->syncp);
+
+	return RX_HANDLER_ANOTHER;
+}
+
+static int netvsc_vf_join(struct net_device *vf_netdev,
+			  struct net_device *ndev)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(ndev);
+	int ret;
+
+	ret = netdev_rx_handler_register(vf_netdev,
+					 netvsc_vf_handle_frame, ndev);
+	if (ret != 0) {
+		netdev_err(vf_netdev,
+			   "can not register netvsc VF receive handler (err = %d)\n",
+			   ret);
+		goto rx_handler_failed;
+	}
+
+	ret = netdev_upper_dev_link(vf_netdev, ndev);
+	if (ret != 0) {
+		netdev_err(vf_netdev,
+			   "can not set master device %s (err = %d)\n",
+			   ndev->name, ret);
+		goto upper_link_failed;
+	}
+
+	/* set slave flag before open to prevent IPv6 addrconf */
+	vf_netdev->flags |= IFF_SLAVE;
+
+	schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
+
+	call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
+
+	netdev_info(vf_netdev, "joined to %s\n", ndev->name);
+	return 0;
+
+upper_link_failed:
+	netdev_rx_handler_unregister(vf_netdev);
+rx_handler_failed:
+	return ret;
+}
+
+static void __netvsc_vf_setup(struct net_device *ndev,
+			      struct net_device *vf_netdev)
+{
+	int ret;
+
+	/* Align MTU of VF with master */
+	ret = dev_set_mtu(vf_netdev, ndev->mtu);
+	if (ret)
+		netdev_warn(vf_netdev,
+			    "unable to change mtu to %u\n", ndev->mtu);
+
+	if (netif_running(ndev)) {
+		ret = dev_open(vf_netdev);
+		if (ret)
+			netdev_warn(vf_netdev,
+				    "unable to open: %d\n", ret);
+	}
+}
+
+/* Setup VF as slave of the synthetic device.
+ * Runs in workqueue to avoid recursion in netlink callbacks.
+ */
+static void netvsc_vf_setup(struct work_struct *w)
+{
+	struct net_device_context *ndev_ctx
+		= container_of(w, struct net_device_context, vf_takeover.work);
+	struct net_device *ndev = hv_get_drvdata(ndev_ctx->device_ctx);
+	struct net_device *vf_netdev;
+
+	if (!rtnl_trylock()) {
+		schedule_delayed_work(&ndev_ctx->vf_takeover, 0);
+		return;
+	}
+
+	vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+	if (vf_netdev)
+		__netvsc_vf_setup(ndev, vf_netdev);
+
+	rtnl_unlock();
+}
+
 static int netvsc_register_vf(struct net_device *vf_netdev)
 {
 	struct net_device *ndev;
@@ -1424,56 +1831,23 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 	if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
 		return NOTIFY_DONE;
 
+	if (netvsc_vf_join(vf_netdev, ndev) != 0)
+		return NOTIFY_DONE;
+
 	netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
-	/*
-	 * Take a reference on the module.
-	 */
-	try_module_get(THIS_MODULE);
 
 	dev_hold(vf_netdev);
 	rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev);
 	return NOTIFY_OK;
 }
 
-static int netvsc_vf_up(struct net_device *vf_netdev)
+/* VF up/down change detected, schedule to change data path */
+static int netvsc_vf_changed(struct net_device *vf_netdev)
 {
-	struct net_device *ndev;
-	struct netvsc_device *netvsc_dev;
 	struct net_device_context *net_device_ctx;
-
-	ndev = get_netvsc_byref(vf_netdev);
-	if (!ndev)
-		return NOTIFY_DONE;
-
-	net_device_ctx = netdev_priv(ndev);
-	netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
-
-	netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
-
-	/*
-	 * Open the device before switching data path.
-	 */
-	rndis_filter_open(netvsc_dev);
-
-	/*
-	 * notify the host to switch the data path.
-	 */
-	netvsc_switch_datapath(ndev, true);
-	netdev_info(ndev, "Data path switched to VF: %s\n", vf_netdev->name);
-
-	netif_carrier_off(ndev);
-
-	/* Now notify peers through VF device. */
-	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev);
-
-	return NOTIFY_OK;
-}
-
-static int netvsc_vf_down(struct net_device *vf_netdev)
-{
-	struct net_device *ndev;
 	struct netvsc_device *netvsc_dev;
-	struct net_device_context *net_device_ctx;
+	struct net_device *ndev;
+	bool vf_is_up = netif_running(vf_netdev);
 
 	ndev = get_netvsc_byref(vf_netdev);
 	if (!ndev)
@@ -1481,15 +1855,12 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
 
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
+	if (!netvsc_dev)
+		return NOTIFY_DONE;
 
-	netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
-	netvsc_switch_datapath(ndev, false);
-	netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
-	rndis_filter_close(netvsc_dev);
-	netif_carrier_on(ndev);
-
-	/* Now notify peers through netvsc device. */
-	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev);
+	netvsc_switch_datapath(ndev, vf_is_up);
+	netdev_info(ndev, "Data path switched %s VF: %s\n",
+		    vf_is_up ? "to" : "from", vf_netdev->name);
 
 	return NOTIFY_OK;
 }
@@ -1504,12 +1875,15 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 		return NOTIFY_DONE;
 
 	net_device_ctx = netdev_priv(ndev);
+	cancel_delayed_work_sync(&net_device_ctx->vf_takeover);
 
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 
+	netdev_rx_handler_unregister(vf_netdev);
+	netdev_upper_dev_unlink(vf_netdev, ndev);
 	RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
 	dev_put(vf_netdev);
-	module_put(THIS_MODULE);
+
 	return NOTIFY_OK;
 }
 
@@ -1520,12 +1894,12 @@ static int netvsc_probe(struct hv_device *dev,
 	struct net_device_context *net_device_ctx;
 	struct netvsc_device_info device_info;
 	struct netvsc_device *nvdev;
-	int ret;
+	int ret = -ENOMEM;
 
 	net = alloc_etherdev_mq(sizeof(struct net_device_context),
 				VRSS_CHANNEL_MAX);
 	if (!net)
-		return -ENOMEM;
+		goto no_net;
 
 	netif_carrier_off(net);
 
@@ -1544,6 +1918,12 @@ static int netvsc_probe(struct hv_device *dev,
 
 	spin_lock_init(&net_device_ctx->lock);
 	INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
+	INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
+
+	net_device_ctx->vf_stats
+		= netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats);
+	if (!net_device_ctx->vf_stats)
+		goto no_stats;
 
 	net->netdev_ops = &device_ops;
 	net->ethtool_ops = &ethtool_ops;
@@ -1556,13 +1936,16 @@ static int netvsc_probe(struct hv_device *dev,
 	memset(&device_info, 0, sizeof(device_info));
 	device_info.ring_size = ring_size;
 	device_info.num_chn = VRSS_CHANNEL_DEFAULT;
-	ret = rndis_filter_device_add(dev, &device_info);
-	if (ret != 0) {
+	device_info.send_sections = NETVSC_DEFAULT_TX;
+	device_info.recv_sections = NETVSC_DEFAULT_RX;
+
+	nvdev = rndis_filter_device_add(dev, &device_info);
+	if (IS_ERR(nvdev)) {
+		ret = PTR_ERR(nvdev);
 		netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
-		free_netdev(net);
-		hv_set_drvdata(dev, NULL);
-		return ret;
+		goto rndis_failed;
 	}
+
 	memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
 
 	/* hw_features computed in rndis_filter_device_add */
@@ -1571,11 +1954,11 @@ static int netvsc_probe(struct hv_device *dev,
 		NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
 	net->vlan_features = net->features;
 
-	/* RCU not necessary here, device not registered */
-	nvdev = net_device_ctx->nvdev;
 	netif_set_real_num_tx_queues(net, nvdev->num_chn);
 	netif_set_real_num_rx_queues(net, nvdev->num_chn);
 
+	netdev_lockdep_set_classes(net);
+
 	/* MTU range: 68 - 1500 or 65521 */
 	net->min_mtu = NETVSC_MTU_MIN;
 	if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
@@ -1586,20 +1969,29 @@ static int netvsc_probe(struct hv_device *dev,
 	ret = register_netdev(net);
 	if (ret != 0) {
 		pr_err("Unable to register netdev.\n");
-		rndis_filter_device_remove(dev, nvdev);
-		free_netdev(net);
+		goto register_failed;
 	}
 
 	return ret;
+
+register_failed:
+	rndis_filter_device_remove(dev, nvdev);
+rndis_failed:
+	free_percpu(net_device_ctx->vf_stats);
+no_stats:
+	hv_set_drvdata(dev, NULL);
+	free_netdev(net);
+no_net:
+	return ret;
 }
 
 static int netvsc_remove(struct hv_device *dev)
 {
-	struct net_device *net;
 	struct net_device_context *ndev_ctx;
+	struct net_device *vf_netdev;
+	struct net_device *net;
 
 	net = hv_get_drvdata(dev);
-
 	if (net == NULL) {
 		dev_err(&dev->device, "No net device to remove\n");
 		return 0;
@@ -1616,13 +2008,18 @@ static int netvsc_remove(struct hv_device *dev)
 	 * removed. Also blocks mtu and channel changes.
 	 */
 	rtnl_lock();
-	rndis_filter_device_remove(dev, ndev_ctx->nvdev);
-	rtnl_unlock();
+	vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+	if (vf_netdev)
+		netvsc_unregister_vf(vf_netdev);
 
-	unregister_netdev(net);
+	rndis_filter_device_remove(dev,
+				   rtnl_dereference(ndev_ctx->nvdev));
+	unregister_netdevice(net);
+	rtnl_unlock();
 
 	hv_set_drvdata(dev, NULL);
 
+	free_percpu(ndev_ctx->vf_stats);
 	free_netdev(net);
 	return 0;
 }
@@ -1677,9 +2074,8 @@ static int netvsc_netdev_event(struct notifier_block *this,
 	case NETDEV_UNREGISTER:
 		return netvsc_unregister_vf(event_dev);
 	case NETDEV_UP:
-		return netvsc_vf_up(event_dev);
 	case NETDEV_DOWN:
-		return netvsc_vf_down(event_dev);
+		return netvsc_vf_changed(event_dev);
 	default:
 		return NOTIFY_DONE;
 	}
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index d6308ff..69c40b8 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -28,6 +28,7 @@
 #include <linux/if_vlan.h>
 #include <linux/nls.h>
 #include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
 
 #include "hyperv_net.h"
 
@@ -213,11 +214,11 @@ static void dump_rndis_message(struct hv_device *hv_dev,
 static int rndis_filter_send_request(struct rndis_device *dev,
 				  struct rndis_request *req)
 {
-	int ret;
 	struct hv_netvsc_packet *packet;
 	struct hv_page_buffer page_buf[2];
 	struct hv_page_buffer *pb = page_buf;
 	struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
+	int ret;
 
 	/* Setup the packet to send it */
 	packet = &req->pkt;
@@ -243,7 +244,10 @@ static int rndis_filter_send_request(struct rndis_device *dev,
 			pb[0].len;
 	}
 
-	ret = netvsc_send(net_device_ctx->device_ctx, packet, NULL, &pb, NULL);
+	rcu_read_lock_bh();
+	ret = netvsc_send(net_device_ctx, packet, NULL, pb, NULL);
+	rcu_read_unlock_bh();
+
 	return ret;
 }
 
@@ -443,8 +447,9 @@ int rndis_filter_receive(struct net_device *ndev,
 	return 0;
 }
 
-static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
-				  void *result, u32 *result_size)
+static int rndis_filter_query_device(struct rndis_device *dev,
+				     struct netvsc_device *nvdev,
+				     u32 oid, void *result, u32 *result_size)
 {
 	struct rndis_request *request;
 	u32 inresult_size = *result_size;
@@ -471,8 +476,6 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
 	query->dev_vc_handle = 0;
 
 	if (oid == OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES) {
-		struct net_device_context *ndevctx = netdev_priv(dev->ndev);
-		struct netvsc_device *nvdev = ndevctx->nvdev;
 		struct ndis_offload *hwcaps;
 		u32 nvsp_version = nvdev->nvsp_version;
 		u8 ndis_rev;
@@ -541,14 +544,15 @@ cleanup:
 
 /* Get the hardware offload capabilities */
 static int
-rndis_query_hwcaps(struct rndis_device *dev, struct ndis_offload *caps)
+rndis_query_hwcaps(struct rndis_device *dev, struct netvsc_device *net_device,
+		   struct ndis_offload *caps)
 {
 	u32 caps_len = sizeof(*caps);
 	int ret;
 
 	memset(caps, 0, sizeof(*caps));
 
-	ret = rndis_filter_query_device(dev,
+	ret = rndis_filter_query_device(dev, net_device,
 					OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
 					caps, &caps_len);
 	if (ret)
@@ -577,11 +581,12 @@ rndis_query_hwcaps(struct rndis_device *dev, struct ndis_offload *caps)
 	return 0;
 }
 
-static int rndis_filter_query_device_mac(struct rndis_device *dev)
+static int rndis_filter_query_device_mac(struct rndis_device *dev,
+					 struct netvsc_device *net_device)
 {
 	u32 size = ETH_ALEN;
 
-	return rndis_filter_query_device(dev,
+	return rndis_filter_query_device(dev, net_device,
 				      RNDIS_OID_802_3_PERMANENT_ADDRESS,
 				      dev->hw_mac_adr, &size);
 }
@@ -589,9 +594,9 @@ static int rndis_filter_query_device_mac(struct rndis_device *dev)
 #define NWADR_STR "NetworkAddress"
 #define NWADR_STRLEN 14
 
-int rndis_filter_set_device_mac(struct net_device *ndev, char *mac)
+int rndis_filter_set_device_mac(struct netvsc_device *nvdev,
+				const char *mac)
 {
-	struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev);
 	struct rndis_device *rdev = nvdev->extension;
 	struct rndis_request *request;
 	struct rndis_set_request *set;
@@ -645,11 +650,8 @@ int rndis_filter_set_device_mac(struct net_device *ndev, char *mac)
 	wait_for_completion(&request->wait_event);
 
 	set_complete = &request->response_msg.msg.set_complete;
-	if (set_complete->status != RNDIS_STATUS_SUCCESS) {
-		netdev_err(ndev, "Fail to set MAC on host side:0x%x\n",
-			   set_complete->status);
-		ret = -EINVAL;
-	}
+	if (set_complete->status != RNDIS_STATUS_SUCCESS)
+		ret = -EIO;
 
 cleanup:
 	put_rndis_request(rdev, request);
@@ -658,9 +660,9 @@ cleanup:
 
 static int
 rndis_filter_set_offload_params(struct net_device *ndev,
+				struct netvsc_device *nvdev,
 				struct ndis_offload_params *req_offloads)
 {
-	struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev);
 	struct rndis_device *rdev = nvdev->extension;
 	struct rndis_request *request;
 	struct rndis_set_request *set;
@@ -715,7 +717,7 @@ cleanup:
 }
 
 int rndis_filter_set_rss_param(struct rndis_device *rdev,
-			       const u8 *rss_key, int num_queue)
+			       const u8 *rss_key)
 {
 	struct net_device *ndev = rdev->ndev;
 	struct rndis_request *request;
@@ -782,27 +784,27 @@ cleanup:
 	return ret;
 }
 
-static int rndis_filter_query_device_link_status(struct rndis_device *dev)
+static int rndis_filter_query_device_link_status(struct rndis_device *dev,
+						 struct netvsc_device *net_device)
 {
 	u32 size = sizeof(u32);
 	u32 link_status;
-	int ret;
-
-	ret = rndis_filter_query_device(dev,
-				      RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
-				      &link_status, &size);
 
-	return ret;
+	return rndis_filter_query_device(dev, net_device,
+					 RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
+					 &link_status, &size);
 }
 
-static int rndis_filter_query_link_speed(struct rndis_device *dev)
+static int rndis_filter_query_link_speed(struct rndis_device *dev,
+					 struct netvsc_device *net_device)
 {
 	u32 size = sizeof(u32);
 	u32 link_speed;
 	struct net_device_context *ndc;
 	int ret;
 
-	ret = rndis_filter_query_device(dev, RNDIS_OID_GEN_LINK_SPEED,
+	ret = rndis_filter_query_device(dev, net_device,
+					RNDIS_OID_GEN_LINK_SPEED,
 					&link_speed, &size);
 
 	if (!ret) {
@@ -871,14 +873,14 @@ void rndis_filter_update(struct netvsc_device *nvdev)
 	schedule_work(&rdev->mcast_work);
 }
 
-static int rndis_filter_init_device(struct rndis_device *dev)
+static int rndis_filter_init_device(struct rndis_device *dev,
+				    struct netvsc_device *nvdev)
 {
 	struct rndis_request *request;
 	struct rndis_initialize_request *init;
 	struct rndis_initialize_complete *init_complete;
 	u32 status;
 	int ret;
-	struct netvsc_device *nvdev = net_device_to_netvsc_device(dev->ndev);
 
 	request = get_rndis_request(dev, RNDIS_MSG_INIT,
 			RNDIS_MESSAGE_SIZE(struct rndis_initialize_request));
@@ -926,12 +928,12 @@ static bool netvsc_device_idle(const struct netvsc_device *nvdev)
 {
 	int i;
 
-	if (atomic_read(&nvdev->num_outstanding_recvs) > 0)
-		return false;
-
 	for (i = 0; i < nvdev->num_chn; i++) {
 		const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
 
+		if (nvchan->mrc.first != nvchan->mrc.next)
+			return false;
+
 		if (atomic_read(&nvchan->queue_sends) > 0)
 			return false;
 	}
@@ -944,7 +946,7 @@ static void rndis_filter_halt_device(struct rndis_device *dev)
 	struct rndis_request *request;
 	struct rndis_halt_request *halt;
 	struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
-	struct netvsc_device *nvdev = net_device_ctx->nvdev;
+	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 
 	/* Attempt to do a rndis device halt */
 	request = get_rndis_request(dev, RNDIS_MSG_HALT,
@@ -1015,20 +1017,20 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 {
 	struct net_device *ndev =
 		hv_get_drvdata(new_sc->primary_channel->device_obj);
-	struct netvsc_device *nvscdev = net_device_to_netvsc_device(ndev);
+	struct net_device_context *ndev_ctx = netdev_priv(ndev);
+	struct netvsc_device *nvscdev;
 	u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
 	struct netvsc_channel *nvchan;
 	int ret;
 
-	if (chn_index >= nvscdev->num_chn)
+	/* This is safe because this callback only happens when
+	 * new device is being setup and waiting on the channel_init_wait.
+	 */
+	nvscdev = rcu_dereference_raw(ndev_ctx->nvdev);
+	if (!nvscdev || chn_index >= nvscdev->num_chn)
 		return;
 
 	nvchan = nvscdev->chan_table + chn_index;
-	nvchan->mrc.buf
-		= vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
-
-	if (!nvchan->mrc.buf)
-		return;
 
 	/* Because the device uses NAPI, all the interrupt batching and
 	 * control is done via Net softirq, not the channel handling
@@ -1052,8 +1054,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 	wake_up(&nvscdev->subchan_open);
 }
 
-int rndis_filter_device_add(struct hv_device *dev,
-			    struct netvsc_device_info *device_info)
+struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+				      struct netvsc_device_info *device_info)
 {
 	struct net_device *net = hv_get_drvdata(dev);
 	struct net_device_context *net_device_ctx = netdev_priv(net);
@@ -1065,28 +1067,27 @@ int rndis_filter_device_add(struct hv_device *dev,
 	struct ndis_recv_scale_cap rsscap;
 	u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
 	unsigned int gso_max_size = GSO_MAX_SIZE;
-	u32 mtu, size, num_rss_qs;
+	u32 mtu, size;
 	const struct cpumask *node_cpu_mask;
 	u32 num_possible_rss_qs;
 	int i, ret;
 
 	rndis_device = get_rndis_device();
 	if (!rndis_device)
-		return -ENODEV;
+		return ERR_PTR(-ENODEV);
 
 	/*
 	 * Let the inner driver handle this first to create the netvsc channel
 	 * NOTE! Once the channel is created, we may get a receive callback
 	 * (RndisFilterOnReceive()) before this call is completed
 	 */
-	ret = netvsc_device_add(dev, device_info);
-	if (ret != 0) {
+	net_device = netvsc_device_add(dev, device_info);
+	if (IS_ERR(net_device)) {
 		kfree(rndis_device);
-		return ret;
+		return net_device;
 	}
 
 	/* Initialize the rndis device */
-	net_device = net_device_ctx->nvdev;
 	net_device->max_chn = 1;
 	net_device->num_chn = 1;
 
@@ -1094,35 +1095,29 @@ int rndis_filter_device_add(struct hv_device *dev,
 	rndis_device->ndev = net;
 
 	/* Send the rndis initialization message */
-	ret = rndis_filter_init_device(rndis_device);
-	if (ret != 0) {
-		rndis_filter_device_remove(dev, net_device);
-		return ret;
-	}
+	ret = rndis_filter_init_device(rndis_device, net_device);
+	if (ret != 0)
+		goto err_dev_remv;
 
 	/* Get the MTU from the host */
 	size = sizeof(u32);
-	ret = rndis_filter_query_device(rndis_device,
+	ret = rndis_filter_query_device(rndis_device, net_device,
 					RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE,
 					&mtu, &size);
 	if (ret == 0 && size == sizeof(u32) && mtu < net->mtu)
 		net->mtu = mtu;
 
 	/* Get the mac address */
-	ret = rndis_filter_query_device_mac(rndis_device);
-	if (ret != 0) {
-		rndis_filter_device_remove(dev, net_device);
-		return ret;
-	}
+	ret = rndis_filter_query_device_mac(rndis_device, net_device);
+	if (ret != 0)
+		goto err_dev_remv;
 
 	memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
 
 	/* Find HW offload capabilities */
-	ret = rndis_query_hwcaps(rndis_device, &hwcaps);
-	if (ret != 0) {
-		rndis_filter_device_remove(dev, net_device);
-		return ret;
-	}
+	ret = rndis_query_hwcaps(rndis_device, net_device, &hwcaps);
+	if (ret != 0)
+		goto err_dev_remv;
 
 	/* A value of zero means "no change"; now turn on what we want. */
 	memset(&offloads, 0, sizeof(struct ndis_offload_params));
@@ -1177,24 +1172,24 @@ int rndis_filter_device_add(struct hv_device *dev,
 
 	netif_set_gso_max_size(net, gso_max_size);
 
-	ret = rndis_filter_set_offload_params(net, &offloads);
+	ret = rndis_filter_set_offload_params(net, net_device, &offloads);
 	if (ret)
 		goto err_dev_remv;
 
-	rndis_filter_query_device_link_status(rndis_device);
+	rndis_filter_query_device_link_status(rndis_device, net_device);
 
 	netdev_dbg(net, "Device MAC %pM link state %s\n",
 		   rndis_device->hw_mac_adr,
 		   rndis_device->link_state ? "down" : "up");
 
 	if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
-		return 0;
+		return net_device;
 
-	rndis_filter_query_link_speed(rndis_device);
+	rndis_filter_query_link_speed(rndis_device, net_device);
 
 	/* vRSS setup */
 	memset(&rsscap, 0, rsscap_size);
-	ret = rndis_filter_query_device(rndis_device,
+	ret = rndis_filter_query_device(rndis_device, net_device,
 					OID_GEN_RECEIVE_SCALE_CAPABILITIES,
 					&rsscap, &rsscap_size);
 	if (ret || rsscap.num_recv_que < 2)
@@ -1220,9 +1215,18 @@ int rndis_filter_device_add(struct hv_device *dev,
 							net_device->num_chn);
 
 	atomic_set(&net_device->open_chn, 1);
-	num_rss_qs = net_device->num_chn - 1;
-	if (num_rss_qs == 0)
-		return 0;
+
+	if (net_device->num_chn == 1)
+		return net_device;
+
+	for (i = 1; i < net_device->num_chn; i++) {
+		ret = netvsc_alloc_recv_comp_ring(net_device, i);
+		if (ret) {
+			while (--i != 0)
+				vfree(net_device->chan_table[i].mrc.slots);
+			goto out;
+		}
+	}
 
 	vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
 
@@ -1254,19 +1258,18 @@ int rndis_filter_device_add(struct hv_device *dev,
 		   atomic_read(&net_device->open_chn) == net_device->num_chn);
 
 	/* ignore failues from setting rss parameters, still have channels */
-	rndis_filter_set_rss_param(rndis_device, netvsc_hash_key,
-				   net_device->num_chn);
+	rndis_filter_set_rss_param(rndis_device, netvsc_hash_key);
 out:
 	if (ret) {
 		net_device->max_chn = 1;
 		net_device->num_chn = 1;
 	}
 
-	return 0; /* return 0 because primary channel can be used alone */
+	return net_device;
 
 err_dev_remv:
 	rndis_filter_device_remove(dev, net_device);
-	return ret;
+	return ERR_PTR(ret);
 }
 
 void rndis_filter_device_remove(struct hv_device *dev,
@@ -1304,3 +1307,8 @@ int rndis_filter_close(struct netvsc_device *nvdev)
 
 	return rndis_filter_close_device(nvdev->extension);
 }
+
+bool rndis_filter_opened(const struct netvsc_device *nvdev)
+{
+	return atomic_read(&nvdev->open_cnt) > 0;
+}
diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index a626c53..24a1eab 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -66,6 +66,7 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
+#include <linux/interrupt.h>
 
 #include <net/ieee802154_netdev.h>
 #include <net/mac802154.h>
@@ -916,10 +917,7 @@ static int ca8210_spi_transfer(
 	struct cas_control *cas_ctl;
 
 	if (!spi) {
-		dev_crit(
-			&spi->dev,
-			"NULL spi device passed to ca8210_spi_transfer\n"
-		);
+		pr_crit("NULL spi device passed to %s\n", __func__);
 		return -ENODEV;
 	}
 
diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c
index 7d33496..ee7084b 100644
--- a/drivers/net/ieee802154/mrf24j40.c
+++ b/drivers/net/ieee802154/mrf24j40.c
@@ -1330,7 +1330,8 @@ static int mrf24j40_probe(struct spi_device *spi)
 	if (spi->max_speed_hz > MAX_SPI_SPEED_HZ) {
 		dev_warn(&spi->dev, "spi clock above possible maximum: %d",
 			 MAX_SPI_SPEED_HZ);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_register_device;
 	}
 
 	ret = mrf24j40_hw_init(devrec);
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 8dab74a..c74893c 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -15,7 +15,7 @@ struct ipvlan_netns {
 	unsigned int ipvl_nf_hook_refcnt;
 };
 
-static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
+static const struct nf_hook_ops ipvl_nfops[] = {
 	{
 		.hook     = ipvlan_nf_input,
 		.pf       = NFPROTO_IPV4,
@@ -169,7 +169,7 @@ static void ipvlan_port_destroy(struct net_device *dev)
 
 #define IPVLAN_FEATURES \
 	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
-	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \
+	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_GSO_ROBUST | \
 	 NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
 	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
 
diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c
index 22f133e..5dea206 100644
--- a/drivers/net/ipvlan/ipvtap.c
+++ b/drivers/net/ipvlan/ipvtap.c
@@ -24,7 +24,7 @@
 #include <linux/virtio_net.h>
 
 #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \
-		      NETIF_F_TSO6 | NETIF_F_UFO)
+		      NETIF_F_TSO6)
 
 static dev_t ipvtap_major;
 static struct cdev ipvtap_cdev;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 0f581ee..d2aea96 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -835,13 +835,13 @@ static struct lock_class_key macvlan_netdev_addr_lock_key;
 
 #define ALWAYS_ON_OFFLOADS \
 	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \
-	 NETIF_F_GSO_ROBUST)
+	 NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL)
 
 #define ALWAYS_ON_FEATURES (ALWAYS_ON_OFFLOADS | NETIF_F_LLTX)
 
 #define MACVLAN_FEATURES \
 	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
-	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_LRO | \
+	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_LRO | \
 	 NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
 	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
 
@@ -874,6 +874,7 @@ static int macvlan_init(struct net_device *dev)
 	dev->hw_features	|= NETIF_F_LRO;
 	dev->vlan_features	= lowerdev->vlan_features & MACVLAN_FEATURES;
 	dev->vlan_features	|= ALWAYS_ON_OFFLOADS;
+	dev->hw_enc_features    |= dev->features;
 	dev->gso_max_size	= lowerdev->gso_max_size;
 	dev->gso_max_segs	= lowerdev->gso_max_segs;
 	dev->hard_header_len	= lowerdev->hard_header_len;
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 91e7b19b..c2d0ea2 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -49,7 +49,7 @@ static struct class macvtap_class = {
 static struct cdev macvtap_cdev;
 
 #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \
-		      NETIF_F_TSO6 | NETIF_F_UFO)
+		      NETIF_F_TSO6)
 
 static void macvtap_count_tx_dropped(struct tap_dev *tap)
 {
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 928fd89..a9d16a3 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -5,7 +5,7 @@
 menuconfig MDIO_DEVICE
 	tristate "MDIO bus device drivers"
 	help
-	   MDIO devices and driver infrastructure code.
+	  MDIO devices and driver infrastructure code.
 
 config MDIO_BUS
 	tristate
@@ -85,7 +85,7 @@ config MDIO_BUS_MUX_MMIOREG
 	  parent bus.  Child bus selection is under the control of one of
 	  the FPGA's registers.
 
-	  Currently, only 8-bit registers are supported.
+	  Currently, only 8/16/32 bits registers are supported.
 
 config MDIO_CAVIUM
 	tristate
@@ -106,12 +106,22 @@ config MDIO_HISI_FEMAC
 	  This module provides a driver for the MDIO busses found in the
 	  Hisilicon SoC that have an Fast Ethernet MAC.
 
+config MDIO_I2C
+	tristate
+	depends on I2C
+	help
+	  Support I2C based PHYs.  This provides a MDIO bus bridged
+	  to I2C to allow PHYs connected in I2C mode to be accessed
+	  using the existing infrastructure.
+
+	  This is library mode.
+
 config MDIO_MOXART
-        tristate "MOXA ART MDIO interface support"
-        depends on ARCH_MOXART
-        help
-          This driver supports the MDIO interface found in the network
-          interface units of the MOXA ART SoC
+	tristate "MOXA ART MDIO interface support"
+	depends on ARCH_MOXART
+	help
+	  This driver supports the MDIO interface found in the network
+	  interface units of the MOXA ART SoC
 
 config MDIO_OCTEON
 	tristate "Octeon and some ThunderX SOCs MDIO buses"
@@ -159,6 +169,16 @@ menuconfig PHYLIB
 	  devices.  This option provides infrastructure for
 	  managing PHY devices.
 
+config PHYLINK
+	tristate
+	depends on NETDEVICES
+	select PHYLIB
+	select SWPHY
+	help
+	  PHYlink models the link between the PHY and MAC, allowing fixed
+	  configuration links, PHYs, and Serdes links with MAC level
+	  autonegotiation modes.
+
 if PHYLIB
 
 config SWPHY
@@ -172,7 +192,7 @@ config LED_TRIGGER_PHY
 	  state change will trigger the events, for consumption by an
 	  LED class driver.  There are triggers for each link speed currently
 	  supported by the phy, and are of the form:
-	       <mii bus id>:<phy>:<speed>
+	      <mii bus id>:<phy>:<speed>
 
 	  Where speed is in the form:
 		<Speed in megabits>Mbps or <Speed in gigabits>Gbps
@@ -180,15 +200,20 @@ config LED_TRIGGER_PHY
 
 comment "MII PHY device drivers"
 
+config SFP
+	tristate "SFP cage support"
+	depends on I2C && PHYLINK
+	select MDIO_I2C
+
 config AMD_PHY
 	tristate "AMD PHYs"
 	---help---
 	  Currently supports the am79c874
 
 config AQUANTIA_PHY
-        tristate "Aquantia PHYs"
-        ---help---
-          Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405
+	tristate "Aquantia PHYs"
+	---help---
+	  Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405
 
 config AT803X_PHY
 	tristate "AT803X PHYs"
@@ -341,6 +366,11 @@ config REALTEK_PHY
 	---help---
 	  Supports the Realtek 821x PHY.
 
+config ROCKCHIP_PHY
+        tristate "Driver for Rockchip Ethernet PHYs"
+        ---help---
+          Currently supports the integrated Ethernet PHY.
+
 config SMSC_PHY
 	tristate "SMSC PHYs"
 	---help---
@@ -352,21 +382,21 @@ config STE10XP
 	  This is the driver for the STe100p and STe101p PHYs.
 
 config TERANETICS_PHY
-        tristate "Teranetics PHYs"
-        ---help---
-          Currently supports the Teranetics TN2020
+	tristate "Teranetics PHYs"
+	---help---
+	  Currently supports the Teranetics TN2020
 
 config VITESSE_PHY
-        tristate "Vitesse PHYs"
-        ---help---
-          Currently supports the vsc8244
+	tristate "Vitesse PHYs"
+	---help---
+	  Currently supports the vsc8244
 
 config XILINX_GMII2RGMII
-       tristate "Xilinx GMII2RGMII converter driver"
-       ---help---
-         This driver support xilinx GMII to RGMII IP core it provides
-         the Reduced Gigabit Media Independent Interface(RGMII) between
-         Ethernet physical media devices and the Gigabit Ethernet controller.
+	tristate "Xilinx GMII2RGMII converter driver"
+	---help---
+	  This driver support xilinx GMII to RGMII IP core it provides
+	  the Reduced Gigabit Media Independent Interface(RGMII) between
+	  Ethernet physical media devices and the Gigabit Ethernet controller.
 
 endif # PHYLIB
 
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 8e9b9f3..416df92 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -18,6 +18,7 @@ endif
 libphy-$(CONFIG_SWPHY)		+= swphy.o
 libphy-$(CONFIG_LED_TRIGGER_PHY)	+= phy_led_triggers.o
 
+obj-$(CONFIG_PHYLINK)		+= phylink.o
 obj-$(CONFIG_PHYLIB)		+= libphy.o
 
 obj-$(CONFIG_MDIO_BCM_IPROC)	+= mdio-bcm-iproc.o
@@ -30,12 +31,17 @@ obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o
 obj-$(CONFIG_MDIO_CAVIUM)	+= mdio-cavium.o
 obj-$(CONFIG_MDIO_GPIO)		+= mdio-gpio.o
 obj-$(CONFIG_MDIO_HISI_FEMAC)	+= mdio-hisi-femac.o
+obj-$(CONFIG_MDIO_I2C)		+= mdio-i2c.o
 obj-$(CONFIG_MDIO_MOXART)	+= mdio-moxart.o
 obj-$(CONFIG_MDIO_OCTEON)	+= mdio-octeon.o
 obj-$(CONFIG_MDIO_SUN4I)	+= mdio-sun4i.o
 obj-$(CONFIG_MDIO_THUNDER)	+= mdio-thunder.o
 obj-$(CONFIG_MDIO_XGENE)	+= mdio-xgene.o
 
+obj-$(CONFIG_SFP)		+= sfp.o
+sfp-obj-$(CONFIG_SFP)		+= sfp-bus.o
+obj-y				+= $(sfp-obj-y) $(sfp-obj-m)
+
 obj-$(CONFIG_AMD_PHY)		+= amd.o
 obj-$(CONFIG_AQUANTIA_PHY)	+= aquantia.o
 obj-$(CONFIG_AT803X_PHY)	+= at803x.o
@@ -66,6 +72,7 @@ obj-$(CONFIG_MICROSEMI_PHY)	+= mscc.o
 obj-$(CONFIG_NATIONAL_PHY)	+= national.o
 obj-$(CONFIG_QSEMI_PHY)		+= qsemi.o
 obj-$(CONFIG_REALTEK_PHY)	+= realtek.o
+obj-$(CONFIG_ROCKCHIP_PHY)	+= rockchip.o
 obj-$(CONFIG_SMSC_PHY)		+= smsc.o
 obj-$(CONFIG_STE10XP)		+= ste10Xp.o
 obj-$(CONFIG_TERANETICS_PHY)	+= teranetics.o
diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index caa9f6e..8b33f68 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -511,7 +511,7 @@ static int bcm7xxx_config_init(struct phy_device *phydev)
 static int bcm7xxx_suspend(struct phy_device *phydev)
 {
 	int ret;
-	const struct bcm7xxx_regs {
+	static const struct bcm7xxx_regs {
 		int reg;
 		u16 value;
 	} bcm7xxx_suspend_cfg[] = {
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index c3065236..cbd6298 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -874,7 +874,6 @@ static void decode_rxts(struct dp83640_private *dp83640,
 			shhwtstamps = skb_hwtstamps(skb);
 			memset(shhwtstamps, 0, sizeof(*shhwtstamps));
 			shhwtstamps->hwtstamp = ns_to_ktime(rxts->ns);
-			netif_rx_ni(skb);
 			list_add(&rxts->list, &dp83640->rxpool);
 			break;
 		}
@@ -885,6 +884,9 @@ static void decode_rxts(struct dp83640_private *dp83640,
 		list_add_tail(&rxts->list, &dp83640->rxts);
 out:
 	spin_unlock_irqrestore(&dp83640->rx_lock, flags);
+
+	if (shhwtstamps)
+		netif_rx_ni(skb);
 }
 
 static void decode_txts(struct dp83640_private *dp83640,
@@ -1425,7 +1427,6 @@ static bool dp83640_rxtstamp(struct phy_device *phydev,
 			shhwtstamps = skb_hwtstamps(skb);
 			memset(shhwtstamps, 0, sizeof(*shhwtstamps));
 			shhwtstamps->hwtstamp = ns_to_ktime(rxts->ns);
-			netif_rx_ni(skb);
 			list_del_init(&rxts->list);
 			list_add(&rxts->list, &dp83640->rxpool);
 			break;
@@ -1438,6 +1439,8 @@ static bool dp83640_rxtstamp(struct phy_device *phydev,
 		skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT;
 		skb_queue_tail(&dp83640->rx_queue, skb);
 		schedule_delayed_work(&dp83640->ts_work, SKB_TIMESTAMP_TIMEOUT);
+	} else {
+		netif_rx_ni(skb);
 	}
 
 	return true;
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 5d314f1..15cbcdb 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -55,43 +55,35 @@
 #define MII_M1011_IMASK_INIT		0x6400
 #define MII_M1011_IMASK_CLEAR		0x0000
 
-#define MII_M1011_PHY_SCR		0x10
-#define MII_M1011_PHY_SCR_MDI		0x0000
-#define MII_M1011_PHY_SCR_MDI_X		0x0020
-#define MII_M1011_PHY_SCR_AUTO_CROSS	0x0060
-
-#define MII_M1145_PHY_EXT_SR		0x1b
-#define MII_M1145_PHY_EXT_CR		0x14
-#define MII_M1145_RGMII_RX_DELAY	0x0080
-#define MII_M1145_RGMII_TX_DELAY	0x0002
-#define MII_M1145_HWCFG_MODE_SGMII_NO_CLK	0x4
-#define MII_M1145_HWCFG_MODE_MASK		0xf
-#define MII_M1145_HWCFG_FIBER_COPPER_AUTO	0x8000
-
-#define MII_M1145_HWCFG_MODE_SGMII_NO_CLK	0x4
-#define MII_M1145_HWCFG_MODE_MASK		0xf
-#define MII_M1145_HWCFG_FIBER_COPPER_AUTO	0x8000
+#define MII_M1011_PHY_SCR			0x10
+#define MII_M1011_PHY_SCR_DOWNSHIFT_EN		BIT(11)
+#define MII_M1011_PHY_SCR_DOWNSHIFT_SHIFT	12
+#define MII_M1011_PHY_SRC_DOWNSHIFT_MASK	0x7800
+#define MII_M1011_PHY_SCR_MDI			(0x0 << 5)
+#define MII_M1011_PHY_SCR_MDI_X			(0x1 << 5)
+#define MII_M1011_PHY_SCR_AUTO_CROSS		(0x3 << 5)
 
 #define MII_M1111_PHY_LED_CONTROL	0x18
 #define MII_M1111_PHY_LED_DIRECT	0x4100
 #define MII_M1111_PHY_LED_COMBINE	0x411c
 #define MII_M1111_PHY_EXT_CR		0x14
-#define MII_M1111_RX_DELAY		0x80
-#define MII_M1111_TX_DELAY		0x2
+#define MII_M1111_RGMII_RX_DELAY	BIT(7)
+#define MII_M1111_RGMII_TX_DELAY	BIT(1)
 #define MII_M1111_PHY_EXT_SR		0x1b
 
 #define MII_M1111_HWCFG_MODE_MASK		0xf
-#define MII_M1111_HWCFG_MODE_COPPER_RGMII	0xb
 #define MII_M1111_HWCFG_MODE_FIBER_RGMII	0x3
 #define MII_M1111_HWCFG_MODE_SGMII_NO_CLK	0x4
+#define MII_M1111_HWCFG_MODE_RTBI		0x7
 #define MII_M1111_HWCFG_MODE_COPPER_RTBI	0x9
-#define MII_M1111_HWCFG_FIBER_COPPER_AUTO	0x8000
-#define MII_M1111_HWCFG_FIBER_COPPER_RES	0x2000
+#define MII_M1111_HWCFG_MODE_COPPER_RGMII	0xb
+#define MII_M1111_HWCFG_FIBER_COPPER_RES	BIT(13)
+#define MII_M1111_HWCFG_FIBER_COPPER_AUTO	BIT(15)
 
 #define MII_88E1121_PHY_MSCR_REG	21
 #define MII_88E1121_PHY_MSCR_RX_DELAY	BIT(5)
 #define MII_88E1121_PHY_MSCR_TX_DELAY	BIT(4)
-#define MII_88E1121_PHY_MSCR_DELAY_MASK	(~(0x3 << 4))
+#define MII_88E1121_PHY_MSCR_DELAY_MASK	(~(BIT(5) | BIT(4)))
 
 #define MII_88E1121_MISC_TEST				0x1a
 #define MII_88E1510_MISC_TEST_TEMP_THRESHOLD_MASK	0x1f00
@@ -108,24 +100,24 @@
 #define MII_88E1318S_PHY_MSCR1_PAD_ODD	BIT(6)
 
 /* Copper Specific Interrupt Enable Register */
-#define MII_88E1318S_PHY_CSIER                              0x12
+#define MII_88E1318S_PHY_CSIER				0x12
 /* WOL Event Interrupt Enable */
-#define MII_88E1318S_PHY_CSIER_WOL_EIE                      BIT(7)
+#define MII_88E1318S_PHY_CSIER_WOL_EIE			BIT(7)
 
 /* LED Timer Control Register */
-#define MII_88E1318S_PHY_LED_TCR                            0x12
-#define MII_88E1318S_PHY_LED_TCR_FORCE_INT                  BIT(15)
-#define MII_88E1318S_PHY_LED_TCR_INTn_ENABLE                BIT(7)
-#define MII_88E1318S_PHY_LED_TCR_INT_ACTIVE_LOW             BIT(11)
+#define MII_88E1318S_PHY_LED_TCR			0x12
+#define MII_88E1318S_PHY_LED_TCR_FORCE_INT		BIT(15)
+#define MII_88E1318S_PHY_LED_TCR_INTn_ENABLE		BIT(7)
+#define MII_88E1318S_PHY_LED_TCR_INT_ACTIVE_LOW		BIT(11)
 
 /* Magic Packet MAC address registers */
-#define MII_88E1318S_PHY_MAGIC_PACKET_WORD2                 0x17
-#define MII_88E1318S_PHY_MAGIC_PACKET_WORD1                 0x18
-#define MII_88E1318S_PHY_MAGIC_PACKET_WORD0                 0x19
+#define MII_88E1318S_PHY_MAGIC_PACKET_WORD2		0x17
+#define MII_88E1318S_PHY_MAGIC_PACKET_WORD1		0x18
+#define MII_88E1318S_PHY_MAGIC_PACKET_WORD0		0x19
 
-#define MII_88E1318S_PHY_WOL_CTRL                           0x10
-#define MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS          BIT(12)
-#define MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE BIT(14)
+#define MII_88E1318S_PHY_WOL_CTRL				0x10
+#define MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS		BIT(12)
+#define MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE	BIT(14)
 
 #define MII_88E1121_PHY_LED_CTRL	16
 #define MII_88E1121_PHY_LED_DEF		0x0030
@@ -138,8 +130,6 @@
 #define MII_M1011_PHY_STATUS_RESOLVED	0x0800
 #define MII_M1011_PHY_STATUS_LINK	0x0400
 
-#define MII_M1116R_CONTROL_REG_MAC	21
-
 #define MII_88E3016_PHY_SPEC_CTRL	0x10
 #define MII_88E3016_DISABLE_SCRAMBLER	0x0200
 #define MII_88E3016_AUTO_MDIX_CROSSOVER	0x0030
@@ -152,7 +142,7 @@
 #define LPA_FIBER_1000HALF	0x40
 #define LPA_FIBER_1000FULL	0x20
 
-#define LPA_PAUSE_FIBER	0x180
+#define LPA_PAUSE_FIBER		0x180
 #define LPA_PAUSE_ASYM_FIBER	0x100
 
 #define ADVERTISE_FIBER_1000HALF	0x40
@@ -274,6 +264,23 @@ static int marvell_set_polarity(struct phy_device *phydev, int polarity)
 	return 0;
 }
 
+static int marvell_set_downshift(struct phy_device *phydev, bool enable,
+				 u8 retries)
+{
+	int reg;
+
+	reg = phy_read(phydev, MII_M1011_PHY_SCR);
+	if (reg < 0)
+		return reg;
+
+	reg &= MII_M1011_PHY_SRC_DOWNSHIFT_MASK;
+	reg |= ((retries - 1) << MII_M1011_PHY_SCR_DOWNSHIFT_SHIFT);
+	if (enable)
+		reg |= MII_M1011_PHY_SCR_DOWNSHIFT_EN;
+
+	return phy_write(phydev, MII_M1011_PHY_SCR, reg);
+}
+
 static int marvell_config_aneg(struct phy_device *phydev)
 {
 	int err;
@@ -292,17 +299,11 @@ static int marvell_config_aneg(struct phy_device *phydev)
 		return err;
 
 	if (phydev->autoneg != AUTONEG_ENABLE) {
-		int bmcr;
-
 		/* A write to speed/duplex bits (that is performed by
 		 * genphy_config_aneg() call above) must be followed by
 		 * a software reset. Otherwise, the write has no effect.
 		 */
-		bmcr = phy_read(phydev, MII_BMCR);
-		if (bmcr < 0)
-			return bmcr;
-
-		err = phy_write(phydev, MII_BMCR, bmcr | BMCR_RESET);
+		err = genphy_soft_reset(phydev);
 		if (err < 0)
 			return err;
 	}
@@ -318,8 +319,7 @@ static int m88e1101_config_aneg(struct phy_device *phydev)
 	 * that certain registers get written in order
 	 * to restart autonegotiation
 	 */
-	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
-
+	err = genphy_soft_reset(phydev);
 	if (err < 0)
 		return err;
 
@@ -354,7 +354,7 @@ static int m88e1111_config_aneg(struct phy_device *phydev)
 	 * that certain registers get written in order
 	 * to restart autonegotiation
 	 */
-	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+	err = genphy_soft_reset(phydev);
 
 	err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
 	if (err < 0)
@@ -370,17 +370,11 @@ static int m88e1111_config_aneg(struct phy_device *phydev)
 		return err;
 
 	if (phydev->autoneg != AUTONEG_ENABLE) {
-		int bmcr;
-
 		/* A write to speed/duplex bits (that is performed by
 		 * genphy_config_aneg() call above) must be followed by
 		 * a software reset. Otherwise, the write has no effect.
 		 */
-		bmcr = phy_read(phydev, MII_BMCR);
-		if (bmcr < 0)
-			return bmcr;
-
-		err = phy_write(phydev, MII_BMCR, bmcr | BMCR_RESET);
+		err = genphy_soft_reset(phydev);
 		if (err < 0)
 			return err;
 	}
@@ -466,7 +460,7 @@ static int marvell_of_reg_init(struct phy_device *phydev)
 }
 #endif /* CONFIG_OF_MDIO */
 
-static int m88e1121_config_aneg(struct phy_device *phydev)
+static int m88e1121_config_aneg_rgmii_delays(struct phy_device *phydev)
 {
 	int err, oldpage, mscr;
 
@@ -474,31 +468,45 @@ static int m88e1121_config_aneg(struct phy_device *phydev)
 	if (oldpage < 0)
 		return oldpage;
 
-	if (phy_interface_is_rgmii(phydev)) {
-		mscr = phy_read(phydev, MII_88E1121_PHY_MSCR_REG) &
-			MII_88E1121_PHY_MSCR_DELAY_MASK;
-
-		if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
-			mscr |= (MII_88E1121_PHY_MSCR_RX_DELAY |
-				 MII_88E1121_PHY_MSCR_TX_DELAY);
-		else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
-			mscr |= MII_88E1121_PHY_MSCR_RX_DELAY;
-		else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
-			mscr |= MII_88E1121_PHY_MSCR_TX_DELAY;
-
-		err = phy_write(phydev, MII_88E1121_PHY_MSCR_REG, mscr);
-		if (err < 0)
-			return err;
+	mscr = phy_read(phydev, MII_88E1121_PHY_MSCR_REG);
+	if (mscr < 0) {
+		err = mscr;
+		goto out;
 	}
 
+	mscr &= MII_88E1121_PHY_MSCR_DELAY_MASK;
+
+	if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
+		mscr |= (MII_88E1121_PHY_MSCR_RX_DELAY |
+			 MII_88E1121_PHY_MSCR_TX_DELAY);
+	else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
+		mscr |= MII_88E1121_PHY_MSCR_RX_DELAY;
+	else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
+		mscr |= MII_88E1121_PHY_MSCR_TX_DELAY;
+
+	err = phy_write(phydev, MII_88E1121_PHY_MSCR_REG, mscr);
+
+out:
 	marvell_set_page(phydev, oldpage);
 
-	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+	return err;
+}
+
+static int m88e1121_config_aneg(struct phy_device *phydev)
+{
+	int err = 0;
+
+	if (phy_interface_is_rgmii(phydev)) {
+		err = m88e1121_config_aneg_rgmii_delays(phydev);
+		if (err)
+			return err;
+	}
+
+	err = genphy_soft_reset(phydev);
 	if (err < 0)
 		return err;
 
-	err = phy_write(phydev, MII_M1011_PHY_SCR,
-			MII_M1011_PHY_SCR_AUTO_CROSS);
+	err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
 	if (err < 0)
 		return err;
 
@@ -596,7 +604,7 @@ static int marvell_config_aneg_fiber(struct phy_device *phydev)
 
 	if (changed == 0) {
 		/* Advertisement hasn't changed, but maybe aneg was never on to
-		 * begin with?  Or maybe phy was isolated?
+		 * begin with?	Or maybe phy was isolated?
 		 */
 		int ctl = phy_read(phydev, MII_BMCR);
 
@@ -653,12 +661,9 @@ static int marvell_config_init(struct phy_device *phydev)
 
 static int m88e1116r_config_init(struct phy_device *phydev)
 {
-	int temp;
 	int err;
 
-	temp = phy_read(phydev, MII_BMCR);
-	temp |= BMCR_RESET;
-	err = phy_write(phydev, MII_BMCR, temp);
+	err = genphy_soft_reset(phydev);
 	if (err < 0)
 		return err;
 
@@ -668,35 +673,22 @@ static int m88e1116r_config_init(struct phy_device *phydev)
 	if (err < 0)
 		return err;
 
-	temp = phy_read(phydev, MII_M1011_PHY_SCR);
-	temp |= (7 << 12);	/* max number of gigabit attempts */
-	temp |= (1 << 11);	/* enable downshift */
-	temp |= MII_M1011_PHY_SCR_AUTO_CROSS;
-	err = phy_write(phydev, MII_M1011_PHY_SCR, temp);
+	err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
 	if (err < 0)
 		return err;
 
-	err = marvell_set_page(phydev, MII_MARVELL_MSCR_PAGE);
-	if (err < 0)
-		return err;
-	temp = phy_read(phydev, MII_M1116R_CONTROL_REG_MAC);
-	temp |= (1 << 5);
-	temp |= (1 << 4);
-	err = phy_write(phydev, MII_M1116R_CONTROL_REG_MAC, temp);
+	err = marvell_set_downshift(phydev, true, 8);
 	if (err < 0)
 		return err;
-	err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
+
+	err = m88e1121_config_aneg_rgmii_delays(phydev);
 	if (err < 0)
 		return err;
 
-	temp = phy_read(phydev, MII_BMCR);
-	temp |= BMCR_RESET;
-	err = phy_write(phydev, MII_BMCR, temp);
+	err = genphy_soft_reset(phydev);
 	if (err < 0)
 		return err;
 
-	mdelay(500);
-
 	return marvell_config_init(phydev);
 }
 
@@ -719,9 +711,29 @@ static int m88e3016_config_init(struct phy_device *phydev)
 	return marvell_config_init(phydev);
 }
 
-static int m88e1111_config_init_rgmii(struct phy_device *phydev)
+static int m88e1111_config_init_hwcfg_mode(struct phy_device *phydev,
+					   u16 mode,
+					   int fibre_copper_auto)
+{
+	int temp;
+
+	temp = phy_read(phydev, MII_M1111_PHY_EXT_SR);
+	if (temp < 0)
+		return temp;
+
+	temp &= ~(MII_M1111_HWCFG_MODE_MASK |
+		  MII_M1111_HWCFG_FIBER_COPPER_AUTO |
+		  MII_M1111_HWCFG_FIBER_COPPER_RES);
+	temp |= mode;
+
+	if (fibre_copper_auto)
+		temp |= MII_M1111_HWCFG_FIBER_COPPER_AUTO;
+
+	return phy_write(phydev, MII_M1111_PHY_EXT_SR, temp);
+}
+
+static int m88e1111_config_init_rgmii_delays(struct phy_device *phydev)
 {
-	int err;
 	int temp;
 
 	temp = phy_read(phydev, MII_M1111_PHY_EXT_CR);
@@ -729,16 +741,24 @@ static int m88e1111_config_init_rgmii(struct phy_device *phydev)
 		return temp;
 
 	if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) {
-		temp |= (MII_M1111_RX_DELAY | MII_M1111_TX_DELAY);
+		temp |= (MII_M1111_RGMII_RX_DELAY | MII_M1111_RGMII_TX_DELAY);
 	} else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) {
-		temp &= ~MII_M1111_TX_DELAY;
-		temp |= MII_M1111_RX_DELAY;
+		temp &= ~MII_M1111_RGMII_TX_DELAY;
+		temp |= MII_M1111_RGMII_RX_DELAY;
 	} else if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) {
-		temp &= ~MII_M1111_RX_DELAY;
-		temp |= MII_M1111_TX_DELAY;
+		temp &= ~MII_M1111_RGMII_RX_DELAY;
+		temp |= MII_M1111_RGMII_TX_DELAY;
 	}
 
-	err = phy_write(phydev, MII_M1111_PHY_EXT_CR, temp);
+	return phy_write(phydev, MII_M1111_PHY_EXT_CR, temp);
+}
+
+static int m88e1111_config_init_rgmii(struct phy_device *phydev)
+{
+	int temp;
+	int err;
+
+	err = m88e1111_config_init_rgmii_delays(phydev);
 	if (err < 0)
 		return err;
 
@@ -759,17 +779,11 @@ static int m88e1111_config_init_rgmii(struct phy_device *phydev)
 static int m88e1111_config_init_sgmii(struct phy_device *phydev)
 {
 	int err;
-	int temp;
-
-	temp = phy_read(phydev, MII_M1111_PHY_EXT_SR);
-	if (temp < 0)
-		return temp;
-
-	temp &= ~(MII_M1111_HWCFG_MODE_MASK);
-	temp |= MII_M1111_HWCFG_MODE_SGMII_NO_CLK;
-	temp |= MII_M1111_HWCFG_FIBER_COPPER_AUTO;
 
-	err = phy_write(phydev, MII_M1111_PHY_EXT_SR, temp);
+	err = m88e1111_config_init_hwcfg_mode(
+		phydev,
+		MII_M1111_HWCFG_MODE_SGMII_NO_CLK,
+		MII_M1111_HWCFG_FIBER_COPPER_AUTO);
 	if (err < 0)
 		return err;
 
@@ -780,48 +794,27 @@ static int m88e1111_config_init_sgmii(struct phy_device *phydev)
 static int m88e1111_config_init_rtbi(struct phy_device *phydev)
 {
 	int err;
-	int temp;
 
-	temp = phy_read(phydev, MII_M1111_PHY_EXT_CR);
-	if (temp < 0)
-		return temp;
-
-	temp |= (MII_M1111_RX_DELAY | MII_M1111_TX_DELAY);
-	err = phy_write(phydev, MII_M1111_PHY_EXT_CR, temp);
-	if (err < 0)
+	err = m88e1111_config_init_rgmii_delays(phydev);
+	if (err)
 		return err;
 
-	temp = phy_read(phydev, MII_M1111_PHY_EXT_SR);
-	if (temp < 0)
-		return temp;
-
-	temp &= ~(MII_M1111_HWCFG_MODE_MASK |
-		  MII_M1111_HWCFG_FIBER_COPPER_RES);
-	temp |= 0x7 | MII_M1111_HWCFG_FIBER_COPPER_AUTO;
-
-	err = phy_write(phydev, MII_M1111_PHY_EXT_SR, temp);
+	err = m88e1111_config_init_hwcfg_mode(
+		phydev,
+		MII_M1111_HWCFG_MODE_RTBI,
+		MII_M1111_HWCFG_FIBER_COPPER_AUTO);
 	if (err < 0)
 		return err;
 
 	/* soft reset */
-	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+	err = genphy_soft_reset(phydev);
 	if (err < 0)
 		return err;
 
-	do
-		temp = phy_read(phydev, MII_BMCR);
-	while (temp & BMCR_RESET);
-
-	temp = phy_read(phydev, MII_M1111_PHY_EXT_SR);
-	if (temp < 0)
-		return temp;
-
-	temp &= ~(MII_M1111_HWCFG_MODE_MASK |
-		  MII_M1111_HWCFG_FIBER_COPPER_RES);
-	temp |= MII_M1111_HWCFG_MODE_COPPER_RTBI |
-		MII_M1111_HWCFG_FIBER_COPPER_AUTO;
-
-	return phy_write(phydev, MII_M1111_PHY_EXT_SR, temp);
+	return m88e1111_config_init_hwcfg_mode(
+		phydev,
+		MII_M1111_HWCFG_MODE_RTBI,
+		MII_M1111_HWCFG_FIBER_COPPER_AUTO);
 }
 
 static int m88e1111_config_init(struct phy_device *phydev)
@@ -850,7 +843,7 @@ static int m88e1111_config_init(struct phy_device *phydev)
 	if (err < 0)
 		return err;
 
-	return phy_write(phydev, MII_BMCR, BMCR_RESET);
+	return genphy_soft_reset(phydev);
 }
 
 static int m88e1121_config_init(struct phy_device *phydev)
@@ -912,12 +905,11 @@ static int m88e1118_config_aneg(struct phy_device *phydev)
 {
 	int err;
 
-	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+	err = genphy_soft_reset(phydev);
 	if (err < 0)
 		return err;
 
-	err = phy_write(phydev, MII_M1011_PHY_SCR,
-			MII_M1011_PHY_SCR_AUTO_CROSS);
+	err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
 	if (err < 0)
 		return err;
 
@@ -961,7 +953,7 @@ static int m88e1118_config_init(struct phy_device *phydev)
 	if (err < 0)
 		return err;
 
-	return phy_write(phydev, MII_BMCR, BMCR_RESET);
+	return genphy_soft_reset(phydev);
 }
 
 static int m88e1149_config_init(struct phy_device *phydev)
@@ -987,20 +979,15 @@ static int m88e1149_config_init(struct phy_device *phydev)
 	if (err < 0)
 		return err;
 
-	return phy_write(phydev, MII_BMCR, BMCR_RESET);
+	return genphy_soft_reset(phydev);
 }
 
 static int m88e1145_config_init_rgmii(struct phy_device *phydev)
 {
+	int temp;
 	int err;
-	int temp = phy_read(phydev, MII_M1145_PHY_EXT_CR);
-
-	if (temp < 0)
-		return temp;
 
-	temp |= (MII_M1145_RGMII_RX_DELAY | MII_M1145_RGMII_TX_DELAY);
-
-	err = phy_write(phydev, MII_M1145_PHY_EXT_CR, temp);
+	err = m88e1111_config_init_rgmii_delays(phydev);
 	if (err < 0)
 		return err;
 
@@ -1032,16 +1019,9 @@ static int m88e1145_config_init_rgmii(struct phy_device *phydev)
 
 static int m88e1145_config_init_sgmii(struct phy_device *phydev)
 {
-	int temp = phy_read(phydev, MII_M1145_PHY_EXT_SR);
-
-	if (temp < 0)
-		return temp;
-
-	temp &= ~MII_M1145_HWCFG_MODE_MASK;
-	temp |= MII_M1145_HWCFG_MODE_SGMII_NO_CLK;
-	temp |= MII_M1145_HWCFG_FIBER_COPPER_AUTO;
-
-	return phy_write(phydev, MII_M1145_PHY_EXT_SR, temp);
+	return m88e1111_config_init_hwcfg_mode(
+		phydev, MII_M1111_HWCFG_MODE_SGMII_NO_CLK,
+		MII_M1111_HWCFG_FIBER_COPPER_AUTO);
 }
 
 static int m88e1145_config_init(struct phy_device *phydev)
@@ -1515,7 +1495,7 @@ static void marvell_get_strings(struct phy_device *phydev, u8 *data)
 }
 
 #ifndef UINT64_MAX
-#define UINT64_MAX              (u64)(~((u64)0))
+#define UINT64_MAX		(u64)(~((u64)0))
 #endif
 static u64 marvell_get_stat(struct phy_device *phydev, int i)
 {
diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c
index 3439523..08e0647 100644
--- a/drivers/net/phy/mdio-bcm-unimac.c
+++ b/drivers/net/phy/mdio-bcm-unimac.c
@@ -21,6 +21,8 @@
 #include <linux/of_platform.h>
 #include <linux/of_mdio.h>
 
+#include <linux/platform_data/mdio-bcm-unimac.h>
+
 #define MDIO_CMD		0x00
 #define  MDIO_START_BUSY	(1 << 29)
 #define  MDIO_READ_FAIL		(1 << 28)
@@ -41,46 +43,80 @@
 struct unimac_mdio_priv {
 	struct mii_bus		*mii_bus;
 	void __iomem		*base;
+	int (*wait_func)	(void *wait_func_data);
+	void			*wait_func_data;
 };
 
+static inline u32 unimac_mdio_readl(struct unimac_mdio_priv *priv, u32 offset)
+{
+	/* MIPS chips strapped for BE will automagically configure the
+	 * peripheral registers for CPU-native byte order.
+	 */
+	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+		return __raw_readl(priv->base + offset);
+	else
+		return readl_relaxed(priv->base + offset);
+}
+
+static inline void unimac_mdio_writel(struct unimac_mdio_priv *priv, u32 val,
+				      u32 offset)
+{
+	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+		__raw_writel(val, priv->base + offset);
+	else
+		writel_relaxed(val, priv->base + offset);
+}
+
 static inline void unimac_mdio_start(struct unimac_mdio_priv *priv)
 {
 	u32 reg;
 
-	reg = __raw_readl(priv->base + MDIO_CMD);
+	reg = unimac_mdio_readl(priv, MDIO_CMD);
 	reg |= MDIO_START_BUSY;
-	__raw_writel(reg, priv->base + MDIO_CMD);
+	unimac_mdio_writel(priv, reg, MDIO_CMD);
 }
 
 static inline unsigned int unimac_mdio_busy(struct unimac_mdio_priv *priv)
 {
-	return __raw_readl(priv->base + MDIO_CMD) & MDIO_START_BUSY;
+	return unimac_mdio_readl(priv, MDIO_CMD) & MDIO_START_BUSY;
+}
+
+static int unimac_mdio_poll(void *wait_func_data)
+{
+	struct unimac_mdio_priv *priv = wait_func_data;
+	unsigned int timeout = 1000;
+
+	do {
+		if (!unimac_mdio_busy(priv))
+			return 0;
+
+		usleep_range(1000, 2000);
+	} while (--timeout);
+
+	if (!timeout)
+		return -ETIMEDOUT;
+
+	return 0;
 }
 
 static int unimac_mdio_read(struct mii_bus *bus, int phy_id, int reg)
 {
 	struct unimac_mdio_priv *priv = bus->priv;
-	unsigned int timeout = 1000;
+	int ret;
 	u32 cmd;
 
 	/* Prepare the read operation */
 	cmd = MDIO_RD | (phy_id << MDIO_PMD_SHIFT) | (reg << MDIO_REG_SHIFT);
-	__raw_writel(cmd, priv->base + MDIO_CMD);
+	unimac_mdio_writel(priv, cmd, MDIO_CMD);
 
 	/* Start MDIO transaction */
 	unimac_mdio_start(priv);
 
-	do {
-		if (!unimac_mdio_busy(priv))
-			break;
+	ret = priv->wait_func(priv->wait_func_data);
+	if (ret)
+		return ret;
 
-		usleep_range(1000, 2000);
-	} while (timeout--);
-
-	if (!timeout)
-		return -ETIMEDOUT;
-
-	cmd = __raw_readl(priv->base + MDIO_CMD);
+	cmd = unimac_mdio_readl(priv, MDIO_CMD);
 
 	/* Some broken devices are known not to release the line during
 	 * turn-around, e.g: Broadcom BCM53125 external switches, so check for
@@ -97,27 +133,16 @@ static int unimac_mdio_write(struct mii_bus *bus, int phy_id,
 			     int reg, u16 val)
 {
 	struct unimac_mdio_priv *priv = bus->priv;
-	unsigned int timeout = 1000;
 	u32 cmd;
 
 	/* Prepare the write operation */
 	cmd = MDIO_WR | (phy_id << MDIO_PMD_SHIFT) |
 		(reg << MDIO_REG_SHIFT) | (0xffff & val);
-	__raw_writel(cmd, priv->base + MDIO_CMD);
+	unimac_mdio_writel(priv, cmd, MDIO_CMD);
 
 	unimac_mdio_start(priv);
 
-	do {
-		if (!unimac_mdio_busy(priv))
-			break;
-
-		usleep_range(1000, 2000);
-	} while (timeout--);
-
-	if (!timeout)
-		return -ETIMEDOUT;
-
-	return 0;
+	return priv->wait_func(priv->wait_func_data);
 }
 
 /* Workaround for integrated BCM7xxx Gigabit PHYs which have a problem with
@@ -155,8 +180,10 @@ static int unimac_mdio_reset(struct mii_bus *bus)
 	}
 
 	for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
-		if (read_mask & 1 << addr)
+		if (read_mask & 1 << addr) {
+			dev_dbg(&bus->dev, "Workaround for PHY @ %d\n", addr);
 			mdiobus_read(bus, addr, MII_BMSR);
+		}
 	}
 
 	return 0;
@@ -164,6 +191,7 @@ static int unimac_mdio_reset(struct mii_bus *bus)
 
 static int unimac_mdio_probe(struct platform_device *pdev)
 {
+	struct unimac_mdio_pdata *pdata = pdev->dev.platform_data;
 	struct unimac_mdio_priv *priv;
 	struct device_node *np;
 	struct mii_bus *bus;
@@ -193,12 +221,21 @@ static int unimac_mdio_probe(struct platform_device *pdev)
 
 	bus = priv->mii_bus;
 	bus->priv = priv;
-	bus->name = "unimac MII bus";
+	if (pdata) {
+		bus->name = pdata->bus_name;
+		priv->wait_func = pdata->wait_func;
+		priv->wait_func_data = pdata->wait_func_data;
+		bus->phy_mask = ~pdata->phy_mask;
+	} else {
+		bus->name = "unimac MII bus";
+		priv->wait_func_data = priv;
+		priv->wait_func = unimac_mdio_poll;
+	}
 	bus->parent = &pdev->dev;
 	bus->read = unimac_mdio_read;
 	bus->write = unimac_mdio_write;
 	bus->reset = unimac_mdio_reset;
-	snprintf(bus->id, MII_BUS_ID_SIZE, "%s", pdev->name);
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d", pdev->name, pdev->id);
 
 	ret = of_mdiobus_register(bus, np);
 	if (ret) {
@@ -240,7 +277,7 @@ MODULE_DEVICE_TABLE(of, unimac_mdio_ids);
 
 static struct platform_driver unimac_mdio_driver = {
 	.driver = {
-		.name = "unimac-mdio",
+		.name = UNIMAC_MDIO_DRV_NAME,
 		.of_match_table = unimac_mdio_ids,
 	},
 	.probe	= unimac_mdio_probe,
@@ -251,4 +288,4 @@ module_platform_driver(unimac_mdio_driver);
 MODULE_AUTHOR("Broadcom Corporation");
 MODULE_DESCRIPTION("Broadcom UniMAC MDIO bus controller");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:unimac-mdio");
+MODULE_ALIAS("platform:" UNIMAC_MDIO_DRV_NAME);
diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index 7faa79b..4333c6e 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -116,7 +116,7 @@ static void mdc_set(struct mdiobb_ctrl *ctrl, int what)
 	gpiod_set_value(bitbang->mdc, what);
 }
 
-static struct mdiobb_ops mdio_gpio_ops = {
+static const struct mdiobb_ops mdio_gpio_ops = {
 	.owner = THIS_MODULE,
 	.set_mdc = mdc_set,
 	.set_mdio_dir = mdio_dir,
diff --git a/drivers/net/phy/mdio-i2c.c b/drivers/net/phy/mdio-i2c.c
new file mode 100644
index 0000000..6d24fd1
--- /dev/null
+++ b/drivers/net/phy/mdio-i2c.c
@@ -0,0 +1,109 @@
+/*
+ * MDIO I2C bridge
+ *
+ * Copyright (C) 2015-2016 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Network PHYs can appear on I2C buses when they are part of SFP module.
+ * This driver exposes these PHYs to the networking PHY code, allowing
+ * our PHY drivers access to these PHYs, and so allowing configuration
+ * of their settings.
+ */
+#include <linux/i2c.h>
+#include <linux/phy.h>
+
+#include "mdio-i2c.h"
+
+/*
+ * I2C bus addresses 0x50 and 0x51 are normally an EEPROM, which is
+ * specified to be present in SFP modules.  These correspond with PHY
+ * addresses 16 and 17.  Disallow access to these "phy" addresses.
+ */
+static bool i2c_mii_valid_phy_id(int phy_id)
+{
+	return phy_id != 0x10 && phy_id != 0x11;
+}
+
+static unsigned int i2c_mii_phy_addr(int phy_id)
+{
+	return phy_id + 0x40;
+}
+
+static int i2c_mii_read(struct mii_bus *bus, int phy_id, int reg)
+{
+	struct i2c_adapter *i2c = bus->priv;
+	struct i2c_msg msgs[2];
+	u8 data[2], dev_addr = reg;
+	int bus_addr, ret;
+
+	if (!i2c_mii_valid_phy_id(phy_id))
+		return 0xffff;
+
+	bus_addr = i2c_mii_phy_addr(phy_id);
+	msgs[0].addr = bus_addr;
+	msgs[0].flags = 0;
+	msgs[0].len = 1;
+	msgs[0].buf = &dev_addr;
+	msgs[1].addr = bus_addr;
+	msgs[1].flags = I2C_M_RD;
+	msgs[1].len = sizeof(data);
+	msgs[1].buf = data;
+
+	ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs));
+	if (ret != ARRAY_SIZE(msgs))
+		return 0xffff;
+
+	return data[0] << 8 | data[1];
+}
+
+static int i2c_mii_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
+{
+	struct i2c_adapter *i2c = bus->priv;
+	struct i2c_msg msg;
+	int ret;
+	u8 data[3];
+
+	if (!i2c_mii_valid_phy_id(phy_id))
+		return 0;
+
+	data[0] = reg;
+	data[1] = val >> 8;
+	data[2] = val;
+
+	msg.addr = i2c_mii_phy_addr(phy_id);
+	msg.flags = 0;
+	msg.len = 3;
+	msg.buf = data;
+
+	ret = i2c_transfer(i2c, &msg, 1);
+
+	return ret < 0 ? ret : 0;
+}
+
+struct mii_bus *mdio_i2c_alloc(struct device *parent, struct i2c_adapter *i2c)
+{
+	struct mii_bus *mii;
+
+	if (!i2c_check_functionality(i2c, I2C_FUNC_I2C))
+		return ERR_PTR(-EINVAL);
+
+	mii = mdiobus_alloc();
+	if (!mii)
+		return ERR_PTR(-ENOMEM);
+
+	snprintf(mii->id, MII_BUS_ID_SIZE, "i2c:%s", dev_name(parent));
+	mii->parent = parent;
+	mii->read = i2c_mii_read;
+	mii->write = i2c_mii_write;
+	mii->priv = i2c;
+
+	return mii;
+}
+EXPORT_SYMBOL_GPL(mdio_i2c_alloc);
+
+MODULE_AUTHOR("Russell King");
+MODULE_DESCRIPTION("MDIO I2C bridge library");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/phy/mdio-i2c.h b/drivers/net/phy/mdio-i2c.h
new file mode 100644
index 0000000..889ab57
--- /dev/null
+++ b/drivers/net/phy/mdio-i2c.h
@@ -0,0 +1,19 @@
+/*
+ * MDIO I2C bridge
+ *
+ * Copyright (C) 2015 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef MDIO_I2C_H
+#define MDIO_I2C_H
+
+struct device;
+struct i2c_adapter;
+struct mii_bus;
+
+struct mii_bus *mdio_i2c_alloc(struct device *parent, struct i2c_adapter *i2c);
+
+#endif
diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c
index 0a5f62e..0831b71 100644
--- a/drivers/net/phy/mdio-mux-bcm-iproc.c
+++ b/drivers/net/phy/mdio-mux-bcm-iproc.c
@@ -199,7 +199,7 @@ static int mdio_mux_iproc_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, md);
 
-	rc = mdio_mux_init(md->dev, mdio_mux_iproc_switch_fn,
+	rc = mdio_mux_init(md->dev, md->dev->of_node, mdio_mux_iproc_switch_fn,
 			   &md->mux_handle, md, md->mii_bus);
 	if (rc) {
 		dev_info(md->dev, "mdiomux initialization failed\n");
diff --git a/drivers/net/phy/mdio-mux-gpio.c b/drivers/net/phy/mdio-mux-gpio.c
index 9199499..082ffef 100644
--- a/drivers/net/phy/mdio-mux-gpio.c
+++ b/drivers/net/phy/mdio-mux-gpio.c
@@ -54,7 +54,7 @@ static int mdio_mux_gpio_probe(struct platform_device *pdev)
 	if (IS_ERR(s->gpios))
 		return PTR_ERR(s->gpios);
 
-	r = mdio_mux_init(&pdev->dev,
+	r = mdio_mux_init(&pdev->dev, pdev->dev.of_node,
 			  mdio_mux_gpio_switch_fn, &s->mux_handle, s, NULL);
 
 	if (r != 0) {
diff --git a/drivers/net/phy/mdio-mux-mmioreg.c b/drivers/net/phy/mdio-mux-mmioreg.c
index 6a33646..2573ab0 100644
--- a/drivers/net/phy/mdio-mux-mmioreg.c
+++ b/drivers/net/phy/mdio-mux-mmioreg.c
@@ -105,7 +105,7 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
 	const __be32 *iprop;
 	int len, ret;
 
-	dev_dbg(&pdev->dev, "probing node %s\n", np->full_name);
+	dev_dbg(&pdev->dev, "probing node %pOF\n", np);
 
 	s = devm_kzalloc(&pdev->dev, sizeof(*s), GFP_KERNEL);
 	if (!s)
@@ -113,8 +113,8 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
 
 	ret = of_address_to_resource(np, 0, &res);
 	if (ret) {
-		dev_err(&pdev->dev, "could not obtain memory map for node %s\n",
-			np->full_name);
+		dev_err(&pdev->dev, "could not obtain memory map for node %pOF\n",
+			np);
 		return ret;
 	}
 	s->phys = res.start;
@@ -145,25 +145,26 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
 	for_each_available_child_of_node(np, np2) {
 		iprop = of_get_property(np2, "reg", &len);
 		if (!iprop || len != sizeof(uint32_t)) {
-			dev_err(&pdev->dev, "mdio-mux child node %s is "
-				"missing a 'reg' property\n", np2->full_name);
+			dev_err(&pdev->dev, "mdio-mux child node %pOF is "
+				"missing a 'reg' property\n", np2);
 			of_node_put(np2);
 			return -ENODEV;
 		}
 		if (be32_to_cpup(iprop) & ~s->mask) {
-			dev_err(&pdev->dev, "mdio-mux child node %s has "
+			dev_err(&pdev->dev, "mdio-mux child node %pOF has "
 				"a 'reg' value with unmasked bits\n",
-				np2->full_name);
+				np2);
 			of_node_put(np2);
 			return -ENODEV;
 		}
 	}
 
-	ret = mdio_mux_init(&pdev->dev, mdio_mux_mmioreg_switch_fn,
+	ret = mdio_mux_init(&pdev->dev, pdev->dev.of_node,
+			    mdio_mux_mmioreg_switch_fn,
 			    &s->mux_handle, s, NULL);
 	if (ret) {
-		dev_err(&pdev->dev, "failed to register mdio-mux bus %s\n",
-			np->full_name);
+		dev_err(&pdev->dev, "failed to register mdio-mux bus %pOF\n",
+			np);
 		return ret;
 	}
 
diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c
index c608e1d..0a86f1e 100644
--- a/drivers/net/phy/mdio-mux.c
+++ b/drivers/net/phy/mdio-mux.c
@@ -13,7 +13,6 @@
 #include <linux/module.h>
 #include <linux/phy.h>
 
-#define DRV_VERSION "1.0"
 #define DRV_DESCRIPTION "MDIO bus multiplexer driver"
 
 struct mdio_mux_child_bus;
@@ -87,6 +86,7 @@ out:
 static int parent_count;
 
 int mdio_mux_init(struct device *dev,
+		  struct device_node *mux_node,
 		  int (*switch_fn)(int cur, int desired, void *data),
 		  void **mux_handle,
 		  void *data,
@@ -99,11 +99,11 @@ int mdio_mux_init(struct device *dev,
 	struct mdio_mux_parent_bus *pb;
 	struct mdio_mux_child_bus *cb;
 
-	if (!dev->of_node)
+	if (!mux_node)
 		return -ENODEV;
 
 	if (!mux_bus) {
-		parent_bus_node = of_parse_phandle(dev->of_node,
+		parent_bus_node = of_parse_phandle(mux_node,
 						   "mdio-parent-bus", 0);
 
 		if (!parent_bus_node)
@@ -117,10 +117,11 @@ int mdio_mux_init(struct device *dev,
 	} else {
 		parent_bus_node = NULL;
 		parent_bus = mux_bus;
+		get_device(&parent_bus->dev);
 	}
 
 	pb = devm_kzalloc(dev, sizeof(*pb), GFP_KERNEL);
-	if (pb == NULL) {
+	if (!pb) {
 		ret_val = -ENOMEM;
 		goto err_pb_kz;
 	}
@@ -132,22 +133,19 @@ int mdio_mux_init(struct device *dev,
 	pb->mii_bus = parent_bus;
 
 	ret_val = -ENODEV;
-	for_each_available_child_of_node(dev->of_node, child_bus_node) {
+	for_each_available_child_of_node(mux_node, child_bus_node) {
 		int v;
 
 		r = of_property_read_u32(child_bus_node, "reg", &v);
 		if (r) {
 			dev_err(dev,
-				"Error: Failed to find reg for child %s\n",
-				of_node_full_name(child_bus_node));
+				"Error: Failed to find reg for child %pOF\n",
+				child_bus_node);
 			continue;
 		}
 
 		cb = devm_kzalloc(dev, sizeof(*cb), GFP_KERNEL);
-		if (cb == NULL) {
-			dev_err(dev,
-				"Error: Failed to allocate memory for child %s\n",
-				of_node_full_name(child_bus_node));
+		if (!cb) {
 			ret_val = -ENOMEM;
 			continue;
 		}
@@ -156,9 +154,6 @@ int mdio_mux_init(struct device *dev,
 
 		cb->mii_bus = mdiobus_alloc();
 		if (!cb->mii_bus) {
-			dev_err(dev,
-				"Error: Failed to allocate MDIO bus for child %s\n",
-				of_node_full_name(child_bus_node));
 			ret_val = -ENOMEM;
 			devm_kfree(dev, cb);
 			continue;
@@ -174,8 +169,8 @@ int mdio_mux_init(struct device *dev,
 		r = of_mdiobus_register(cb->mii_bus, child_bus_node);
 		if (r) {
 			dev_err(dev,
-				"Error: Failed to register MDIO bus for child %s\n",
-				of_node_full_name(child_bus_node));
+				"Error: Failed to register MDIO bus for child %pOF\n",
+				child_bus_node);
 			mdiobus_free(cb->mii_bus);
 			devm_kfree(dev, cb);
 		} else {
@@ -185,16 +180,13 @@ int mdio_mux_init(struct device *dev,
 	}
 	if (pb->children) {
 		*mux_handle = pb;
-		dev_info(dev, "Version " DRV_VERSION "\n");
 		return 0;
 	}
 
 	dev_err(dev, "Error: No acceptable child buses found\n");
 	devm_kfree(dev, pb);
 err_pb_kz:
-	/* balance the reference of_mdio_find_bus() took */
-	if (!mux_bus)
-		put_device(&parent_bus->dev);
+	put_device(&parent_bus->dev);
 err_parent_bus:
 	of_node_put(parent_bus_node);
 	return ret_val;
@@ -212,12 +204,10 @@ void mdio_mux_uninit(void *mux_handle)
 		cb = cb->next;
 	}
 
-	/* balance the reference of_mdio_find_bus() in mdio_mux_init() took */
 	put_device(&pb->mii_bus->dev);
 }
 EXPORT_SYMBOL_GPL(mdio_mux_uninit);
 
 MODULE_DESCRIPTION(DRV_DESCRIPTION);
-MODULE_VERSION(DRV_VERSION);
 MODULE_AUTHOR("David Daney");
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 2df7b62c..b6f9fa6 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -399,8 +399,7 @@ error:
 	}
 
 	/* Put PHYs in RESET to save power */
-	if (bus->reset_gpiod)
-		gpiod_set_value_cansleep(bus->reset_gpiod, 1);
+	gpiod_set_value_cansleep(bus->reset_gpiod, 1);
 
 	device_del(&bus->dev);
 	return err;
@@ -425,8 +424,7 @@ void mdiobus_unregister(struct mii_bus *bus)
 	}
 
 	/* Put PHYs in RESET to save power */
-	if (bus->reset_gpiod)
-		gpiod_set_value_cansleep(bus->reset_gpiod, 1);
+	gpiod_set_value_cansleep(bus->reset_gpiod, 1);
 
 	device_del(&bus->dev);
 }
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 6739b73..21f75ae 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -9,6 +9,186 @@
 #include <linux/export.h>
 #include <linux/phy.h>
 
+const char *phy_speed_to_str(int speed)
+{
+	switch (speed) {
+	case SPEED_10:
+		return "10Mbps";
+	case SPEED_100:
+		return "100Mbps";
+	case SPEED_1000:
+		return "1Gbps";
+	case SPEED_2500:
+		return "2.5Gbps";
+	case SPEED_5000:
+		return "5Gbps";
+	case SPEED_10000:
+		return "10Gbps";
+	case SPEED_14000:
+		return "14Gbps";
+	case SPEED_20000:
+		return "20Gbps";
+	case SPEED_25000:
+		return "25Gbps";
+	case SPEED_40000:
+		return "40Gbps";
+	case SPEED_50000:
+		return "50Gbps";
+	case SPEED_56000:
+		return "56Gbps";
+	case SPEED_100000:
+		return "100Gbps";
+	case SPEED_UNKNOWN:
+		return "Unknown";
+	default:
+		return "Unsupported (update phy-core.c)";
+	}
+}
+EXPORT_SYMBOL_GPL(phy_speed_to_str);
+
+const char *phy_duplex_to_str(unsigned int duplex)
+{
+	if (duplex == DUPLEX_HALF)
+		return "Half";
+	if (duplex == DUPLEX_FULL)
+		return "Full";
+	if (duplex == DUPLEX_UNKNOWN)
+		return "Unknown";
+	return "Unsupported (update phy-core.c)";
+}
+EXPORT_SYMBOL_GPL(phy_duplex_to_str);
+
+/* A mapping of all SUPPORTED settings to speed/duplex.  This table
+ * must be grouped by speed and sorted in descending match priority
+ * - iow, descending speed. */
+static const struct phy_setting settings[] = {
+	{
+		.speed = SPEED_10000,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+	},
+	{
+		.speed = SPEED_10000,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
+	},
+	{
+		.speed = SPEED_10000,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+	},
+	{
+		.speed = SPEED_2500,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_2500baseX_Full_BIT,
+	},
+	{
+		.speed = SPEED_1000,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+	},
+	{
+		.speed = SPEED_1000,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+	},
+	{
+		.speed = SPEED_1000,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+	},
+	{
+		.speed = SPEED_1000,
+		.duplex = DUPLEX_HALF,
+		.bit = ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+	},
+	{
+		.speed = SPEED_100,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+	},
+	{
+		.speed = SPEED_100,
+		.duplex = DUPLEX_HALF,
+		.bit = ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+	},
+	{
+		.speed = SPEED_10,
+		.duplex = DUPLEX_FULL,
+		.bit = ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+	},
+	{
+		.speed = SPEED_10,
+		.duplex = DUPLEX_HALF,
+		.bit = ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+	},
+};
+
+/**
+ * phy_lookup_setting - lookup a PHY setting
+ * @speed: speed to match
+ * @duplex: duplex to match
+ * @mask: allowed link modes
+ * @maxbit: bit size of link modes
+ * @exact: an exact match is required
+ *
+ * Search the settings array for a setting that matches the speed and
+ * duplex, and which is supported.
+ *
+ * If @exact is unset, either an exact match or %NULL for no match will
+ * be returned.
+ *
+ * If @exact is set, an exact match, the fastest supported setting at
+ * or below the specified speed, the slowest supported setting, or if
+ * they all fail, %NULL will be returned.
+ */
+const struct phy_setting *
+phy_lookup_setting(int speed, int duplex, const unsigned long *mask,
+		   size_t maxbit, bool exact)
+{
+	const struct phy_setting *p, *match = NULL, *last = NULL;
+	int i;
+
+	for (i = 0, p = settings; i < ARRAY_SIZE(settings); i++, p++) {
+		if (p->bit < maxbit && test_bit(p->bit, mask)) {
+			last = p;
+			if (p->speed == speed && p->duplex == duplex) {
+				/* Exact match for speed and duplex */
+				match = p;
+				break;
+			} else if (!exact) {
+				if (!match && p->speed <= speed)
+					/* Candidate */
+					match = p;
+
+				if (p->speed < speed)
+					break;
+			}
+		}
+	}
+
+	if (!match && !exact)
+		match = last;
+
+	return match;
+}
+EXPORT_SYMBOL_GPL(phy_lookup_setting);
+
+size_t phy_speeds(unsigned int *speeds, size_t size,
+		  unsigned long *mask, size_t maxbit)
+{
+	size_t count;
+	int i;
+
+	for (i = 0, count = 0; i < ARRAY_SIZE(settings) && count < size; i++)
+		if (settings[i].bit < maxbit &&
+		    test_bit(settings[i].bit, mask) &&
+		    (count == 0 || speeds[count - 1] != settings[i].speed))
+			speeds[count++] = settings[i].speed;
+
+	return count;
+}
+
 static void mmd_phy_indirect(struct mii_bus *bus, int phy_addr, int devad,
 			     u16 regnum)
 {
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index d0626bf..e842d2c 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -30,7 +30,6 @@
 #include <linux/ethtool.h>
 #include <linux/phy.h>
 #include <linux/phy_led_triggers.h>
-#include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/mdio.h>
 #include <linux/io.h>
@@ -39,42 +38,6 @@
 
 #include <asm/irq.h>
 
-static const char *phy_speed_to_str(int speed)
-{
-	switch (speed) {
-	case SPEED_10:
-		return "10Mbps";
-	case SPEED_100:
-		return "100Mbps";
-	case SPEED_1000:
-		return "1Gbps";
-	case SPEED_2500:
-		return "2.5Gbps";
-	case SPEED_5000:
-		return "5Gbps";
-	case SPEED_10000:
-		return "10Gbps";
-	case SPEED_14000:
-		return "14Gbps";
-	case SPEED_20000:
-		return "20Gbps";
-	case SPEED_25000:
-		return "25Gbps";
-	case SPEED_40000:
-		return "40Gbps";
-	case SPEED_50000:
-		return "50Gbps";
-	case SPEED_56000:
-		return "56Gbps";
-	case SPEED_100000:
-		return "100Gbps";
-	case SPEED_UNKNOWN:
-		return "Unknown";
-	default:
-		return "Unsupported (update phy.c)";
-	}
-}
-
 #define PHY_STATE_STR(_state)			\
 	case PHY_##_state:			\
 		return __stringify(_state);	\
@@ -110,7 +73,7 @@ void phy_print_status(struct phy_device *phydev)
 		netdev_info(phydev->attached_dev,
 			"Link is Up - %s/%s - flow control %s\n",
 			phy_speed_to_str(phydev->speed),
-			DUPLEX_FULL == phydev->duplex ? "Full" : "Half",
+			phy_duplex_to_str(phydev->duplex),
 			phydev->pause ? "rx/tx" : "off");
 	} else	{
 		netdev_info(phydev->attached_dev, "Link is Down\n");
@@ -194,123 +157,6 @@ int phy_aneg_done(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_aneg_done);
 
-/* A structure for mapping a particular speed and duplex
- * combination to a particular SUPPORTED and ADVERTISED value
- */
-struct phy_setting {
-	int speed;
-	int duplex;
-	u32 setting;
-};
-
-/* A mapping of all SUPPORTED settings to speed/duplex.  This table
- * must be grouped by speed and sorted in descending match priority
- * - iow, descending speed. */
-static const struct phy_setting settings[] = {
-	{
-		.speed = SPEED_10000,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_10000baseKR_Full,
-	},
-	{
-		.speed = SPEED_10000,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_10000baseKX4_Full,
-	},
-	{
-		.speed = SPEED_10000,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_10000baseT_Full,
-	},
-	{
-		.speed = SPEED_2500,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_2500baseX_Full,
-	},
-	{
-		.speed = SPEED_1000,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_1000baseKX_Full,
-	},
-	{
-		.speed = SPEED_1000,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_1000baseT_Full,
-	},
-	{
-		.speed = SPEED_1000,
-		.duplex = DUPLEX_HALF,
-		.setting = SUPPORTED_1000baseT_Half,
-	},
-	{
-		.speed = SPEED_100,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_100baseT_Full,
-	},
-	{
-		.speed = SPEED_100,
-		.duplex = DUPLEX_HALF,
-		.setting = SUPPORTED_100baseT_Half,
-	},
-	{
-		.speed = SPEED_10,
-		.duplex = DUPLEX_FULL,
-		.setting = SUPPORTED_10baseT_Full,
-	},
-	{
-		.speed = SPEED_10,
-		.duplex = DUPLEX_HALF,
-		.setting = SUPPORTED_10baseT_Half,
-	},
-};
-
-/**
- * phy_lookup_setting - lookup a PHY setting
- * @speed: speed to match
- * @duplex: duplex to match
- * @features: allowed link modes
- * @exact: an exact match is required
- *
- * Search the settings array for a setting that matches the speed and
- * duplex, and which is supported.
- *
- * If @exact is unset, either an exact match or %NULL for no match will
- * be returned.
- *
- * If @exact is set, an exact match, the fastest supported setting at
- * or below the specified speed, the slowest supported setting, or if
- * they all fail, %NULL will be returned.
- */
-static const struct phy_setting *
-phy_lookup_setting(int speed, int duplex, u32 features, bool exact)
-{
-	const struct phy_setting *p, *match = NULL, *last = NULL;
-	int i;
-
-	for (i = 0, p = settings; i < ARRAY_SIZE(settings); i++, p++) {
-		if (p->setting & features) {
-			last = p;
-			if (p->speed == speed && p->duplex == duplex) {
-				/* Exact match for speed and duplex */
-				match = p;
-				break;
-			} else if (!exact) {
-				if (!match && p->speed <= speed)
-					/* Candidate */
-					match = p;
-
-				if (p->speed < speed)
-					break;
-			}
-		}
-	}
-
-	if (!match && !exact)
-		match = last;
-
-	return match;
-}
-
 /**
  * phy_find_valid - find a PHY setting that matches the requested parameters
  * @speed: desired speed
@@ -327,7 +173,9 @@ phy_lookup_setting(int speed, int duplex, u32 features, bool exact)
 static const struct phy_setting *
 phy_find_valid(int speed, int duplex, u32 supported)
 {
-	return phy_lookup_setting(speed, duplex, supported, false);
+	unsigned long mask = supported;
+
+	return phy_lookup_setting(speed, duplex, &mask, BITS_PER_LONG, false);
 }
 
 /**
@@ -344,16 +192,9 @@ unsigned int phy_supported_speeds(struct phy_device *phy,
 				  unsigned int *speeds,
 				  unsigned int size)
 {
-	unsigned int count = 0;
-	unsigned int idx = 0;
+	unsigned long supported = phy->supported;
 
-	for (idx = 0; idx < ARRAY_SIZE(settings) && count < size; idx++)
-		/* Assumes settings are grouped by speed */
-		if ((settings[idx].setting & phy->supported) &&
-		    (count == 0 || speeds[count - 1] != settings[idx].speed))
-			speeds[count++] = settings[idx].speed;
-
-	return count;
+	return phy_speeds(speeds, size, &supported, BITS_PER_LONG);
 }
 
 /**
@@ -367,7 +208,9 @@ unsigned int phy_supported_speeds(struct phy_device *phy,
  */
 static inline bool phy_check_valid(int speed, int duplex, u32 features)
 {
-	return !!phy_lookup_setting(speed, duplex, features, true);
+	unsigned long mask = features;
+
+	return !!phy_lookup_setting(speed, duplex, &mask, BITS_PER_LONG, true);
 }
 
 /**
@@ -705,14 +548,15 @@ EXPORT_SYMBOL(phy_start_aneg);
  *
  * Description: The PHY infrastructure can run a state machine
  *   which tracks whether the PHY is starting up, negotiating,
- *   etc.  This function starts the timer which tracks the state
- *   of the PHY.  If you want to maintain your own state machine,
+ *   etc.  This function starts the delayed workqueue which tracks
+ *   the state of the PHY. If you want to maintain your own state machine,
  *   do not call this function.
  */
 void phy_start_machine(struct phy_device *phydev)
 {
 	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, HZ);
 }
+EXPORT_SYMBOL_GPL(phy_start_machine);
 
 /**
  * phy_trigger_machine - trigger the state machine to run
@@ -737,9 +581,9 @@ void phy_trigger_machine(struct phy_device *phydev, bool sync)
  * phy_stop_machine - stop the PHY state machine tracking
  * @phydev: target phy_device struct
  *
- * Description: Stops the state machine timer, sets the state to UP
- *   (unless it wasn't up yet). This function must be called BEFORE
- *   phy_detach.
+ * Description: Stops the state machine delayed workqueue, sets the
+ *   state to UP (unless it wasn't up yet). This function must be
+ *   called BEFORE phy_detach.
  */
 void phy_stop_machine(struct phy_device *phydev)
 {
@@ -1019,9 +863,15 @@ void phy_start(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_start);
 
-static void phy_adjust_link(struct phy_device *phydev)
+static void phy_link_up(struct phy_device *phydev)
 {
-	phydev->adjust_link(phydev->attached_dev);
+	phydev->phy_link_change(phydev, true, true);
+	phy_led_trigger_change_speed(phydev);
+}
+
+static void phy_link_down(struct phy_device *phydev, bool do_carrier)
+{
+	phydev->phy_link_change(phydev, false, do_carrier);
 	phy_led_trigger_change_speed(phydev);
 }
 
@@ -1066,8 +916,7 @@ void phy_state_machine(struct work_struct *work)
 		/* If the link is down, give up on negotiation for now */
 		if (!phydev->link) {
 			phydev->state = PHY_NOLINK;
-			netif_carrier_off(phydev->attached_dev);
-			phy_adjust_link(phydev);
+			phy_link_down(phydev, true);
 			break;
 		}
 
@@ -1079,9 +928,7 @@ void phy_state_machine(struct work_struct *work)
 		/* If AN is done, we're running */
 		if (err > 0) {
 			phydev->state = PHY_RUNNING;
-			netif_carrier_on(phydev->attached_dev);
-			phy_adjust_link(phydev);
-
+			phy_link_up(phydev);
 		} else if (0 == phydev->link_timeout--)
 			needs_aneg = true;
 		break;
@@ -1106,8 +953,7 @@ void phy_state_machine(struct work_struct *work)
 				}
 			}
 			phydev->state = PHY_RUNNING;
-			netif_carrier_on(phydev->attached_dev);
-			phy_adjust_link(phydev);
+			phy_link_up(phydev);
 		}
 		break;
 	case PHY_FORCING:
@@ -1117,13 +963,12 @@ void phy_state_machine(struct work_struct *work)
 
 		if (phydev->link) {
 			phydev->state = PHY_RUNNING;
-			netif_carrier_on(phydev->attached_dev);
+			phy_link_up(phydev);
 		} else {
 			if (0 == phydev->link_timeout--)
 				needs_aneg = true;
+			phy_link_down(phydev, false);
 		}
-
-		phy_adjust_link(phydev);
 		break;
 	case PHY_RUNNING:
 		/* Only register a CHANGE if we are polling and link changed
@@ -1155,14 +1000,12 @@ void phy_state_machine(struct work_struct *work)
 
 		if (phydev->link) {
 			phydev->state = PHY_RUNNING;
-			netif_carrier_on(phydev->attached_dev);
+			phy_link_up(phydev);
 		} else {
 			phydev->state = PHY_NOLINK;
-			netif_carrier_off(phydev->attached_dev);
+			phy_link_down(phydev, true);
 		}
 
-		phy_adjust_link(phydev);
-
 		if (phy_interrupt_is_valid(phydev))
 			err = phy_config_interrupt(phydev,
 						   PHY_INTERRUPT_ENABLED);
@@ -1170,8 +1013,7 @@ void phy_state_machine(struct work_struct *work)
 	case PHY_HALTED:
 		if (phydev->link) {
 			phydev->link = 0;
-			netif_carrier_off(phydev->attached_dev);
-			phy_adjust_link(phydev);
+			phy_link_down(phydev, true);
 			do_suspend = true;
 		}
 		break;
@@ -1191,11 +1033,11 @@ void phy_state_machine(struct work_struct *work)
 
 				if (phydev->link) {
 					phydev->state = PHY_RUNNING;
-					netif_carrier_on(phydev->attached_dev);
+					phy_link_up(phydev);
 				} else	{
 					phydev->state = PHY_NOLINK;
+					phy_link_down(phydev, false);
 				}
-				phy_adjust_link(phydev);
 			} else {
 				phydev->state = PHY_AN;
 				phydev->link_timeout = PHY_AN_TIMEOUT;
@@ -1207,11 +1049,11 @@ void phy_state_machine(struct work_struct *work)
 
 			if (phydev->link) {
 				phydev->state = PHY_RUNNING;
-				netif_carrier_on(phydev->attached_dev);
+				phy_link_up(phydev);
 			} else	{
 				phydev->state = PHY_NOLINK;
+				phy_link_down(phydev, false);
 			}
-			phy_adjust_link(phydev);
 		}
 		break;
 	}
@@ -1226,9 +1068,10 @@ void phy_state_machine(struct work_struct *work)
 	if (err < 0)
 		phy_error(phydev);
 
-	phydev_dbg(phydev, "PHY state change %s -> %s\n",
-		   phy_state_to_str(old_state),
-		   phy_state_to_str(phydev->state));
+	if (old_state != phydev->state)
+		phydev_dbg(phydev, "PHY state change %s -> %s\n",
+			   phy_state_to_str(old_state),
+			   phy_state_to_str(phydev->state));
 
 	/* Only re-schedule a PHY state machine change if we are polling the
 	 * PHY, if PHY_IGNORE_INTERRUPT is set, then we will be moving
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 2f742ae..8cf0c59 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -688,6 +688,19 @@ struct phy_device *phy_find_first(struct mii_bus *bus)
 }
 EXPORT_SYMBOL(phy_find_first);
 
+static void phy_link_change(struct phy_device *phydev, bool up, bool do_carrier)
+{
+	struct net_device *netdev = phydev->attached_dev;
+
+	if (do_carrier) {
+		if (up)
+			netif_carrier_on(netdev);
+		else
+			netif_carrier_off(netdev);
+	}
+	phydev->adjust_link(netdev);
+}
+
 /**
  * phy_prepare_link - prepares the PHY layer to monitor link status
  * @phydev: target phy_device struct
@@ -861,21 +874,37 @@ void phy_attached_info(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_attached_info);
 
-#define ATTACHED_FMT "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)"
+#define ATTACHED_FMT "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%s)"
 void phy_attached_print(struct phy_device *phydev, const char *fmt, ...)
 {
 	const char *drv_name = phydev->drv ? phydev->drv->name : "unbound";
+	char *irq_str;
+	char irq_num[4];
+
+	switch(phydev->irq) {
+	case PHY_POLL:
+		irq_str = "POLL";
+		break;
+	case PHY_IGNORE_INTERRUPT:
+		irq_str = "IGNORE";
+		break;
+	default:
+		snprintf(irq_num, sizeof(irq_num), "%d", phydev->irq);
+		irq_str = irq_num;
+		break;
+	}
+
 
 	if (!fmt) {
 		dev_info(&phydev->mdio.dev, ATTACHED_FMT "\n",
 			 drv_name, phydev_name(phydev),
-			 phydev->irq);
+			 irq_str);
 	} else {
 		va_list ap;
 
 		dev_info(&phydev->mdio.dev, ATTACHED_FMT,
 			 drv_name, phydev_name(phydev),
-			 phydev->irq);
+			 irq_str);
 
 		va_start(ap, fmt);
 		vprintk(fmt, ap);
@@ -953,6 +982,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 		goto error;
 	}
 
+	phydev->phy_link_change = phy_link_change;
 	phydev->attached_dev = dev;
 	dev->phydev = phydev;
 
@@ -1072,6 +1102,7 @@ void phy_detach(struct phy_device *phydev)
 	phydev->attached_dev->phydev = NULL;
 	phydev->attached_dev = NULL;
 	phy_suspend(phydev);
+	phydev->phylink = NULL;
 
 	phy_led_triggers_unregister(phydev);
 
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
new file mode 100644
index 0000000..bcb4755
--- /dev/null
+++ b/drivers/net/phy/phylink.c
@@ -0,0 +1,1462 @@
+/*
+ * phylink models the MAC to optional PHY connection, supporting
+ * technologies such as SFP cages where the PHY is hot-pluggable.
+ *
+ * Copyright (C) 2015 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/ethtool.h>
+#include <linux/export.h>
+#include <linux/gpio/consumer.h>
+#include <linux/netdevice.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/phy.h>
+#include <linux/phy_fixed.h>
+#include <linux/phylink.h>
+#include <linux/rtnetlink.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "sfp.h"
+#include "swphy.h"
+
+#define SUPPORTED_INTERFACES \
+	(SUPPORTED_TP | SUPPORTED_MII | SUPPORTED_FIBRE | \
+	 SUPPORTED_BNC | SUPPORTED_AUI | SUPPORTED_Backplane)
+#define ADVERTISED_INTERFACES \
+	(ADVERTISED_TP | ADVERTISED_MII | ADVERTISED_FIBRE | \
+	 ADVERTISED_BNC | ADVERTISED_AUI | ADVERTISED_Backplane)
+
+enum {
+	PHYLINK_DISABLE_STOPPED,
+	PHYLINK_DISABLE_LINK,
+};
+
+struct phylink {
+	struct net_device *netdev;
+	const struct phylink_mac_ops *ops;
+
+	unsigned long phylink_disable_state; /* bitmask of disables */
+	struct phy_device *phydev;
+	phy_interface_t link_interface;	/* PHY_INTERFACE_xxx */
+	u8 link_an_mode;		/* MLO_AN_xxx */
+	u8 link_port;			/* The current non-phy ethtool port */
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+
+	/* The link configuration settings */
+	struct phylink_link_state link_config;
+	struct gpio_desc *link_gpio;
+
+	struct mutex state_mutex;
+	struct phylink_link_state phy_state;
+	struct work_struct resolve;
+
+	bool mac_link_dropped;
+
+	struct sfp_bus *sfp_bus;
+};
+
+static inline void linkmode_zero(unsigned long *dst)
+{
+	bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline void linkmode_copy(unsigned long *dst, const unsigned long *src)
+{
+	bitmap_copy(dst, src, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline void linkmode_and(unsigned long *dst, const unsigned long *a,
+				const unsigned long *b)
+{
+	bitmap_and(dst, a, b, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline void linkmode_or(unsigned long *dst, const unsigned long *a,
+				const unsigned long *b)
+{
+	bitmap_or(dst, a, b, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline bool linkmode_empty(const unsigned long *src)
+{
+	return bitmap_empty(src, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+void phylink_set_port_modes(unsigned long *mask)
+{
+	phylink_set(mask, TP);
+	phylink_set(mask, AUI);
+	phylink_set(mask, MII);
+	phylink_set(mask, FIBRE);
+	phylink_set(mask, BNC);
+	phylink_set(mask, Backplane);
+}
+EXPORT_SYMBOL_GPL(phylink_set_port_modes);
+
+static int phylink_is_empty_linkmode(const unsigned long *linkmode)
+{
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(tmp) = { 0, };
+
+	phylink_set_port_modes(tmp);
+	phylink_set(tmp, Autoneg);
+	phylink_set(tmp, Pause);
+	phylink_set(tmp, Asym_Pause);
+
+	bitmap_andnot(tmp, linkmode, tmp, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+	return linkmode_empty(tmp);
+}
+
+static const char *phylink_an_mode_str(unsigned int mode)
+{
+	static const char *modestr[] = {
+		[MLO_AN_PHY] = "phy",
+		[MLO_AN_FIXED] = "fixed",
+		[MLO_AN_SGMII] = "SGMII",
+		[MLO_AN_8023Z] = "802.3z",
+	};
+
+	return mode < ARRAY_SIZE(modestr) ? modestr[mode] : "unknown";
+}
+
+static int phylink_validate(struct phylink *pl, unsigned long *supported,
+			    struct phylink_link_state *state)
+{
+	pl->ops->validate(pl->netdev, supported, state);
+
+	return phylink_is_empty_linkmode(supported) ? -EINVAL : 0;
+}
+
+static int phylink_parse_fixedlink(struct phylink *pl, struct device_node *np)
+{
+	struct device_node *fixed_node;
+	const struct phy_setting *s;
+	struct gpio_desc *desc;
+	const __be32 *fixed_prop;
+	u32 speed;
+	int ret, len;
+
+	fixed_node = of_get_child_by_name(np, "fixed-link");
+	if (fixed_node) {
+		ret = of_property_read_u32(fixed_node, "speed", &speed);
+
+		pl->link_config.speed = speed;
+		pl->link_config.duplex = DUPLEX_HALF;
+
+		if (of_property_read_bool(fixed_node, "full-duplex"))
+			pl->link_config.duplex = DUPLEX_FULL;
+
+		/* We treat the "pause" and "asym-pause" terminology as
+		 * defining the link partner's ability. */
+		if (of_property_read_bool(fixed_node, "pause"))
+			pl->link_config.pause |= MLO_PAUSE_SYM;
+		if (of_property_read_bool(fixed_node, "asym-pause"))
+			pl->link_config.pause |= MLO_PAUSE_ASYM;
+
+		if (ret == 0) {
+			desc = fwnode_get_named_gpiod(&fixed_node->fwnode,
+						      "link-gpios", 0,
+						      GPIOD_IN, "?");
+
+			if (!IS_ERR(desc))
+				pl->link_gpio = desc;
+			else if (desc == ERR_PTR(-EPROBE_DEFER))
+				ret = -EPROBE_DEFER;
+		}
+		of_node_put(fixed_node);
+
+		if (ret)
+			return ret;
+	} else {
+		fixed_prop = of_get_property(np, "fixed-link", &len);
+		if (!fixed_prop) {
+			netdev_err(pl->netdev, "broken fixed-link?\n");
+			return -EINVAL;
+		}
+		if (len == 5 * sizeof(*fixed_prop)) {
+			pl->link_config.duplex = be32_to_cpu(fixed_prop[1]) ?
+						DUPLEX_FULL : DUPLEX_HALF;
+			pl->link_config.speed = be32_to_cpu(fixed_prop[2]);
+			if (be32_to_cpu(fixed_prop[3]))
+				pl->link_config.pause |= MLO_PAUSE_SYM;
+			if (be32_to_cpu(fixed_prop[4]))
+				pl->link_config.pause |= MLO_PAUSE_ASYM;
+		}
+	}
+
+	if (pl->link_config.speed > SPEED_1000 &&
+	    pl->link_config.duplex != DUPLEX_FULL)
+		netdev_warn(pl->netdev, "fixed link specifies half duplex for %dMbps link?\n",
+			    pl->link_config.speed);
+
+	bitmap_fill(pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+	linkmode_copy(pl->link_config.advertising, pl->supported);
+	phylink_validate(pl, pl->supported, &pl->link_config);
+
+	s = phy_lookup_setting(pl->link_config.speed, pl->link_config.duplex,
+			       pl->supported,
+			       __ETHTOOL_LINK_MODE_MASK_NBITS, true);
+	linkmode_zero(pl->supported);
+	phylink_set(pl->supported, MII);
+	if (s) {
+		__set_bit(s->bit, pl->supported);
+	} else {
+		netdev_warn(pl->netdev, "fixed link %s duplex %dMbps not recognised\n",
+			    pl->link_config.duplex == DUPLEX_FULL ? "full" : "half",
+			    pl->link_config.speed);
+	}
+
+	linkmode_and(pl->link_config.advertising, pl->link_config.advertising,
+		     pl->supported);
+
+	pl->link_config.link = 1;
+	pl->link_config.an_complete = 1;
+
+	return 0;
+}
+
+static int phylink_parse_mode(struct phylink *pl, struct device_node *np)
+{
+	struct device_node *dn;
+	const char *managed;
+
+	dn = of_get_child_by_name(np, "fixed-link");
+	if (dn || of_find_property(np, "fixed-link", NULL))
+		pl->link_an_mode = MLO_AN_FIXED;
+	of_node_put(dn);
+
+	if (of_property_read_string(np, "managed", &managed) == 0 &&
+	    strcmp(managed, "in-band-status") == 0) {
+		if (pl->link_an_mode == MLO_AN_FIXED) {
+			netdev_err(pl->netdev,
+				   "can't use both fixed-link and in-band-status\n");
+			return -EINVAL;
+		}
+
+		linkmode_zero(pl->supported);
+		phylink_set(pl->supported, MII);
+		phylink_set(pl->supported, Autoneg);
+		phylink_set(pl->supported, Asym_Pause);
+		phylink_set(pl->supported, Pause);
+		pl->link_config.an_enabled = true;
+
+		switch (pl->link_config.interface) {
+		case PHY_INTERFACE_MODE_SGMII:
+			phylink_set(pl->supported, 10baseT_Half);
+			phylink_set(pl->supported, 10baseT_Full);
+			phylink_set(pl->supported, 100baseT_Half);
+			phylink_set(pl->supported, 100baseT_Full);
+			phylink_set(pl->supported, 1000baseT_Half);
+			phylink_set(pl->supported, 1000baseT_Full);
+			pl->link_an_mode = MLO_AN_SGMII;
+			break;
+
+		case PHY_INTERFACE_MODE_1000BASEX:
+			phylink_set(pl->supported, 1000baseX_Full);
+			pl->link_an_mode = MLO_AN_8023Z;
+			break;
+
+		case PHY_INTERFACE_MODE_2500BASEX:
+			phylink_set(pl->supported, 2500baseX_Full);
+			pl->link_an_mode = MLO_AN_8023Z;
+			break;
+
+		case PHY_INTERFACE_MODE_10GKR:
+			phylink_set(pl->supported, 10baseT_Half);
+			phylink_set(pl->supported, 10baseT_Full);
+			phylink_set(pl->supported, 100baseT_Half);
+			phylink_set(pl->supported, 100baseT_Full);
+			phylink_set(pl->supported, 1000baseT_Half);
+			phylink_set(pl->supported, 1000baseT_Full);
+			phylink_set(pl->supported, 1000baseX_Full);
+			phylink_set(pl->supported, 10000baseKR_Full);
+			phylink_set(pl->supported, 10000baseCR_Full);
+			phylink_set(pl->supported, 10000baseSR_Full);
+			phylink_set(pl->supported, 10000baseLR_Full);
+			phylink_set(pl->supported, 10000baseLRM_Full);
+			phylink_set(pl->supported, 10000baseER_Full);
+			pl->link_an_mode = MLO_AN_SGMII;
+			break;
+
+		default:
+			netdev_err(pl->netdev,
+				   "incorrect link mode %s for in-band status\n",
+				   phy_modes(pl->link_config.interface));
+			return -EINVAL;
+		}
+
+		linkmode_copy(pl->link_config.advertising, pl->supported);
+
+		if (phylink_validate(pl, pl->supported, &pl->link_config)) {
+			netdev_err(pl->netdev,
+				   "failed to validate link configuration for in-band status\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static void phylink_mac_config(struct phylink *pl,
+			       const struct phylink_link_state *state)
+{
+	netdev_dbg(pl->netdev,
+		   "%s: mode=%s/%s/%s/%s adv=%*pb pause=%02x link=%u an=%u\n",
+		   __func__, phylink_an_mode_str(pl->link_an_mode),
+		   phy_modes(state->interface),
+		   phy_speed_to_str(state->speed),
+		   phy_duplex_to_str(state->duplex),
+		   __ETHTOOL_LINK_MODE_MASK_NBITS, state->advertising,
+		   state->pause, state->link, state->an_enabled);
+
+	pl->ops->mac_config(pl->netdev, pl->link_an_mode, state);
+}
+
+static void phylink_mac_an_restart(struct phylink *pl)
+{
+	if (pl->link_config.an_enabled &&
+	    (pl->link_config.interface == PHY_INTERFACE_MODE_1000BASEX ||
+	     pl->link_config.interface == PHY_INTERFACE_MODE_2500BASEX))
+		pl->ops->mac_an_restart(pl->netdev);
+}
+
+static int phylink_get_mac_state(struct phylink *pl, struct phylink_link_state *state)
+{
+	struct net_device *ndev = pl->netdev;
+
+	linkmode_copy(state->advertising, pl->link_config.advertising);
+	linkmode_zero(state->lp_advertising);
+	state->interface = pl->link_config.interface;
+	state->an_enabled = pl->link_config.an_enabled;
+	state->link = 1;
+
+	return pl->ops->mac_link_state(ndev, state);
+}
+
+/* The fixed state is... fixed except for the link state,
+ * which may be determined by a GPIO.
+ */
+static void phylink_get_fixed_state(struct phylink *pl, struct phylink_link_state *state)
+{
+	*state = pl->link_config;
+	if (pl->link_gpio)
+		state->link = !!gpiod_get_value(pl->link_gpio);
+}
+
+/* Flow control is resolved according to our and the link partners
+ * advertisments using the following drawn from the 802.3 specs:
+ *  Local device  Link partner
+ *  Pause AsymDir Pause AsymDir Result
+ *    1     X       1     X     TX+RX
+ *    0     1       1     1     RX
+ *    1     1       0     1     TX
+ */
+static void phylink_resolve_flow(struct phylink *pl,
+	struct phylink_link_state *state)
+{
+	int new_pause = 0;
+
+	if (pl->link_config.pause & MLO_PAUSE_AN) {
+		int pause = 0;
+
+		if (phylink_test(pl->link_config.advertising, Pause))
+			pause |= MLO_PAUSE_SYM;
+		if (phylink_test(pl->link_config.advertising, Asym_Pause))
+			pause |= MLO_PAUSE_ASYM;
+
+		pause &= state->pause;
+
+		if (pause & MLO_PAUSE_SYM)
+			new_pause = MLO_PAUSE_TX | MLO_PAUSE_RX;
+		else if (pause & MLO_PAUSE_ASYM)
+			new_pause = state->pause & MLO_PAUSE_SYM ?
+				 MLO_PAUSE_RX : MLO_PAUSE_TX;
+	} else {
+		new_pause = pl->link_config.pause & MLO_PAUSE_TXRX_MASK;
+	}
+
+	state->pause &= ~MLO_PAUSE_TXRX_MASK;
+	state->pause |= new_pause;
+}
+
+static const char *phylink_pause_to_str(int pause)
+{
+	switch (pause & MLO_PAUSE_TXRX_MASK) {
+	case MLO_PAUSE_TX | MLO_PAUSE_RX:
+		return "rx/tx";
+	case MLO_PAUSE_TX:
+		return "tx";
+	case MLO_PAUSE_RX:
+		return "rx";
+	default:
+		return "off";
+	}
+}
+
+static void phylink_resolve(struct work_struct *w)
+{
+	struct phylink *pl = container_of(w, struct phylink, resolve);
+	struct phylink_link_state link_state;
+	struct net_device *ndev = pl->netdev;
+
+	mutex_lock(&pl->state_mutex);
+	if (pl->phylink_disable_state) {
+		pl->mac_link_dropped = false;
+		link_state.link = false;
+	} else if (pl->mac_link_dropped) {
+		link_state.link = false;
+	} else {
+		switch (pl->link_an_mode) {
+		case MLO_AN_PHY:
+			link_state = pl->phy_state;
+			phylink_resolve_flow(pl, &link_state);
+			phylink_mac_config(pl, &link_state);
+			break;
+
+		case MLO_AN_FIXED:
+			phylink_get_fixed_state(pl, &link_state);
+			phylink_mac_config(pl, &link_state);
+			break;
+
+		case MLO_AN_SGMII:
+			phylink_get_mac_state(pl, &link_state);
+			if (pl->phydev) {
+				bool changed = false;
+
+				link_state.link = link_state.link &&
+						  pl->phy_state.link;
+
+				if (pl->phy_state.interface !=
+				    link_state.interface) {
+					link_state.interface = pl->phy_state.interface;
+					changed = true;
+				}
+
+				/* Propagate the flow control from the PHY
+				 * to the MAC. Also propagate the interface
+				 * if changed.
+				 */
+				if (pl->phy_state.link || changed) {
+					link_state.pause |= pl->phy_state.pause;
+					phylink_resolve_flow(pl, &link_state);
+
+					phylink_mac_config(pl, &link_state);
+				}
+			}
+			break;
+
+		case MLO_AN_8023Z:
+			phylink_get_mac_state(pl, &link_state);
+			break;
+		}
+	}
+
+	if (link_state.link != netif_carrier_ok(ndev)) {
+		if (!link_state.link) {
+			netif_carrier_off(ndev);
+			pl->ops->mac_link_down(ndev, pl->link_an_mode);
+			netdev_info(ndev, "Link is Down\n");
+		} else {
+			pl->ops->mac_link_up(ndev, pl->link_an_mode,
+					     pl->phydev);
+
+			netif_carrier_on(ndev);
+
+			netdev_info(ndev,
+				    "Link is Up - %s/%s - flow control %s\n",
+				    phy_speed_to_str(link_state.speed),
+				    phy_duplex_to_str(link_state.duplex),
+				    phylink_pause_to_str(link_state.pause));
+		}
+	}
+	if (!link_state.link && pl->mac_link_dropped) {
+		pl->mac_link_dropped = false;
+		queue_work(system_power_efficient_wq, &pl->resolve);
+	}
+	mutex_unlock(&pl->state_mutex);
+}
+
+static void phylink_run_resolve(struct phylink *pl)
+{
+	if (!pl->phylink_disable_state)
+		queue_work(system_power_efficient_wq, &pl->resolve);
+}
+
+static const struct sfp_upstream_ops sfp_phylink_ops;
+
+static int phylink_register_sfp(struct phylink *pl, struct device_node *np)
+{
+	struct device_node *sfp_np;
+
+	sfp_np = of_parse_phandle(np, "sfp", 0);
+	if (!sfp_np)
+		return 0;
+
+	pl->sfp_bus = sfp_register_upstream(sfp_np, pl->netdev, pl,
+					    &sfp_phylink_ops);
+	if (!pl->sfp_bus)
+		return -ENOMEM;
+
+	return 0;
+}
+
+struct phylink *phylink_create(struct net_device *ndev, struct device_node *np,
+	phy_interface_t iface, const struct phylink_mac_ops *ops)
+{
+	struct phylink *pl;
+	int ret;
+
+	pl = kzalloc(sizeof(*pl), GFP_KERNEL);
+	if (!pl)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&pl->state_mutex);
+	INIT_WORK(&pl->resolve, phylink_resolve);
+	pl->netdev = ndev;
+	pl->phy_state.interface = iface;
+	pl->link_interface = iface;
+	pl->link_port = PORT_MII;
+	pl->link_config.interface = iface;
+	pl->link_config.pause = MLO_PAUSE_AN;
+	pl->link_config.speed = SPEED_UNKNOWN;
+	pl->link_config.duplex = DUPLEX_UNKNOWN;
+	pl->ops = ops;
+	__set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
+
+	bitmap_fill(pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+	linkmode_copy(pl->link_config.advertising, pl->supported);
+	phylink_validate(pl, pl->supported, &pl->link_config);
+
+	ret = phylink_parse_mode(pl, np);
+	if (ret < 0) {
+		kfree(pl);
+		return ERR_PTR(ret);
+	}
+
+	if (pl->link_an_mode == MLO_AN_FIXED) {
+		ret = phylink_parse_fixedlink(pl, np);
+		if (ret < 0) {
+			kfree(pl);
+			return ERR_PTR(ret);
+		}
+	}
+
+	ret = phylink_register_sfp(pl, np);
+	if (ret < 0) {
+		kfree(pl);
+		return ERR_PTR(ret);
+	}
+
+	return pl;
+}
+EXPORT_SYMBOL_GPL(phylink_create);
+
+void phylink_destroy(struct phylink *pl)
+{
+	if (pl->sfp_bus)
+		sfp_unregister_upstream(pl->sfp_bus);
+
+	cancel_work_sync(&pl->resolve);
+	kfree(pl);
+}
+EXPORT_SYMBOL_GPL(phylink_destroy);
+
+void phylink_phy_change(struct phy_device *phydev, bool up, bool do_carrier)
+{
+	struct phylink *pl = phydev->phylink;
+
+	mutex_lock(&pl->state_mutex);
+	pl->phy_state.speed = phydev->speed;
+	pl->phy_state.duplex = phydev->duplex;
+	pl->phy_state.pause = MLO_PAUSE_NONE;
+	if (phydev->pause)
+		pl->phy_state.pause |= MLO_PAUSE_SYM;
+	if (phydev->asym_pause)
+		pl->phy_state.pause |= MLO_PAUSE_ASYM;
+	pl->phy_state.interface = phydev->interface;
+	pl->phy_state.link = up;
+	mutex_unlock(&pl->state_mutex);
+
+	phylink_run_resolve(pl);
+
+	netdev_dbg(pl->netdev, "phy link %s %s/%s/%s\n", up ? "up" : "down",
+	           phy_modes(phydev->interface),
+		   phy_speed_to_str(phydev->speed),
+		   phy_duplex_to_str(phydev->duplex));
+}
+
+static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
+{
+	struct phylink_link_state config;
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+	u32 advertising;
+	int ret;
+
+	memset(&config, 0, sizeof(config));
+	ethtool_convert_legacy_u32_to_link_mode(supported, phy->supported);
+	ethtool_convert_legacy_u32_to_link_mode(config.advertising,
+						phy->advertising);
+	config.interface = pl->link_config.interface;
+
+	/*
+	 * This is the new way of dealing with flow control for PHYs,
+	 * as described by Timur Tabi in commit 529ed1275263 ("net: phy:
+	 * phy drivers should not set SUPPORTED_[Asym_]Pause") except
+	 * using our validate call to the MAC, we rely upon the MAC
+	 * clearing the bits from both supported and advertising fields.
+	 */
+	if (phylink_test(supported, Pause))
+		phylink_set(config.advertising, Pause);
+	if (phylink_test(supported, Asym_Pause))
+		phylink_set(config.advertising, Asym_Pause);
+
+	ret = phylink_validate(pl, supported, &config);
+	if (ret)
+		return ret;
+
+	phy->phylink = pl;
+	phy->phy_link_change = phylink_phy_change;
+
+	netdev_info(pl->netdev,
+		    "PHY [%s] driver [%s]\n", dev_name(&phy->mdio.dev),
+		    phy->drv->name);
+
+	mutex_lock(&phy->lock);
+	mutex_lock(&pl->state_mutex);
+	pl->netdev->phydev = phy;
+	pl->phydev = phy;
+	linkmode_copy(pl->supported, supported);
+	linkmode_copy(pl->link_config.advertising, config.advertising);
+
+	/* Restrict the phy advertisment according to the MAC support. */
+	ethtool_convert_link_mode_to_legacy_u32(&advertising, config.advertising);
+	phy->advertising = advertising;
+	mutex_unlock(&pl->state_mutex);
+	mutex_unlock(&phy->lock);
+
+	netdev_dbg(pl->netdev,
+		   "phy: setting supported %*pb advertising 0x%08x\n",
+		   __ETHTOOL_LINK_MODE_MASK_NBITS, pl->supported,
+		   phy->advertising);
+
+	phy_start_machine(phy);
+	if (phy->irq > 0)
+		phy_start_interrupts(phy);
+
+	return 0;
+}
+
+int phylink_connect_phy(struct phylink *pl, struct phy_device *phy)
+{
+	int ret;
+
+	ret = phy_attach_direct(pl->netdev, phy, 0, pl->link_interface);
+	if (ret)
+		return ret;
+
+	ret = phylink_bringup_phy(pl, phy);
+	if (ret)
+		phy_detach(phy);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_connect_phy);
+
+int phylink_of_phy_connect(struct phylink *pl, struct device_node *dn)
+{
+	struct device_node *phy_node;
+	struct phy_device *phy_dev;
+	int ret;
+
+	/* Fixed links are handled without needing a PHY */
+	if (pl->link_an_mode == MLO_AN_FIXED)
+		return 0;
+
+	phy_node = of_parse_phandle(dn, "phy-handle", 0);
+	if (!phy_node)
+		phy_node = of_parse_phandle(dn, "phy", 0);
+	if (!phy_node)
+		phy_node = of_parse_phandle(dn, "phy-device", 0);
+
+	if (!phy_node) {
+		if (pl->link_an_mode == MLO_AN_PHY) {
+			netdev_err(pl->netdev, "unable to find PHY node\n");
+			return -ENODEV;
+		}
+		return 0;
+	}
+
+	phy_dev = of_phy_attach(pl->netdev, phy_node, 0, pl->link_interface);
+	/* We're done with the phy_node handle */
+	of_node_put(phy_node);
+
+	if (!phy_dev)
+		return -ENODEV;
+
+	ret = phylink_bringup_phy(pl, phy_dev);
+	if (ret)
+		phy_detach(phy_dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_of_phy_connect);
+
+void phylink_disconnect_phy(struct phylink *pl)
+{
+	struct phy_device *phy;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	phy = pl->phydev;
+	if (phy) {
+		mutex_lock(&phy->lock);
+		mutex_lock(&pl->state_mutex);
+		pl->netdev->phydev = NULL;
+		pl->phydev = NULL;
+		mutex_unlock(&pl->state_mutex);
+		mutex_unlock(&phy->lock);
+		flush_work(&pl->resolve);
+
+		phy_disconnect(phy);
+	}
+}
+EXPORT_SYMBOL_GPL(phylink_disconnect_phy);
+
+void phylink_mac_change(struct phylink *pl, bool up)
+{
+	if (!up)
+		pl->mac_link_dropped = true;
+	phylink_run_resolve(pl);
+	netdev_dbg(pl->netdev, "mac link %s\n", up ? "up" : "down");
+}
+EXPORT_SYMBOL_GPL(phylink_mac_change);
+
+void phylink_start(struct phylink *pl)
+{
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	netdev_info(pl->netdev, "configuring for %s/%s link mode\n",
+		    phylink_an_mode_str(pl->link_an_mode),
+		    phy_modes(pl->link_config.interface));
+
+	/* Apply the link configuration to the MAC when starting. This allows
+	 * a fixed-link to start with the correct parameters, and also
+	 * ensures that we set the appropriate advertisment for Serdes links.
+	 */
+	phylink_resolve_flow(pl, &pl->link_config);
+	phylink_mac_config(pl, &pl->link_config);
+
+	clear_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
+	phylink_run_resolve(pl);
+
+	if (pl->sfp_bus)
+		sfp_upstream_start(pl->sfp_bus);
+	if (pl->phydev)
+		phy_start(pl->phydev);
+}
+EXPORT_SYMBOL_GPL(phylink_start);
+
+void phylink_stop(struct phylink *pl)
+{
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev)
+		phy_stop(pl->phydev);
+	if (pl->sfp_bus)
+		sfp_upstream_stop(pl->sfp_bus);
+
+	set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
+	flush_work(&pl->resolve);
+}
+EXPORT_SYMBOL_GPL(phylink_stop);
+
+void phylink_ethtool_get_wol(struct phylink *pl, struct ethtool_wolinfo *wol)
+{
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	wol->supported = 0;
+	wol->wolopts = 0;
+
+	if (pl->phydev)
+		phy_ethtool_get_wol(pl->phydev, wol);
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_get_wol);
+
+int phylink_ethtool_set_wol(struct phylink *pl, struct ethtool_wolinfo *wol)
+{
+	int ret = -EOPNOTSUPP;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev)
+		ret = phy_ethtool_set_wol(pl->phydev, wol);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_set_wol);
+
+static void phylink_merge_link_mode(unsigned long *dst, const unsigned long *b)
+{
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask);
+
+	linkmode_zero(mask);
+	phylink_set_port_modes(mask);
+
+	linkmode_and(dst, dst, mask);
+	linkmode_or(dst, dst, b);
+}
+
+static void phylink_get_ksettings(const struct phylink_link_state *state,
+				  struct ethtool_link_ksettings *kset)
+{
+	phylink_merge_link_mode(kset->link_modes.advertising, state->advertising);
+	linkmode_copy(kset->link_modes.lp_advertising, state->lp_advertising);
+	kset->base.speed = state->speed;
+	kset->base.duplex = state->duplex;
+	kset->base.autoneg = state->an_enabled ? AUTONEG_ENABLE :
+				AUTONEG_DISABLE;
+}
+
+int phylink_ethtool_ksettings_get(struct phylink *pl,
+	struct ethtool_link_ksettings *kset)
+{
+	struct phylink_link_state link_state;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev) {
+		phy_ethtool_ksettings_get(pl->phydev, kset);
+	} else {
+		kset->base.port = pl->link_port;
+	}
+
+	linkmode_copy(kset->link_modes.supported, pl->supported);
+
+	switch (pl->link_an_mode) {
+	case MLO_AN_FIXED:
+		/* We are using fixed settings. Report these as the
+		 * current link settings - and note that these also
+		 * represent the supported speeds/duplex/pause modes.
+		 */
+		phylink_get_fixed_state(pl, &link_state);
+		phylink_get_ksettings(&link_state, kset);
+		break;
+
+	case MLO_AN_SGMII:
+		/* If there is a phy attached, then use the reported
+		 * settings from the phy with no modification.
+		 */
+		if (pl->phydev)
+			break;
+
+	case MLO_AN_8023Z:
+		phylink_get_mac_state(pl, &link_state);
+
+		/* The MAC is reporting the link results from its own PCS
+		 * layer via in-band status. Report these as the current
+		 * link settings.
+		 */
+		phylink_get_ksettings(&link_state, kset);
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_ksettings_get);
+
+int phylink_ethtool_ksettings_set(struct phylink *pl,
+	const struct ethtool_link_ksettings *kset)
+{
+	struct ethtool_link_ksettings our_kset;
+	struct phylink_link_state config;
+	int ret;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (kset->base.autoneg != AUTONEG_DISABLE &&
+	    kset->base.autoneg != AUTONEG_ENABLE)
+		return -EINVAL;
+
+	config = pl->link_config;
+
+	/* Mask out unsupported advertisments */
+	linkmode_and(config.advertising, kset->link_modes.advertising,
+		     pl->supported);
+
+	/* FIXME: should we reject autoneg if phy/mac does not support it? */
+	if (kset->base.autoneg == AUTONEG_DISABLE) {
+		const struct phy_setting *s;
+
+		/* Autonegotiation disabled, select a suitable speed and
+		 * duplex.
+		 */
+		s = phy_lookup_setting(kset->base.speed, kset->base.duplex,
+				       pl->supported,
+				       __ETHTOOL_LINK_MODE_MASK_NBITS, false);
+		if (!s)
+			return -EINVAL;
+
+		/* If we have a fixed link (as specified by firmware), refuse
+		 * to change link parameters.
+		 */
+		if (pl->link_an_mode == MLO_AN_FIXED &&
+		    (s->speed != pl->link_config.speed ||
+		     s->duplex != pl->link_config.duplex))
+			return -EINVAL;
+
+		config.speed = s->speed;
+		config.duplex = s->duplex;
+		config.an_enabled = false;
+
+		__clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising);
+	} else {
+		/* If we have a fixed link, refuse to enable autonegotiation */
+		if (pl->link_an_mode == MLO_AN_FIXED)
+			return -EINVAL;
+
+		config.speed = SPEED_UNKNOWN;
+		config.duplex = DUPLEX_UNKNOWN;
+		config.an_enabled = true;
+
+		__set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising);
+	}
+
+	if (phylink_validate(pl, pl->supported, &config))
+		return -EINVAL;
+
+	/* If autonegotiation is enabled, we must have an advertisment */
+	if (config.an_enabled && phylink_is_empty_linkmode(config.advertising))
+		return -EINVAL;
+
+	our_kset = *kset;
+	linkmode_copy(our_kset.link_modes.advertising, config.advertising);
+	our_kset.base.speed = config.speed;
+	our_kset.base.duplex = config.duplex;
+
+	/* If we have a PHY, configure the phy */
+	if (pl->phydev) {
+		ret = phy_ethtool_ksettings_set(pl->phydev, &our_kset);
+		if (ret)
+			return ret;
+	}
+
+	mutex_lock(&pl->state_mutex);
+	/* Configure the MAC to match the new settings */
+	linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising);
+	pl->link_config.speed = our_kset.base.speed;
+	pl->link_config.duplex = our_kset.base.duplex;
+	pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE;
+
+	if (!test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) {
+		phylink_mac_config(pl, &pl->link_config);
+		phylink_mac_an_restart(pl);
+	}
+	mutex_unlock(&pl->state_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_ksettings_set);
+
+int phylink_ethtool_nway_reset(struct phylink *pl)
+{
+	int ret = 0;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev)
+		ret = phy_restart_aneg(pl->phydev);
+	phylink_mac_an_restart(pl);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_nway_reset);
+
+void phylink_ethtool_get_pauseparam(struct phylink *pl,
+				    struct ethtool_pauseparam *pause)
+{
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	pause->autoneg = !!(pl->link_config.pause & MLO_PAUSE_AN);
+	pause->rx_pause = !!(pl->link_config.pause & MLO_PAUSE_RX);
+	pause->tx_pause = !!(pl->link_config.pause & MLO_PAUSE_TX);
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_get_pauseparam);
+
+int phylink_ethtool_set_pauseparam(struct phylink *pl,
+				   struct ethtool_pauseparam *pause)
+{
+	struct phylink_link_state *config = &pl->link_config;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (!phylink_test(pl->supported, Pause) &&
+	    !phylink_test(pl->supported, Asym_Pause))
+		return -EOPNOTSUPP;
+
+	if (!phylink_test(pl->supported, Asym_Pause) &&
+	    !pause->autoneg && pause->rx_pause != pause->tx_pause)
+		return -EINVAL;
+
+	config->pause &= ~(MLO_PAUSE_AN | MLO_PAUSE_TXRX_MASK);
+
+	if (pause->autoneg)
+		config->pause |= MLO_PAUSE_AN;
+	if (pause->rx_pause)
+		config->pause |= MLO_PAUSE_RX;
+	if (pause->tx_pause)
+		config->pause |= MLO_PAUSE_TX;
+
+	if (!test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) {
+		switch (pl->link_an_mode) {
+		case MLO_AN_PHY:
+			/* Silently mark the carrier down, and then trigger a resolve */
+			netif_carrier_off(pl->netdev);
+			phylink_run_resolve(pl);
+			break;
+
+		case MLO_AN_FIXED:
+			/* Should we allow fixed links to change against the config? */
+			phylink_resolve_flow(pl, config);
+			phylink_mac_config(pl, config);
+			break;
+
+		case MLO_AN_SGMII:
+		case MLO_AN_8023Z:
+			phylink_mac_config(pl, config);
+			phylink_mac_an_restart(pl);
+			break;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_set_pauseparam);
+
+int phylink_ethtool_get_module_info(struct phylink *pl,
+				    struct ethtool_modinfo *modinfo)
+{
+	int ret = -EOPNOTSUPP;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->sfp_bus)
+		ret = sfp_get_module_info(pl->sfp_bus, modinfo);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_get_module_info);
+
+int phylink_ethtool_get_module_eeprom(struct phylink *pl,
+				      struct ethtool_eeprom *ee, u8 *buf)
+{
+	int ret = -EOPNOTSUPP;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->sfp_bus)
+		ret = sfp_get_module_eeprom(pl->sfp_bus, ee, buf);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_get_module_eeprom);
+
+int phylink_init_eee(struct phylink *pl, bool clk_stop_enable)
+{
+	int ret = -EPROTONOSUPPORT;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev)
+		ret = phy_init_eee(pl->phydev, clk_stop_enable);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_init_eee);
+
+int phylink_get_eee_err(struct phylink *pl)
+{
+	int ret = 0;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev)
+		ret = phy_get_eee_err(pl->phydev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_get_eee_err);
+
+int phylink_ethtool_get_eee(struct phylink *pl, struct ethtool_eee *eee)
+{
+	int ret = -EOPNOTSUPP;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev)
+		ret = phy_ethtool_get_eee(pl->phydev, eee);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_get_eee);
+
+int phylink_ethtool_set_eee(struct phylink *pl, struct ethtool_eee *eee)
+{
+	int ret = -EOPNOTSUPP;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev)
+		ret = phy_ethtool_set_eee(pl->phydev, eee);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_ethtool_set_eee);
+
+/* This emulates MII registers for a fixed-mode phy operating as per the
+ * passed in state. "aneg" defines if we report negotiation is possible.
+ *
+ * FIXME: should deal with negotiation state too.
+ */
+static int phylink_mii_emul_read(struct net_device *ndev, unsigned int reg,
+				 struct phylink_link_state *state, bool aneg)
+{
+	struct fixed_phy_status fs;
+	int val;
+
+	fs.link = state->link;
+	fs.speed = state->speed;
+	fs.duplex = state->duplex;
+	fs.pause = state->pause & MLO_PAUSE_SYM;
+	fs.asym_pause = state->pause & MLO_PAUSE_ASYM;
+
+	val = swphy_read_reg(reg, &fs);
+	if (reg == MII_BMSR) {
+		if (!state->an_complete)
+			val &= ~BMSR_ANEGCOMPLETE;
+		if (!aneg)
+			val &= ~BMSR_ANEGCAPABLE;
+	}
+	return val;
+}
+
+static int phylink_phy_read(struct phylink *pl, unsigned int phy_id,
+			    unsigned int reg)
+{
+	struct phy_device *phydev = pl->phydev;
+	int prtad, devad;
+
+	if (mdio_phy_id_is_c45(phy_id)) {
+		prtad = mdio_phy_id_prtad(phy_id);
+		devad = mdio_phy_id_devad(phy_id);
+		devad = MII_ADDR_C45 | devad << 16 | reg;
+	} else if (phydev->is_c45) {
+		switch (reg) {
+		case MII_BMCR:
+		case MII_BMSR:
+		case MII_PHYSID1:
+		case MII_PHYSID2:
+			devad = __ffs(phydev->c45_ids.devices_in_package);
+			break;
+		case MII_ADVERTISE:
+		case MII_LPA:
+			if (!(phydev->c45_ids.devices_in_package & MDIO_DEVS_AN))
+				return -EINVAL;
+			devad = MDIO_MMD_AN;
+			if (reg == MII_ADVERTISE)
+				reg = MDIO_AN_ADVERTISE;
+			else
+				reg = MDIO_AN_LPA;
+			break;
+		default:
+			return -EINVAL;
+		}
+		prtad = phy_id;
+		devad = MII_ADDR_C45 | devad << 16 | reg;
+	} else {
+		prtad = phy_id;
+		devad = reg;
+	}
+	return mdiobus_read(pl->phydev->mdio.bus, prtad, devad);
+}
+
+static int phylink_phy_write(struct phylink *pl, unsigned int phy_id,
+			     unsigned int reg, unsigned int val)
+{
+	struct phy_device *phydev = pl->phydev;
+	int prtad, devad;
+
+	if (mdio_phy_id_is_c45(phy_id)) {
+		prtad = mdio_phy_id_prtad(phy_id);
+		devad = mdio_phy_id_devad(phy_id);
+		devad = MII_ADDR_C45 | devad << 16 | reg;
+	} else if (phydev->is_c45) {
+		switch (reg) {
+		case MII_BMCR:
+		case MII_BMSR:
+		case MII_PHYSID1:
+		case MII_PHYSID2:
+			devad = __ffs(phydev->c45_ids.devices_in_package);
+			break;
+		case MII_ADVERTISE:
+		case MII_LPA:
+			if (!(phydev->c45_ids.devices_in_package & MDIO_DEVS_AN))
+				return -EINVAL;
+			devad = MDIO_MMD_AN;
+			if (reg == MII_ADVERTISE)
+				reg = MDIO_AN_ADVERTISE;
+			else
+				reg = MDIO_AN_LPA;
+			break;
+		default:
+			return -EINVAL;
+		}
+		prtad = phy_id;
+		devad = MII_ADDR_C45 | devad << 16 | reg;
+	} else {
+		prtad = phy_id;
+		devad = reg;
+	}
+
+	return mdiobus_write(phydev->mdio.bus, prtad, devad, val);
+}
+
+static int phylink_mii_read(struct phylink *pl, unsigned int phy_id,
+			    unsigned int reg)
+{
+	struct phylink_link_state state;
+	int val = 0xffff;
+
+	switch (pl->link_an_mode) {
+	case MLO_AN_FIXED:
+		if (phy_id == 0) {
+			phylink_get_fixed_state(pl, &state);
+			val = phylink_mii_emul_read(pl->netdev, reg, &state,
+						    true);
+		}
+		break;
+
+	case MLO_AN_PHY:
+		return -EOPNOTSUPP;
+
+	case MLO_AN_SGMII:
+		/* No phy, fall through to 8023z method */
+	case MLO_AN_8023Z:
+		if (phy_id == 0) {
+			val = phylink_get_mac_state(pl, &state);
+			if (val < 0)
+				return val;
+
+			val = phylink_mii_emul_read(pl->netdev, reg, &state,
+						    true);
+		}
+		break;
+	}
+
+	return val & 0xffff;
+}
+
+static int phylink_mii_write(struct phylink *pl, unsigned int phy_id,
+			     unsigned int reg, unsigned int val)
+{
+	switch (pl->link_an_mode) {
+	case MLO_AN_FIXED:
+		break;
+
+	case MLO_AN_PHY:
+		return -EOPNOTSUPP;
+
+	case MLO_AN_SGMII:
+		/* No phy, fall through to 8023z method */
+	case MLO_AN_8023Z:
+		break;
+	}
+
+	return 0;
+}
+
+int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd)
+{
+	struct mii_ioctl_data *mii = if_mii(ifr);
+	int  ret;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	if (pl->phydev) {
+		/* PHYs only exist for MLO_AN_PHY and MLO_AN_SGMII */
+		switch (cmd) {
+		case SIOCGMIIPHY:
+			mii->phy_id = pl->phydev->mdio.addr;
+
+		case SIOCGMIIREG:
+			ret = phylink_phy_read(pl, mii->phy_id, mii->reg_num);
+			if (ret >= 0) {
+				mii->val_out = ret;
+				ret = 0;
+			}
+			break;
+
+		case SIOCSMIIREG:
+			ret = phylink_phy_write(pl, mii->phy_id, mii->reg_num,
+						mii->val_in);
+			break;
+
+		default:
+			ret = phy_mii_ioctl(pl->phydev, ifr, cmd);
+			break;
+		}
+	} else {
+		switch (cmd) {
+		case SIOCGMIIPHY:
+			mii->phy_id = 0;
+
+		case SIOCGMIIREG:
+			ret = phylink_mii_read(pl, mii->phy_id, mii->reg_num);
+			if (ret >= 0) {
+				mii->val_out = ret;
+				ret = 0;
+			}
+			break;
+
+		case SIOCSMIIREG:
+			ret = phylink_mii_write(pl, mii->phy_id, mii->reg_num,
+						mii->val_in);
+			break;
+
+		default:
+			ret = -EOPNOTSUPP;
+			break;
+		}
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_mii_ioctl);
+
+
+
+static int phylink_sfp_module_insert(void *upstream,
+				     const struct sfp_eeprom_id *id)
+{
+	struct phylink *pl = upstream;
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(support) = { 0, };
+	struct phylink_link_state config;
+	phy_interface_t iface;
+	int mode, ret = 0;
+	bool changed;
+	u8 port;
+
+	sfp_parse_support(pl->sfp_bus, id, support);
+	port = sfp_parse_port(pl->sfp_bus, id, support);
+	iface = sfp_parse_interface(pl->sfp_bus, id);
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	switch (iface) {
+	case PHY_INTERFACE_MODE_SGMII:
+		mode = MLO_AN_SGMII;
+		break;
+	case PHY_INTERFACE_MODE_1000BASEX:
+		mode = MLO_AN_8023Z;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	memset(&config, 0, sizeof(config));
+	linkmode_copy(config.advertising, support);
+	config.interface = iface;
+	config.speed = SPEED_UNKNOWN;
+	config.duplex = DUPLEX_UNKNOWN;
+	config.pause = MLO_PAUSE_AN;
+	config.an_enabled = pl->link_config.an_enabled;
+
+	/* Ignore errors if we're expecting a PHY to attach later */
+	ret = phylink_validate(pl, support, &config);
+	if (ret) {
+		netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n",
+			   phylink_an_mode_str(mode), phy_modes(config.interface),
+			   __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+		return ret;
+	}
+
+	netdev_dbg(pl->netdev, "requesting link mode %s/%s with support %*pb\n",
+		   phylink_an_mode_str(mode), phy_modes(config.interface),
+		   __ETHTOOL_LINK_MODE_MASK_NBITS, support);
+
+	if (mode == MLO_AN_8023Z && pl->phydev)
+		return -EINVAL;
+
+	changed = !bitmap_equal(pl->supported, support,
+				__ETHTOOL_LINK_MODE_MASK_NBITS);
+	if (changed) {
+		linkmode_copy(pl->supported, support);
+		linkmode_copy(pl->link_config.advertising, config.advertising);
+	}
+
+	if (pl->link_an_mode != mode ||
+	    pl->link_config.interface != config.interface) {
+		pl->link_config.interface = config.interface;
+		pl->link_an_mode = mode;
+
+		changed = true;
+
+		netdev_info(pl->netdev, "switched to %s/%s link mode\n",
+			    phylink_an_mode_str(mode),
+			    phy_modes(config.interface));
+	}
+
+	pl->link_port = port;
+
+	if (changed && !test_bit(PHYLINK_DISABLE_STOPPED,
+				 &pl->phylink_disable_state))
+		phylink_mac_config(pl, &pl->link_config);
+
+	return ret;
+}
+
+static void phylink_sfp_link_down(void *upstream)
+{
+	struct phylink *pl = upstream;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state);
+	flush_work(&pl->resolve);
+
+	netif_carrier_off(pl->netdev);
+}
+
+static void phylink_sfp_link_up(void *upstream)
+{
+	struct phylink *pl = upstream;
+
+	WARN_ON(!lockdep_rtnl_is_held());
+
+	clear_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state);
+	phylink_run_resolve(pl);
+}
+
+static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy)
+{
+	return phylink_connect_phy(upstream, phy);
+}
+
+static void phylink_sfp_disconnect_phy(void *upstream)
+{
+	phylink_disconnect_phy(upstream);
+}
+
+static const struct sfp_upstream_ops sfp_phylink_ops = {
+	.module_insert = phylink_sfp_module_insert,
+	.link_up = phylink_sfp_link_up,
+	.link_down = phylink_sfp_link_down,
+	.connect_phy = phylink_sfp_connect_phy,
+	.disconnect_phy = phylink_sfp_disconnect_phy,
+};
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/rockchip.c b/drivers/net/phy/rockchip.c
new file mode 100644
index 0000000..c092af1
--- /dev/null
+++ b/drivers/net/phy/rockchip.c
@@ -0,0 +1,233 @@
+/**
+ * drivers/net/phy/rockchip.c
+ *
+ * Driver for ROCKCHIP Ethernet PHYs
+ *
+ * Copyright (c) 2017, Fuzhou Rockchip Electronics Co., Ltd
+ *
+ * David Wu <david.wu@rock-chips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mii.h>
+#include <linux/netdevice.h>
+#include <linux/phy.h>
+
+#define INTERNAL_EPHY_ID			0x1234d400
+
+#define MII_INTERNAL_CTRL_STATUS		17
+#define SMI_ADDR_TSTCNTL			20
+#define SMI_ADDR_TSTREAD1			21
+#define SMI_ADDR_TSTREAD2			22
+#define SMI_ADDR_TSTWRITE			23
+#define MII_SPECIAL_CONTROL_STATUS		31
+
+#define MII_AUTO_MDIX_EN			BIT(7)
+#define MII_MDIX_EN				BIT(6)
+
+#define MII_SPEED_10				BIT(2)
+#define MII_SPEED_100				BIT(3)
+
+#define TSTCNTL_RD				(BIT(15) | BIT(10))
+#define TSTCNTL_WR				(BIT(14) | BIT(10))
+
+#define TSTMODE_ENABLE				0x400
+#define TSTMODE_DISABLE				0x0
+
+#define WR_ADDR_A7CFG				0x18
+
+static int rockchip_init_tstmode(struct phy_device *phydev)
+{
+	int ret;
+
+	/* Enable access to Analog and DSP register banks */
+	ret = phy_write(phydev, SMI_ADDR_TSTCNTL, TSTMODE_ENABLE);
+	if (ret)
+		return ret;
+
+	ret = phy_write(phydev, SMI_ADDR_TSTCNTL, TSTMODE_DISABLE);
+	if (ret)
+		return ret;
+
+	return phy_write(phydev, SMI_ADDR_TSTCNTL, TSTMODE_ENABLE);
+}
+
+static int rockchip_close_tstmode(struct phy_device *phydev)
+{
+	/* Back to basic register bank */
+	return phy_write(phydev, SMI_ADDR_TSTCNTL, TSTMODE_DISABLE);
+}
+
+static int rockchip_integrated_phy_analog_init(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = rockchip_init_tstmode(phydev);
+	if (ret)
+		return ret;
+
+	/*
+	 * Adjust tx amplitude to make sginal better,
+	 * the default value is 0x8.
+	 */
+	ret = phy_write(phydev, SMI_ADDR_TSTWRITE, 0xB);
+	if (ret)
+		return ret;
+	ret = phy_write(phydev, SMI_ADDR_TSTCNTL, TSTCNTL_WR | WR_ADDR_A7CFG);
+	if (ret)
+		return ret;
+
+	return rockchip_close_tstmode(phydev);
+}
+
+static int rockchip_integrated_phy_config_init(struct phy_device *phydev)
+{
+	int val, ret;
+
+	/*
+	 * The auto MIDX has linked problem on some board,
+	 * workround to disable auto MDIX.
+	 */
+	val = phy_read(phydev, MII_INTERNAL_CTRL_STATUS);
+	if (val < 0)
+		return val;
+	val &= ~MII_AUTO_MDIX_EN;
+	ret = phy_write(phydev, MII_INTERNAL_CTRL_STATUS, val);
+	if (ret)
+		return ret;
+
+	return rockchip_integrated_phy_analog_init(phydev);
+}
+
+static void rockchip_link_change_notify(struct phy_device *phydev)
+{
+	int speed = SPEED_10;
+
+	if (phydev->autoneg == AUTONEG_ENABLE) {
+		int reg = phy_read(phydev, MII_SPECIAL_CONTROL_STATUS);
+
+		if (reg < 0) {
+			phydev_err(phydev, "phy_read err: %d.\n", reg);
+			return;
+		}
+
+		if (reg & MII_SPEED_100)
+			speed = SPEED_100;
+		else if (reg & MII_SPEED_10)
+			speed = SPEED_10;
+	} else {
+		int bmcr = phy_read(phydev, MII_BMCR);
+
+		if (bmcr < 0) {
+			phydev_err(phydev, "phy_read err: %d.\n", bmcr);
+			return;
+		}
+
+		if (bmcr & BMCR_SPEED100)
+			speed = SPEED_100;
+		else
+			speed = SPEED_10;
+	}
+
+	/*
+	 * If mode switch happens from 10BT to 100BT, all DSP/AFE
+	 * registers are set to default values. So any AFE/DSP
+	 * registers have to be re-initialized in this case.
+	 */
+	if ((phydev->speed == SPEED_10) && (speed == SPEED_100)) {
+		int ret = rockchip_integrated_phy_analog_init(phydev);
+		if (ret)
+			phydev_err(phydev, "rockchip_integrated_phy_analog_init err: %d.\n",
+				   ret);
+	}
+}
+
+static int rockchip_set_polarity(struct phy_device *phydev, int polarity)
+{
+	int reg, err, val;
+
+	/* get the current settings */
+	reg = phy_read(phydev, MII_INTERNAL_CTRL_STATUS);
+	if (reg < 0)
+		return reg;
+
+	reg &= ~MII_AUTO_MDIX_EN;
+	val = reg;
+	switch (polarity) {
+	case ETH_TP_MDI:
+		val &= ~MII_MDIX_EN;
+		break;
+	case ETH_TP_MDI_X:
+		val |= MII_MDIX_EN;
+		break;
+	case ETH_TP_MDI_AUTO:
+	case ETH_TP_MDI_INVALID:
+	default:
+		return 0;
+	}
+
+	if (val != reg) {
+		/* Set the new polarity value in the register */
+		err = phy_write(phydev, MII_INTERNAL_CTRL_STATUS, val);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int rockchip_config_aneg(struct phy_device *phydev)
+{
+	int err;
+
+	err = rockchip_set_polarity(phydev, phydev->mdix);
+	if (err < 0)
+		return err;
+
+	return genphy_config_aneg(phydev);
+}
+
+static int rockchip_phy_resume(struct phy_device *phydev)
+{
+	genphy_resume(phydev);
+
+	return rockchip_integrated_phy_config_init(phydev);
+}
+
+static struct phy_driver rockchip_phy_driver[] = {
+{
+	.phy_id			= INTERNAL_EPHY_ID,
+	.phy_id_mask		= 0xfffffff0,
+	.name			= "Rockchip integrated EPHY",
+	.features		= PHY_BASIC_FEATURES,
+	.flags			= 0,
+	.link_change_notify	= rockchip_link_change_notify,
+	.soft_reset		= genphy_soft_reset,
+	.config_init		= rockchip_integrated_phy_config_init,
+	.config_aneg		= rockchip_config_aneg,
+	.read_status		= genphy_read_status,
+	.suspend		= genphy_suspend,
+	.resume			= rockchip_phy_resume,
+},
+};
+
+module_phy_driver(rockchip_phy_driver);
+
+static struct mdio_device_id __maybe_unused rockchip_phy_tbl[] = {
+	{ INTERNAL_EPHY_ID, 0xfffffff0 },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(mdio, rockchip_phy_tbl);
+
+MODULE_AUTHOR("David Wu <david.wu@rock-chips.com>");
+MODULE_DESCRIPTION("Rockchip Ethernet PHY driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
new file mode 100644
index 0000000..5cb5384
--- /dev/null
+++ b/drivers/net/phy/sfp-bus.c
@@ -0,0 +1,475 @@
+#include <linux/export.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/phylink.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+
+#include "sfp.h"
+
+struct sfp_bus {
+	struct kref kref;
+	struct list_head node;
+	struct device_node *device_node;
+
+	const struct sfp_socket_ops *socket_ops;
+	struct device *sfp_dev;
+	struct sfp *sfp;
+
+	const struct sfp_upstream_ops *upstream_ops;
+	void *upstream;
+	struct net_device *netdev;
+	struct phy_device *phydev;
+
+	bool registered;
+	bool started;
+};
+
+
+int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
+		   unsigned long *support)
+{
+	int port;
+
+	/* port is the physical connector, set this from the connector field. */
+	switch (id->base.connector) {
+	case SFP_CONNECTOR_SC:
+	case SFP_CONNECTOR_FIBERJACK:
+	case SFP_CONNECTOR_LC:
+	case SFP_CONNECTOR_MT_RJ:
+	case SFP_CONNECTOR_MU:
+	case SFP_CONNECTOR_OPTICAL_PIGTAIL:
+		if (support)
+			phylink_set(support, FIBRE);
+		port = PORT_FIBRE;
+		break;
+
+	case SFP_CONNECTOR_RJ45:
+		if (support)
+			phylink_set(support, TP);
+		port = PORT_TP;
+		break;
+
+	case SFP_CONNECTOR_UNSPEC:
+		if (id->base.e1000_base_t) {
+			if (support)
+				phylink_set(support, TP);
+			port = PORT_TP;
+			break;
+		}
+		/* fallthrough */
+	case SFP_CONNECTOR_SG: /* guess */
+	case SFP_CONNECTOR_MPO_1X12:
+	case SFP_CONNECTOR_MPO_2X16:
+	case SFP_CONNECTOR_HSSDC_II:
+	case SFP_CONNECTOR_COPPER_PIGTAIL:
+	case SFP_CONNECTOR_NOSEPARATE:
+	case SFP_CONNECTOR_MXC_2X16:
+		port = PORT_OTHER;
+		break;
+	default:
+		dev_warn(bus->sfp_dev, "SFP: unknown connector id 0x%02x\n",
+			 id->base.connector);
+		port = PORT_OTHER;
+		break;
+	}
+
+	return port;
+}
+EXPORT_SYMBOL_GPL(sfp_parse_port);
+
+phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
+				    const struct sfp_eeprom_id *id)
+{
+	phy_interface_t iface;
+
+	/* Setting the serdes link mode is guesswork: there's no field in
+	 * the EEPROM which indicates what mode should be used.
+	 *
+	 * If the module wants 64b66b, then it must be >= 10G.
+	 *
+	 * If it's a gigabit-only fiber module, it probably does not have
+	 * a PHY, so switch to 802.3z negotiation mode. Otherwise, switch
+	 * to SGMII mode (which is required to support non-gigabit speeds).
+	 */
+	switch (id->base.encoding) {
+	case SFP_ENCODING_8472_64B66B:
+		iface = PHY_INTERFACE_MODE_10GKR;
+		break;
+
+	case SFP_ENCODING_8B10B:
+		if (!id->base.e1000_base_t &&
+		    !id->base.e100_base_lx &&
+		    !id->base.e100_base_fx)
+			iface = PHY_INTERFACE_MODE_1000BASEX;
+		else
+			iface = PHY_INTERFACE_MODE_SGMII;
+		break;
+
+	default:
+		iface = PHY_INTERFACE_MODE_NA;
+		dev_err(bus->sfp_dev,
+			"SFP module encoding does not support 8b10b nor 64b66b\n");
+		break;
+	}
+
+	return iface;
+}
+EXPORT_SYMBOL_GPL(sfp_parse_interface);
+
+void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
+		       unsigned long *support)
+{
+	phylink_set(support, Autoneg);
+	phylink_set(support, Pause);
+	phylink_set(support, Asym_Pause);
+
+	/* Set ethtool support from the compliance fields. */
+	if (id->base.e10g_base_sr)
+		phylink_set(support, 10000baseSR_Full);
+	if (id->base.e10g_base_lr)
+		phylink_set(support, 10000baseLR_Full);
+	if (id->base.e10g_base_lrm)
+		phylink_set(support, 10000baseLRM_Full);
+	if (id->base.e10g_base_er)
+		phylink_set(support, 10000baseER_Full);
+	if (id->base.e1000_base_sx ||
+	    id->base.e1000_base_lx ||
+	    id->base.e1000_base_cx)
+		phylink_set(support, 1000baseX_Full);
+	if (id->base.e1000_base_t) {
+		phylink_set(support, 1000baseT_Half);
+		phylink_set(support, 1000baseT_Full);
+	}
+
+	switch (id->base.extended_cc) {
+	case 0x00: /* Unspecified */
+		break;
+	case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */
+		phylink_set(support, 100000baseSR4_Full);
+		phylink_set(support, 25000baseSR_Full);
+		break;
+	case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */
+	case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */
+		phylink_set(support, 100000baseLR4_ER4_Full);
+		break;
+	case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */
+	case 0x0c: /* 25Gbase-CR CA-S */
+	case 0x0d: /* 25Gbase-CR CA-N */
+		phylink_set(support, 100000baseCR4_Full);
+		phylink_set(support, 25000baseCR_Full);
+		break;
+	default:
+		dev_warn(bus->sfp_dev,
+			 "Unknown/unsupported extended compliance code: 0x%02x\n",
+			 id->base.extended_cc);
+		break;
+	}
+
+	/* For fibre channel SFP, derive possible BaseX modes */
+	if (id->base.fc_speed_100 ||
+	    id->base.fc_speed_200 ||
+	    id->base.fc_speed_400) {
+		if (id->base.br_nominal >= 31)
+			phylink_set(support, 2500baseX_Full);
+		if (id->base.br_nominal >= 12)
+			phylink_set(support, 1000baseX_Full);
+	}
+
+	switch (id->base.connector) {
+	case SFP_CONNECTOR_SC:
+	case SFP_CONNECTOR_FIBERJACK:
+	case SFP_CONNECTOR_LC:
+	case SFP_CONNECTOR_MT_RJ:
+	case SFP_CONNECTOR_MU:
+	case SFP_CONNECTOR_OPTICAL_PIGTAIL:
+		break;
+
+	case SFP_CONNECTOR_UNSPEC:
+		if (id->base.e1000_base_t)
+			break;
+
+	case SFP_CONNECTOR_SG: /* guess */
+	case SFP_CONNECTOR_MPO_1X12:
+	case SFP_CONNECTOR_MPO_2X16:
+	case SFP_CONNECTOR_HSSDC_II:
+	case SFP_CONNECTOR_COPPER_PIGTAIL:
+	case SFP_CONNECTOR_NOSEPARATE:
+	case SFP_CONNECTOR_MXC_2X16:
+	default:
+		/* a guess at the supported link modes */
+		dev_warn(bus->sfp_dev,
+			 "Guessing link modes, please report...\n");
+		phylink_set(support, 1000baseT_Half);
+		phylink_set(support, 1000baseT_Full);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(sfp_parse_support);
+
+
+static LIST_HEAD(sfp_buses);
+static DEFINE_MUTEX(sfp_mutex);
+
+static const struct sfp_upstream_ops *sfp_get_upstream_ops(struct sfp_bus *bus)
+{
+	return bus->registered ? bus->upstream_ops : NULL;
+}
+
+static struct sfp_bus *sfp_bus_get(struct device_node *np)
+{
+	struct sfp_bus *sfp, *new, *found = NULL;
+
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
+
+	mutex_lock(&sfp_mutex);
+
+	list_for_each_entry(sfp, &sfp_buses, node) {
+		if (sfp->device_node == np) {
+			kref_get(&sfp->kref);
+			found = sfp;
+			break;
+		}
+	}
+
+	if (!found && new) {
+		kref_init(&new->kref);
+		new->device_node = np;
+		list_add(&new->node, &sfp_buses);
+		found = new;
+		new = NULL;
+	}
+
+	mutex_unlock(&sfp_mutex);
+
+	kfree(new);
+
+	return found;
+}
+
+static void sfp_bus_release(struct kref *kref) __releases(sfp_mutex)
+{
+	struct sfp_bus *bus = container_of(kref, struct sfp_bus, kref);
+
+	list_del(&bus->node);
+	mutex_unlock(&sfp_mutex);
+	kfree(bus);
+}
+
+static void sfp_bus_put(struct sfp_bus *bus)
+{
+	kref_put_mutex(&bus->kref, sfp_bus_release, &sfp_mutex);
+}
+
+static int sfp_register_bus(struct sfp_bus *bus)
+{
+	const struct sfp_upstream_ops *ops = bus->upstream_ops;
+	int ret;
+
+	if (ops) {
+		if (ops->link_down)
+			ops->link_down(bus->upstream);
+		if (ops->connect_phy && bus->phydev) {
+			ret = ops->connect_phy(bus->upstream, bus->phydev);
+			if (ret)
+				return ret;
+		}
+	}
+	if (bus->started)
+		bus->socket_ops->start(bus->sfp);
+	bus->registered = true;
+	return 0;
+}
+
+static void sfp_unregister_bus(struct sfp_bus *bus)
+{
+	const struct sfp_upstream_ops *ops = bus->upstream_ops;
+
+	if (bus->registered) {
+		if (bus->started)
+			bus->socket_ops->stop(bus->sfp);
+		if (bus->phydev && ops && ops->disconnect_phy)
+			ops->disconnect_phy(bus->upstream);
+	}
+	bus->registered = false;
+}
+
+
+int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo)
+{
+	if (!bus->registered)
+		return -ENOIOCTLCMD;
+	return bus->socket_ops->module_info(bus->sfp, modinfo);
+}
+EXPORT_SYMBOL_GPL(sfp_get_module_info);
+
+int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
+	u8 *data)
+{
+	if (!bus->registered)
+		return -ENOIOCTLCMD;
+	return bus->socket_ops->module_eeprom(bus->sfp, ee, data);
+}
+EXPORT_SYMBOL_GPL(sfp_get_module_eeprom);
+
+void sfp_upstream_start(struct sfp_bus *bus)
+{
+	if (bus->registered)
+		bus->socket_ops->start(bus->sfp);
+	bus->started = true;
+}
+EXPORT_SYMBOL_GPL(sfp_upstream_start);
+
+void sfp_upstream_stop(struct sfp_bus *bus)
+{
+	if (bus->registered)
+		bus->socket_ops->stop(bus->sfp);
+	bus->started = false;
+}
+EXPORT_SYMBOL_GPL(sfp_upstream_stop);
+
+struct sfp_bus *sfp_register_upstream(struct device_node *np,
+	struct net_device *ndev, void *upstream,
+	const struct sfp_upstream_ops *ops)
+{
+	struct sfp_bus *bus = sfp_bus_get(np);
+	int ret = 0;
+
+	if (bus) {
+		rtnl_lock();
+		bus->upstream_ops = ops;
+		bus->upstream = upstream;
+		bus->netdev = ndev;
+
+		if (bus->sfp)
+			ret = sfp_register_bus(bus);
+		rtnl_unlock();
+	}
+
+	if (ret) {
+		sfp_bus_put(bus);
+		bus = NULL;
+	}
+
+	return bus;
+}
+EXPORT_SYMBOL_GPL(sfp_register_upstream);
+
+void sfp_unregister_upstream(struct sfp_bus *bus)
+{
+	rtnl_lock();
+	sfp_unregister_bus(bus);
+	bus->upstream = NULL;
+	bus->netdev = NULL;
+	rtnl_unlock();
+
+	sfp_bus_put(bus);
+}
+EXPORT_SYMBOL_GPL(sfp_unregister_upstream);
+
+
+/* Socket driver entry points */
+int sfp_add_phy(struct sfp_bus *bus, struct phy_device *phydev)
+{
+	const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus);
+	int ret = 0;
+
+	if (ops && ops->connect_phy)
+		ret = ops->connect_phy(bus->upstream, phydev);
+
+	if (ret == 0)
+		bus->phydev = phydev;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sfp_add_phy);
+
+void sfp_remove_phy(struct sfp_bus *bus)
+{
+	const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus);
+
+	if (ops && ops->disconnect_phy)
+		ops->disconnect_phy(bus->upstream);
+	bus->phydev = NULL;
+}
+EXPORT_SYMBOL_GPL(sfp_remove_phy);
+
+
+void sfp_link_up(struct sfp_bus *bus)
+{
+	const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus);
+
+	if (ops && ops->link_up)
+		ops->link_up(bus->upstream);
+}
+EXPORT_SYMBOL_GPL(sfp_link_up);
+
+void sfp_link_down(struct sfp_bus *bus)
+{
+	const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus);
+
+	if (ops && ops->link_down)
+		ops->link_down(bus->upstream);
+}
+EXPORT_SYMBOL_GPL(sfp_link_down);
+
+int sfp_module_insert(struct sfp_bus *bus, const struct sfp_eeprom_id *id)
+{
+	const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus);
+	int ret = 0;
+
+	if (ops && ops->module_insert)
+		ret = ops->module_insert(bus->upstream, id);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sfp_module_insert);
+
+void sfp_module_remove(struct sfp_bus *bus)
+{
+	const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus);
+
+	if (ops && ops->module_remove)
+		ops->module_remove(bus->upstream);
+}
+EXPORT_SYMBOL_GPL(sfp_module_remove);
+
+struct sfp_bus *sfp_register_socket(struct device *dev, struct sfp *sfp,
+				    const struct sfp_socket_ops *ops)
+{
+	struct sfp_bus *bus = sfp_bus_get(dev->of_node);
+	int ret = 0;
+
+	if (bus) {
+		rtnl_lock();
+		bus->sfp_dev = dev;
+		bus->sfp = sfp;
+		bus->socket_ops = ops;
+
+		if (bus->netdev)
+			ret = sfp_register_bus(bus);
+		rtnl_unlock();
+	}
+
+	if (ret) {
+		sfp_bus_put(bus);
+		bus = NULL;
+	}
+
+	return bus;
+}
+EXPORT_SYMBOL_GPL(sfp_register_socket);
+
+void sfp_unregister_socket(struct sfp_bus *bus)
+{
+	rtnl_lock();
+	sfp_unregister_bus(bus);
+	bus->sfp_dev = NULL;
+	bus->sfp = NULL;
+	bus->socket_ops = NULL;
+	rtnl_unlock();
+
+	sfp_bus_put(bus);
+}
+EXPORT_SYMBOL_GPL(sfp_unregister_socket);
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
new file mode 100644
index 0000000..fb2cf43
--- /dev/null
+++ b/drivers/net/phy/sfp.c
@@ -0,0 +1,915 @@
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+#include "mdio-i2c.h"
+#include "sfp.h"
+#include "swphy.h"
+
+enum {
+	GPIO_MODDEF0,
+	GPIO_LOS,
+	GPIO_TX_FAULT,
+	GPIO_TX_DISABLE,
+	GPIO_RATE_SELECT,
+	GPIO_MAX,
+
+	SFP_F_PRESENT = BIT(GPIO_MODDEF0),
+	SFP_F_LOS = BIT(GPIO_LOS),
+	SFP_F_TX_FAULT = BIT(GPIO_TX_FAULT),
+	SFP_F_TX_DISABLE = BIT(GPIO_TX_DISABLE),
+	SFP_F_RATE_SELECT = BIT(GPIO_RATE_SELECT),
+
+	SFP_E_INSERT = 0,
+	SFP_E_REMOVE,
+	SFP_E_DEV_DOWN,
+	SFP_E_DEV_UP,
+	SFP_E_TX_FAULT,
+	SFP_E_TX_CLEAR,
+	SFP_E_LOS_HIGH,
+	SFP_E_LOS_LOW,
+	SFP_E_TIMEOUT,
+
+	SFP_MOD_EMPTY = 0,
+	SFP_MOD_PROBE,
+	SFP_MOD_PRESENT,
+	SFP_MOD_ERROR,
+
+	SFP_DEV_DOWN = 0,
+	SFP_DEV_UP,
+
+	SFP_S_DOWN = 0,
+	SFP_S_INIT,
+	SFP_S_WAIT_LOS,
+	SFP_S_LINK_UP,
+	SFP_S_TX_FAULT,
+	SFP_S_REINIT,
+	SFP_S_TX_DISABLE,
+};
+
+static const char *gpio_of_names[] = {
+	"moddef0",
+	"los",
+	"tx-fault",
+	"tx-disable",
+	"rate-select",
+};
+
+static const enum gpiod_flags gpio_flags[] = {
+	GPIOD_IN,
+	GPIOD_IN,
+	GPIOD_IN,
+	GPIOD_ASIS,
+	GPIOD_ASIS,
+};
+
+#define T_INIT_JIFFIES	msecs_to_jiffies(300)
+#define T_RESET_US	10
+#define T_FAULT_RECOVER	msecs_to_jiffies(1000)
+
+/* SFP module presence detection is poor: the three MOD DEF signals are
+ * the same length on the PCB, which means it's possible for MOD DEF 0 to
+ * connect before the I2C bus on MOD DEF 1/2.
+ *
+ * The SFP MSA specifies 300ms as t_init (the time taken for TX_FAULT to
+ * be deasserted) but makes no mention of the earliest time before we can
+ * access the I2C EEPROM.  However, Avago modules require 300ms.
+ */
+#define T_PROBE_INIT	msecs_to_jiffies(300)
+#define T_PROBE_RETRY	msecs_to_jiffies(100)
+
+/*
+ * SFP modules appear to always have their PHY configured for bus address
+ * 0x56 (which with mdio-i2c, translates to a PHY address of 22).
+ */
+#define SFP_PHY_ADDR	22
+
+/*
+ * Give this long for the PHY to reset.
+ */
+#define T_PHY_RESET_MS	50
+
+static DEFINE_MUTEX(sfp_mutex);
+
+struct sfp {
+	struct device *dev;
+	struct i2c_adapter *i2c;
+	struct mii_bus *i2c_mii;
+	struct sfp_bus *sfp_bus;
+	struct phy_device *mod_phy;
+
+	unsigned int (*get_state)(struct sfp *);
+	void (*set_state)(struct sfp *, unsigned int);
+	int (*read)(struct sfp *, bool, u8, void *, size_t);
+
+	struct gpio_desc *gpio[GPIO_MAX];
+
+	unsigned int state;
+	struct delayed_work poll;
+	struct delayed_work timeout;
+	struct mutex sm_mutex;
+	unsigned char sm_mod_state;
+	unsigned char sm_dev_state;
+	unsigned short sm_state;
+	unsigned int sm_retries;
+
+	struct sfp_eeprom_id id;
+};
+
+static unsigned long poll_jiffies;
+
+static unsigned int sfp_gpio_get_state(struct sfp *sfp)
+{
+	unsigned int i, state, v;
+
+	for (i = state = 0; i < GPIO_MAX; i++) {
+		if (gpio_flags[i] != GPIOD_IN || !sfp->gpio[i])
+			continue;
+
+		v = gpiod_get_value_cansleep(sfp->gpio[i]);
+		if (v)
+			state |= BIT(i);
+	}
+
+	return state;
+}
+
+static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state)
+{
+	if (state & SFP_F_PRESENT) {
+		/* If the module is present, drive the signals */
+		if (sfp->gpio[GPIO_TX_DISABLE])
+			gpiod_direction_output(sfp->gpio[GPIO_TX_DISABLE],
+						state & SFP_F_TX_DISABLE);
+		if (state & SFP_F_RATE_SELECT)
+			gpiod_direction_output(sfp->gpio[GPIO_RATE_SELECT],
+						state & SFP_F_RATE_SELECT);
+	} else {
+		/* Otherwise, let them float to the pull-ups */
+		if (sfp->gpio[GPIO_TX_DISABLE])
+			gpiod_direction_input(sfp->gpio[GPIO_TX_DISABLE]);
+		if (state & SFP_F_RATE_SELECT)
+			gpiod_direction_input(sfp->gpio[GPIO_RATE_SELECT]);
+	}
+}
+
+static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr,
+	void *buf, size_t len)
+{
+	struct i2c_msg msgs[2];
+	int ret;
+
+	msgs[0].addr = bus_addr;
+	msgs[0].flags = 0;
+	msgs[0].len = 1;
+	msgs[0].buf = &dev_addr;
+	msgs[1].addr = bus_addr;
+	msgs[1].flags = I2C_M_RD;
+	msgs[1].len = len;
+	msgs[1].buf = buf;
+
+	ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs));
+	if (ret < 0)
+		return ret;
+
+	return ret == ARRAY_SIZE(msgs) ? len : 0;
+}
+
+static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 addr, void *buf,
+	size_t len)
+{
+	return sfp__i2c_read(sfp->i2c, a2 ? 0x51 : 0x50, addr, buf, len);
+}
+
+static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c)
+{
+	struct mii_bus *i2c_mii;
+	int ret;
+
+	if (!i2c_check_functionality(i2c, I2C_FUNC_I2C))
+		return -EINVAL;
+
+	sfp->i2c = i2c;
+	sfp->read = sfp_i2c_read;
+
+	i2c_mii = mdio_i2c_alloc(sfp->dev, i2c);
+	if (IS_ERR(i2c_mii))
+		return PTR_ERR(i2c_mii);
+
+	i2c_mii->name = "SFP I2C Bus";
+	i2c_mii->phy_mask = ~0;
+
+	ret = mdiobus_register(i2c_mii);
+	if (ret < 0) {
+		mdiobus_free(i2c_mii);
+		return ret;
+	}
+
+	sfp->i2c_mii = i2c_mii;
+
+	return 0;
+}
+
+
+/* Interface */
+static unsigned int sfp_get_state(struct sfp *sfp)
+{
+	return sfp->get_state(sfp);
+}
+
+static void sfp_set_state(struct sfp *sfp, unsigned int state)
+{
+	sfp->set_state(sfp, state);
+}
+
+static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
+{
+	return sfp->read(sfp, a2, addr, buf, len);
+}
+
+static unsigned int sfp_check(void *buf, size_t len)
+{
+	u8 *p, check;
+
+	for (p = buf, check = 0; len; p++, len--)
+		check += *p;
+
+	return check;
+}
+
+/* Helpers */
+static void sfp_module_tx_disable(struct sfp *sfp)
+{
+	dev_dbg(sfp->dev, "tx disable %u -> %u\n",
+		sfp->state & SFP_F_TX_DISABLE ? 1 : 0, 1);
+	sfp->state |= SFP_F_TX_DISABLE;
+	sfp_set_state(sfp, sfp->state);
+}
+
+static void sfp_module_tx_enable(struct sfp *sfp)
+{
+	dev_dbg(sfp->dev, "tx disable %u -> %u\n",
+		sfp->state & SFP_F_TX_DISABLE ? 1 : 0, 0);
+	sfp->state &= ~SFP_F_TX_DISABLE;
+	sfp_set_state(sfp, sfp->state);
+}
+
+static void sfp_module_tx_fault_reset(struct sfp *sfp)
+{
+	unsigned int state = sfp->state;
+
+	if (state & SFP_F_TX_DISABLE)
+		return;
+
+	sfp_set_state(sfp, state | SFP_F_TX_DISABLE);
+
+	udelay(T_RESET_US);
+
+	sfp_set_state(sfp, state);
+}
+
+/* SFP state machine */
+static void sfp_sm_set_timer(struct sfp *sfp, unsigned int timeout)
+{
+	if (timeout)
+		mod_delayed_work(system_power_efficient_wq, &sfp->timeout,
+				 timeout);
+	else
+		cancel_delayed_work(&sfp->timeout);
+}
+
+static void sfp_sm_next(struct sfp *sfp, unsigned int state,
+			unsigned int timeout)
+{
+	sfp->sm_state = state;
+	sfp_sm_set_timer(sfp, timeout);
+}
+
+static void sfp_sm_ins_next(struct sfp *sfp, unsigned int state, unsigned int timeout)
+{
+	sfp->sm_mod_state = state;
+	sfp_sm_set_timer(sfp, timeout);
+}
+
+static void sfp_sm_phy_detach(struct sfp *sfp)
+{
+	phy_stop(sfp->mod_phy);
+	sfp_remove_phy(sfp->sfp_bus);
+	phy_device_remove(sfp->mod_phy);
+	phy_device_free(sfp->mod_phy);
+	sfp->mod_phy = NULL;
+}
+
+static void sfp_sm_probe_phy(struct sfp *sfp)
+{
+	struct phy_device *phy;
+	int err;
+
+	msleep(T_PHY_RESET_MS);
+
+	phy = mdiobus_scan(sfp->i2c_mii, SFP_PHY_ADDR);
+	if (IS_ERR(phy)) {
+		dev_err(sfp->dev, "mdiobus scan returned %ld\n", PTR_ERR(phy));
+		return;
+	}
+	if (!phy) {
+		dev_info(sfp->dev, "no PHY detected\n");
+		return;
+	}
+
+	err = sfp_add_phy(sfp->sfp_bus, phy);
+	if (err) {
+		phy_device_remove(phy);
+		phy_device_free(phy);
+		dev_err(sfp->dev, "sfp_add_phy failed: %d\n", err);
+		return;
+	}
+
+	sfp->mod_phy = phy;
+	phy_start(phy);
+}
+
+static void sfp_sm_link_up(struct sfp *sfp)
+{
+	sfp_link_up(sfp->sfp_bus);
+	sfp_sm_next(sfp, SFP_S_LINK_UP, 0);
+}
+
+static void sfp_sm_link_down(struct sfp *sfp)
+{
+	sfp_link_down(sfp->sfp_bus);
+}
+
+static void sfp_sm_link_check_los(struct sfp *sfp)
+{
+	unsigned int los = sfp->state & SFP_F_LOS;
+
+	/* FIXME: what if neither SFP_OPTIONS_LOS_INVERTED nor
+	 * SFP_OPTIONS_LOS_NORMAL are set?  For now, we assume
+	 * the same as SFP_OPTIONS_LOS_NORMAL set.
+	 */
+	if (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED)
+		los ^= SFP_F_LOS;
+
+	if (los)
+		sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0);
+	else
+		sfp_sm_link_up(sfp);
+}
+
+static void sfp_sm_fault(struct sfp *sfp, bool warn)
+{
+	if (sfp->sm_retries && !--sfp->sm_retries) {
+		dev_err(sfp->dev, "module persistently indicates fault, disabling\n");
+		sfp_sm_next(sfp, SFP_S_TX_DISABLE, 0);
+	} else {
+		if (warn)
+			dev_err(sfp->dev, "module transmit fault indicated\n");
+
+		sfp_sm_next(sfp, SFP_S_TX_FAULT, T_FAULT_RECOVER);
+	}
+}
+
+static void sfp_sm_mod_init(struct sfp *sfp)
+{
+	sfp_module_tx_enable(sfp);
+
+	/* Wait t_init before indicating that the link is up, provided the
+	 * current state indicates no TX_FAULT.  If TX_FAULT clears before
+	 * this time, that's fine too.
+	 */
+	sfp_sm_next(sfp, SFP_S_INIT, T_INIT_JIFFIES);
+	sfp->sm_retries = 5;
+
+	/* Setting the serdes link mode is guesswork: there's no
+	 * field in the EEPROM which indicates what mode should
+	 * be used.
+	 *
+	 * If it's a gigabit-only fiber module, it probably does
+	 * not have a PHY, so switch to 802.3z negotiation mode.
+	 * Otherwise, switch to SGMII mode (which is required to
+	 * support non-gigabit speeds) and probe for a PHY.
+	 */
+	if (sfp->id.base.e1000_base_t ||
+	    sfp->id.base.e100_base_lx ||
+	    sfp->id.base.e100_base_fx)
+		sfp_sm_probe_phy(sfp);
+}
+
+static int sfp_sm_mod_probe(struct sfp *sfp)
+{
+	/* SFP module inserted - read I2C data */
+	struct sfp_eeprom_id id;
+	char vendor[17];
+	char part[17];
+	char sn[17];
+	char date[9];
+	char rev[5];
+	u8 check;
+	int err;
+
+	err = sfp_read(sfp, false, 0, &id, sizeof(id));
+	if (err < 0) {
+		dev_err(sfp->dev, "failed to read EEPROM: %d\n", err);
+		return -EAGAIN;
+	}
+
+	if (err != sizeof(id)) {
+		dev_err(sfp->dev, "EEPROM short read: %d\n", err);
+		return -EAGAIN;
+	}
+
+	/* Validate the checksum over the base structure */
+	check = sfp_check(&id.base, sizeof(id.base) - 1);
+	if (check != id.base.cc_base) {
+		dev_err(sfp->dev,
+			"EEPROM base structure checksum failure: 0x%02x\n",
+			check);
+		print_hex_dump(KERN_ERR, "sfp EE: ", DUMP_PREFIX_OFFSET,
+			       16, 1, &id, sizeof(id.base) - 1, true);
+		return -EINVAL;
+	}
+
+	check = sfp_check(&id.ext, sizeof(id.ext) - 1);
+	if (check != id.ext.cc_ext) {
+		dev_err(sfp->dev,
+			"EEPROM extended structure checksum failure: 0x%02x\n",
+			check);
+		memset(&id.ext, 0, sizeof(id.ext));
+	}
+
+	sfp->id = id;
+
+	memcpy(vendor, sfp->id.base.vendor_name, 16);
+	vendor[16] = '\0';
+	memcpy(part, sfp->id.base.vendor_pn, 16);
+	part[16] = '\0';
+	memcpy(rev, sfp->id.base.vendor_rev, 4);
+	rev[4] = '\0';
+	memcpy(sn, sfp->id.ext.vendor_sn, 16);
+	sn[16] = '\0';
+	memcpy(date, sfp->id.ext.datecode, 8);
+	date[8] = '\0';
+
+	dev_info(sfp->dev, "module %s %s rev %s sn %s dc %s\n", vendor, part, rev, sn, date);
+
+	/* We only support SFP modules, not the legacy GBIC modules. */
+	if (sfp->id.base.phys_id != SFP_PHYS_ID_SFP ||
+	    sfp->id.base.phys_ext_id != SFP_PHYS_EXT_ID_SFP) {
+		dev_err(sfp->dev, "module is not SFP - phys id 0x%02x 0x%02x\n",
+			sfp->id.base.phys_id, sfp->id.base.phys_ext_id);
+		return -EINVAL;
+	}
+
+	return sfp_module_insert(sfp->sfp_bus, &sfp->id);
+}
+
+static void sfp_sm_mod_remove(struct sfp *sfp)
+{
+	sfp_module_remove(sfp->sfp_bus);
+
+	if (sfp->mod_phy)
+		sfp_sm_phy_detach(sfp);
+
+	sfp_module_tx_disable(sfp);
+
+	memset(&sfp->id, 0, sizeof(sfp->id));
+
+	dev_info(sfp->dev, "module removed\n");
+}
+
+static void sfp_sm_event(struct sfp *sfp, unsigned int event)
+{
+	mutex_lock(&sfp->sm_mutex);
+
+	dev_dbg(sfp->dev, "SM: enter %u:%u:%u event %u\n",
+		sfp->sm_mod_state, sfp->sm_dev_state, sfp->sm_state, event);
+
+	/* This state machine tracks the insert/remove state of
+	 * the module, and handles probing the on-board EEPROM.
+	 */
+	switch (sfp->sm_mod_state) {
+	default:
+		if (event == SFP_E_INSERT) {
+			sfp_module_tx_disable(sfp);
+			sfp_sm_ins_next(sfp, SFP_MOD_PROBE, T_PROBE_INIT);
+		}
+		break;
+
+	case SFP_MOD_PROBE:
+		if (event == SFP_E_REMOVE) {
+			sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
+		} else if (event == SFP_E_TIMEOUT) {
+			int err = sfp_sm_mod_probe(sfp);
+
+			if (err == 0)
+				sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
+			else if (err == -EAGAIN)
+				sfp_sm_set_timer(sfp, T_PROBE_RETRY);
+			else
+				sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0);
+		}
+		break;
+
+	case SFP_MOD_PRESENT:
+	case SFP_MOD_ERROR:
+		if (event == SFP_E_REMOVE) {
+			sfp_sm_mod_remove(sfp);
+			sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
+		}
+		break;
+	}
+
+	/* This state machine tracks the netdev up/down state */
+	switch (sfp->sm_dev_state) {
+	default:
+		if (event == SFP_E_DEV_UP)
+			sfp->sm_dev_state = SFP_DEV_UP;
+		break;
+
+	case SFP_DEV_UP:
+		if (event == SFP_E_DEV_DOWN) {
+			/* If the module has a PHY, avoid raising TX disable
+			 * as this resets the PHY. Otherwise, raise it to
+			 * turn the laser off.
+			 */
+			if (!sfp->mod_phy)
+				sfp_module_tx_disable(sfp);
+			sfp->sm_dev_state = SFP_DEV_DOWN;
+		}
+		break;
+	}
+
+	/* Some events are global */
+	if (sfp->sm_state != SFP_S_DOWN &&
+	    (sfp->sm_mod_state != SFP_MOD_PRESENT ||
+	     sfp->sm_dev_state != SFP_DEV_UP)) {
+		if (sfp->sm_state == SFP_S_LINK_UP &&
+		    sfp->sm_dev_state == SFP_DEV_UP)
+			sfp_sm_link_down(sfp);
+		if (sfp->mod_phy)
+			sfp_sm_phy_detach(sfp);
+		sfp_sm_next(sfp, SFP_S_DOWN, 0);
+		mutex_unlock(&sfp->sm_mutex);
+		return;
+	}
+
+	/* The main state machine */
+	switch (sfp->sm_state) {
+	case SFP_S_DOWN:
+		if (sfp->sm_mod_state == SFP_MOD_PRESENT &&
+		    sfp->sm_dev_state == SFP_DEV_UP)
+			sfp_sm_mod_init(sfp);
+		break;
+
+	case SFP_S_INIT:
+		if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT)
+			sfp_sm_fault(sfp, true);
+		else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR)
+			sfp_sm_link_check_los(sfp);
+		break;
+
+	case SFP_S_WAIT_LOS:
+		if (event == SFP_E_TX_FAULT)
+			sfp_sm_fault(sfp, true);
+		else if (event ==
+			 (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED ?
+			  SFP_E_LOS_HIGH : SFP_E_LOS_LOW))
+			sfp_sm_link_up(sfp);
+		break;
+
+	case SFP_S_LINK_UP:
+		if (event == SFP_E_TX_FAULT) {
+			sfp_sm_link_down(sfp);
+			sfp_sm_fault(sfp, true);
+		} else if (event ==
+			   (sfp->id.ext.options & SFP_OPTIONS_LOS_INVERTED ?
+			    SFP_E_LOS_LOW : SFP_E_LOS_HIGH)) {
+			sfp_sm_link_down(sfp);
+			sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0);
+		}
+		break;
+
+	case SFP_S_TX_FAULT:
+		if (event == SFP_E_TIMEOUT) {
+			sfp_module_tx_fault_reset(sfp);
+			sfp_sm_next(sfp, SFP_S_REINIT, T_INIT_JIFFIES);
+		}
+		break;
+
+	case SFP_S_REINIT:
+		if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT) {
+			sfp_sm_fault(sfp, false);
+		} else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) {
+			dev_info(sfp->dev, "module transmit fault recovered\n");
+			sfp_sm_link_check_los(sfp);
+		}
+		break;
+
+	case SFP_S_TX_DISABLE:
+		break;
+	}
+
+	dev_dbg(sfp->dev, "SM: exit %u:%u:%u\n",
+		sfp->sm_mod_state, sfp->sm_dev_state, sfp->sm_state);
+
+	mutex_unlock(&sfp->sm_mutex);
+}
+
+static void sfp_start(struct sfp *sfp)
+{
+	sfp_sm_event(sfp, SFP_E_DEV_UP);
+}
+
+static void sfp_stop(struct sfp *sfp)
+{
+	sfp_sm_event(sfp, SFP_E_DEV_DOWN);
+}
+
+static int sfp_module_info(struct sfp *sfp, struct ethtool_modinfo *modinfo)
+{
+	/* locking... and check module is present */
+
+	if (sfp->id.ext.sff8472_compliance) {
+		modinfo->type = ETH_MODULE_SFF_8472;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+	} else {
+		modinfo->type = ETH_MODULE_SFF_8079;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+	}
+	return 0;
+}
+
+static int sfp_module_eeprom(struct sfp *sfp, struct ethtool_eeprom *ee,
+	u8 *data)
+{
+	unsigned int first, last, len;
+	int ret;
+
+	if (ee->len == 0)
+		return -EINVAL;
+
+	first = ee->offset;
+	last = ee->offset + ee->len;
+	if (first < ETH_MODULE_SFF_8079_LEN) {
+		len = min_t(unsigned int, last, ETH_MODULE_SFF_8079_LEN);
+		len -= first;
+
+		ret = sfp->read(sfp, false, first, data, len);
+		if (ret < 0)
+			return ret;
+
+		first += len;
+		data += len;
+	}
+	if (first >= ETH_MODULE_SFF_8079_LEN &&
+	    first < ETH_MODULE_SFF_8472_LEN) {
+		len = min_t(unsigned int, last, ETH_MODULE_SFF_8472_LEN);
+		len -= first;
+		first -= ETH_MODULE_SFF_8079_LEN;
+
+		ret = sfp->read(sfp, true, first, data, len);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+static const struct sfp_socket_ops sfp_module_ops = {
+	.start = sfp_start,
+	.stop = sfp_stop,
+	.module_info = sfp_module_info,
+	.module_eeprom = sfp_module_eeprom,
+};
+
+static void sfp_timeout(struct work_struct *work)
+{
+	struct sfp *sfp = container_of(work, struct sfp, timeout.work);
+
+	rtnl_lock();
+	sfp_sm_event(sfp, SFP_E_TIMEOUT);
+	rtnl_unlock();
+}
+
+static void sfp_check_state(struct sfp *sfp)
+{
+	unsigned int state, i, changed;
+
+	state = sfp_get_state(sfp);
+	changed = state ^ sfp->state;
+	changed &= SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT;
+
+	for (i = 0; i < GPIO_MAX; i++)
+		if (changed & BIT(i))
+			dev_dbg(sfp->dev, "%s %u -> %u\n", gpio_of_names[i],
+				!!(sfp->state & BIT(i)), !!(state & BIT(i)));
+
+	state |= sfp->state & (SFP_F_TX_DISABLE | SFP_F_RATE_SELECT);
+	sfp->state = state;
+
+	rtnl_lock();
+	if (changed & SFP_F_PRESENT)
+		sfp_sm_event(sfp, state & SFP_F_PRESENT ?
+				SFP_E_INSERT : SFP_E_REMOVE);
+
+	if (changed & SFP_F_TX_FAULT)
+		sfp_sm_event(sfp, state & SFP_F_TX_FAULT ?
+				SFP_E_TX_FAULT : SFP_E_TX_CLEAR);
+
+	if (changed & SFP_F_LOS)
+		sfp_sm_event(sfp, state & SFP_F_LOS ?
+				SFP_E_LOS_HIGH : SFP_E_LOS_LOW);
+	rtnl_unlock();
+}
+
+static irqreturn_t sfp_irq(int irq, void *data)
+{
+	struct sfp *sfp = data;
+
+	sfp_check_state(sfp);
+
+	return IRQ_HANDLED;
+}
+
+static void sfp_poll(struct work_struct *work)
+{
+	struct sfp *sfp = container_of(work, struct sfp, poll.work);
+
+	sfp_check_state(sfp);
+	mod_delayed_work(system_wq, &sfp->poll, poll_jiffies);
+}
+
+static struct sfp *sfp_alloc(struct device *dev)
+{
+	struct sfp *sfp;
+
+	sfp = kzalloc(sizeof(*sfp), GFP_KERNEL);
+	if (!sfp)
+		return ERR_PTR(-ENOMEM);
+
+	sfp->dev = dev;
+
+	mutex_init(&sfp->sm_mutex);
+	INIT_DELAYED_WORK(&sfp->poll, sfp_poll);
+	INIT_DELAYED_WORK(&sfp->timeout, sfp_timeout);
+
+	return sfp;
+}
+
+static void sfp_cleanup(void *data)
+{
+	struct sfp *sfp = data;
+
+	cancel_delayed_work_sync(&sfp->poll);
+	cancel_delayed_work_sync(&sfp->timeout);
+	if (sfp->i2c_mii) {
+		mdiobus_unregister(sfp->i2c_mii);
+		mdiobus_free(sfp->i2c_mii);
+	}
+	if (sfp->i2c)
+		i2c_put_adapter(sfp->i2c);
+	kfree(sfp);
+}
+
+static int sfp_probe(struct platform_device *pdev)
+{
+	struct sfp *sfp;
+	bool poll = false;
+	int irq, err, i;
+
+	sfp = sfp_alloc(&pdev->dev);
+	if (IS_ERR(sfp))
+		return PTR_ERR(sfp);
+
+	platform_set_drvdata(pdev, sfp);
+
+	err = devm_add_action(sfp->dev, sfp_cleanup, sfp);
+	if (err < 0)
+		return err;
+
+	if (pdev->dev.of_node) {
+		struct device_node *node = pdev->dev.of_node;
+		struct device_node *np;
+
+		np = of_parse_phandle(node, "i2c-bus", 0);
+		if (np) {
+			struct i2c_adapter *i2c;
+
+			i2c = of_find_i2c_adapter_by_node(np);
+			of_node_put(np);
+			if (!i2c)
+				return -EPROBE_DEFER;
+
+			err = sfp_i2c_configure(sfp, i2c);
+			if (err < 0) {
+				i2c_put_adapter(i2c);
+				return err;
+			}
+		}
+
+		for (i = 0; i < GPIO_MAX; i++) {
+			sfp->gpio[i] = devm_gpiod_get_optional(sfp->dev,
+					   gpio_of_names[i], gpio_flags[i]);
+			if (IS_ERR(sfp->gpio[i]))
+				return PTR_ERR(sfp->gpio[i]);
+		}
+
+		sfp->get_state = sfp_gpio_get_state;
+		sfp->set_state = sfp_gpio_set_state;
+	}
+
+	sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops);
+	if (!sfp->sfp_bus)
+		return -ENOMEM;
+
+	/* Get the initial state, and always signal TX disable,
+	 * since the network interface will not be up.
+	 */
+	sfp->state = sfp_get_state(sfp) | SFP_F_TX_DISABLE;
+
+	if (sfp->gpio[GPIO_RATE_SELECT] &&
+	    gpiod_get_value_cansleep(sfp->gpio[GPIO_RATE_SELECT]))
+		sfp->state |= SFP_F_RATE_SELECT;
+	sfp_set_state(sfp, sfp->state);
+	sfp_module_tx_disable(sfp);
+	rtnl_lock();
+	if (sfp->state & SFP_F_PRESENT)
+		sfp_sm_event(sfp, SFP_E_INSERT);
+	rtnl_unlock();
+
+	for (i = 0; i < GPIO_MAX; i++) {
+		if (gpio_flags[i] != GPIOD_IN || !sfp->gpio[i])
+			continue;
+
+		irq = gpiod_to_irq(sfp->gpio[i]);
+		if (!irq) {
+			poll = true;
+			continue;
+		}
+
+		err = devm_request_threaded_irq(sfp->dev, irq, NULL, sfp_irq,
+						IRQF_ONESHOT |
+						IRQF_TRIGGER_RISING |
+						IRQF_TRIGGER_FALLING,
+						dev_name(sfp->dev), sfp);
+		if (err)
+			poll = true;
+	}
+
+	if (poll)
+		mod_delayed_work(system_wq, &sfp->poll, poll_jiffies);
+
+	return 0;
+}
+
+static int sfp_remove(struct platform_device *pdev)
+{
+	struct sfp *sfp = platform_get_drvdata(pdev);
+
+	sfp_unregister_socket(sfp->sfp_bus);
+
+	return 0;
+}
+
+static const struct of_device_id sfp_of_match[] = {
+	{ .compatible = "sff,sfp", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, sfp_of_match);
+
+static struct platform_driver sfp_driver = {
+	.probe = sfp_probe,
+	.remove = sfp_remove,
+	.driver = {
+		.name = "sfp",
+		.of_match_table = sfp_of_match,
+	},
+};
+
+static int sfp_init(void)
+{
+	poll_jiffies = msecs_to_jiffies(100);
+
+	return platform_driver_register(&sfp_driver);
+}
+module_init(sfp_init);
+
+static void sfp_exit(void)
+{
+	platform_driver_unregister(&sfp_driver);
+}
+module_exit(sfp_exit);
+
+MODULE_ALIAS("platform:sfp");
+MODULE_AUTHOR("Russell King");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/phy/sfp.h b/drivers/net/phy/sfp.h
new file mode 100644
index 0000000..31b0acf
--- /dev/null
+++ b/drivers/net/phy/sfp.h
@@ -0,0 +1,28 @@
+#ifndef SFP_H
+#define SFP_H
+
+#include <linux/ethtool.h>
+#include <linux/sfp.h>
+
+struct sfp;
+
+struct sfp_socket_ops {
+	void (*start)(struct sfp *sfp);
+	void (*stop)(struct sfp *sfp);
+	int (*module_info)(struct sfp *sfp, struct ethtool_modinfo *modinfo);
+	int (*module_eeprom)(struct sfp *sfp, struct ethtool_eeprom *ee,
+			     u8 *data);
+};
+
+int sfp_add_phy(struct sfp_bus *bus, struct phy_device *phydev);
+void sfp_remove_phy(struct sfp_bus *bus);
+void sfp_link_up(struct sfp_bus *bus);
+void sfp_link_down(struct sfp_bus *bus);
+int sfp_module_insert(struct sfp_bus *bus, const struct sfp_eeprom_id *id);
+void sfp_module_remove(struct sfp_bus *bus);
+int sfp_link_configure(struct sfp_bus *bus, const struct sfp_eeprom_id *id);
+struct sfp_bus *sfp_register_socket(struct device *dev, struct sfp *sfp,
+				    const struct sfp_socket_ops *ops);
+void sfp_unregister_socket(struct sfp_bus *bus);
+
+#endif
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 3570c75..21b71ae 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -943,9 +943,6 @@ static int set_offload(struct tap_queue *q, unsigned long arg)
 			if (arg & TUN_F_TSO6)
 				feature_mask |= NETIF_F_TSO6;
 		}
-
-		if (arg & TUN_F_UFO)
-			feature_mask |= NETIF_F_UFO;
 	}
 
 	/* tun/tap driver inverts the usage for TSO offloads, where
@@ -956,7 +953,7 @@ static int set_offload(struct tap_queue *q, unsigned long arg)
 	 * When user space turns off TSO, we turn off GSO/LRO so that
 	 * user-space will not receive TSO frames.
 	 */
-	if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO))
+	if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6))
 		features |= RX_OFFLOADS;
 	else
 		features &= ~RX_OFFLOADS;
@@ -1078,7 +1075,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
 	case TUNSETOFFLOAD:
 		/* let the user check for future flags */
 		if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
-			    TUN_F_TSO_ECN | TUN_F_UFO))
+			    TUN_F_TSO_ECN))
 			return -EINVAL;
 
 		rtnl_lock();
@@ -1130,7 +1127,7 @@ static long tap_compat_ioctl(struct file *file, unsigned int cmd,
 }
 #endif
 
-const struct file_operations tap_fops = {
+static const struct file_operations tap_fops = {
 	.owner		= THIS_MODULE,
 	.open		= tap_open,
 	.release	= tap_release,
@@ -1218,7 +1215,7 @@ int tap_queue_resize(struct tap_dev *tap)
 	int n = tap->numqueues;
 	int ret, i = 0;
 
-	arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL);
+	arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL);
 	if (!arrays)
 		return -ENOMEM;
 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 0a2c0a4..3c9985f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -73,6 +73,8 @@
 #include <linux/seq_file.h>
 #include <linux/uio.h>
 #include <linux/skb_array.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
 
 #include <linux/uaccess.h>
 
@@ -105,6 +107,9 @@ do {								\
 } while (0)
 #endif
 
+#define TUN_HEADROOM 256
+#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
+
 /* TUN device flags */
 
 /* IFF_ATTACH_QUEUE is never stored in device flags,
@@ -199,7 +204,7 @@ struct tun_struct {
 	struct net_device	*dev;
 	netdev_features_t	set_features;
 #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
-			  NETIF_F_TSO6|NETIF_F_UFO)
+			  NETIF_F_TSO6)
 
 	int			align;
 	int			vnet_hdr_sz;
@@ -221,6 +226,7 @@ struct tun_struct {
 	u32 flow_count;
 	u32 rx_batched;
 	struct tun_pcpu_stats __percpu *pcpu_stats;
+	struct bpf_prog __rcu *xdp_prog;
 };
 
 #ifdef CONFIG_TUN_VNET_CROSS_LE
@@ -585,6 +591,7 @@ static void tun_detach(struct tun_file *tfile, bool clean)
 static void tun_detach_all(struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
+	struct bpf_prog *xdp_prog = rtnl_dereference(tun->xdp_prog);
 	struct tun_file *tfile, *tmp;
 	int i, n = tun->numqueues;
 
@@ -617,6 +624,9 @@ static void tun_detach_all(struct net_device *dev)
 	}
 	BUG_ON(tun->numdisabled != 0);
 
+	if (xdp_prog)
+		bpf_prog_put(xdp_prog);
+
 	if (tun->flags & IFF_PERSIST)
 		module_put(THIS_MODULE);
 }
@@ -892,7 +902,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	    sk_filter(tfile->socket.sk, skb))
 		goto drop;
 
-	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
 		goto drop;
 
 	skb_tx_timestamp(skb);
@@ -1003,6 +1013,46 @@ tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	stats->tx_dropped = tx_dropped;
 }
 
+static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+		       struct netlink_ext_ack *extack)
+{
+	struct tun_struct *tun = netdev_priv(dev);
+	struct bpf_prog *old_prog;
+
+	old_prog = rtnl_dereference(tun->xdp_prog);
+	rcu_assign_pointer(tun->xdp_prog, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	return 0;
+}
+
+static u32 tun_xdp_query(struct net_device *dev)
+{
+	struct tun_struct *tun = netdev_priv(dev);
+	const struct bpf_prog *xdp_prog;
+
+	xdp_prog = rtnl_dereference(tun->xdp_prog);
+	if (xdp_prog)
+		return xdp_prog->aux->id;
+
+	return 0;
+}
+
+static int tun_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+{
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return tun_xdp_set(dev, xdp->prog, xdp->extack);
+	case XDP_QUERY_PROG:
+		xdp->prog_id = tun_xdp_query(dev);
+		xdp->prog_attached = !!xdp->prog_id;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static const struct net_device_ops tun_netdev_ops = {
 	.ndo_uninit		= tun_net_uninit,
 	.ndo_open		= tun_net_open,
@@ -1033,6 +1083,7 @@ static const struct net_device_ops tap_netdev_ops = {
 	.ndo_features_check	= passthru_features_check,
 	.ndo_set_rx_headroom	= tun_set_headroom,
 	.ndo_get_stats64	= tun_net_get_stats64,
+	.ndo_xdp		= tun_xdp,
 };
 
 static void tun_flow_init(struct tun_struct *tun)
@@ -1190,6 +1241,138 @@ static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile,
 	}
 }
 
+static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
+			      int len, int noblock, bool zerocopy)
+{
+	if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
+		return false;
+
+	if (tfile->socket.sk->sk_sndbuf != INT_MAX)
+		return false;
+
+	if (!noblock)
+		return false;
+
+	if (zerocopy)
+		return false;
+
+	if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
+	    SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
+		return false;
+
+	return true;
+}
+
+static struct sk_buff *tun_build_skb(struct tun_struct *tun,
+				     struct tun_file *tfile,
+				     struct iov_iter *from,
+				     struct virtio_net_hdr *hdr,
+				     int len, int *skb_xdp)
+{
+	struct page_frag *alloc_frag = &current->task_frag;
+	struct sk_buff *skb;
+	struct bpf_prog *xdp_prog;
+	int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	unsigned int delta = 0;
+	char *buf;
+	size_t copied;
+	bool xdp_xmit = false;
+	int err, pad = TUN_RX_PAD;
+
+	rcu_read_lock();
+	xdp_prog = rcu_dereference(tun->xdp_prog);
+	if (xdp_prog)
+		pad += TUN_HEADROOM;
+	buflen += SKB_DATA_ALIGN(len + pad);
+	rcu_read_unlock();
+
+	if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
+		return ERR_PTR(-ENOMEM);
+
+	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
+	copied = copy_page_from_iter(alloc_frag->page,
+				     alloc_frag->offset + pad,
+				     len, from);
+	if (copied != len)
+		return ERR_PTR(-EFAULT);
+
+	/* There's a small window that XDP may be set after the check
+	 * of xdp_prog above, this should be rare and for simplicity
+	 * we do XDP on skb in case the headroom is not enough.
+	 */
+	if (hdr->gso_type || !xdp_prog)
+		*skb_xdp = 1;
+	else
+		*skb_xdp = 0;
+
+	rcu_read_lock();
+	xdp_prog = rcu_dereference(tun->xdp_prog);
+	if (xdp_prog && !*skb_xdp) {
+		struct xdp_buff xdp;
+		void *orig_data;
+		u32 act;
+
+		xdp.data_hard_start = buf;
+		xdp.data = buf + pad;
+		xdp.data_end = xdp.data + len;
+		orig_data = xdp.data;
+		act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+		switch (act) {
+		case XDP_REDIRECT:
+			get_page(alloc_frag->page);
+			alloc_frag->offset += buflen;
+			err = xdp_do_redirect(tun->dev, &xdp, xdp_prog);
+			if (err)
+				goto err_redirect;
+			return NULL;
+		case XDP_TX:
+			xdp_xmit = true;
+			/* fall through */
+		case XDP_PASS:
+			delta = orig_data - xdp.data;
+			break;
+		default:
+			bpf_warn_invalid_xdp_action(act);
+			/* fall through */
+		case XDP_ABORTED:
+			trace_xdp_exception(tun->dev, xdp_prog, act);
+			/* fall through */
+		case XDP_DROP:
+			goto err_xdp;
+		}
+	}
+
+	skb = build_skb(buf, buflen);
+	if (!skb) {
+		rcu_read_unlock();
+		return ERR_PTR(-ENOMEM);
+	}
+
+	skb_reserve(skb, pad - delta);
+	skb_put(skb, len + delta);
+	get_page(alloc_frag->page);
+	alloc_frag->offset += buflen;
+
+	if (xdp_xmit) {
+		skb->dev = tun->dev;
+		generic_xdp_tx(skb, xdp_prog);
+		rcu_read_lock();
+		return NULL;
+	}
+
+	rcu_read_unlock();
+
+	return skb;
+
+err_redirect:
+	put_page(alloc_frag->page);
+err_xdp:
+	rcu_read_unlock();
+	this_cpu_inc(tun->pcpu_stats->rx_dropped);
+	return NULL;
+}
+
 /* Get packet from user space buffer */
 static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 			    void *msg_control, struct iov_iter *from,
@@ -1206,6 +1389,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	bool zerocopy = false;
 	int err;
 	u32 rxhash;
+	int skb_xdp = 1;
 
 	if (!(tun->dev->flags & IFF_UP))
 		return -EIO;
@@ -1263,30 +1447,44 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 			zerocopy = true;
 	}
 
-	if (!zerocopy) {
-		copylen = len;
-		if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
-			linear = good_linear;
-		else
-			linear = tun16_to_cpu(tun, gso.hdr_len);
-	}
-
-	skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
-	if (IS_ERR(skb)) {
-		if (PTR_ERR(skb) != -EAGAIN)
+	if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
+		/* For the packet that is not easy to be processed
+		 * (e.g gso or jumbo packet), we will do it at after
+		 * skb was created with generic XDP routine.
+		 */
+		skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp);
+		if (IS_ERR(skb)) {
 			this_cpu_inc(tun->pcpu_stats->rx_dropped);
-		return PTR_ERR(skb);
-	}
+			return PTR_ERR(skb);
+		}
+		if (!skb)
+			return total_len;
+	} else {
+		if (!zerocopy) {
+			copylen = len;
+			if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
+				linear = good_linear;
+			else
+				linear = tun16_to_cpu(tun, gso.hdr_len);
+		}
 
-	if (zerocopy)
-		err = zerocopy_sg_from_iter(skb, from);
-	else
-		err = skb_copy_datagram_from_iter(skb, 0, from, len);
+		skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
+		if (IS_ERR(skb)) {
+			if (PTR_ERR(skb) != -EAGAIN)
+				this_cpu_inc(tun->pcpu_stats->rx_dropped);
+			return PTR_ERR(skb);
+		}
 
-	if (err) {
-		this_cpu_inc(tun->pcpu_stats->rx_dropped);
-		kfree_skb(skb);
-		return -EFAULT;
+		if (zerocopy)
+			err = zerocopy_sg_from_iter(skb, from);
+		else
+			err = skb_copy_datagram_from_iter(skb, 0, from, len);
+
+		if (err) {
+			this_cpu_inc(tun->pcpu_stats->rx_dropped);
+			kfree_skb(skb);
+			return -EFAULT;
+		}
 	}
 
 	if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
@@ -1334,6 +1532,22 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	skb_reset_network_header(skb);
 	skb_probe_transport_header(skb, 0);
 
+	if (skb_xdp) {
+		struct bpf_prog *xdp_prog;
+		int ret;
+
+		rcu_read_lock();
+		xdp_prog = rcu_dereference(tun->xdp_prog);
+		if (xdp_prog) {
+			ret = do_xdp_generic(xdp_prog, skb);
+			if (ret != XDP_PASS) {
+				rcu_read_unlock();
+				return total_len;
+			}
+		}
+		rcu_read_unlock();
+	}
+
 	rxhash = __skb_get_hash_symmetric(skb);
 #ifndef CONFIG_4KSTACKS
 	tun_rx_batched(tun, tfile, skb, more);
@@ -1924,11 +2138,6 @@ static int set_offload(struct tun_struct *tun, unsigned long arg)
 				features |= NETIF_F_TSO6;
 			arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
 		}
-
-		if (arg & TUN_F_UFO) {
-			features |= NETIF_F_UFO;
-			arg &= ~TUN_F_UFO;
-		}
 	}
 
 	/* This gives the user a way to test for new features in future by
@@ -2540,7 +2749,7 @@ static int tun_queue_resize(struct tun_struct *tun)
 	int n = tun->numqueues + tun->numdisabled;
 	int ret, i;
 
-	arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL);
+	arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL);
 	if (!arrays)
 		return -ENOMEM;
 
diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index fce92f0..dbc9031 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -961,7 +961,7 @@ static void catc_disconnect(struct usb_interface *intf)
  * Module functions and tables.
  */
 
-static struct usb_device_id catc_id_table [] = {
+static const struct usb_device_id catc_id_table[] = {
 	{ USB_DEVICE(0x0423, 0xa) },	/* CATC Netmate, Belkin F5U011 */
 	{ USB_DEVICE(0x0423, 0xc) },	/* CATC Netmate II, Belkin F5U111 */
 	{ USB_DEVICE(0x08d1, 0x1) },	/* smartBridges smartNIC */
diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c
index 2952cb5..288ecd9 100644
--- a/drivers/net/usb/cdc-phonet.c
+++ b/drivers/net/usb/cdc-phonet.c
@@ -304,7 +304,7 @@ static void usbpn_setup(struct net_device *dev)
 /*
  * USB driver callbacks
  */
-static struct usb_device_id usbpn_ids[] = {
+static const struct usb_device_id usbpn_ids[] = {
 	{
 		.match_flags = USB_DEVICE_ID_MATCH_VENDOR
 			| USB_DEVICE_ID_MATCH_INT_CLASS
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 9c80e80..47cab1b 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -367,7 +367,7 @@ static struct attribute *cdc_ncm_sysfs_attrs[] = {
 	NULL,
 };
 
-static struct attribute_group cdc_ncm_sysfs_attr_group = {
+static const struct attribute_group cdc_ncm_sysfs_attr_group = {
 	.name = "cdc_ncm",
 	.attrs = cdc_ncm_sysfs_attrs,
 };
diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
index 0f213ea..d49c710 100644
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c
@@ -87,7 +87,7 @@
 #define IPHETH_CARRIER_CHECK_TIMEOUT round_jiffies_relative(1 * HZ)
 #define IPHETH_CARRIER_ON       0x04
 
-static struct usb_device_id ipheth_table[] = {
+static const struct usb_device_id ipheth_table[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(
 		USB_VENDOR_APPLE, USB_PRODUCT_IPHONE,
 		IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index 92e4fd2..f160583 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -125,7 +125,7 @@ static int kaweth_resume(struct usb_interface *intf);
 /****************************************************************
  *     usb_device_id
  ****************************************************************/
-static struct usb_device_id usb_klsi_table[] = {
+static const struct usb_device_id usb_klsi_table[] = {
 	{ USB_DEVICE(0x03e8, 0x0008) }, /* AOX Endpoints USB Ethernet */
 	{ USB_DEVICE(0x04bb, 0x0901) }, /* I-O DATA USB-ET/T */
 	{ USB_DEVICE(0x0506, 0x03e8) }, /* 3Com 3C19250 */
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 6cfffef..ceb78e2 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -5303,7 +5303,7 @@ static void rtl8152_disconnect(struct usb_interface *intf)
 	.bInterfaceProtocol = USB_CDC_PROTO_NONE
 
 /* table of devices that work with this driver */
-static struct usb_device_id rtl8152_table[] = {
+static const struct usb_device_id rtl8152_table[] = {
 	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8050)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)},
diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c
index daaa88a..5f565bd 100644
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -112,7 +112,7 @@
 #undef	EEPROM_WRITE
 
 /* table of devices that work with this driver */
-static struct usb_device_id rtl8150_table[] = {
+static const struct usb_device_id rtl8150_table[] = {
 	{USB_DEVICE(VENDOR_ID_REALTEK, PRODUCT_ID_RTL8150)},
 	{USB_DEVICE(VENDOR_ID_MELCO, PRODUCT_ID_LUAKTX)},
 	{USB_DEVICE(VENDOR_ID_MICRONET, PRODUCT_ID_SP128AR)},
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b06169e..511f833 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -57,6 +57,13 @@ DECLARE_EWMA(pkt_len, 0, 64)
 
 #define VIRTNET_DRIVER_VERSION "1.0.0"
 
+static const unsigned long guest_offloads[] = {
+	VIRTIO_NET_F_GUEST_TSO4,
+	VIRTIO_NET_F_GUEST_TSO6,
+	VIRTIO_NET_F_GUEST_ECN,
+	VIRTIO_NET_F_GUEST_UFO
+};
+
 struct virtnet_stats {
 	struct u64_stats_sync tx_syncp;
 	struct u64_stats_sync rx_syncp;
@@ -164,10 +171,13 @@ struct virtnet_info {
 	u8 ctrl_promisc;
 	u8 ctrl_allmulti;
 	u16 ctrl_vid;
+	u64 ctrl_offloads;
 
 	/* Ethtool settings */
 	u8 duplex;
 	u32 speed;
+
+	unsigned long guest_offloads;
 };
 
 struct padded_vnet_hdr {
@@ -270,6 +280,23 @@ static void skb_xmit_done(struct virtqueue *vq)
 		netif_wake_subqueue(vi->dev, vq2txq(vq));
 }
 
+#define MRG_CTX_HEADER_SHIFT 22
+static void *mergeable_len_to_ctx(unsigned int truesize,
+				  unsigned int headroom)
+{
+	return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
+}
+
+static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
+{
+	return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
+}
+
+static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
+{
+	return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
+}
+
 /* Called from bottom half context */
 static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 				   struct receive_queue *rq,
@@ -292,7 +319,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 
 	hdr_len = vi->hdr_len;
 	if (vi->mergeable_rx_bufs)
-		hdr_padded_len = sizeof *hdr;
+		hdr_padded_len = sizeof(*hdr);
 	else
 		hdr_padded_len = sizeof(struct padded_vnet_hdr);
 
@@ -390,19 +417,85 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
 	return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
 }
 
+/* We copy the packet for XDP in the following cases:
+ *
+ * 1) Packet is scattered across multiple rx buffers.
+ * 2) Headroom space is insufficient.
+ *
+ * This is inefficient but it's a temporary condition that
+ * we hit right after XDP is enabled and until queue is refilled
+ * with large buffers with sufficient headroom - so it should affect
+ * at most queue size packets.
+ * Afterwards, the conditions to enable
+ * XDP should preclude the underlying device from sending packets
+ * across multiple buffers (num_buf > 1), and we make sure buffers
+ * have enough headroom.
+ */
+static struct page *xdp_linearize_page(struct receive_queue *rq,
+				       u16 *num_buf,
+				       struct page *p,
+				       int offset,
+				       int page_off,
+				       unsigned int *len)
+{
+	struct page *page = alloc_page(GFP_ATOMIC);
+
+	if (!page)
+		return NULL;
+
+	memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
+	page_off += *len;
+
+	while (--*num_buf) {
+		unsigned int buflen;
+		void *buf;
+		int off;
+
+		buf = virtqueue_get_buf(rq->vq, &buflen);
+		if (unlikely(!buf))
+			goto err_buf;
+
+		p = virt_to_head_page(buf);
+		off = buf - page_address(p);
+
+		/* guard against a misconfigured or uncooperative backend that
+		 * is sending packet larger than the MTU.
+		 */
+		if ((page_off + buflen) > PAGE_SIZE) {
+			put_page(p);
+			goto err_buf;
+		}
+
+		memcpy(page_address(page) + page_off,
+		       page_address(p) + off, buflen);
+		page_off += buflen;
+		put_page(p);
+	}
+
+	/* Headroom does not contribute to packet length */
+	*len = page_off - VIRTIO_XDP_HEADROOM;
+	return page;
+err_buf:
+	__free_pages(page, 0);
+	return NULL;
+}
+
 static struct sk_buff *receive_small(struct net_device *dev,
 				     struct virtnet_info *vi,
 				     struct receive_queue *rq,
-				     void *buf, unsigned int len)
+				     void *buf, void *ctx,
+				     unsigned int len)
 {
 	struct sk_buff *skb;
 	struct bpf_prog *xdp_prog;
-	unsigned int xdp_headroom = virtnet_get_headroom(vi);
+	unsigned int xdp_headroom = (unsigned long)ctx;
 	unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
 	unsigned int headroom = vi->hdr_len + header_offset;
 	unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
 			      SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	struct page *page = virt_to_head_page(buf);
 	unsigned int delta = 0;
+	struct page *xdp_page;
 	len -= vi->hdr_len;
 
 	rcu_read_lock();
@@ -416,6 +509,27 @@ static struct sk_buff *receive_small(struct net_device *dev,
 		if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
 			goto err_xdp;
 
+		if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
+			int offset = buf - page_address(page) + header_offset;
+			unsigned int tlen = len + vi->hdr_len;
+			u16 num_buf = 1;
+
+			xdp_headroom = virtnet_get_headroom(vi);
+			header_offset = VIRTNET_RX_PAD + xdp_headroom;
+			headroom = vi->hdr_len + header_offset;
+			buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
+				 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+			xdp_page = xdp_linearize_page(rq, &num_buf, page,
+						      offset, header_offset,
+						      &tlen);
+			if (!xdp_page)
+				goto err_xdp;
+
+			buf = page_address(xdp_page);
+			put_page(page);
+			page = xdp_page;
+		}
+
 		xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
 		xdp.data = xdp.data_hard_start + xdp_headroom;
 		xdp.data_end = xdp.data + len;
@@ -444,7 +558,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
 
 	skb = build_skb(buf, buflen);
 	if (!skb) {
-		put_page(virt_to_head_page(buf));
+		put_page(page);
 		goto err;
 	}
 	skb_reserve(skb, headroom - delta);
@@ -460,7 +574,7 @@ err:
 err_xdp:
 	rcu_read_unlock();
 	dev->stats.rx_dropped++;
-	put_page(virt_to_head_page(buf));
+	put_page(page);
 xdp_xmit:
 	return NULL;
 }
@@ -485,66 +599,6 @@ err:
 	return NULL;
 }
 
-/* The conditions to enable XDP should preclude the underlying device from
- * sending packets across multiple buffers (num_buf > 1). However per spec
- * it does not appear to be illegal to do so but rather just against convention.
- * So in order to avoid making a system unresponsive the packets are pushed
- * into a page and the XDP program is run. This will be extremely slow and we
- * push a warning to the user to fix this as soon as possible. Fixing this may
- * require resolving the underlying hardware to determine why multiple buffers
- * are being received or simply loading the XDP program in the ingress stack
- * after the skb is built because there is no advantage to running it here
- * anymore.
- */
-static struct page *xdp_linearize_page(struct receive_queue *rq,
-				       u16 *num_buf,
-				       struct page *p,
-				       int offset,
-				       unsigned int *len)
-{
-	struct page *page = alloc_page(GFP_ATOMIC);
-	unsigned int page_off = VIRTIO_XDP_HEADROOM;
-
-	if (!page)
-		return NULL;
-
-	memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
-	page_off += *len;
-
-	while (--*num_buf) {
-		unsigned int buflen;
-		void *buf;
-		int off;
-
-		buf = virtqueue_get_buf(rq->vq, &buflen);
-		if (unlikely(!buf))
-			goto err_buf;
-
-		p = virt_to_head_page(buf);
-		off = buf - page_address(p);
-
-		/* guard against a misconfigured or uncooperative backend that
-		 * is sending packet larger than the MTU.
-		 */
-		if ((page_off + buflen) > PAGE_SIZE) {
-			put_page(p);
-			goto err_buf;
-		}
-
-		memcpy(page_address(page) + page_off,
-		       page_address(p) + off, buflen);
-		page_off += buflen;
-		put_page(p);
-	}
-
-	/* Headroom does not contribute to packet length */
-	*len = page_off - VIRTIO_XDP_HEADROOM;
-	return page;
-err_buf:
-	__free_pages(page, 0);
-	return NULL;
-}
-
 static struct sk_buff *receive_mergeable(struct net_device *dev,
 					 struct virtnet_info *vi,
 					 struct receive_queue *rq,
@@ -559,6 +613,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	struct sk_buff *head_skb, *curr_skb;
 	struct bpf_prog *xdp_prog;
 	unsigned int truesize;
+	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
 
 	head_skb = NULL;
 
@@ -571,10 +626,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		u32 act;
 
 		/* This happens when rx buffer size is underestimated */
-		if (unlikely(num_buf > 1)) {
+		if (unlikely(num_buf > 1 ||
+			     headroom < virtnet_get_headroom(vi))) {
 			/* linearize data for XDP */
 			xdp_page = xdp_linearize_page(rq, &num_buf,
-						      page, offset, &len);
+						      page, offset,
+						      VIRTIO_XDP_HEADROOM,
+						      &len);
 			if (!xdp_page)
 				goto err_xdp;
 			offset = VIRTIO_XDP_HEADROOM;
@@ -639,13 +697,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	}
 	rcu_read_unlock();
 
-	if (unlikely(len > (unsigned long)ctx)) {
+	truesize = mergeable_ctx_to_truesize(ctx);
+	if (unlikely(len > truesize)) {
 		pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
 			 dev->name, len, (unsigned long)ctx);
 		dev->stats.rx_length_errors++;
 		goto err_skb;
 	}
-	truesize = (unsigned long)ctx;
+
 	head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
 	curr_skb = head_skb;
 
@@ -665,13 +724,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		}
 
 		page = virt_to_head_page(buf);
-		if (unlikely(len > (unsigned long)ctx)) {
+
+		truesize = mergeable_ctx_to_truesize(ctx);
+		if (unlikely(len > truesize)) {
 			pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
 				 dev->name, len, (unsigned long)ctx);
 			dev->stats.rx_length_errors++;
 			goto err_skb;
 		}
-		truesize = (unsigned long)ctx;
 
 		num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
 		if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
@@ -754,7 +814,7 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
 	else if (vi->big_packets)
 		skb = receive_big(dev, vi, rq, buf, len);
 	else
-		skb = receive_small(dev, vi, rq, buf, len);
+		skb = receive_small(dev, vi, rq, buf, ctx, len);
 
 	if (unlikely(!skb))
 		return 0;
@@ -787,12 +847,18 @@ frame_err:
 	return 0;
 }
 
+/* Unlike mergeable buffers, all buffers are allocated to the
+ * same size, except for the headroom. For this reason we do
+ * not need to use  mergeable_len_to_ctx here - it is enough
+ * to store the headroom as the context ignoring the truesize.
+ */
 static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
 			     gfp_t gfp)
 {
 	struct page_frag *alloc_frag = &rq->alloc_frag;
 	char *buf;
 	unsigned int xdp_headroom = virtnet_get_headroom(vi);
+	void *ctx = (void *)(unsigned long)xdp_headroom;
 	int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
 	int err;
 
@@ -806,10 +872,9 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
 	alloc_frag->offset += len;
 	sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom,
 		    vi->hdr_len + GOOD_PACKET_LEN);
-	err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp);
+	err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
 	if (err < 0)
 		put_page(virt_to_head_page(buf));
-
 	return err;
 }
 
@@ -902,7 +967,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
 	}
 
 	sg_init_one(rq->sg, buf, len);
-	ctx = (void *)(unsigned long)len;
+	ctx = mergeable_len_to_ctx(len, headroom);
 	err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
 	if (err < 0)
 		put_page(virt_to_head_page(buf));
@@ -1014,7 +1079,7 @@ static int virtnet_receive(struct receive_queue *rq, int budget)
 	void *buf;
 	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 
-	if (vi->mergeable_rx_bufs) {
+	if (!vi->big_packets || vi->mergeable_rx_bufs) {
 		void *ctx;
 
 		while (received < budget &&
@@ -1813,7 +1878,6 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
 }
 
 static int init_vqs(struct virtnet_info *vi);
-static void _remove_vq_common(struct virtnet_info *vi);
 
 static int virtnet_restore_up(struct virtio_device *vdev)
 {
@@ -1842,37 +1906,45 @@ static int virtnet_restore_up(struct virtio_device *vdev)
 	return err;
 }
 
-static int virtnet_reset(struct virtnet_info *vi, int curr_qp, int xdp_qp)
+static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
 {
-	struct virtio_device *dev = vi->vdev;
-	int ret;
+	struct scatterlist sg;
+	vi->ctrl_offloads = cpu_to_virtio64(vi->vdev, offloads);
 
-	virtio_config_disable(dev);
-	dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
-	virtnet_freeze_down(dev);
-	_remove_vq_common(vi);
+	sg_init_one(&sg, &vi->ctrl_offloads, sizeof(vi->ctrl_offloads));
 
-	virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
-	virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
+	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
+				  VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &sg)) {
+		dev_warn(&vi->dev->dev, "Fail to set guest offload. \n");
+		return -EINVAL;
+	}
 
-	ret = virtio_finalize_features(dev);
-	if (ret)
-		goto err;
+	return 0;
+}
 
-	vi->xdp_queue_pairs = xdp_qp;
-	ret = virtnet_restore_up(dev);
-	if (ret)
-		goto err;
-	ret = _virtnet_set_queues(vi, curr_qp);
-	if (ret)
-		goto err;
+static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
+{
+	u64 offloads = 0;
 
-	virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
-	virtio_config_enable(dev);
-	return 0;
-err:
-	virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
-	return ret;
+	if (!vi->guest_offloads)
+		return 0;
+
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
+		offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM;
+
+	return virtnet_set_guest_offloads(vi, offloads);
+}
+
+static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
+{
+	u64 offloads = vi->guest_offloads;
+
+	if (!vi->guest_offloads)
+		return 0;
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
+		offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM;
+
+	return virtnet_set_guest_offloads(vi, offloads);
 }
 
 static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
@@ -1884,10 +1956,11 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 	u16 xdp_qp = 0, curr_qp;
 	int i, err;
 
-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
-	    virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
-	    virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
-	    virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO)) {
+	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
+	    && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+	        virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
+	        virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
+		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) {
 		NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first");
 		return -EOPNOTSUPP;
 	}
@@ -1921,35 +1994,35 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 			return PTR_ERR(prog);
 	}
 
-	/* Changing the headroom in buffers is a disruptive operation because
-	 * existing buffers must be flushed and reallocated. This will happen
-	 * when a xdp program is initially added or xdp is disabled by removing
-	 * the xdp program resulting in number of XDP queues changing.
-	 */
-	if (vi->xdp_queue_pairs != xdp_qp) {
-		err = virtnet_reset(vi, curr_qp + xdp_qp, xdp_qp);
-		if (err) {
-			dev_warn(&dev->dev, "XDP reset failure.\n");
-			goto virtio_reset_err;
-		}
-	}
+	/* Make sure NAPI is not using any XDP TX queues for RX. */
+	for (i = 0; i < vi->max_queue_pairs; i++)
+		napi_disable(&vi->rq[i].napi);
 
 	netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
+	err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
+	if (err)
+		goto err;
+	vi->xdp_queue_pairs = xdp_qp;
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
 		old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
 		rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
+		if (i == 0) {
+			if (!old_prog)
+				virtnet_clear_guest_offloads(vi);
+			if (!prog)
+				virtnet_restore_guest_offloads(vi);
+		}
 		if (old_prog)
 			bpf_prog_put(old_prog);
+		virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
 	}
 
 	return 0;
 
-virtio_reset_err:
-	/* On reset error do our best to unwind XDP changes inflight and return
-	 * error up to user space for resolution. The underlying reset hung on
-	 * us so not much we can do here.
-	 */
+err:
+	for (i = 0; i < vi->max_queue_pairs; i++)
+		virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
 	if (prog)
 		bpf_prog_sub(prog, vi->max_queue_pairs - 1);
 	return err;
@@ -2182,7 +2255,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 	names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
 	if (!names)
 		goto err_names;
-	if (vi->mergeable_rx_bufs) {
+	if (!vi->big_packets || vi->mergeable_rx_bufs) {
 		ctx = kzalloc(total_vqs * sizeof(*ctx), GFP_KERNEL);
 		if (!ctx)
 			goto err_ctx;
@@ -2303,7 +2376,7 @@ err:
 
 #ifdef CONFIG_SYSFS
 static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
-		struct rx_queue_attribute *attribute, char *buf)
+		char *buf)
 {
 	struct virtnet_info *vi = netdev_priv(queue->dev);
 	unsigned int queue_index = get_netdev_rx_queue_index(queue);
@@ -2428,7 +2501,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 			dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
 
 		if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
-			dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO
+			dev->hw_features |= NETIF_F_TSO
 				| NETIF_F_TSO_ECN | NETIF_F_TSO6;
 		}
 		/* Individual feature bits: what can host handle? */
@@ -2438,13 +2511,11 @@ static int virtnet_probe(struct virtio_device *vdev)
 			dev->hw_features |= NETIF_F_TSO6;
 		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
 			dev->hw_features |= NETIF_F_TSO_ECN;
-		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
-			dev->hw_features |= NETIF_F_UFO;
 
 		dev->features |= NETIF_F_GSO_ROBUST;
 
 		if (gso)
-			dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO);
+			dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
 		/* (!csum && gso) case will be fixed by register_netdev() */
 	}
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
@@ -2577,6 +2648,10 @@ static int virtnet_probe(struct virtio_device *vdev)
 		netif_carrier_on(dev);
 	}
 
+	for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
+		if (virtio_has_feature(vi->vdev, guest_offloads[i]))
+			set_bit(guest_offloads[i], &vi->guest_offloads);
+
 	pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
 		 dev->name, max_queue_pairs);
 
@@ -2597,15 +2672,6 @@ free:
 	return err;
 }
 
-static void _remove_vq_common(struct virtnet_info *vi)
-{
-	vi->vdev->config->reset(vi->vdev);
-	free_unused_bufs(vi);
-	_free_receive_bufs(vi);
-	free_receive_page_frags(vi);
-	virtnet_del_vqs(vi);
-}
-
 static void remove_vq_common(struct virtnet_info *vi)
 {
 	vi->vdev->config->reset(vi->vdev);
@@ -2637,8 +2703,7 @@ static void virtnet_remove(struct virtio_device *vdev)
 	free_netdev(vi->dev);
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int virtnet_freeze(struct virtio_device *vdev)
+static __maybe_unused int virtnet_freeze(struct virtio_device *vdev)
 {
 	struct virtnet_info *vi = vdev->priv;
 
@@ -2649,7 +2714,7 @@ static int virtnet_freeze(struct virtio_device *vdev)
 	return 0;
 }
 
-static int virtnet_restore(struct virtio_device *vdev)
+static __maybe_unused int virtnet_restore(struct virtio_device *vdev)
 {
 	struct virtnet_info *vi = vdev->priv;
 	int err;
@@ -2665,7 +2730,6 @@ static int virtnet_restore(struct virtio_device *vdev)
 
 	return 0;
 }
-#endif
 
 static struct virtio_device_id id_table[] = {
 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
@@ -2682,7 +2746,7 @@ static struct virtio_device_id id_table[] = {
 	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
 	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
 	VIRTIO_NET_F_CTRL_MAC_ADDR, \
-	VIRTIO_NET_F_MTU
+	VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
 
 static unsigned int features[] = {
 	VIRTNET_FEATURES,
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 8a1eaf3..7e19051 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -47,9 +47,7 @@ static unsigned int vrf_net_id;
 
 struct net_vrf {
 	struct rtable __rcu	*rth;
-	struct rtable __rcu	*rth_local;
 	struct rt6_info	__rcu	*rt6;
-	struct rt6_info	__rcu	*rt6_local;
 	u32                     tb_id;
 };
 
@@ -194,42 +192,10 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
 
 	/* if dst.dev is loopback or the VRF device again this is locally
 	 * originated traffic destined to a local address. Short circuit
-	 * to Rx path using our local dst
+	 * to Rx path
 	 */
-	if (dst->dev == net->loopback_dev || dst->dev == dev) {
-		struct net_vrf *vrf = netdev_priv(dev);
-		struct rt6_info *rt6_local;
-
-		/* release looked up dst and use cached local dst */
-		dst_release(dst);
-
-		rcu_read_lock();
-
-		rt6_local = rcu_dereference(vrf->rt6_local);
-		if (unlikely(!rt6_local)) {
-			rcu_read_unlock();
-			goto err;
-		}
-
-		/* Ordering issue: cached local dst is created on newlink
-		 * before the IPv6 initialization. Using the local dst
-		 * requires rt6i_idev to be set so make sure it is.
-		 */
-		if (unlikely(!rt6_local->rt6i_idev)) {
-			rt6_local->rt6i_idev = in6_dev_get(dev);
-			if (!rt6_local->rt6i_idev) {
-				rcu_read_unlock();
-				goto err;
-			}
-		}
-
-		dst = &rt6_local->dst;
-		dst_hold(dst);
-
-		rcu_read_unlock();
-
-		return vrf_local_xmit(skb, dev, &rt6_local->dst);
-	}
+	if (dst->dev == dev)
+		return vrf_local_xmit(skb, dev, dst);
 
 	skb_dst_set(skb, dst);
 
@@ -296,30 +262,10 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
 
 	/* if dst.dev is loopback or the VRF device again this is locally
 	 * originated traffic destined to a local address. Short circuit
-	 * to Rx path using our local dst
+	 * to Rx path
 	 */
-	if (rt->dst.dev == net->loopback_dev || rt->dst.dev == vrf_dev) {
-		struct net_vrf *vrf = netdev_priv(vrf_dev);
-		struct rtable *rth_local;
-		struct dst_entry *dst = NULL;
-
-		ip_rt_put(rt);
-
-		rcu_read_lock();
-
-		rth_local = rcu_dereference(vrf->rth_local);
-		if (likely(rth_local)) {
-			dst = &rth_local->dst;
-			dst_hold(dst);
-		}
-
-		rcu_read_unlock();
-
-		if (unlikely(!dst))
-			goto err;
-
-		return vrf_local_xmit(skb, vrf_dev, dst);
-	}
+	if (rt->dst.dev == vrf_dev)
+		return vrf_local_xmit(skb, vrf_dev, &rt->dst);
 
 	skb_dst_set(skb, &rt->dst);
 
@@ -528,12 +474,10 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
 static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
 {
 	struct rt6_info *rt6 = rtnl_dereference(vrf->rt6);
-	struct rt6_info *rt6_local = rtnl_dereference(vrf->rt6_local);
 	struct net *net = dev_net(dev);
 	struct dst_entry *dst;
 
 	RCU_INIT_POINTER(vrf->rt6, NULL);
-	RCU_INIT_POINTER(vrf->rt6_local, NULL);
 	synchronize_rcu();
 
 	/* move dev in dst's to loopback so this VRF device can be deleted
@@ -546,19 +490,6 @@ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
 		dev_hold(dst->dev);
 		dst_release(dst);
 	}
-
-	if (rt6_local) {
-		if (rt6_local->rt6i_idev) {
-			in6_dev_put(rt6_local->rt6i_idev);
-			rt6_local->rt6i_idev = NULL;
-		}
-
-		dst = &rt6_local->dst;
-		dev_put(dst->dev);
-		dst->dev = net->loopback_dev;
-		dev_hold(dst->dev);
-		dst_release(dst);
-	}
 }
 
 static int vrf_rt6_create(struct net_device *dev)
@@ -567,7 +498,7 @@ static int vrf_rt6_create(struct net_device *dev)
 	struct net_vrf *vrf = netdev_priv(dev);
 	struct net *net = dev_net(dev);
 	struct fib6_table *rt6i_table;
-	struct rt6_info *rt6, *rt6_local;
+	struct rt6_info *rt6;
 	int rc = -ENOMEM;
 
 	/* IPv6 can be CONFIG enabled and then disabled runtime */
@@ -586,22 +517,7 @@ static int vrf_rt6_create(struct net_device *dev)
 	rt6->rt6i_table = rt6i_table;
 	rt6->dst.output	= vrf_output6;
 
-	/* create a dst for local routing - packets sent locally
-	 * to local address via the VRF device as a loopback
-	 */
-	rt6_local = ip6_dst_alloc(net, dev, flags);
-	if (!rt6_local) {
-		dst_release(&rt6->dst);
-		goto out;
-	}
-
-	rt6_local->rt6i_idev  = in6_dev_get(dev);
-	rt6_local->rt6i_flags = RTF_UP | RTF_NONEXTHOP | RTF_LOCAL;
-	rt6_local->rt6i_table = rt6i_table;
-	rt6_local->dst.input  = ip6_input;
-
 	rcu_assign_pointer(vrf->rt6, rt6);
-	rcu_assign_pointer(vrf->rt6_local, rt6_local);
 
 	rc = 0;
 out:
@@ -788,12 +704,10 @@ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
 static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
 {
 	struct rtable *rth = rtnl_dereference(vrf->rth);
-	struct rtable *rth_local = rtnl_dereference(vrf->rth_local);
 	struct net *net = dev_net(dev);
 	struct dst_entry *dst;
 
 	RCU_INIT_POINTER(vrf->rth, NULL);
-	RCU_INIT_POINTER(vrf->rth_local, NULL);
 	synchronize_rcu();
 
 	/* move dev in dst's to loopback so this VRF device can be deleted
@@ -806,20 +720,12 @@ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
 		dev_hold(dst->dev);
 		dst_release(dst);
 	}
-
-	if (rth_local) {
-		dst = &rth_local->dst;
-		dev_put(dst->dev);
-		dst->dev = net->loopback_dev;
-		dev_hold(dst->dev);
-		dst_release(dst);
-	}
 }
 
 static int vrf_rtable_create(struct net_device *dev)
 {
 	struct net_vrf *vrf = netdev_priv(dev);
-	struct rtable *rth, *rth_local;
+	struct rtable *rth;
 
 	if (!fib_new_table(dev_net(dev), vrf->tb_id))
 		return -ENOMEM;
@@ -829,22 +735,10 @@ static int vrf_rtable_create(struct net_device *dev)
 	if (!rth)
 		return -ENOMEM;
 
-	/* create a dst for local ingress routing - packets sent locally
-	 * to local address via the VRF device as a loopback
-	 */
-	rth_local = rt_dst_alloc(dev, RTCF_LOCAL, RTN_LOCAL, 1, 1, 0);
-	if (!rth_local) {
-		dst_release(&rth->dst);
-		return -ENOMEM;
-	}
-
 	rth->dst.output	= vrf_output;
 	rth->rt_table_id = vrf->tb_id;
 
-	rth_local->rt_table_id = vrf->tb_id;
-
 	rcu_assign_pointer(vrf->rth, rth);
-	rcu_assign_pointer(vrf->rth_local, rth_local);
 
 	return 0;
 }
@@ -1371,10 +1265,14 @@ static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
 			struct netlink_ext_ack *extack)
 {
 	if (tb[IFLA_ADDRESS]) {
-		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
+			NL_SET_ERR_MSG(extack, "Invalid hardware address");
 			return -EINVAL;
-		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+		}
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
+			NL_SET_ERR_MSG(extack, "Invalid hardware address");
 			return -EADDRNOTAVAIL;
+		}
 	}
 	return 0;
 }
@@ -1399,12 +1297,17 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 	struct net *net;
 	int err;
 
-	if (!data || !data[IFLA_VRF_TABLE])
+	if (!data || !data[IFLA_VRF_TABLE]) {
+		NL_SET_ERR_MSG(extack, "VRF table id is missing");
 		return -EINVAL;
+	}
 
 	vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]);
-	if (vrf->tb_id == RT_TABLE_UNSPEC)
+	if (vrf->tb_id == RT_TABLE_UNSPEC) {
+		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VRF_TABLE],
+				    "Invalid VRF table id");
 		return -EINVAL;
+	}
 
 	dev->priv_flags |= IFF_L3MDEV_MASTER;
 
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index e17baac..d7c49cf 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -26,6 +26,7 @@
 #include <net/inet_ecn.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/tun_proto.h>
 #include <net/vxlan.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -1261,19 +1262,9 @@ static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
 	if (gpe->oam_flag)
 		return false;
 
-	switch (gpe->next_protocol) {
-	case VXLAN_GPE_NP_IPV4:
-		*protocol = htons(ETH_P_IP);
-		break;
-	case VXLAN_GPE_NP_IPV6:
-		*protocol = htons(ETH_P_IPV6);
-		break;
-	case VXLAN_GPE_NP_ETHERNET:
-		*protocol = htons(ETH_P_TEB);
-		break;
-	default:
+	*protocol = tun_p_to_eth_p(gpe->next_protocol);
+	if (!*protocol)
 		return false;
-	}
 
 	unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
 	return true;
@@ -1799,19 +1790,10 @@ static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags,
 	struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh;
 
 	gpe->np_applied = 1;
-
-	switch (protocol) {
-	case htons(ETH_P_IP):
-		gpe->next_protocol = VXLAN_GPE_NP_IPV4;
-		return 0;
-	case htons(ETH_P_IPV6):
-		gpe->next_protocol = VXLAN_GPE_NP_IPV6;
-		return 0;
-	case htons(ETH_P_TEB):
-		gpe->next_protocol = VXLAN_GPE_NP_ETHERNET;
-		return 0;
-	}
-	return -EPFNOSUPPORT;
+	gpe->next_protocol = tun_p_from_eth_p(protocol);
+	if (!gpe->next_protocol)
+		return -EPFNOSUPPORT;
+	return 0;
 }
 
 static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
@@ -2609,7 +2591,7 @@ static struct device_type vxlan_type = {
  * supply the listening VXLAN udp ports. Callers are expected
  * to implement the ndo_udp_tunnel_add.
  */
-static void vxlan_push_rx_ports(struct net_device *dev)
+static void vxlan_offload_rx_ports(struct net_device *dev, bool push)
 {
 	struct vxlan_sock *vs;
 	struct net *net = dev_net(dev);
@@ -2618,11 +2600,19 @@ static void vxlan_push_rx_ports(struct net_device *dev)
 
 	spin_lock(&vn->sock_lock);
 	for (i = 0; i < PORT_HASH_SIZE; ++i) {
-		hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist)
-			udp_tunnel_push_rx_port(dev, vs->sock,
-						(vs->flags & VXLAN_F_GPE) ?
-						UDP_TUNNEL_TYPE_VXLAN_GPE :
-						UDP_TUNNEL_TYPE_VXLAN);
+		hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
+			unsigned short type;
+
+			if (vs->flags & VXLAN_F_GPE)
+				type = UDP_TUNNEL_TYPE_VXLAN_GPE;
+			else
+				type = UDP_TUNNEL_TYPE_VXLAN;
+
+			if (push)
+				udp_tunnel_push_rx_port(dev, vs->sock, type);
+			else
+				udp_tunnel_drop_rx_port(dev, vs->sock, type);
+		}
 	}
 	spin_unlock(&vn->sock_lock);
 }
@@ -2721,12 +2711,14 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
 {
 	if (tb[IFLA_ADDRESS]) {
 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
-			pr_debug("invalid link address (not ethernet)\n");
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
+					    "Provided link layer address is not Ethernet");
 			return -EINVAL;
 		}
 
 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
-			pr_debug("invalid all zero ethernet address\n");
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
+					    "Provided Ethernet address is not unicast");
 			return -EADDRNOTAVAIL;
 		}
 	}
@@ -2734,18 +2726,27 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
 	if (tb[IFLA_MTU]) {
 		u32 mtu = nla_get_u32(tb[IFLA_MTU]);
 
-		if (mtu < ETH_MIN_MTU || mtu > ETH_MAX_MTU)
+		if (mtu < ETH_MIN_MTU || mtu > ETH_MAX_MTU) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
+					    "MTU must be between 68 and 65535");
 			return -EINVAL;
+		}
 	}
 
-	if (!data)
+	if (!data) {
+		NL_SET_ERR_MSG(extack,
+			       "Required attributes not provided to perform the operation");
 		return -EINVAL;
+	}
 
 	if (data[IFLA_VXLAN_ID]) {
 		u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
 
-		if (id >= VXLAN_N_VID)
+		if (id >= VXLAN_N_VID) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_ID],
+					    "VXLAN ID must be lower than 16777216");
 			return -ERANGE;
+		}
 	}
 
 	if (data[IFLA_VXLAN_PORT_RANGE]) {
@@ -2753,8 +2754,8 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
 			= nla_data(data[IFLA_VXLAN_PORT_RANGE]);
 
 		if (ntohs(p->high) < ntohs(p->low)) {
-			pr_debug("port range %u .. %u not valid\n",
-				 ntohs(p->low), ntohs(p->high));
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT_RANGE],
+					    "Invalid source port range");
 			return -EINVAL;
 		}
 	}
@@ -2911,7 +2912,8 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan)
 
 static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
 				 struct net_device **lower,
-				 struct vxlan_dev *old)
+				 struct vxlan_dev *old,
+				 struct netlink_ext_ack *extack)
 {
 	struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
 	struct vxlan_dev *tmp;
@@ -2925,6 +2927,8 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
 		 */
 		if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) ||
 		    !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
+			NL_SET_ERR_MSG(extack,
+				       "VXLAN GPE does not support this combination of attributes");
 			return -EINVAL;
 		}
 	}
@@ -2939,15 +2943,23 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
 		conf->saddr.sa.sa_family = conf->remote_ip.sa.sa_family;
 	}
 
-	if (conf->saddr.sa.sa_family != conf->remote_ip.sa.sa_family)
+	if (conf->saddr.sa.sa_family != conf->remote_ip.sa.sa_family) {
+		NL_SET_ERR_MSG(extack,
+			       "Local and remote address must be from the same family");
 		return -EINVAL;
+	}
 
-	if (vxlan_addr_multicast(&conf->saddr))
+	if (vxlan_addr_multicast(&conf->saddr)) {
+		NL_SET_ERR_MSG(extack, "Local address cannot be multicast");
 		return -EINVAL;
+	}
 
 	if (conf->saddr.sa.sa_family == AF_INET6) {
-		if (!IS_ENABLED(CONFIG_IPV6))
+		if (!IS_ENABLED(CONFIG_IPV6)) {
+			NL_SET_ERR_MSG(extack,
+				       "IPv6 support not enabled in the kernel");
 			return -EPFNOSUPPORT;
+		}
 		use_ipv6 = true;
 		conf->flags |= VXLAN_F_IPV6;
 
@@ -2959,46 +2971,68 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
 
 			if (local_type & IPV6_ADDR_LINKLOCAL) {
 				if (!(remote_type & IPV6_ADDR_LINKLOCAL) &&
-				    (remote_type != IPV6_ADDR_ANY))
+				    (remote_type != IPV6_ADDR_ANY)) {
+					NL_SET_ERR_MSG(extack,
+						       "Invalid combination of local and remote address scopes");
 					return -EINVAL;
+				}
 
 				conf->flags |= VXLAN_F_IPV6_LINKLOCAL;
 			} else {
 				if (remote_type ==
-				    (IPV6_ADDR_UNICAST | IPV6_ADDR_LINKLOCAL))
+				    (IPV6_ADDR_UNICAST | IPV6_ADDR_LINKLOCAL)) {
+					NL_SET_ERR_MSG(extack,
+						       "Invalid combination of local and remote address scopes");
 					return -EINVAL;
+				}
 
 				conf->flags &= ~VXLAN_F_IPV6_LINKLOCAL;
 			}
 		}
 	}
 
-	if (conf->label && !use_ipv6)
+	if (conf->label && !use_ipv6) {
+		NL_SET_ERR_MSG(extack,
+			       "Label attribute only applies to IPv6 VXLAN devices");
 		return -EINVAL;
+	}
 
 	if (conf->remote_ifindex) {
 		struct net_device *lowerdev;
 
 		lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
-		if (!lowerdev)
+		if (!lowerdev) {
+			NL_SET_ERR_MSG(extack,
+				       "Invalid local interface, device not found");
 			return -ENODEV;
+		}
 
 #if IS_ENABLED(CONFIG_IPV6)
 		if (use_ipv6) {
 			struct inet6_dev *idev = __in6_dev_get(lowerdev);
-			if (idev && idev->cnf.disable_ipv6)
+			if (idev && idev->cnf.disable_ipv6) {
+				NL_SET_ERR_MSG(extack,
+					       "IPv6 support disabled by administrator");
 				return -EPERM;
+			}
 		}
 #endif
 
 		*lower = lowerdev;
 	} else {
-		if (vxlan_addr_multicast(&conf->remote_ip))
+		if (vxlan_addr_multicast(&conf->remote_ip)) {
+			NL_SET_ERR_MSG(extack,
+				       "Local interface required for multicast remote destination");
+
 			return -EINVAL;
+		}
 
 #if IS_ENABLED(CONFIG_IPV6)
-		if (conf->flags & VXLAN_F_IPV6_LINKLOCAL)
+		if (conf->flags & VXLAN_F_IPV6_LINKLOCAL) {
+			NL_SET_ERR_MSG(extack,
+				       "Local interface required for link-local local/remote addresses");
 			return -EINVAL;
+		}
 #endif
 
 		*lower = NULL;
@@ -3030,6 +3064,8 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
 		    tmp->cfg.remote_ifindex != conf->remote_ifindex)
 			continue;
 
+		NL_SET_ERR_MSG(extack,
+			       "A VXLAN device with the specified VNI already exists");
 		return -EEXIST;
 	}
 
@@ -3089,14 +3125,14 @@ static void vxlan_config_apply(struct net_device *dev,
 }
 
 static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
-			       struct vxlan_config *conf,
-			       bool changelink)
+			       struct vxlan_config *conf, bool changelink,
+			       struct netlink_ext_ack *extack)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct net_device *lowerdev;
 	int ret;
 
-	ret = vxlan_config_validate(src_net, conf, &lowerdev, vxlan);
+	ret = vxlan_config_validate(src_net, conf, &lowerdev, vxlan, extack);
 	if (ret)
 		return ret;
 
@@ -3106,13 +3142,14 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
 }
 
 static int __vxlan_dev_create(struct net *net, struct net_device *dev,
-			      struct vxlan_config *conf)
+			      struct vxlan_config *conf,
+			      struct netlink_ext_ack *extack)
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	int err;
 
-	err = vxlan_dev_configure(net, dev, conf, false);
+	err = vxlan_dev_configure(net, dev, conf, false, extack);
 	if (err)
 		return err;
 
@@ -3358,7 +3395,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 	if (err)
 		return err;
 
-	return __vxlan_dev_create(src_net, dev, &conf);
+	return __vxlan_dev_create(src_net, dev, &conf, extack);
 }
 
 static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
@@ -3378,7 +3415,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
 
 	memcpy(&old_dst, dst, sizeof(struct vxlan_rdst));
 
-	err = vxlan_dev_configure(vxlan->net, dev, &conf, true);
+	err = vxlan_dev_configure(vxlan->net, dev, &conf, true, extack);
 	if (err)
 		return err;
 
@@ -3584,7 +3621,7 @@ struct net_device *vxlan_dev_create(struct net *net, const char *name,
 	if (IS_ERR(dev))
 		return dev;
 
-	err = __vxlan_dev_create(net, dev, conf);
+	err = __vxlan_dev_create(net, dev, conf, NULL);
 	if (err < 0) {
 		free_netdev(dev);
 		return ERR_PTR(err);
@@ -3631,10 +3668,15 @@ static int vxlan_netdevice_event(struct notifier_block *unused,
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 
-	if (event == NETDEV_UNREGISTER)
+	if (event == NETDEV_UNREGISTER) {
+		vxlan_offload_rx_ports(dev, false);
 		vxlan_handle_lowerdev_unregister(vn, dev);
-	else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
-		vxlan_push_rx_ports(dev);
+	} else if (event == NETDEV_REGISTER) {
+		vxlan_offload_rx_ports(dev, true);
+	} else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO ||
+		   event == NETDEV_UDP_TUNNEL_DROP_INFO) {
+		vxlan_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
+	}
 
 	return NOTIFY_DONE;
 }
diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c
index 799830f..a043fb1 100644
--- a/drivers/net/wan/dscc4.c
+++ b/drivers/net/wan/dscc4.c
@@ -483,20 +483,20 @@ static void dscc4_tx_print(struct net_device *dev,
 
 static void dscc4_release_ring(struct dscc4_dev_priv *dpriv)
 {
-	struct pci_dev *pdev = dpriv->pci_priv->pdev;
+	struct device *d = &dpriv->pci_priv->pdev->dev;
 	struct TxFD *tx_fd = dpriv->tx_fd;
 	struct RxFD *rx_fd = dpriv->rx_fd;
 	struct sk_buff **skbuff;
 	int i;
 
-	pci_free_consistent(pdev, TX_TOTAL_SIZE, tx_fd, dpriv->tx_fd_dma);
-	pci_free_consistent(pdev, RX_TOTAL_SIZE, rx_fd, dpriv->rx_fd_dma);
+	dma_free_coherent(d, TX_TOTAL_SIZE, tx_fd, dpriv->tx_fd_dma);
+	dma_free_coherent(d, RX_TOTAL_SIZE, rx_fd, dpriv->rx_fd_dma);
 
 	skbuff = dpriv->tx_skbuff;
 	for (i = 0; i < TX_RING_SIZE; i++) {
 		if (*skbuff) {
-			pci_unmap_single(pdev, le32_to_cpu(tx_fd->data),
-				(*skbuff)->len, PCI_DMA_TODEVICE);
+			dma_unmap_single(d, le32_to_cpu(tx_fd->data),
+					 (*skbuff)->len, DMA_TO_DEVICE);
 			dev_kfree_skb(*skbuff);
 		}
 		skbuff++;
@@ -506,8 +506,9 @@ static void dscc4_release_ring(struct dscc4_dev_priv *dpriv)
 	skbuff = dpriv->rx_skbuff;
 	for (i = 0; i < RX_RING_SIZE; i++) {
 		if (*skbuff) {
-			pci_unmap_single(pdev, le32_to_cpu(rx_fd->data),
-				RX_MAX(HDLC_MAX_MRU), PCI_DMA_FROMDEVICE);
+			dma_unmap_single(d, le32_to_cpu(rx_fd->data),
+					 RX_MAX(HDLC_MAX_MRU),
+					 DMA_FROM_DEVICE);
 			dev_kfree_skb(*skbuff);
 		}
 		skbuff++;
@@ -519,22 +520,30 @@ static inline int try_get_rx_skb(struct dscc4_dev_priv *dpriv,
 				 struct net_device *dev)
 {
 	unsigned int dirty = dpriv->rx_dirty%RX_RING_SIZE;
+	struct device *d = &dpriv->pci_priv->pdev->dev;
 	struct RxFD *rx_fd = dpriv->rx_fd + dirty;
 	const int len = RX_MAX(HDLC_MAX_MRU);
 	struct sk_buff *skb;
-	int ret = 0;
+	dma_addr_t addr;
 
 	skb = dev_alloc_skb(len);
+	if (!skb)
+		goto err_out;
+
+	skb->protocol = hdlc_type_trans(skb, dev);
+	addr = dma_map_single(d, skb->data, len, DMA_FROM_DEVICE);
+	if (dma_mapping_error(d, addr))
+		goto err_free_skb;
+
 	dpriv->rx_skbuff[dirty] = skb;
-	if (skb) {
-		skb->protocol = hdlc_type_trans(skb, dev);
-		rx_fd->data = cpu_to_le32(pci_map_single(dpriv->pci_priv->pdev,
-					  skb->data, len, PCI_DMA_FROMDEVICE));
-	} else {
-		rx_fd->data = 0;
-		ret = -1;
-	}
-	return ret;
+	rx_fd->data = cpu_to_le32(addr);
+	return 0;
+
+err_free_skb:
+	dev_kfree_skb_any(skb);
+err_out:
+	rx_fd->data = 0;
+	return -1;
 }
 
 /*
@@ -646,7 +655,7 @@ static inline void dscc4_rx_skb(struct dscc4_dev_priv *dpriv,
 				struct net_device *dev)
 {
 	struct RxFD *rx_fd = dpriv->rx_fd + dpriv->rx_current%RX_RING_SIZE;
-	struct pci_dev *pdev = dpriv->pci_priv->pdev;
+	struct device *d = &dpriv->pci_priv->pdev->dev;
 	struct sk_buff *skb;
 	int pkt_len;
 
@@ -656,8 +665,8 @@ static inline void dscc4_rx_skb(struct dscc4_dev_priv *dpriv,
 		goto refill;
 	}
 	pkt_len = TO_SIZE(le32_to_cpu(rx_fd->state2));
-	pci_unmap_single(pdev, le32_to_cpu(rx_fd->data),
-			 RX_MAX(HDLC_MAX_MRU), PCI_DMA_FROMDEVICE);
+	dma_unmap_single(d, le32_to_cpu(rx_fd->data),
+			 RX_MAX(HDLC_MAX_MRU), DMA_FROM_DEVICE);
 	if ((skb->data[--pkt_len] & FrameOk) == FrameOk) {
 		dev->stats.rx_packets++;
 		dev->stats.rx_bytes += pkt_len;
@@ -774,8 +783,8 @@ static int dscc4_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	rc = -ENOMEM;
 
-	priv->iqcfg = (__le32 *) pci_alloc_consistent(pdev,
-		IRQ_RING_SIZE*sizeof(__le32), &priv->iqcfg_dma);
+	priv->iqcfg = (__le32 *)dma_alloc_coherent(&pdev->dev,
+		IRQ_RING_SIZE*sizeof(__le32), &priv->iqcfg_dma, GFP_KERNEL);
 	if (!priv->iqcfg)
 		goto err_free_irq_5;
 	writel(priv->iqcfg_dma, ioaddr + IQCFG);
@@ -786,16 +795,18 @@ static int dscc4_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 */
 	for (i = 0; i < dev_per_card; i++) {
 		dpriv = priv->root + i;
-		dpriv->iqtx = (__le32 *) pci_alloc_consistent(pdev,
-			IRQ_RING_SIZE*sizeof(u32), &dpriv->iqtx_dma);
+		dpriv->iqtx = (__le32 *)dma_alloc_coherent(&pdev->dev,
+			IRQ_RING_SIZE*sizeof(u32), &dpriv->iqtx_dma,
+			GFP_KERNEL);
 		if (!dpriv->iqtx)
 			goto err_free_iqtx_6;
 		writel(dpriv->iqtx_dma, ioaddr + IQTX0 + i*4);
 	}
 	for (i = 0; i < dev_per_card; i++) {
 		dpriv = priv->root + i;
-		dpriv->iqrx = (__le32 *) pci_alloc_consistent(pdev,
-			IRQ_RING_SIZE*sizeof(u32), &dpriv->iqrx_dma);
+		dpriv->iqrx = (__le32 *)dma_alloc_coherent(&pdev->dev,
+			IRQ_RING_SIZE*sizeof(u32), &dpriv->iqrx_dma,
+			GFP_KERNEL);
 		if (!dpriv->iqrx)
 			goto err_free_iqrx_7;
 		writel(dpriv->iqrx_dma, ioaddr + IQRX0 + i*4);
@@ -819,18 +830,18 @@ out:
 err_free_iqrx_7:
 	while (--i >= 0) {
 		dpriv = priv->root + i;
-		pci_free_consistent(pdev, IRQ_RING_SIZE*sizeof(u32),
-				    dpriv->iqrx, dpriv->iqrx_dma);
+		dma_free_coherent(&pdev->dev, IRQ_RING_SIZE*sizeof(u32),
+				  dpriv->iqrx, dpriv->iqrx_dma);
 	}
 	i = dev_per_card;
 err_free_iqtx_6:
 	while (--i >= 0) {
 		dpriv = priv->root + i;
-		pci_free_consistent(pdev, IRQ_RING_SIZE*sizeof(u32),
-				    dpriv->iqtx, dpriv->iqtx_dma);
+		dma_free_coherent(&pdev->dev, IRQ_RING_SIZE*sizeof(u32),
+				  dpriv->iqtx, dpriv->iqtx_dma);
 	}
-	pci_free_consistent(pdev, IRQ_RING_SIZE*sizeof(u32), priv->iqcfg,
-			    priv->iqcfg_dma);
+	dma_free_coherent(&pdev->dev, IRQ_RING_SIZE*sizeof(u32), priv->iqcfg,
+			  priv->iqcfg_dma);
 err_free_irq_5:
 	free_irq(pdev->irq, priv->root);
 err_release_4:
@@ -1145,16 +1156,23 @@ static netdev_tx_t dscc4_start_xmit(struct sk_buff *skb,
 					  struct net_device *dev)
 {
 	struct dscc4_dev_priv *dpriv = dscc4_priv(dev);
-	struct dscc4_pci_priv *ppriv = dpriv->pci_priv;
+	struct device *d = &dpriv->pci_priv->pdev->dev;
 	struct TxFD *tx_fd;
+	dma_addr_t addr;
 	int next;
 
+	addr = dma_map_single(d, skb->data, skb->len, DMA_TO_DEVICE);
+	if (dma_mapping_error(d, addr)) {
+		dev_kfree_skb_any(skb);
+		dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
 	next = dpriv->tx_current%TX_RING_SIZE;
 	dpriv->tx_skbuff[next] = skb;
 	tx_fd = dpriv->tx_fd + next;
 	tx_fd->state = FrameEnd | TO_STATE_TX(skb->len);
-	tx_fd->data = cpu_to_le32(pci_map_single(ppriv->pdev, skb->data, skb->len,
-				     PCI_DMA_TODEVICE));
+	tx_fd->data = cpu_to_le32(addr);
 	tx_fd->complete = 0x00000000;
 	tx_fd->jiffies = jiffies;
 	mb();
@@ -1572,8 +1590,9 @@ try:
 			tx_fd = dpriv->tx_fd + cur;
 			skb = dpriv->tx_skbuff[cur];
 			if (skb) {
-				pci_unmap_single(ppriv->pdev, le32_to_cpu(tx_fd->data),
-						 skb->len, PCI_DMA_TODEVICE);
+				dma_unmap_single(&ppriv->pdev->dev,
+						 le32_to_cpu(tx_fd->data),
+						 skb->len, DMA_TO_DEVICE);
 				if (tx_fd->state & FrameEnd) {
 					dev->stats.tx_packets++;
 					dev->stats.tx_bytes += skb->len;
@@ -1887,16 +1906,22 @@ static struct sk_buff *dscc4_init_dummy_skb(struct dscc4_dev_priv *dpriv)
 
 	skb = dev_alloc_skb(DUMMY_SKB_SIZE);
 	if (skb) {
+		struct device *d = &dpriv->pci_priv->pdev->dev;
 		int last = dpriv->tx_dirty%TX_RING_SIZE;
 		struct TxFD *tx_fd = dpriv->tx_fd + last;
+		dma_addr_t addr;
 
 		skb->len = DUMMY_SKB_SIZE;
 		skb_copy_to_linear_data(skb, version,
 					strlen(version) % DUMMY_SKB_SIZE);
+		addr = dma_map_single(d, skb->data, DUMMY_SKB_SIZE,
+				      DMA_TO_DEVICE);
+		if (dma_mapping_error(d, addr)) {
+			dev_kfree_skb_any(skb);
+			return NULL;
+		}
 		tx_fd->state = FrameEnd | TO_STATE_TX(DUMMY_SKB_SIZE);
-		tx_fd->data = cpu_to_le32(pci_map_single(dpriv->pci_priv->pdev,
-					     skb->data, DUMMY_SKB_SIZE,
-					     PCI_DMA_TODEVICE));
+		tx_fd->data = cpu_to_le32(addr);
 		dpriv->tx_skbuff[last] = skb;
 	}
 	return skb;
@@ -1905,18 +1930,20 @@ static struct sk_buff *dscc4_init_dummy_skb(struct dscc4_dev_priv *dpriv)
 static int dscc4_init_ring(struct net_device *dev)
 {
 	struct dscc4_dev_priv *dpriv = dscc4_priv(dev);
-	struct pci_dev *pdev = dpriv->pci_priv->pdev;
+	struct device *d = &dpriv->pci_priv->pdev->dev;
 	struct TxFD *tx_fd;
 	struct RxFD *rx_fd;
 	void *ring;
 	int i;
 
-	ring = pci_alloc_consistent(pdev, RX_TOTAL_SIZE, &dpriv->rx_fd_dma);
+	ring = dma_alloc_coherent(d, RX_TOTAL_SIZE, &dpriv->rx_fd_dma,
+				  GFP_KERNEL);
 	if (!ring)
 		goto err_out;
 	dpriv->rx_fd = rx_fd = (struct RxFD *) ring;
 
-	ring = pci_alloc_consistent(pdev, TX_TOTAL_SIZE, &dpriv->tx_fd_dma);
+	ring = dma_alloc_coherent(d, TX_TOTAL_SIZE, &dpriv->tx_fd_dma,
+				  GFP_KERNEL);
 	if (!ring)
 		goto err_free_dma_rx;
 	dpriv->tx_fd = tx_fd = (struct TxFD *) ring;
@@ -1954,9 +1981,9 @@ static int dscc4_init_ring(struct net_device *dev)
 	return 0;
 
 err_free_dma_tx:
-	pci_free_consistent(pdev, TX_TOTAL_SIZE, ring, dpriv->tx_fd_dma);
+	dma_free_coherent(d, TX_TOTAL_SIZE, ring, dpriv->tx_fd_dma);
 err_free_dma_rx:
-	pci_free_consistent(pdev, RX_TOTAL_SIZE, rx_fd, dpriv->rx_fd_dma);
+	dma_free_coherent(d, RX_TOTAL_SIZE, rx_fd, dpriv->rx_fd_dma);
 err_out:
 	return -ENOMEM;
 }
@@ -1976,16 +2003,16 @@ static void dscc4_remove_one(struct pci_dev *pdev)
 	dscc4_pci_reset(pdev, ioaddr);
 
 	free_irq(pdev->irq, root);
-	pci_free_consistent(pdev, IRQ_RING_SIZE*sizeof(u32), ppriv->iqcfg,
-			    ppriv->iqcfg_dma);
+	dma_free_coherent(&pdev->dev, IRQ_RING_SIZE*sizeof(u32), ppriv->iqcfg,
+			  ppriv->iqcfg_dma);
 	for (i = 0; i < dev_per_card; i++) {
 		struct dscc4_dev_priv *dpriv = root + i;
 
 		dscc4_release_ring(dpriv);
-		pci_free_consistent(pdev, IRQ_RING_SIZE*sizeof(u32),
-				    dpriv->iqrx, dpriv->iqrx_dma);
-		pci_free_consistent(pdev, IRQ_RING_SIZE*sizeof(u32),
-				    dpriv->iqtx, dpriv->iqtx_dma);
+		dma_free_coherent(&pdev->dev, IRQ_RING_SIZE*sizeof(u32),
+				  dpriv->iqrx, dpriv->iqrx_dma);
+		dma_free_coherent(&pdev->dev, IRQ_RING_SIZE*sizeof(u32),
+				  dpriv->iqtx, dpriv->iqtx_dma);
 	}
 
 	dscc4_free1(pdev);
diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c
index 106d6f8..68f0463 100644
--- a/drivers/net/wireless/ath/ar5523/ar5523.c
+++ b/drivers/net/wireless/ath/ar5523/ar5523.c
@@ -1749,7 +1749,7 @@ static void ar5523_disconnect(struct usb_interface *intf)
 	{ USB_DEVICE((vendor), (device) + 1), \
 		.driver_info = AR5523_FLAG_ABG|AR5523_FLAG_PRE_FIRMWARE }
 
-static struct usb_device_id ar5523_id_table[] = {
+static const struct usb_device_id ar5523_id_table[] = {
 	AR5523_DEVICE_UG(0x168c, 0x0001),	/* Atheros / AR5523 */
 	AR5523_DEVICE_UG(0x0cf3, 0x0001),	/* Atheros2 / AR5523_1 */
 	AR5523_DEVICE_UG(0x0cf3, 0x0003),	/* Atheros2 / AR5523_2 */
diff --git a/drivers/net/wireless/ath/ath10k/Kconfig b/drivers/net/wireless/ath/ath10k/Kconfig
index 412eb13..87f56d0 100644
--- a/drivers/net/wireless/ath/ath10k/Kconfig
+++ b/drivers/net/wireless/ath/ath10k/Kconfig
@@ -29,6 +29,13 @@ config ATH10K_SDIO
 	  This module adds experimental support for SDIO/MMC bus. Currently
 	  work in progress and will not fully work.
 
+config ATH10K_USB
+	tristate "Atheros ath10k USB support (EXPERIMENTAL)"
+	depends on ATH10K && USB
+	---help---
+	  This module adds experimental support for USB bus. Currently
+	  work in progress and will not fully work.
+
 config ATH10K_DEBUG
 	bool "Atheros ath10k debugging"
 	depends on ATH10K
diff --git a/drivers/net/wireless/ath/ath10k/Makefile b/drivers/net/wireless/ath/ath10k/Makefile
index b0b19a7..899b9b7 100644
--- a/drivers/net/wireless/ath/ath10k/Makefile
+++ b/drivers/net/wireless/ath/ath10k/Makefile
@@ -30,5 +30,8 @@ ath10k_pci-$(CONFIG_ATH10K_AHB) += ahb.o
 obj-$(CONFIG_ATH10K_SDIO) += ath10k_sdio.o
 ath10k_sdio-y += sdio.o
 
+obj-$(CONFIG_ATH10K_USB) += ath10k_usb.o
+ath10k_usb-y += usb.o
+
 # for tracing framework to find trace.h
 CFLAGS_trace.o := -I$(src)
diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c
index da770af..ff6815e 100644
--- a/drivers/net/wireless/ath/ath10k/ahb.c
+++ b/drivers/net/wireless/ath/ath10k/ahb.c
@@ -197,35 +197,40 @@ static int ath10k_ahb_rst_ctrl_init(struct ath10k *ar)
 
 	dev = &ar_ahb->pdev->dev;
 
-	ar_ahb->core_cold_rst = devm_reset_control_get(dev, "wifi_core_cold");
+	ar_ahb->core_cold_rst = devm_reset_control_get_exclusive(dev,
+								 "wifi_core_cold");
 	if (IS_ERR(ar_ahb->core_cold_rst)) {
 		ath10k_err(ar, "failed to get core cold rst ctrl: %ld\n",
 			   PTR_ERR(ar_ahb->core_cold_rst));
 		return PTR_ERR(ar_ahb->core_cold_rst);
 	}
 
-	ar_ahb->radio_cold_rst = devm_reset_control_get(dev, "wifi_radio_cold");
+	ar_ahb->radio_cold_rst = devm_reset_control_get_exclusive(dev,
+								  "wifi_radio_cold");
 	if (IS_ERR(ar_ahb->radio_cold_rst)) {
 		ath10k_err(ar, "failed to get radio cold rst ctrl: %ld\n",
 			   PTR_ERR(ar_ahb->radio_cold_rst));
 		return PTR_ERR(ar_ahb->radio_cold_rst);
 	}
 
-	ar_ahb->radio_warm_rst = devm_reset_control_get(dev, "wifi_radio_warm");
+	ar_ahb->radio_warm_rst = devm_reset_control_get_exclusive(dev,
+								  "wifi_radio_warm");
 	if (IS_ERR(ar_ahb->radio_warm_rst)) {
 		ath10k_err(ar, "failed to get radio warm rst ctrl: %ld\n",
 			   PTR_ERR(ar_ahb->radio_warm_rst));
 		return PTR_ERR(ar_ahb->radio_warm_rst);
 	}
 
-	ar_ahb->radio_srif_rst = devm_reset_control_get(dev, "wifi_radio_srif");
+	ar_ahb->radio_srif_rst = devm_reset_control_get_exclusive(dev,
+								  "wifi_radio_srif");
 	if (IS_ERR(ar_ahb->radio_srif_rst)) {
 		ath10k_err(ar, "failed to get radio srif rst ctrl: %ld\n",
 			   PTR_ERR(ar_ahb->radio_srif_rst));
 		return PTR_ERR(ar_ahb->radio_srif_rst);
 	}
 
-	ar_ahb->cpu_init_rst = devm_reset_control_get(dev, "wifi_cpu_init");
+	ar_ahb->cpu_init_rst = devm_reset_control_get_exclusive(dev,
+								"wifi_cpu_init");
 	if (IS_ERR(ar_ahb->cpu_init_rst)) {
 		ath10k_err(ar, "failed to get cpu init rst ctrl: %ld\n",
 			   PTR_ERR(ar_ahb->cpu_init_rst));
@@ -787,8 +792,9 @@ static int ath10k_ahb_probe(struct platform_device *pdev)
 	ar_pci->mem = ar_ahb->mem;
 	ar_pci->mem_len = ar_ahb->mem_len;
 	ar_pci->ar = ar;
-	ar_pci->bus_ops = &ath10k_ahb_bus_ops;
+	ar_pci->ce.bus_ops = &ath10k_ahb_bus_ops;
 	ar_pci->targ_cpu_to_ce_addr = ath10k_ahb_qca4019_targ_cpu_to_ce_addr;
+	ar->ce_priv = &ar_pci->ce;
 
 	ret = ath10k_pci_setup_resource(ar);
 	if (ret) {
diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c
index 08b84c8..a8afd69 100644
--- a/drivers/net/wireless/ath/ath10k/ce.c
+++ b/drivers/net/wireless/ath/ath10k/ce.c
@@ -16,7 +16,6 @@
  */
 
 #include "hif.h"
-#include "pci.h"
 #include "ce.h"
 #include "debug.h"
 
@@ -33,7 +32,7 @@
  * Each ring consists of a number of descriptors which specify
  * an address, length, and meta-data.
  *
- * Typically, one side of the PCIe interconnect (Host or Target)
+ * Typically, one side of the PCIe/AHB/SNOC interconnect (Host or Target)
  * controls one ring and the other side controls the other ring.
  * The source side chooses when to initiate a transfer and it
  * chooses what to send (buffer address, length). The destination
@@ -73,57 +72,71 @@ ath10k_get_ring_byte(unsigned int offset,
 	return ((offset & addr_map->mask) >> (addr_map->lsb));
 }
 
+static inline u32 ath10k_ce_read32(struct ath10k *ar, u32 offset)
+{
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
+
+	return ce->bus_ops->read32(ar, offset);
+}
+
+static inline void ath10k_ce_write32(struct ath10k *ar, u32 offset, u32 value)
+{
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
+
+	ce->bus_ops->write32(ar, offset, value);
+}
+
 static inline void ath10k_ce_dest_ring_write_index_set(struct ath10k *ar,
 						       u32 ce_ctrl_addr,
 						       unsigned int n)
 {
-	ath10k_pci_write32(ar, ce_ctrl_addr +
-			ar->hw_ce_regs->dst_wr_index_addr, n);
+	ath10k_ce_write32(ar, ce_ctrl_addr +
+			  ar->hw_ce_regs->dst_wr_index_addr, n);
 }
 
 static inline u32 ath10k_ce_dest_ring_write_index_get(struct ath10k *ar,
 						      u32 ce_ctrl_addr)
 {
-	return ath10k_pci_read32(ar, ce_ctrl_addr +
-			ar->hw_ce_regs->dst_wr_index_addr);
+	return ath10k_ce_read32(ar, ce_ctrl_addr +
+				ar->hw_ce_regs->dst_wr_index_addr);
 }
 
 static inline void ath10k_ce_src_ring_write_index_set(struct ath10k *ar,
 						      u32 ce_ctrl_addr,
 						      unsigned int n)
 {
-	ath10k_pci_write32(ar, ce_ctrl_addr +
-			ar->hw_ce_regs->sr_wr_index_addr, n);
+	ath10k_ce_write32(ar, ce_ctrl_addr +
+			  ar->hw_ce_regs->sr_wr_index_addr, n);
 }
 
 static inline u32 ath10k_ce_src_ring_write_index_get(struct ath10k *ar,
 						     u32 ce_ctrl_addr)
 {
-	return ath10k_pci_read32(ar, ce_ctrl_addr +
-			ar->hw_ce_regs->sr_wr_index_addr);
+	return ath10k_ce_read32(ar, ce_ctrl_addr +
+				ar->hw_ce_regs->sr_wr_index_addr);
 }
 
 static inline u32 ath10k_ce_src_ring_read_index_get(struct ath10k *ar,
 						    u32 ce_ctrl_addr)
 {
-	return ath10k_pci_read32(ar, ce_ctrl_addr +
-			ar->hw_ce_regs->current_srri_addr);
+	return ath10k_ce_read32(ar, ce_ctrl_addr +
+				ar->hw_ce_regs->current_srri_addr);
 }
 
 static inline void ath10k_ce_src_ring_base_addr_set(struct ath10k *ar,
 						    u32 ce_ctrl_addr,
 						    unsigned int addr)
 {
-	ath10k_pci_write32(ar, ce_ctrl_addr +
-			ar->hw_ce_regs->sr_base_addr, addr);
+	ath10k_ce_write32(ar, ce_ctrl_addr +
+			  ar->hw_ce_regs->sr_base_addr, addr);
 }
 
 static inline void ath10k_ce_src_ring_size_set(struct ath10k *ar,
 					       u32 ce_ctrl_addr,
 					       unsigned int n)
 {
-	ath10k_pci_write32(ar, ce_ctrl_addr +
-			ar->hw_ce_regs->sr_size_addr, n);
+	ath10k_ce_write32(ar, ce_ctrl_addr +
+			  ar->hw_ce_regs->sr_size_addr, n);
 }
 
 static inline void ath10k_ce_src_ring_dmax_set(struct ath10k *ar,
@@ -131,12 +144,13 @@ static inline void ath10k_ce_src_ring_dmax_set(struct ath10k *ar,
 					       unsigned int n)
 {
 	struct ath10k_hw_ce_ctrl1 *ctrl_regs = ar->hw_ce_regs->ctrl1_regs;
-	u32 ctrl1_addr = ath10k_pci_read32(ar,
-					   ce_ctrl_addr + ctrl_regs->addr);
 
-	ath10k_pci_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
-			   (ctrl1_addr &  ~(ctrl_regs->dmax->mask)) |
-			   ath10k_set_ring_byte(n, ctrl_regs->dmax));
+	u32 ctrl1_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
+					  ctrl_regs->addr);
+
+	ath10k_ce_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
+			  (ctrl1_addr &  ~(ctrl_regs->dmax->mask)) |
+			  ath10k_set_ring_byte(n, ctrl_regs->dmax));
 }
 
 static inline void ath10k_ce_src_ring_byte_swap_set(struct ath10k *ar,
@@ -144,11 +158,13 @@ static inline void ath10k_ce_src_ring_byte_swap_set(struct ath10k *ar,
 						    unsigned int n)
 {
 	struct ath10k_hw_ce_ctrl1 *ctrl_regs = ar->hw_ce_regs->ctrl1_regs;
-	u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + ctrl_regs->addr);
 
-	ath10k_pci_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
-			   (ctrl1_addr & ~(ctrl_regs->src_ring->mask)) |
-			   ath10k_set_ring_byte(n, ctrl_regs->src_ring));
+	u32 ctrl1_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
+					  ctrl_regs->addr);
+
+	ath10k_ce_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
+			  (ctrl1_addr & ~(ctrl_regs->src_ring->mask)) |
+			  ath10k_set_ring_byte(n, ctrl_regs->src_ring));
 }
 
 static inline void ath10k_ce_dest_ring_byte_swap_set(struct ath10k *ar,
@@ -156,34 +172,36 @@ static inline void ath10k_ce_dest_ring_byte_swap_set(struct ath10k *ar,
 						     unsigned int n)
 {
 	struct ath10k_hw_ce_ctrl1 *ctrl_regs = ar->hw_ce_regs->ctrl1_regs;
-	u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + ctrl_regs->addr);
 
-	ath10k_pci_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
-			   (ctrl1_addr & ~(ctrl_regs->dst_ring->mask)) |
-			   ath10k_set_ring_byte(n, ctrl_regs->dst_ring));
+	u32 ctrl1_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
+					  ctrl_regs->addr);
+
+	ath10k_ce_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
+			  (ctrl1_addr & ~(ctrl_regs->dst_ring->mask)) |
+			  ath10k_set_ring_byte(n, ctrl_regs->dst_ring));
 }
 
 static inline u32 ath10k_ce_dest_ring_read_index_get(struct ath10k *ar,
 						     u32 ce_ctrl_addr)
 {
-	return ath10k_pci_read32(ar, ce_ctrl_addr +
-				 ar->hw_ce_regs->current_drri_addr);
+	return ath10k_ce_read32(ar, ce_ctrl_addr +
+				ar->hw_ce_regs->current_drri_addr);
 }
 
 static inline void ath10k_ce_dest_ring_base_addr_set(struct ath10k *ar,
 						     u32 ce_ctrl_addr,
 						     u32 addr)
 {
-	ath10k_pci_write32(ar, ce_ctrl_addr +
-			ar->hw_ce_regs->dr_base_addr, addr);
+	ath10k_ce_write32(ar, ce_ctrl_addr +
+			  ar->hw_ce_regs->dr_base_addr, addr);
 }
 
 static inline void ath10k_ce_dest_ring_size_set(struct ath10k *ar,
 						u32 ce_ctrl_addr,
 						unsigned int n)
 {
-	ath10k_pci_write32(ar, ce_ctrl_addr +
-			ar->hw_ce_regs->dr_size_addr, n);
+	ath10k_ce_write32(ar, ce_ctrl_addr +
+			  ar->hw_ce_regs->dr_size_addr, n);
 }
 
 static inline void ath10k_ce_src_ring_highmark_set(struct ath10k *ar,
@@ -191,11 +209,11 @@ static inline void ath10k_ce_src_ring_highmark_set(struct ath10k *ar,
 						   unsigned int n)
 {
 	struct ath10k_hw_ce_dst_src_wm_regs *srcr_wm = ar->hw_ce_regs->wm_srcr;
-	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + srcr_wm->addr);
+	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + srcr_wm->addr);
 
-	ath10k_pci_write32(ar, ce_ctrl_addr + srcr_wm->addr,
-			   (addr & ~(srcr_wm->wm_high->mask)) |
-			   (ath10k_set_ring_byte(n, srcr_wm->wm_high)));
+	ath10k_ce_write32(ar, ce_ctrl_addr + srcr_wm->addr,
+			  (addr & ~(srcr_wm->wm_high->mask)) |
+			  (ath10k_set_ring_byte(n, srcr_wm->wm_high)));
 }
 
 static inline void ath10k_ce_src_ring_lowmark_set(struct ath10k *ar,
@@ -203,11 +221,11 @@ static inline void ath10k_ce_src_ring_lowmark_set(struct ath10k *ar,
 						  unsigned int n)
 {
 	struct ath10k_hw_ce_dst_src_wm_regs *srcr_wm = ar->hw_ce_regs->wm_srcr;
-	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + srcr_wm->addr);
+	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + srcr_wm->addr);
 
-	ath10k_pci_write32(ar, ce_ctrl_addr + srcr_wm->addr,
-			   (addr & ~(srcr_wm->wm_low->mask)) |
-			   (ath10k_set_ring_byte(n, srcr_wm->wm_low)));
+	ath10k_ce_write32(ar, ce_ctrl_addr + srcr_wm->addr,
+			  (addr & ~(srcr_wm->wm_low->mask)) |
+			  (ath10k_set_ring_byte(n, srcr_wm->wm_low)));
 }
 
 static inline void ath10k_ce_dest_ring_highmark_set(struct ath10k *ar,
@@ -215,11 +233,11 @@ static inline void ath10k_ce_dest_ring_highmark_set(struct ath10k *ar,
 						    unsigned int n)
 {
 	struct ath10k_hw_ce_dst_src_wm_regs *dstr_wm = ar->hw_ce_regs->wm_dstr;
-	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + dstr_wm->addr);
+	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + dstr_wm->addr);
 
-	ath10k_pci_write32(ar, ce_ctrl_addr + dstr_wm->addr,
-			   (addr & ~(dstr_wm->wm_high->mask)) |
-			   (ath10k_set_ring_byte(n, dstr_wm->wm_high)));
+	ath10k_ce_write32(ar, ce_ctrl_addr + dstr_wm->addr,
+			  (addr & ~(dstr_wm->wm_high->mask)) |
+			  (ath10k_set_ring_byte(n, dstr_wm->wm_high)));
 }
 
 static inline void ath10k_ce_dest_ring_lowmark_set(struct ath10k *ar,
@@ -227,66 +245,73 @@ static inline void ath10k_ce_dest_ring_lowmark_set(struct ath10k *ar,
 						   unsigned int n)
 {
 	struct ath10k_hw_ce_dst_src_wm_regs *dstr_wm = ar->hw_ce_regs->wm_dstr;
-	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + dstr_wm->addr);
+	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + dstr_wm->addr);
 
-	ath10k_pci_write32(ar, ce_ctrl_addr + dstr_wm->addr,
-			   (addr & ~(dstr_wm->wm_low->mask)) |
-			   (ath10k_set_ring_byte(n, dstr_wm->wm_low)));
+	ath10k_ce_write32(ar, ce_ctrl_addr + dstr_wm->addr,
+			  (addr & ~(dstr_wm->wm_low->mask)) |
+			  (ath10k_set_ring_byte(n, dstr_wm->wm_low)));
 }
 
 static inline void ath10k_ce_copy_complete_inter_enable(struct ath10k *ar,
 							u32 ce_ctrl_addr)
 {
 	struct ath10k_hw_ce_host_ie *host_ie = ar->hw_ce_regs->host_ie;
-	u32 host_ie_addr = ath10k_pci_read32(ar, ce_ctrl_addr +
-					ar->hw_ce_regs->host_ie_addr);
 
-	ath10k_pci_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
-			   host_ie_addr | host_ie->copy_complete->mask);
+	u32 host_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
+					    ar->hw_ce_regs->host_ie_addr);
+
+	ath10k_ce_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
+			  host_ie_addr | host_ie->copy_complete->mask);
 }
 
 static inline void ath10k_ce_copy_complete_intr_disable(struct ath10k *ar,
 							u32 ce_ctrl_addr)
 {
 	struct ath10k_hw_ce_host_ie *host_ie = ar->hw_ce_regs->host_ie;
-	u32 host_ie_addr = ath10k_pci_read32(ar, ce_ctrl_addr +
-					ar->hw_ce_regs->host_ie_addr);
 
-	ath10k_pci_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
-			   host_ie_addr & ~(host_ie->copy_complete->mask));
+	u32 host_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
+					    ar->hw_ce_regs->host_ie_addr);
+
+	ath10k_ce_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
+			  host_ie_addr & ~(host_ie->copy_complete->mask));
 }
 
 static inline void ath10k_ce_watermark_intr_disable(struct ath10k *ar,
 						    u32 ce_ctrl_addr)
 {
 	struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
-	u32 host_ie_addr = ath10k_pci_read32(ar, ce_ctrl_addr +
-					ar->hw_ce_regs->host_ie_addr);
 
-	ath10k_pci_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
-			   host_ie_addr & ~(wm_regs->wm_mask));
+	u32 host_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
+					    ar->hw_ce_regs->host_ie_addr);
+
+	ath10k_ce_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
+			  host_ie_addr & ~(wm_regs->wm_mask));
 }
 
 static inline void ath10k_ce_error_intr_enable(struct ath10k *ar,
 					       u32 ce_ctrl_addr)
 {
 	struct ath10k_hw_ce_misc_regs *misc_regs = ar->hw_ce_regs->misc_regs;
-	u32 misc_ie_addr = ath10k_pci_read32(ar, ce_ctrl_addr +
-					ar->hw_ce_regs->misc_ie_addr);
 
-	ath10k_pci_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr,
-			   misc_ie_addr | misc_regs->err_mask);
+	u32 misc_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
+					    ar->hw_ce_regs->misc_ie_addr);
+
+	ath10k_ce_write32(ar,
+			  ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr,
+			  misc_ie_addr | misc_regs->err_mask);
 }
 
 static inline void ath10k_ce_error_intr_disable(struct ath10k *ar,
 						u32 ce_ctrl_addr)
 {
 	struct ath10k_hw_ce_misc_regs *misc_regs = ar->hw_ce_regs->misc_regs;
-	u32 misc_ie_addr = ath10k_pci_read32(ar, ce_ctrl_addr +
-					ar->hw_ce_regs->misc_ie_addr);
 
-	ath10k_pci_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr,
-			   misc_ie_addr & ~(misc_regs->err_mask));
+	u32 misc_ie_addr = ath10k_ce_read32(ar,
+			ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr);
+
+	ath10k_ce_write32(ar,
+			  ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr,
+			  misc_ie_addr & ~(misc_regs->err_mask));
 }
 
 static inline void ath10k_ce_engine_int_status_clear(struct ath10k *ar,
@@ -295,7 +320,7 @@ static inline void ath10k_ce_engine_int_status_clear(struct ath10k *ar,
 {
 	struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
 
-	ath10k_pci_write32(ar, ce_ctrl_addr + wm_regs->addr, mask);
+	ath10k_ce_write32(ar, ce_ctrl_addr + wm_regs->addr, mask);
 }
 
 /*
@@ -362,11 +387,11 @@ exit:
 void __ath10k_ce_send_revert(struct ath10k_ce_pipe *pipe)
 {
 	struct ath10k *ar = pipe->ar;
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	struct ath10k_ce_ring *src_ring = pipe->src_ring;
 	u32 ctrl_addr = pipe->ctrl_addr;
 
-	lockdep_assert_held(&ar_pci->ce_lock);
+	lockdep_assert_held(&ce->ce_lock);
 
 	/*
 	 * This function must be called only if there is an incomplete
@@ -394,13 +419,13 @@ int ath10k_ce_send(struct ath10k_ce_pipe *ce_state,
 		   unsigned int flags)
 {
 	struct ath10k *ar = ce_state->ar;
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	int ret;
 
-	spin_lock_bh(&ar_pci->ce_lock);
+	spin_lock_bh(&ce->ce_lock);
 	ret = ath10k_ce_send_nolock(ce_state, per_transfer_context,
 				    buffer, nbytes, transfer_id, flags);
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 
 	return ret;
 }
@@ -408,14 +433,14 @@ int ath10k_ce_send(struct ath10k_ce_pipe *ce_state,
 int ath10k_ce_num_free_src_entries(struct ath10k_ce_pipe *pipe)
 {
 	struct ath10k *ar = pipe->ar;
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	int delta;
 
-	spin_lock_bh(&ar_pci->ce_lock);
+	spin_lock_bh(&ce->ce_lock);
 	delta = CE_RING_DELTA(pipe->src_ring->nentries_mask,
 			      pipe->src_ring->write_index,
 			      pipe->src_ring->sw_index - 1);
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 
 	return delta;
 }
@@ -423,13 +448,13 @@ int ath10k_ce_num_free_src_entries(struct ath10k_ce_pipe *pipe)
 int __ath10k_ce_rx_num_free_bufs(struct ath10k_ce_pipe *pipe)
 {
 	struct ath10k *ar = pipe->ar;
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
 	unsigned int nentries_mask = dest_ring->nentries_mask;
 	unsigned int write_index = dest_ring->write_index;
 	unsigned int sw_index = dest_ring->sw_index;
 
-	lockdep_assert_held(&ar_pci->ce_lock);
+	lockdep_assert_held(&ce->ce_lock);
 
 	return CE_RING_DELTA(nentries_mask, write_index, sw_index - 1);
 }
@@ -437,7 +462,7 @@ int __ath10k_ce_rx_num_free_bufs(struct ath10k_ce_pipe *pipe)
 int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr)
 {
 	struct ath10k *ar = pipe->ar;
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
 	unsigned int nentries_mask = dest_ring->nentries_mask;
 	unsigned int write_index = dest_ring->write_index;
@@ -446,7 +471,7 @@ int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr)
 	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, write_index);
 	u32 ctrl_addr = pipe->ctrl_addr;
 
-	lockdep_assert_held(&ar_pci->ce_lock);
+	lockdep_assert_held(&ce->ce_lock);
 
 	if ((pipe->id != 5) &&
 	    CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0)
@@ -486,12 +511,12 @@ void ath10k_ce_rx_update_write_idx(struct ath10k_ce_pipe *pipe, u32 nentries)
 int ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr)
 {
 	struct ath10k *ar = pipe->ar;
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	int ret;
 
-	spin_lock_bh(&ar_pci->ce_lock);
+	spin_lock_bh(&ce->ce_lock);
 	ret = __ath10k_ce_rx_post_buf(pipe, ctx, paddr);
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 
 	return ret;
 }
@@ -554,14 +579,14 @@ int ath10k_ce_completed_recv_next(struct ath10k_ce_pipe *ce_state,
 				  unsigned int *nbytesp)
 {
 	struct ath10k *ar = ce_state->ar;
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	int ret;
 
-	spin_lock_bh(&ar_pci->ce_lock);
+	spin_lock_bh(&ce->ce_lock);
 	ret = ath10k_ce_completed_recv_next_nolock(ce_state,
 						   per_transfer_contextp,
 						   nbytesp);
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 
 	return ret;
 }
@@ -576,7 +601,7 @@ int ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
 	unsigned int write_index;
 	int ret;
 	struct ath10k *ar;
-	struct ath10k_pci *ar_pci;
+	struct ath10k_ce *ce;
 
 	dest_ring = ce_state->dest_ring;
 
@@ -584,9 +609,9 @@ int ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
 		return -EIO;
 
 	ar = ce_state->ar;
-	ar_pci = ath10k_pci_priv(ar);
+	ce = ath10k_ce_priv(ar);
 
-	spin_lock_bh(&ar_pci->ce_lock);
+	spin_lock_bh(&ce->ce_lock);
 
 	nentries_mask = dest_ring->nentries_mask;
 	sw_index = dest_ring->sw_index;
@@ -614,7 +639,7 @@ int ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
 		ret = -EIO;
 	}
 
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 
 	return ret;
 }
@@ -686,7 +711,7 @@ int ath10k_ce_cancel_send_next(struct ath10k_ce_pipe *ce_state,
 	unsigned int write_index;
 	int ret;
 	struct ath10k *ar;
-	struct ath10k_pci *ar_pci;
+	struct ath10k_ce *ce;
 
 	src_ring = ce_state->src_ring;
 
@@ -694,9 +719,9 @@ int ath10k_ce_cancel_send_next(struct ath10k_ce_pipe *ce_state,
 		return -EIO;
 
 	ar = ce_state->ar;
-	ar_pci = ath10k_pci_priv(ar);
+	ce = ath10k_ce_priv(ar);
 
-	spin_lock_bh(&ar_pci->ce_lock);
+	spin_lock_bh(&ce->ce_lock);
 
 	nentries_mask = src_ring->nentries_mask;
 	sw_index = src_ring->sw_index;
@@ -727,7 +752,7 @@ int ath10k_ce_cancel_send_next(struct ath10k_ce_pipe *ce_state,
 		ret = -EIO;
 	}
 
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 
 	return ret;
 }
@@ -736,13 +761,13 @@ int ath10k_ce_completed_send_next(struct ath10k_ce_pipe *ce_state,
 				  void **per_transfer_contextp)
 {
 	struct ath10k *ar = ce_state->ar;
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	int ret;
 
-	spin_lock_bh(&ar_pci->ce_lock);
+	spin_lock_bh(&ce->ce_lock);
 	ret = ath10k_ce_completed_send_next_nolock(ce_state,
 						   per_transfer_contextp);
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 
 	return ret;
 }
@@ -755,17 +780,18 @@ int ath10k_ce_completed_send_next(struct ath10k_ce_pipe *ce_state,
  */
 void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id)
 {
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
+	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
 	struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
 	u32 ctrl_addr = ce_state->ctrl_addr;
 
-	spin_lock_bh(&ar_pci->ce_lock);
+	spin_lock_bh(&ce->ce_lock);
 
 	/* Clear the copy-complete interrupts that will be handled here. */
-	ath10k_ce_engine_int_status_clear(ar, ctrl_addr, wm_regs->cc_mask);
+	ath10k_ce_engine_int_status_clear(ar, ctrl_addr,
+					  wm_regs->cc_mask);
 
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 
 	if (ce_state->recv_cb)
 		ce_state->recv_cb(ce_state);
@@ -773,7 +799,7 @@ void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id)
 	if (ce_state->send_cb)
 		ce_state->send_cb(ce_state);
 
-	spin_lock_bh(&ar_pci->ce_lock);
+	spin_lock_bh(&ce->ce_lock);
 
 	/*
 	 * Misc CE interrupts are not being handled, but still need
@@ -781,7 +807,7 @@ void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id)
 	 */
 	ath10k_ce_engine_int_status_clear(ar, ctrl_addr, wm_regs->wm_mask);
 
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 }
 
 /*
@@ -795,7 +821,7 @@ void ath10k_ce_per_engine_service_any(struct ath10k *ar)
 	int ce_id;
 	u32 intr_summary;
 
-	intr_summary = CE_INTERRUPT_SUMMARY(ar);
+	intr_summary = ath10k_ce_interrupt_summary(ar);
 
 	for (ce_id = 0; intr_summary && (ce_id < CE_COUNT); ce_id++) {
 		if (intr_summary & (1 << ce_id))
@@ -847,22 +873,25 @@ int ath10k_ce_disable_interrupts(struct ath10k *ar)
 
 void ath10k_ce_enable_interrupts(struct ath10k *ar)
 {
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	int ce_id;
+	struct ath10k_ce_pipe *ce_state;
 
 	/* Skip the last copy engine, CE7 the diagnostic window, as that
 	 * uses polling and isn't initialized for interrupts.
 	 */
-	for (ce_id = 0; ce_id < CE_COUNT - 1; ce_id++)
-		ath10k_ce_per_engine_handler_adjust(&ar_pci->ce_states[ce_id]);
+	for (ce_id = 0; ce_id < CE_COUNT - 1; ce_id++) {
+		ce_state  = &ce->ce_states[ce_id];
+		ath10k_ce_per_engine_handler_adjust(ce_state);
+	}
 }
 
 static int ath10k_ce_init_src_ring(struct ath10k *ar,
 				   unsigned int ce_id,
 				   const struct ce_attr *attr)
 {
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
+	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
 	u32 nentries, ctrl_addr = ath10k_ce_base_address(ar, ce_id);
 
@@ -898,8 +927,8 @@ static int ath10k_ce_init_dest_ring(struct ath10k *ar,
 				    unsigned int ce_id,
 				    const struct ce_attr *attr)
 {
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
+	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
 	u32 nentries, ctrl_addr = ath10k_ce_base_address(ar, ce_id);
 
@@ -1081,8 +1110,8 @@ void ath10k_ce_deinit_pipe(struct ath10k *ar, unsigned int ce_id)
 int ath10k_ce_alloc_pipe(struct ath10k *ar, int ce_id,
 			 const struct ce_attr *attr)
 {
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
+	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
 	int ret;
 
 	/*
@@ -1138,8 +1167,8 @@ int ath10k_ce_alloc_pipe(struct ath10k *ar, int ce_id,
 
 void ath10k_ce_free_pipe(struct ath10k *ar, int ce_id)
 {
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
+	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
 
 	if (ce_state->src_ring) {
 		dma_free_coherent(ar->dev,
@@ -1168,38 +1197,38 @@ void ath10k_ce_free_pipe(struct ath10k *ar, int ce_id)
 void ath10k_ce_dump_registers(struct ath10k *ar,
 			      struct ath10k_fw_crash_data *crash_data)
 {
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-	struct ath10k_ce_crash_data ce;
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
+	struct ath10k_ce_crash_data ce_data;
 	u32 addr, id;
 
 	lockdep_assert_held(&ar->data_lock);
 
 	ath10k_err(ar, "Copy Engine register dump:\n");
 
-	spin_lock_bh(&ar_pci->ce_lock);
+	spin_lock_bh(&ce->ce_lock);
 	for (id = 0; id < CE_COUNT; id++) {
 		addr = ath10k_ce_base_address(ar, id);
-		ce.base_addr = cpu_to_le32(addr);
+		ce_data.base_addr = cpu_to_le32(addr);
 
-		ce.src_wr_idx =
+		ce_data.src_wr_idx =
 			cpu_to_le32(ath10k_ce_src_ring_write_index_get(ar, addr));
-		ce.src_r_idx =
+		ce_data.src_r_idx =
 			cpu_to_le32(ath10k_ce_src_ring_read_index_get(ar, addr));
-		ce.dst_wr_idx =
+		ce_data.dst_wr_idx =
 			cpu_to_le32(ath10k_ce_dest_ring_write_index_get(ar, addr));
-		ce.dst_r_idx =
+		ce_data.dst_r_idx =
 			cpu_to_le32(ath10k_ce_dest_ring_read_index_get(ar, addr));
 
 		if (crash_data)
-			crash_data->ce_crash_data[id] = ce;
+			crash_data->ce_crash_data[id] = ce_data;
 
 		ath10k_err(ar, "[%02d]: 0x%08x %3u %3u %3u %3u", id,
-			   le32_to_cpu(ce.base_addr),
-			   le32_to_cpu(ce.src_wr_idx),
-			   le32_to_cpu(ce.src_r_idx),
-			   le32_to_cpu(ce.dst_wr_idx),
-			   le32_to_cpu(ce.dst_r_idx));
+			   le32_to_cpu(ce_data.base_addr),
+			   le32_to_cpu(ce_data.src_wr_idx),
+			   le32_to_cpu(ce_data.src_r_idx),
+			   le32_to_cpu(ce_data.dst_wr_idx),
+			   le32_to_cpu(ce_data.dst_r_idx));
 	}
 
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 }
diff --git a/drivers/net/wireless/ath/ath10k/ce.h b/drivers/net/wireless/ath/ath10k/ce.h
index 95743a5..bdec794 100644
--- a/drivers/net/wireless/ath/ath10k/ce.h
+++ b/drivers/net/wireless/ath/ath10k/ce.h
@@ -122,6 +122,24 @@ struct ath10k_ce_pipe {
 /* Copy Engine settable attributes */
 struct ce_attr;
 
+struct ath10k_bus_ops {
+	u32 (*read32)(struct ath10k *ar, u32 offset);
+	void (*write32)(struct ath10k *ar, u32 offset, u32 value);
+	int (*get_num_banks)(struct ath10k *ar);
+};
+
+static inline struct ath10k_ce *ath10k_ce_priv(struct ath10k *ar)
+{
+	return (struct ath10k_ce *)ar->ce_priv;
+}
+
+struct ath10k_ce {
+	/* protects CE info */
+	spinlock_t ce_lock;
+	const struct ath10k_bus_ops *bus_ops;
+	struct ath10k_ce_pipe ce_states[CE_COUNT_MAX];
+};
+
 /*==================Send====================*/
 
 /* ath10k_ce_send flags */
@@ -291,9 +309,13 @@ static inline u32 ath10k_ce_base_address(struct ath10k *ar, unsigned int ce_id)
 		CE_WRAPPER_INTERRUPT_SUMMARY_HOST_MSI_LSB)
 #define CE_WRAPPER_INTERRUPT_SUMMARY_ADDRESS			0x0000
 
-#define CE_INTERRUPT_SUMMARY(ar) \
-	CE_WRAPPER_INTERRUPT_SUMMARY_HOST_MSI_GET( \
-		ath10k_pci_read32((ar), CE_WRAPPER_BASE_ADDRESS + \
-		CE_WRAPPER_INTERRUPT_SUMMARY_ADDRESS))
+static inline u32 ath10k_ce_interrupt_summary(struct ath10k *ar)
+{
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
+
+	return CE_WRAPPER_INTERRUPT_SUMMARY_HOST_MSI_GET(
+		ce->bus_ops->read32((ar), CE_WRAPPER_BASE_ADDRESS +
+		CE_WRAPPER_INTERRUPT_SUMMARY_ADDRESS));
+}
 
 #endif /* _CE_H_ */
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 75c5c90..a4f6358 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -519,7 +519,7 @@ static const struct firmware *ath10k_fetch_fw_file(struct ath10k *ar,
 		dir = ".";
 
 	snprintf(filename, sizeof(filename), "%s/%s", dir, file);
-	ret = request_firmware_direct(&fw, filename, ar->dev);
+	ret = request_firmware(&fw, filename, ar->dev);
 	ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot fw request '%s': %d\n",
 		   filename, ret);
 
@@ -1454,6 +1454,7 @@ static void ath10k_core_get_fw_name(struct ath10k *ar, char *fw_name,
 {
 	switch (ar->hif.bus) {
 	case ATH10K_BUS_SDIO:
+	case ATH10K_BUS_USB:
 		scnprintf(fw_name, fw_name_len, "%s-%s-%d.bin",
 			  ATH10K_FW_FILE_BASE, ath10k_bus_str(ar->hif.bus),
 			  fw_api);
@@ -1885,6 +1886,7 @@ static int ath10k_core_init_firmware_features(struct ath10k *ar)
 		ar->fw_stats_req_mask = WMI_10_4_STAT_PEER |
 					WMI_10_4_STAT_PEER_EXTD;
 		ar->max_spatial_stream = ar->hw_params.max_spatial_stream;
+		ar->max_num_tdls_vdevs = TARGET_10_4_NUM_TDLS_VDEVS;
 
 		if (test_bit(ATH10K_FW_FEATURE_PEER_FLOW_CONTROL,
 			     fw_file->fw_features))
@@ -2055,6 +2057,12 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode,
 		goto err_wmi_detach;
 	}
 
+	/* If firmware indicates Full Rx Reorder support it must be used in a
+	 * slightly different manner. Let HTT code know.
+	 */
+	ar->htt.rx_ring.in_ord_rx = !!(test_bit(WMI_SERVICE_RX_FULL_REORDER,
+						ar->wmi.svc_map));
+
 	status = ath10k_htt_rx_alloc(&ar->htt);
 	if (status) {
 		ath10k_err(ar, "failed to alloc htt rx: %d\n", status);
@@ -2123,6 +2131,14 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode,
 			     ar->running_fw->fw_file.fw_features))
 			val |= WMI_10_4_COEX_GPIO_SUPPORT;
 
+		if (test_bit(WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY,
+			     ar->wmi.svc_map))
+			val |= WMI_10_4_TDLS_EXPLICIT_MODE_ONLY;
+
+		if (test_bit(WMI_SERVICE_TDLS_UAPSD_BUFFER_STA,
+			     ar->wmi.svc_map))
+			val |= WMI_10_4_TDLS_UAPSD_BUFFER_STA;
+
 		status = ath10k_mac_ext_resource_config(ar, val);
 		if (status) {
 			ath10k_err(ar,
@@ -2167,12 +2183,6 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode,
 		}
 	}
 
-	/* If firmware indicates Full Rx Reorder support it must be used in a
-	 * slightly different manner. Let HTT code know.
-	 */
-	ar->htt.rx_ring.in_ord_rx = !!(test_bit(WMI_SERVICE_RX_FULL_REORDER,
-						ar->wmi.svc_map));
-
 	status = ath10k_htt_rx_ring_refill(ar);
 	if (status) {
 		ath10k_err(ar, "failed to refill htt rx ring: %d\n", status);
@@ -2516,6 +2526,11 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
 		ar->hw_ce_regs = &qcax_ce_regs;
 		ar->hw_values = &qca4019_values;
 		break;
+	case ATH10K_HW_WCN3990:
+		ar->regs = &wcn3990_regs;
+		ar->hw_ce_regs = &wcn3990_ce_regs;
+		ar->hw_values = &wcn3990_values;
+		break;
 	default:
 		ath10k_err(ar, "unsupported core hardware revision %d\n",
 			   hw_rev);
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 1aa5cf1..949ebb3 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -92,6 +92,7 @@ enum ath10k_bus {
 	ATH10K_BUS_PCI,
 	ATH10K_BUS_AHB,
 	ATH10K_BUS_SDIO,
+	ATH10K_BUS_USB,
 };
 
 static inline const char *ath10k_bus_str(enum ath10k_bus bus)
@@ -103,6 +104,8 @@ static inline const char *ath10k_bus_str(enum ath10k_bus bus)
 		return "ahb";
 	case ATH10K_BUS_SDIO:
 		return "sdio";
+	case ATH10K_BUS_USB:
+		return "usb";
 	}
 
 	return "unknown";
@@ -459,7 +462,7 @@ struct ath10k_ce_crash_hdr {
 struct ath10k_fw_crash_data {
 	bool crashed_since_read;
 
-	uuid_le uuid;
+	guid_t guid;
 	struct timespec timestamp;
 	__le32 registers[REG_DUMP_COUNT_QCA988X];
 	struct ath10k_ce_crash_data ce_crash_data[CE_COUNT_MAX];
@@ -993,6 +996,10 @@ struct ath10k {
 		u32 reg_ack_cts_timeout_orig;
 	} fw_coverage;
 
+	u32 ampdu_reference;
+
+	void *ce_priv;
+
 	/* must be last */
 	u8 drv_priv[0] __aligned(sizeof(void *));
 };
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index 389fcb7..df51450 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -70,7 +70,7 @@ struct ath10k_dump_file_data {
 
 	/* some info we can get from ath10k struct that might help */
 
-	u8 uuid[16];
+	guid_t guid;
 
 	__le32 chip_id;
 
@@ -237,7 +237,7 @@ static ssize_t ath10k_read_wmi_services(struct file *file,
 {
 	struct ath10k *ar = file->private_data;
 	char *buf;
-	size_t len = 0, buf_len = 4096;
+	size_t len = 0, buf_len = 8192;
 	const char *name;
 	ssize_t ret_cnt;
 	bool enabled;
@@ -719,7 +719,7 @@ ath10k_debug_get_new_fw_crash_data(struct ath10k *ar)
 	lockdep_assert_held(&ar->data_lock);
 
 	crash_data->crashed_since_read = true;
-	uuid_le_gen(&crash_data->uuid);
+	guid_gen(&crash_data->guid);
 	getnstimeofday(&crash_data->timestamp);
 
 	return crash_data;
@@ -766,7 +766,7 @@ static struct ath10k_dump_file_data *ath10k_build_dump_file(struct ath10k *ar,
 
 	dump_data->version = cpu_to_le32(ATH10K_FW_CRASH_DUMP_VERSION);
 
-	memcpy(dump_data->uuid, &crash_data->uuid, sizeof(dump_data->uuid));
+	guid_copy(&dump_data->guid, &crash_data->guid);
 	dump_data->chip_id = cpu_to_le32(ar->chip_id);
 	dump_data->bus_type = cpu_to_le32(0);
 	dump_data->target_version = cpu_to_le32(ar->target_version);
diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index 257d109..548ad54 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -40,6 +40,8 @@ enum ath10k_debug_mask {
 	ATH10K_DBG_AHB		= 0x00008000,
 	ATH10K_DBG_SDIO		= 0x00010000,
 	ATH10K_DBG_SDIO_DUMP	= 0x00020000,
+	ATH10K_DBG_USB		= 0x00040000,
+	ATH10K_DBG_USB_BULK	= 0x00080000,
 	ATH10K_DBG_ANY		= 0xffffffff,
 };
 
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 398dda9..a3f5dc7 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -890,16 +890,26 @@ static void ath10k_htt_rx_h_ppdu(struct ath10k *ar,
 		status->nss = 0;
 		status->encoding = RX_ENC_LEGACY;
 		status->bw = RATE_INFO_BW_20;
+
 		status->flag &= ~RX_FLAG_MACTIME_END;
 		status->flag |= RX_FLAG_NO_SIGNAL_VAL;
 
+		status->flag &= ~(RX_FLAG_AMPDU_IS_LAST);
+		status->flag |= RX_FLAG_AMPDU_DETAILS | RX_FLAG_AMPDU_LAST_KNOWN;
+		status->ampdu_reference = ar->ampdu_reference;
+
 		ath10k_htt_rx_h_signal(ar, status, rxd);
 		ath10k_htt_rx_h_channel(ar, status, rxd, vdev_id);
 		ath10k_htt_rx_h_rates(ar, status, rxd);
 	}
 
-	if (is_last_ppdu)
+	if (is_last_ppdu) {
 		ath10k_htt_rx_h_mactime(ar, status, rxd);
+
+		/* set ampdu last segment flag */
+		status->flag |= RX_FLAG_AMPDU_IS_LAST;
+		ar->ampdu_reference++;
+	}
 }
 
 static const char * const tid_to_ac[] = {
@@ -1514,7 +1524,7 @@ static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar,
 	 */
 
 	if (!rx_status->freq) {
-		ath10k_warn(ar, "no channel configured; ignoring frame(s)!\n");
+		ath10k_dbg(ar, ATH10K_DBG_HTT, "no channel configured; ignoring frame(s)!\n");
 		return false;
 	}
 
@@ -1735,7 +1745,8 @@ static void ath10k_htt_rx_delba(struct ath10k *ar, struct htt_resp *resp)
 }
 
 static int ath10k_htt_rx_extract_amsdu(struct sk_buff_head *list,
-				       struct sk_buff_head *amsdu)
+				       struct sk_buff_head *amsdu,
+				       int budget_left)
 {
 	struct sk_buff *msdu;
 	struct htt_rx_desc *rxd;
@@ -1746,8 +1757,9 @@ static int ath10k_htt_rx_extract_amsdu(struct sk_buff_head *list,
 	if (WARN_ON(!skb_queue_empty(amsdu)))
 		return -EINVAL;
 
-	while ((msdu = __skb_dequeue(list))) {
+	while ((msdu = __skb_dequeue(list)) && budget_left) {
 		__skb_queue_tail(amsdu, msdu);
+		budget_left--;
 
 		rxd = (void *)msdu->data - sizeof(*rxd);
 		if (rxd->msdu_end.common.info0 &
@@ -1838,7 +1850,8 @@ static int ath10k_htt_rx_h_rx_offload(struct ath10k *ar,
 	return num_msdu;
 }
 
-static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
+static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb,
+				    int budget_left)
 {
 	struct ath10k_htt *htt = &ar->htt;
 	struct htt_resp *resp = (void *)skb->data;
@@ -1895,9 +1908,9 @@ static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
 	if (offload)
 		num_msdus = ath10k_htt_rx_h_rx_offload(ar, &list);
 
-	while (!skb_queue_empty(&list)) {
+	while (!skb_queue_empty(&list) && budget_left) {
 		__skb_queue_head_init(&amsdu);
-		ret = ath10k_htt_rx_extract_amsdu(&list, &amsdu);
+		ret = ath10k_htt_rx_extract_amsdu(&list, &amsdu, budget_left);
 		switch (ret) {
 		case 0:
 			/* Note: The in-order indication may report interleaved
@@ -1907,6 +1920,7 @@ static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
 			 * should still give an idea about rx rate to the user.
 			 */
 			num_msdus += skb_queue_len(&amsdu);
+			budget_left -= skb_queue_len(&amsdu);
 			ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id);
 			ath10k_htt_rx_h_filter(ar, &amsdu, status);
 			ath10k_htt_rx_h_mpdu(ar, &amsdu, status);
@@ -2549,7 +2563,8 @@ int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget)
 		}
 
 		spin_lock_bh(&htt->rx_ring.lock);
-		num_rx_msdus = ath10k_htt_rx_in_ord_ind(ar, skb);
+		num_rx_msdus = ath10k_htt_rx_in_ord_ind(ar, skb,
+							(budget - quota));
 		spin_unlock_bh(&htt->rx_ring.lock);
 		if (num_rx_msdus < 0) {
 			resched_napi = true;
diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c
index afb0c01..a860691 100644
--- a/drivers/net/wireless/ath/ath10k/hw.c
+++ b/drivers/net/wireless/ath/ath10k/hw.c
@@ -192,6 +192,156 @@ const struct ath10k_hw_values qca4019_values = {
 	.ce_desc_meta_data_lsb          = 4,
 };
 
+const struct ath10k_hw_regs wcn3990_regs = {
+	.rtc_soc_base_address			= 0x00000000,
+	.rtc_wmac_base_address			= 0x00000000,
+	.soc_core_base_address			= 0x00000000,
+	.ce_wrapper_base_address		= 0x0024C000,
+	.ce0_base_address			= 0x00240000,
+	.ce1_base_address			= 0x00241000,
+	.ce2_base_address			= 0x00242000,
+	.ce3_base_address			= 0x00243000,
+	.ce4_base_address			= 0x00244000,
+	.ce5_base_address			= 0x00245000,
+	.ce6_base_address			= 0x00246000,
+	.ce7_base_address			= 0x00247000,
+	.ce8_base_address			= 0x00248000,
+	.ce9_base_address			= 0x00249000,
+	.ce10_base_address			= 0x0024A000,
+	.ce11_base_address			= 0x0024B000,
+	.soc_chip_id_address			= 0x000000f0,
+	.soc_reset_control_si0_rst_mask		= 0x00000001,
+	.soc_reset_control_ce_rst_mask		= 0x00000100,
+	.ce_wrap_intr_sum_host_msi_lsb		= 0x0000000c,
+	.ce_wrap_intr_sum_host_msi_mask		= 0x00fff000,
+	.pcie_intr_fw_mask			= 0x00100000,
+};
+
+static struct ath10k_hw_ce_regs_addr_map wcn3990_src_ring = {
+	.msb	= 0x00000010,
+	.lsb	= 0x00000010,
+	.mask	= GENMASK(17, 17),
+};
+
+static struct ath10k_hw_ce_regs_addr_map wcn3990_dst_ring = {
+	.msb	= 0x00000012,
+	.lsb	= 0x00000012,
+	.mask	= GENMASK(18, 18),
+};
+
+static struct ath10k_hw_ce_regs_addr_map wcn3990_dmax = {
+	.msb	= 0x00000000,
+	.lsb	= 0x00000000,
+	.mask	= GENMASK(15, 0),
+};
+
+static struct ath10k_hw_ce_ctrl1 wcn3990_ctrl1 = {
+	.addr		= 0x00000018,
+	.src_ring	= &wcn3990_src_ring,
+	.dst_ring	= &wcn3990_dst_ring,
+	.dmax		= &wcn3990_dmax,
+};
+
+static struct ath10k_hw_ce_regs_addr_map wcn3990_host_ie_cc = {
+	.mask	= GENMASK(0, 0),
+};
+
+static struct ath10k_hw_ce_host_ie wcn3990_host_ie = {
+	.copy_complete	= &wcn3990_host_ie_cc,
+};
+
+static struct ath10k_hw_ce_host_wm_regs wcn3990_wm_reg = {
+	.dstr_lmask	= 0x00000010,
+	.dstr_hmask	= 0x00000008,
+	.srcr_lmask	= 0x00000004,
+	.srcr_hmask	= 0x00000002,
+	.cc_mask	= 0x00000001,
+	.wm_mask	= 0x0000001E,
+	.addr		= 0x00000030,
+};
+
+static struct ath10k_hw_ce_misc_regs wcn3990_misc_reg = {
+	.axi_err	= 0x00000100,
+	.dstr_add_err	= 0x00000200,
+	.srcr_len_err	= 0x00000100,
+	.dstr_mlen_vio	= 0x00000080,
+	.dstr_overflow	= 0x00000040,
+	.srcr_overflow	= 0x00000020,
+	.err_mask	= 0x000003E0,
+	.addr		= 0x00000038,
+};
+
+static struct ath10k_hw_ce_regs_addr_map wcn3990_src_wm_low = {
+	.msb	= 0x00000000,
+	.lsb	= 0x00000010,
+	.mask	= GENMASK(31, 16),
+};
+
+static struct ath10k_hw_ce_regs_addr_map wcn3990_src_wm_high = {
+	.msb	= 0x0000000f,
+	.lsb	= 0x00000000,
+	.mask	= GENMASK(15, 0),
+};
+
+static struct ath10k_hw_ce_dst_src_wm_regs wcn3990_wm_src_ring = {
+	.addr		= 0x0000004c,
+	.low_rst	= 0x00000000,
+	.high_rst	= 0x00000000,
+	.wm_low		= &wcn3990_src_wm_low,
+	.wm_high	= &wcn3990_src_wm_high,
+};
+
+static struct ath10k_hw_ce_regs_addr_map wcn3990_dst_wm_low = {
+	.lsb	= 0x00000010,
+	.mask	= GENMASK(31, 16),
+};
+
+static struct ath10k_hw_ce_regs_addr_map wcn3990_dst_wm_high = {
+	.msb	= 0x0000000f,
+	.lsb	= 0x00000000,
+	.mask	= GENMASK(15, 0),
+};
+
+static struct ath10k_hw_ce_dst_src_wm_regs wcn3990_wm_dst_ring = {
+	.addr		= 0x00000050,
+	.low_rst	= 0x00000000,
+	.high_rst	= 0x00000000,
+	.wm_low		= &wcn3990_dst_wm_low,
+	.wm_high	= &wcn3990_dst_wm_high,
+};
+
+struct ath10k_hw_ce_regs wcn3990_ce_regs = {
+	.sr_base_addr		= 0x00000000,
+	.sr_size_addr		= 0x00000008,
+	.dr_base_addr		= 0x0000000c,
+	.dr_size_addr		= 0x00000014,
+	.misc_ie_addr		= 0x00000034,
+	.sr_wr_index_addr	= 0x0000003c,
+	.dst_wr_index_addr	= 0x00000040,
+	.current_srri_addr	= 0x00000044,
+	.current_drri_addr	= 0x00000048,
+	.ddr_addr_for_rri_low	= 0x00000004,
+	.ddr_addr_for_rri_high	= 0x00000008,
+	.ce_rri_low		= 0x0024C004,
+	.ce_rri_high		= 0x0024C008,
+	.host_ie_addr		= 0x0000002c,
+	.ctrl1_regs		= &wcn3990_ctrl1,
+	.host_ie		= &wcn3990_host_ie,
+	.wm_regs		= &wcn3990_wm_reg,
+	.misc_regs		= &wcn3990_misc_reg,
+	.wm_srcr		= &wcn3990_wm_src_ring,
+	.wm_dstr		= &wcn3990_wm_dst_ring,
+};
+
+const struct ath10k_hw_values wcn3990_values = {
+	.rtc_state_val_on		= 5,
+	.ce_count			= 12,
+	.msi_assign_ce_max		= 12,
+	.num_target_ce_config_wlan	= 12,
+	.ce_desc_meta_data_mask		= 0xFFF0,
+	.ce_desc_meta_data_lsb		= 4,
+};
+
 static struct ath10k_hw_ce_regs_addr_map qcax_src_ring = {
 	.msb	= 0x00000010,
 	.lsb	= 0x00000010,
diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h
index 97dc147..0c089f6 100644
--- a/drivers/net/wireless/ath/ath10k/hw.h
+++ b/drivers/net/wireless/ath/ath10k/hw.h
@@ -231,6 +231,7 @@ enum ath10k_hw_rev {
 	ATH10K_HW_QCA9377,
 	ATH10K_HW_QCA4019,
 	ATH10K_HW_QCA9887,
+	ATH10K_HW_WCN3990,
 };
 
 struct ath10k_hw_regs {
@@ -247,6 +248,10 @@ struct ath10k_hw_regs {
 	u32 ce5_base_address;
 	u32 ce6_base_address;
 	u32 ce7_base_address;
+	u32 ce8_base_address;
+	u32 ce9_base_address;
+	u32 ce10_base_address;
+	u32 ce11_base_address;
 	u32 soc_reset_control_si0_rst_mask;
 	u32 soc_reset_control_ce_rst_mask;
 	u32 soc_chip_id_address;
@@ -267,6 +272,7 @@ extern const struct ath10k_hw_regs qca988x_regs;
 extern const struct ath10k_hw_regs qca6174_regs;
 extern const struct ath10k_hw_regs qca99x0_regs;
 extern const struct ath10k_hw_regs qca4019_regs;
+extern const struct ath10k_hw_regs wcn3990_regs;
 
 struct ath10k_hw_ce_regs_addr_map {
 	u32 msb;
@@ -362,6 +368,8 @@ extern const struct ath10k_hw_values qca6174_values;
 extern const struct ath10k_hw_values qca99x0_values;
 extern const struct ath10k_hw_values qca9888_values;
 extern const struct ath10k_hw_values qca4019_values;
+extern const struct ath10k_hw_values wcn3990_values;
+extern struct ath10k_hw_ce_regs wcn3990_ce_regs;
 extern struct ath10k_hw_ce_regs qcax_ce_regs;
 
 void ath10k_hw_fill_survey_time(struct ath10k *ar, struct survey_info *survey,
@@ -375,6 +383,7 @@ void ath10k_hw_fill_survey_time(struct ath10k *ar, struct survey_info *survey,
 #define QCA_REV_9984(ar) ((ar)->hw_rev == ATH10K_HW_QCA9984)
 #define QCA_REV_9377(ar) ((ar)->hw_rev == ATH10K_HW_QCA9377)
 #define QCA_REV_40XX(ar) ((ar)->hw_rev == ATH10K_HW_QCA4019)
+#define QCA_REV_WCN3990(ar) ((ar)->hw_rev == ATH10K_HW_WCN3990)
 
 /* Known peculiarities:
  *  - raw appears in nwifi decap, raw and nwifi appear in ethernet decap
@@ -711,6 +720,11 @@ ath10k_rx_desc_get_l3_pad_bytes(struct ath10k_hw_params *hw,
 #define TARGET_10_4_IPHDR_PAD_CONFIG		1
 #define TARGET_10_4_QWRAP_CONFIG		0
 
+/* TDLS config */
+#define TARGET_10_4_NUM_TDLS_VDEVS		1
+#define TARGET_10_4_NUM_TDLS_BUFFER_STA		1
+#define TARGET_10_4_NUM_TDLS_SLEEP_STA		1
+
 /* Maximum number of Copy Engine's supported */
 #define CE_COUNT_MAX 12
 
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 55c808f..5683f1a 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -7644,6 +7644,7 @@ static const struct ieee80211_ops ath10k_ops = {
 #ifdef CONFIG_PM
 	.suspend			= ath10k_wow_op_suspend,
 	.resume				= ath10k_wow_op_resume,
+	.set_wakeup			= ath10k_wow_op_set_wakeup,
 #endif
 #ifdef CONFIG_MAC80211_DEBUGFS
 	.sta_add_debugfs		= ath10k_sta_add_debugfs,
@@ -8197,8 +8198,11 @@ int ath10k_mac_register(struct ath10k *ar)
 			NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P;
 	}
 
-	if (test_bit(WMI_SERVICE_TDLS, ar->wmi.svc_map))
+	if (test_bit(WMI_SERVICE_TDLS, ar->wmi.svc_map) ||
+	    test_bit(WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY, ar->wmi.svc_map)) {
 		ar->hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_TDLS;
+		ieee80211_hw_set(ar->hw, TDLS_WIDER_BW);
+	}
 
 	ar->hw->wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL;
 	ar->hw->wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH;
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 7ebfc40..bc163394 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -672,16 +672,16 @@ static u32 ath10k_bus_pci_read32(struct ath10k *ar, u32 offset)
 
 inline void ath10k_pci_write32(struct ath10k *ar, u32 offset, u32 value)
 {
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 
-	ar_pci->bus_ops->write32(ar, offset, value);
+	ce->bus_ops->write32(ar, offset, value);
 }
 
 inline u32 ath10k_pci_read32(struct ath10k *ar, u32 offset)
 {
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 
-	return ar_pci->bus_ops->read32(ar, offset);
+	return ce->bus_ops->read32(ar, offset);
 }
 
 u32 ath10k_pci_soc_read32(struct ath10k *ar, u32 addr)
@@ -761,7 +761,7 @@ static inline const char *ath10k_pci_get_irq_method(struct ath10k *ar)
 static int __ath10k_pci_rx_post_buf(struct ath10k_pci_pipe *pipe)
 {
 	struct ath10k *ar = pipe->hif_ce_state;
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	struct ath10k_ce_pipe *ce_pipe = pipe->ce_hdl;
 	struct sk_buff *skb;
 	dma_addr_t paddr;
@@ -784,9 +784,9 @@ static int __ath10k_pci_rx_post_buf(struct ath10k_pci_pipe *pipe)
 
 	ATH10K_SKB_RXCB(skb)->paddr = paddr;
 
-	spin_lock_bh(&ar_pci->ce_lock);
+	spin_lock_bh(&ce->ce_lock);
 	ret = __ath10k_ce_rx_post_buf(ce_pipe, skb, paddr);
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 	if (ret) {
 		dma_unmap_single(ar->dev, paddr, skb->len + skb_tailroom(skb),
 				 DMA_FROM_DEVICE);
@@ -801,6 +801,7 @@ static void ath10k_pci_rx_post_pipe(struct ath10k_pci_pipe *pipe)
 {
 	struct ath10k *ar = pipe->hif_ce_state;
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	struct ath10k_ce_pipe *ce_pipe = pipe->ce_hdl;
 	int ret, num;
 
@@ -810,9 +811,9 @@ static void ath10k_pci_rx_post_pipe(struct ath10k_pci_pipe *pipe)
 	if (!ce_pipe->dest_ring)
 		return;
 
-	spin_lock_bh(&ar_pci->ce_lock);
+	spin_lock_bh(&ce->ce_lock);
 	num = __ath10k_ce_rx_num_free_bufs(ce_pipe);
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 
 	while (num >= 0) {
 		ret = __ath10k_pci_rx_post_buf(pipe);
@@ -882,6 +883,7 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data,
 				    int nbytes)
 {
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	int ret = 0;
 	u32 *buf;
 	unsigned int completed_nbytes, alloc_nbytes, remaining_bytes;
@@ -892,7 +894,7 @@ static int ath10k_pci_diag_read_mem(struct ath10k *ar, u32 address, void *data,
 	void *data_buf = NULL;
 	int i;
 
-	spin_lock_bh(&ar_pci->ce_lock);
+	spin_lock_bh(&ce->ce_lock);
 
 	ce_diag = ar_pci->ce_diag;
 
@@ -986,7 +988,7 @@ done:
 		dma_free_coherent(ar->dev, alloc_nbytes, data_buf,
 				  ce_data_base);
 
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 
 	return ret;
 }
@@ -1034,6 +1036,7 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address,
 			      const void *data, int nbytes)
 {
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	int ret = 0;
 	u32 *buf;
 	unsigned int completed_nbytes, orig_nbytes, remaining_bytes;
@@ -1043,7 +1046,7 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address,
 	dma_addr_t ce_data_base = 0;
 	int i;
 
-	spin_lock_bh(&ar_pci->ce_lock);
+	spin_lock_bh(&ce->ce_lock);
 
 	ce_diag = ar_pci->ce_diag;
 
@@ -1147,7 +1150,7 @@ done:
 		ath10k_warn(ar, "failed to write diag value at 0x%x: %d\n",
 			    address, ret);
 
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 
 	return ret;
 }
@@ -1342,6 +1345,7 @@ int ath10k_pci_hif_tx_sg(struct ath10k *ar, u8 pipe_id,
 			 struct ath10k_hif_sg_item *items, int n_items)
 {
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	struct ath10k_pci_pipe *pci_pipe = &ar_pci->pipe_info[pipe_id];
 	struct ath10k_ce_pipe *ce_pipe = pci_pipe->ce_hdl;
 	struct ath10k_ce_ring *src_ring = ce_pipe->src_ring;
@@ -1350,7 +1354,7 @@ int ath10k_pci_hif_tx_sg(struct ath10k *ar, u8 pipe_id,
 	unsigned int write_index;
 	int err, i = 0;
 
-	spin_lock_bh(&ar_pci->ce_lock);
+	spin_lock_bh(&ce->ce_lock);
 
 	nentries_mask = src_ring->nentries_mask;
 	sw_index = src_ring->sw_index;
@@ -1396,14 +1400,14 @@ int ath10k_pci_hif_tx_sg(struct ath10k *ar, u8 pipe_id,
 	if (err)
 		goto err;
 
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 	return 0;
 
 err:
 	for (; i > 0; i--)
 		__ath10k_ce_send_revert(ce_pipe);
 
-	spin_unlock_bh(&ar_pci->ce_lock);
+	spin_unlock_bh(&ce->ce_lock);
 	return err;
 }
 
@@ -1459,7 +1463,7 @@ static void ath10k_pci_dump_registers(struct ath10k *ar,
 static void ath10k_pci_fw_crashed_dump(struct ath10k *ar)
 {
 	struct ath10k_fw_crash_data *crash_data;
-	char uuid[50];
+	char guid[UUID_STRING_LEN + 1];
 
 	spin_lock_bh(&ar->data_lock);
 
@@ -1468,11 +1472,11 @@ static void ath10k_pci_fw_crashed_dump(struct ath10k *ar)
 	crash_data = ath10k_debug_get_new_fw_crash_data(ar);
 
 	if (crash_data)
-		scnprintf(uuid, sizeof(uuid), "%pUl", &crash_data->uuid);
+		scnprintf(guid, sizeof(guid), "%pUl", &crash_data->guid);
 	else
-		scnprintf(uuid, sizeof(uuid), "n/a");
+		scnprintf(guid, sizeof(guid), "n/a");
 
-	ath10k_err(ar, "firmware crashed! (uuid %s)\n", uuid);
+	ath10k_err(ar, "firmware crashed! (guid %s)\n", guid);
 	ath10k_print_driver_info(ar);
 	ath10k_pci_dump_registers(ar, crash_data);
 	ath10k_ce_dump_registers(ar, crash_data);
@@ -1593,6 +1597,8 @@ void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar)
 		 *  to mask irq/MSI.
 		 */
 		break;
+	case ATH10K_HW_WCN3990:
+		break;
 	}
 }
 
@@ -1619,6 +1625,8 @@ static void ath10k_pci_irq_msi_fw_unmask(struct ath10k *ar)
 		 *  to unmask irq/MSI.
 		 */
 		break;
+	case ATH10K_HW_WCN3990:
+		break;
 	}
 }
 
@@ -2000,9 +2008,9 @@ static int ath10k_pci_get_num_banks(struct ath10k *ar)
 
 static int ath10k_bus_get_num_banks(struct ath10k *ar)
 {
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 
-	return ar_pci->bus_ops->get_num_banks(ar);
+	return ce->bus_ops->get_num_banks(ar);
 }
 
 int ath10k_pci_init_config(struct ath10k *ar)
@@ -2173,11 +2181,12 @@ int ath10k_pci_alloc_pipes(struct ath10k *ar)
 {
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 	struct ath10k_pci_pipe *pipe;
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	int i, ret;
 
 	for (i = 0; i < CE_COUNT; i++) {
 		pipe = &ar_pci->pipe_info[i];
-		pipe->ce_hdl = &ar_pci->ce_states[i];
+		pipe->ce_hdl = &ce->ce_states[i];
 		pipe->pipe_num = i;
 		pipe->hif_ce_state = ar;
 
@@ -2825,7 +2834,7 @@ static int ath10k_pci_napi_poll(struct napi_struct *ctx, int budget)
 		 * interrupts safer to check for pending interrupts for
 		 * immediate servicing.
 		 */
-		if (CE_INTERRUPT_SUMMARY(ar)) {
+		if (ath10k_ce_interrupt_summary(ar)) {
 			napi_reschedule(ctx);
 			goto out;
 		}
@@ -3142,9 +3151,10 @@ static bool ath10k_pci_chip_is_supported(u32 dev_id, u32 chip_id)
 int ath10k_pci_setup_resource(struct ath10k *ar)
 {
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k_ce *ce = ath10k_ce_priv(ar);
 	int ret;
 
-	spin_lock_init(&ar_pci->ce_lock);
+	spin_lock_init(&ce->ce_lock);
 	spin_lock_init(&ar_pci->ps_lock);
 
 	setup_timer(&ar_pci->rx_post_retry, ath10k_pci_rx_replenish_retry,
@@ -3263,10 +3273,11 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
 	ar_pci->ar = ar;
 	ar->dev_id = pci_dev->device;
 	ar_pci->pci_ps = pci_ps;
-	ar_pci->bus_ops = &ath10k_pci_bus_ops;
+	ar_pci->ce.bus_ops = &ath10k_pci_bus_ops;
 	ar_pci->pci_soft_reset = pci_soft_reset;
 	ar_pci->pci_hard_reset = pci_hard_reset;
 	ar_pci->targ_cpu_to_ce_addr = targ_cpu_to_ce_addr;
+	ar->ce_priv = &ar_pci->ce;
 
 	ar->id.vendor = pdev->vendor;
 	ar->id.device = pdev->device;
@@ -3385,11 +3396,53 @@ static void ath10k_pci_remove(struct pci_dev *pdev)
 
 MODULE_DEVICE_TABLE(pci, ath10k_pci_id_table);
 
+#ifdef CONFIG_PM
+
+static int ath10k_pci_pm_suspend(struct device *dev)
+{
+	struct ath10k *ar = dev_get_drvdata(dev);
+	int ret;
+
+	if (test_bit(ATH10K_FW_FEATURE_WOWLAN_SUPPORT,
+		     ar->running_fw->fw_file.fw_features))
+		return 0;
+
+	ret = ath10k_hif_suspend(ar);
+	if (ret)
+		ath10k_warn(ar, "failed to suspend hif: %d\n", ret);
+
+	return ret;
+}
+
+static int ath10k_pci_pm_resume(struct device *dev)
+{
+	struct ath10k *ar = dev_get_drvdata(dev);
+	int ret;
+
+	if (test_bit(ATH10K_FW_FEATURE_WOWLAN_SUPPORT,
+		     ar->running_fw->fw_file.fw_features))
+		return 0;
+
+	ret = ath10k_hif_resume(ar);
+	if (ret)
+		ath10k_warn(ar, "failed to resume hif: %d\n", ret);
+
+	return ret;
+}
+
+static SIMPLE_DEV_PM_OPS(ath10k_pci_pm_ops,
+			 ath10k_pci_pm_suspend,
+			 ath10k_pci_pm_resume);
+#endif
+
 static struct pci_driver ath10k_pci_driver = {
 	.name = "ath10k_pci",
 	.id_table = ath10k_pci_id_table,
 	.probe = ath10k_pci_probe,
 	.remove = ath10k_pci_remove,
+#ifdef CONFIG_PM
+	.driver.pm = &ath10k_pci_pm_ops,
+#endif
 };
 
 static int __init ath10k_pci_init(void)
diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index c1e08ad6..424ff32 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h
@@ -150,12 +150,6 @@ struct ath10k_pci_supp_chip {
 	u32 rev_id;
 };
 
-struct ath10k_bus_ops {
-	u32 (*read32)(struct ath10k *ar, u32 offset);
-	void (*write32)(struct ath10k *ar, u32 offset, u32 value);
-	int (*get_num_banks)(struct ath10k *ar);
-};
-
 enum ath10k_pci_irq_mode {
 	ATH10K_PCI_IRQ_AUTO = 0,
 	ATH10K_PCI_IRQ_LEGACY = 1,
@@ -177,11 +171,7 @@ struct ath10k_pci {
 	/* Copy Engine used for Diagnostic Accesses */
 	struct ath10k_ce_pipe *ce_diag;
 
-	/* FIXME: document what this really protects */
-	spinlock_t ce_lock;
-
-	/* Map CE id to ce_state */
-	struct ath10k_ce_pipe ce_states[CE_COUNT_MAX];
+	struct ath10k_ce ce;
 	struct timer_list rx_post_retry;
 
 	/* Due to HW quirks it is recommended to disable ASPM during device
@@ -225,8 +215,6 @@ struct ath10k_pci {
 	 */
 	bool pci_ps;
 
-	const struct ath10k_bus_ops *bus_ops;
-
 	/* Chip specific pci reset routine used to do a safe reset */
 	int (*pci_soft_reset)(struct ath10k *ar);
 
diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c
index 859ed87..03a69e5 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.c
+++ b/drivers/net/wireless/ath/ath10k/sdio.c
@@ -683,7 +683,7 @@ static int ath10k_sdio_mbox_rxmsg_pending_handler(struct ath10k *ar,
 	lookaheads[0] = msg_lookahead;
 
 	timeout = jiffies + SDIO_MBOX_PROCESSING_TIMEOUT_HZ;
-	while (time_before(jiffies, timeout)) {
+	do {
 		/* Try to allocate as many HTC RX packets indicated by
 		 * n_lookaheads.
 		 */
@@ -719,7 +719,7 @@ static int ath10k_sdio_mbox_rxmsg_pending_handler(struct ath10k *ar,
 		 * performance in high throughput situations.
 		 */
 		*done = false;
-	}
+	} while (time_before(jiffies, timeout));
 
 	if (ret && (ret != -ECANCELED))
 		ath10k_warn(ar, "failed to get pending recv messages: %d\n",
@@ -1336,16 +1336,14 @@ static void ath10k_sdio_irq_handler(struct sdio_func *func)
 	sdio_release_host(ar_sdio->func);
 
 	timeout = jiffies + ATH10K_SDIO_HIF_COMMUNICATION_TIMEOUT_HZ;
-	while (time_before(jiffies, timeout) && !done) {
+	do {
 		ret = ath10k_sdio_mbox_proc_pending_irqs(ar, &done);
 		if (ret)
 			break;
-	}
+	} while (time_before(jiffies, timeout) && !done);
 
 	sdio_claim_host(ar_sdio->func);
 
-	wake_up(&ar_sdio->irq_wq);
-
 	if (ret && ret != -ECANCELED)
 		ath10k_warn(ar, "failed to process pending SDIO interrupts: %d\n",
 			    ret);
@@ -2000,8 +1998,6 @@ static int ath10k_sdio_probe(struct sdio_func *func,
 		goto err_free_bmi_buf;
 	}
 
-	init_waitqueue_head(&ar_sdio->irq_wq);
-
 	for (i = 0; i < ATH10K_SDIO_BUS_REQUEST_MAX_NUM; i++)
 		ath10k_sdio_free_bus_req(ar, &ar_sdio->bus_req[i]);
 
diff --git a/drivers/net/wireless/ath/ath10k/sdio.h b/drivers/net/wireless/ath/ath10k/sdio.h
index 3f61c67..4ff7b54 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.h
+++ b/drivers/net/wireless/ath/ath10k/sdio.h
@@ -210,8 +210,6 @@ struct ath10k_sdio {
 	/* temporary buffer for BMI requests */
 	u8 *bmi_buf;
 
-	wait_queue_head_t irq_wq;
-
 	bool is_disabled;
 
 	struct workqueue_struct *workqueue;
diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c
new file mode 100644
index 0000000..d4803ff
--- /dev/null
+++ b/drivers/net/wireless/ath/ath10k/usb.c
@@ -0,0 +1,1106 @@
+/*
+ * Copyright (c) 2007-2011 Atheros Communications Inc.
+ * Copyright (c) 2011-2012,2017 Qualcomm Atheros, Inc.
+ * Copyright (c) 2016-2017 Erik Stromdahl <erik.stromdahl@gmail.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/usb.h>
+
+#include "debug.h"
+#include "core.h"
+#include "bmi.h"
+#include "hif.h"
+#include "htc.h"
+#include "usb.h"
+
+static void ath10k_usb_post_recv_transfers(struct ath10k *ar,
+					   struct ath10k_usb_pipe *recv_pipe);
+
+/* inlined helper functions */
+
+static inline enum ath10k_htc_ep_id
+eid_from_htc_hdr(struct ath10k_htc_hdr *htc_hdr)
+{
+	return (enum ath10k_htc_ep_id)htc_hdr->eid;
+}
+
+static inline bool is_trailer_only_msg(struct ath10k_htc_hdr *htc_hdr)
+{
+	return __le16_to_cpu(htc_hdr->len) == htc_hdr->trailer_len;
+}
+
+/* pipe/urb operations */
+static struct ath10k_urb_context *
+ath10k_usb_alloc_urb_from_pipe(struct ath10k_usb_pipe *pipe)
+{
+	struct ath10k_urb_context *urb_context = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pipe->ar_usb->cs_lock, flags);
+	if (!list_empty(&pipe->urb_list_head)) {
+		urb_context = list_first_entry(&pipe->urb_list_head,
+					       struct ath10k_urb_context, link);
+		list_del(&urb_context->link);
+		pipe->urb_cnt--;
+	}
+	spin_unlock_irqrestore(&pipe->ar_usb->cs_lock, flags);
+
+	return urb_context;
+}
+
+static void ath10k_usb_free_urb_to_pipe(struct ath10k_usb_pipe *pipe,
+					struct ath10k_urb_context *urb_context)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pipe->ar_usb->cs_lock, flags);
+
+	pipe->urb_cnt++;
+	list_add(&urb_context->link, &pipe->urb_list_head);
+
+	spin_unlock_irqrestore(&pipe->ar_usb->cs_lock, flags);
+}
+
+static void ath10k_usb_cleanup_recv_urb(struct ath10k_urb_context *urb_context)
+{
+	dev_kfree_skb(urb_context->skb);
+	urb_context->skb = NULL;
+
+	ath10k_usb_free_urb_to_pipe(urb_context->pipe, urb_context);
+}
+
+static void ath10k_usb_free_pipe_resources(struct ath10k *ar,
+					   struct ath10k_usb_pipe *pipe)
+{
+	struct ath10k_urb_context *urb_context;
+
+	if (!pipe->ar_usb) {
+		/* nothing allocated for this pipe */
+		return;
+	}
+
+	ath10k_dbg(ar, ATH10K_DBG_USB,
+		   "usb free resources lpipe %d hpipe 0x%x urbs %d avail %d\n",
+		   pipe->logical_pipe_num, pipe->usb_pipe_handle,
+		   pipe->urb_alloc, pipe->urb_cnt);
+
+	if (pipe->urb_alloc != pipe->urb_cnt) {
+		ath10k_dbg(ar, ATH10K_DBG_USB,
+			   "usb urb leak lpipe %d hpipe 0x%x urbs %d avail %d\n",
+			   pipe->logical_pipe_num, pipe->usb_pipe_handle,
+			   pipe->urb_alloc, pipe->urb_cnt);
+	}
+
+	for (;;) {
+		urb_context = ath10k_usb_alloc_urb_from_pipe(pipe);
+
+		if (!urb_context)
+			break;
+
+		kfree(urb_context);
+	}
+}
+
+static void ath10k_usb_cleanup_pipe_resources(struct ath10k *ar)
+{
+	struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
+	int i;
+
+	for (i = 0; i < ATH10K_USB_PIPE_MAX; i++)
+		ath10k_usb_free_pipe_resources(ar, &ar_usb->pipes[i]);
+}
+
+/* hif usb rx/tx completion functions */
+
+static void ath10k_usb_recv_complete(struct urb *urb)
+{
+	struct ath10k_urb_context *urb_context = urb->context;
+	struct ath10k_usb_pipe *pipe = urb_context->pipe;
+	struct ath10k *ar = pipe->ar_usb->ar;
+	struct sk_buff *skb;
+	int status = 0;
+
+	ath10k_dbg(ar, ATH10K_DBG_USB_BULK,
+		   "usb recv pipe %d stat %d len %d urb 0x%pK\n",
+		   pipe->logical_pipe_num, urb->status, urb->actual_length,
+		   urb);
+
+	if (urb->status != 0) {
+		status = -EIO;
+		switch (urb->status) {
+		case -ECONNRESET:
+		case -ENOENT:
+		case -ESHUTDOWN:
+			/* no need to spew these errors when device
+			 * removed or urb killed due to driver shutdown
+			 */
+			status = -ECANCELED;
+			break;
+		default:
+			ath10k_dbg(ar, ATH10K_DBG_USB_BULK,
+				   "usb recv pipe %d ep 0x%2.2x failed: %d\n",
+				   pipe->logical_pipe_num,
+				   pipe->ep_address, urb->status);
+			break;
+		}
+		goto cleanup_recv_urb;
+	}
+
+	if (urb->actual_length == 0)
+		goto cleanup_recv_urb;
+
+	skb = urb_context->skb;
+
+	/* we are going to pass it up */
+	urb_context->skb = NULL;
+	skb_put(skb, urb->actual_length);
+
+	/* note: queue implements a lock */
+	skb_queue_tail(&pipe->io_comp_queue, skb);
+	schedule_work(&pipe->io_complete_work);
+
+cleanup_recv_urb:
+	ath10k_usb_cleanup_recv_urb(urb_context);
+
+	if (status == 0 &&
+	    pipe->urb_cnt >= pipe->urb_cnt_thresh) {
+		/* our free urbs are piling up, post more transfers */
+		ath10k_usb_post_recv_transfers(ar, pipe);
+	}
+}
+
+static void ath10k_usb_transmit_complete(struct urb *urb)
+{
+	struct ath10k_urb_context *urb_context = urb->context;
+	struct ath10k_usb_pipe *pipe = urb_context->pipe;
+	struct ath10k *ar = pipe->ar_usb->ar;
+	struct sk_buff *skb;
+
+	if (urb->status != 0) {
+		ath10k_dbg(ar, ATH10K_DBG_USB_BULK,
+			   "pipe: %d, failed:%d\n",
+			   pipe->logical_pipe_num, urb->status);
+	}
+
+	skb = urb_context->skb;
+	urb_context->skb = NULL;
+	ath10k_usb_free_urb_to_pipe(urb_context->pipe, urb_context);
+
+	/* note: queue implements a lock */
+	skb_queue_tail(&pipe->io_comp_queue, skb);
+	schedule_work(&pipe->io_complete_work);
+}
+
+/* pipe operations */
+static void ath10k_usb_post_recv_transfers(struct ath10k *ar,
+					   struct ath10k_usb_pipe *recv_pipe)
+{
+	struct ath10k_urb_context *urb_context;
+	struct urb *urb;
+	int usb_status;
+
+	for (;;) {
+		urb_context = ath10k_usb_alloc_urb_from_pipe(recv_pipe);
+		if (!urb_context)
+			break;
+
+		urb_context->skb = dev_alloc_skb(ATH10K_USB_RX_BUFFER_SIZE);
+		if (!urb_context->skb)
+			goto err;
+
+		urb = usb_alloc_urb(0, GFP_ATOMIC);
+		if (!urb)
+			goto err;
+
+		usb_fill_bulk_urb(urb,
+				  recv_pipe->ar_usb->udev,
+				  recv_pipe->usb_pipe_handle,
+				  urb_context->skb->data,
+				  ATH10K_USB_RX_BUFFER_SIZE,
+				  ath10k_usb_recv_complete, urb_context);
+
+		ath10k_dbg(ar, ATH10K_DBG_USB_BULK,
+			   "usb bulk recv submit %d 0x%x ep 0x%2.2x len %d buf 0x%pK\n",
+			   recv_pipe->logical_pipe_num,
+			   recv_pipe->usb_pipe_handle, recv_pipe->ep_address,
+			   ATH10K_USB_RX_BUFFER_SIZE, urb_context->skb);
+
+		usb_anchor_urb(urb, &recv_pipe->urb_submitted);
+		usb_status = usb_submit_urb(urb, GFP_ATOMIC);
+
+		if (usb_status) {
+			ath10k_dbg(ar, ATH10K_DBG_USB_BULK,
+				   "usb bulk recv failed: %d\n",
+				   usb_status);
+			usb_unanchor_urb(urb);
+			usb_free_urb(urb);
+			goto err;
+		}
+		usb_free_urb(urb);
+	}
+
+	return;
+
+err:
+	ath10k_usb_cleanup_recv_urb(urb_context);
+}
+
+static void ath10k_usb_flush_all(struct ath10k *ar)
+{
+	struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
+	int i;
+
+	for (i = 0; i < ATH10K_USB_PIPE_MAX; i++) {
+		if (ar_usb->pipes[i].ar_usb) {
+			usb_kill_anchored_urbs(&ar_usb->pipes[i].urb_submitted);
+			cancel_work_sync(&ar_usb->pipes[i].io_complete_work);
+		}
+	}
+}
+
+static void ath10k_usb_start_recv_pipes(struct ath10k *ar)
+{
+	struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
+
+	ar_usb->pipes[ATH10K_USB_PIPE_RX_DATA].urb_cnt_thresh = 1;
+
+	ath10k_usb_post_recv_transfers(ar,
+				       &ar_usb->pipes[ATH10K_USB_PIPE_RX_DATA]);
+}
+
+static void ath10k_usb_tx_complete(struct ath10k *ar, struct sk_buff *skb)
+{
+	struct ath10k_htc_hdr *htc_hdr;
+	struct ath10k_htc_ep *ep;
+
+	htc_hdr = (struct ath10k_htc_hdr *)skb->data;
+	ep = &ar->htc.endpoint[htc_hdr->eid];
+	ath10k_htc_notify_tx_completion(ep, skb);
+	/* The TX complete handler now owns the skb... */
+}
+
+static void ath10k_usb_rx_complete(struct ath10k *ar, struct sk_buff *skb)
+{
+	struct ath10k_htc *htc = &ar->htc;
+	struct ath10k_htc_hdr *htc_hdr;
+	enum ath10k_htc_ep_id eid;
+	struct ath10k_htc_ep *ep;
+	u16 payload_len;
+	u8 *trailer;
+	int ret;
+
+	htc_hdr = (struct ath10k_htc_hdr *)skb->data;
+	eid = eid_from_htc_hdr(htc_hdr);
+	ep = &ar->htc.endpoint[eid];
+
+	if (ep->service_id == 0) {
+		ath10k_warn(ar, "ep %d is not connected\n", eid);
+		goto out_free_skb;
+	}
+
+	payload_len = le16_to_cpu(htc_hdr->len);
+	if (!payload_len) {
+		ath10k_warn(ar, "zero length frame received, firmware crashed?\n");
+		goto out_free_skb;
+	}
+
+	if (payload_len < htc_hdr->trailer_len) {
+		ath10k_warn(ar, "malformed frame received, firmware crashed?\n");
+		goto out_free_skb;
+	}
+
+	if (htc_hdr->flags & ATH10K_HTC_FLAG_TRAILER_PRESENT) {
+		trailer = skb->data + sizeof(*htc_hdr) + payload_len -
+			  htc_hdr->trailer_len;
+
+		ret = ath10k_htc_process_trailer(htc,
+						 trailer,
+						 htc_hdr->trailer_len,
+						 eid,
+						 NULL,
+						 NULL);
+		if (ret)
+			goto out_free_skb;
+
+		if (is_trailer_only_msg(htc_hdr))
+			goto out_free_skb;
+
+		/* strip off the trailer from the skb since it should not
+		 * be passed on to upper layers
+		 */
+		skb_trim(skb, skb->len - htc_hdr->trailer_len);
+	}
+
+	skb_pull(skb, sizeof(*htc_hdr));
+	ep->ep_ops.ep_rx_complete(ar, skb);
+	/* The RX complete handler now owns the skb... */
+
+	return;
+
+out_free_skb:
+	dev_kfree_skb(skb);
+}
+
+static void ath10k_usb_io_comp_work(struct work_struct *work)
+{
+	struct ath10k_usb_pipe *pipe = container_of(work,
+						    struct ath10k_usb_pipe,
+						    io_complete_work);
+	struct ath10k *ar = pipe->ar_usb->ar;
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&pipe->io_comp_queue))) {
+		if (pipe->flags & ATH10K_USB_PIPE_FLAG_TX)
+			ath10k_usb_tx_complete(ar, skb);
+		else
+			ath10k_usb_rx_complete(ar, skb);
+	}
+}
+
+#define ATH10K_USB_MAX_DIAG_CMD (sizeof(struct ath10k_usb_ctrl_diag_cmd_write))
+#define ATH10K_USB_MAX_DIAG_RESP (sizeof(struct ath10k_usb_ctrl_diag_resp_read))
+
+static void ath10k_usb_destroy(struct ath10k *ar)
+{
+	struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
+
+	ath10k_usb_flush_all(ar);
+	ath10k_usb_cleanup_pipe_resources(ar);
+	usb_set_intfdata(ar_usb->interface, NULL);
+
+	kfree(ar_usb->diag_cmd_buffer);
+	kfree(ar_usb->diag_resp_buffer);
+}
+
+static int ath10k_usb_hif_start(struct ath10k *ar)
+{
+	int i;
+	struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
+
+	ath10k_usb_start_recv_pipes(ar);
+
+	/* set the TX resource avail threshold for each TX pipe */
+	for (i = ATH10K_USB_PIPE_TX_CTRL;
+	     i <= ATH10K_USB_PIPE_TX_DATA_HP; i++) {
+		ar_usb->pipes[i].urb_cnt_thresh =
+		    ar_usb->pipes[i].urb_alloc / 2;
+	}
+
+	return 0;
+}
+
+static int ath10k_usb_hif_tx_sg(struct ath10k *ar, u8 pipe_id,
+				struct ath10k_hif_sg_item *items, int n_items)
+{
+	struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
+	struct ath10k_usb_pipe *pipe = &ar_usb->pipes[pipe_id];
+	struct ath10k_urb_context *urb_context;
+	struct sk_buff *skb;
+	struct urb *urb;
+	int ret, i;
+
+	for (i = 0; i < n_items; i++) {
+		urb_context = ath10k_usb_alloc_urb_from_pipe(pipe);
+		if (!urb_context) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		skb = items[i].transfer_context;
+		urb_context->skb = skb;
+
+		urb = usb_alloc_urb(0, GFP_ATOMIC);
+		if (!urb) {
+			ret = -ENOMEM;
+			goto err_free_urb_to_pipe;
+		}
+
+		usb_fill_bulk_urb(urb,
+				  ar_usb->udev,
+				  pipe->usb_pipe_handle,
+				  skb->data,
+				  skb->len,
+				  ath10k_usb_transmit_complete, urb_context);
+
+		if (!(skb->len % pipe->max_packet_size)) {
+			/* hit a max packet boundary on this pipe */
+			urb->transfer_flags |= URB_ZERO_PACKET;
+		}
+
+		usb_anchor_urb(urb, &pipe->urb_submitted);
+		ret = usb_submit_urb(urb, GFP_ATOMIC);
+		if (ret) {
+			ath10k_dbg(ar, ATH10K_DBG_USB_BULK,
+				   "usb bulk transmit failed: %d\n", ret);
+			usb_unanchor_urb(urb);
+			ret = -EINVAL;
+			goto err_free_urb_to_pipe;
+		}
+
+		usb_free_urb(urb);
+	}
+
+	return 0;
+
+err_free_urb_to_pipe:
+	ath10k_usb_free_urb_to_pipe(urb_context->pipe, urb_context);
+err:
+	return ret;
+}
+
+static void ath10k_usb_hif_stop(struct ath10k *ar)
+{
+	ath10k_usb_flush_all(ar);
+}
+
+static u16 ath10k_usb_hif_get_free_queue_number(struct ath10k *ar, u8 pipe_id)
+{
+	struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
+
+	return ar_usb->pipes[pipe_id].urb_cnt;
+}
+
+static int ath10k_usb_submit_ctrl_out(struct ath10k *ar,
+				      u8 req, u16 value, u16 index, void *data,
+				      u32 size)
+{
+	struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
+	u8 *buf = NULL;
+	int ret;
+
+	if (size > 0) {
+		buf = kmemdup(data, size, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+	}
+
+	/* note: if successful returns number of bytes transferred */
+	ret = usb_control_msg(ar_usb->udev,
+			      usb_sndctrlpipe(ar_usb->udev, 0),
+			      req,
+			      USB_DIR_OUT | USB_TYPE_VENDOR |
+			      USB_RECIP_DEVICE, value, index, buf,
+			      size, 1000);
+
+	if (ret < 0) {
+		ath10k_warn(ar, "Failed to submit usb control message: %d\n",
+			    ret);
+		kfree(buf);
+		return ret;
+	}
+
+	kfree(buf);
+
+	return 0;
+}
+
+static int ath10k_usb_submit_ctrl_in(struct ath10k *ar,
+				     u8 req, u16 value, u16 index, void *data,
+				     u32 size)
+{
+	struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
+	u8 *buf = NULL;
+	int ret;
+
+	if (size > 0) {
+		buf = kmalloc(size, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+	}
+
+	/* note: if successful returns number of bytes transferred */
+	ret = usb_control_msg(ar_usb->udev,
+			      usb_rcvctrlpipe(ar_usb->udev, 0),
+			      req,
+			      USB_DIR_IN | USB_TYPE_VENDOR |
+			      USB_RECIP_DEVICE, value, index, buf,
+			      size, 2 * HZ);
+
+	if (ret < 0) {
+		ath10k_warn(ar, "Failed to read usb control message: %d\n",
+			    ret);
+		kfree(buf);
+		return ret;
+	}
+
+	memcpy((u8 *)data, buf, size);
+
+	kfree(buf);
+
+	return 0;
+}
+
+static int ath10k_usb_ctrl_msg_exchange(struct ath10k *ar,
+					u8 req_val, u8 *req_buf, u32 req_len,
+					u8 resp_val, u8 *resp_buf,
+					u32 *resp_len)
+{
+	int ret;
+
+	/* send command */
+	ret = ath10k_usb_submit_ctrl_out(ar, req_val, 0, 0,
+					 req_buf, req_len);
+	if (ret)
+		goto err;
+
+	/* get response */
+	if (resp_buf) {
+		ret = ath10k_usb_submit_ctrl_in(ar, resp_val, 0, 0,
+						resp_buf, *resp_len);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+err:
+	return ret;
+}
+
+static int ath10k_usb_hif_diag_read(struct ath10k *ar, u32 address, void *buf,
+				    size_t buf_len)
+{
+	struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
+	struct ath10k_usb_ctrl_diag_cmd_read *cmd;
+	u32 resp_len;
+	int ret;
+
+	if (buf_len < sizeof(struct ath10k_usb_ctrl_diag_resp_read))
+		return -EINVAL;
+
+	cmd = (struct ath10k_usb_ctrl_diag_cmd_read *)ar_usb->diag_cmd_buffer;
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->cmd = ATH10K_USB_CTRL_DIAG_CC_READ;
+	cmd->address = cpu_to_le32(address);
+	resp_len = sizeof(struct ath10k_usb_ctrl_diag_resp_read);
+
+	ret = ath10k_usb_ctrl_msg_exchange(ar,
+					   ATH10K_USB_CONTROL_REQ_DIAG_CMD,
+					   (u8 *)cmd,
+					   sizeof(*cmd),
+					   ATH10K_USB_CONTROL_REQ_DIAG_RESP,
+					   ar_usb->diag_resp_buffer, &resp_len);
+	if (ret)
+		return ret;
+
+	if (resp_len != sizeof(struct ath10k_usb_ctrl_diag_resp_read))
+		return -EMSGSIZE;
+
+	memcpy(buf, ar_usb->diag_resp_buffer,
+	       sizeof(struct ath10k_usb_ctrl_diag_resp_read));
+
+	return 0;
+}
+
+static int ath10k_usb_hif_diag_write(struct ath10k *ar, u32 address,
+				     const void *data, int nbytes)
+{
+	struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
+	struct ath10k_usb_ctrl_diag_cmd_write *cmd;
+	int ret;
+
+	if (nbytes != sizeof(cmd->value))
+		return -EINVAL;
+
+	cmd = (struct ath10k_usb_ctrl_diag_cmd_write *)ar_usb->diag_cmd_buffer;
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->cmd = cpu_to_le32(ATH10K_USB_CTRL_DIAG_CC_WRITE);
+	cmd->address = cpu_to_le32(address);
+	memcpy(&cmd->value, data, nbytes);
+
+	ret = ath10k_usb_ctrl_msg_exchange(ar,
+					   ATH10K_USB_CONTROL_REQ_DIAG_CMD,
+					   (u8 *)cmd,
+					   sizeof(*cmd),
+					   0, NULL, NULL);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int ath10k_usb_bmi_exchange_msg(struct ath10k *ar,
+				       void *req, u32 req_len,
+				       void *resp, u32 *resp_len)
+{
+	int ret;
+
+	if (req) {
+		ret = ath10k_usb_submit_ctrl_out(ar,
+						 ATH10K_USB_CONTROL_REQ_SEND_BMI_CMD,
+						 0, 0, req, req_len);
+		if (ret) {
+			ath10k_warn(ar,
+				    "unable to send the bmi data to the device: %d\n",
+				    ret);
+			return ret;
+		}
+	}
+
+	if (resp) {
+		ret = ath10k_usb_submit_ctrl_in(ar,
+						ATH10K_USB_CONTROL_REQ_RECV_BMI_RESP,
+						0, 0, resp, *resp_len);
+		if (ret) {
+			ath10k_warn(ar,
+				    "Unable to read the bmi data from the device: %d\n",
+				    ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void ath10k_usb_hif_get_default_pipe(struct ath10k *ar,
+					    u8 *ul_pipe, u8 *dl_pipe)
+{
+	*ul_pipe = ATH10K_USB_PIPE_TX_CTRL;
+	*dl_pipe = ATH10K_USB_PIPE_RX_CTRL;
+}
+
+static int ath10k_usb_hif_map_service_to_pipe(struct ath10k *ar, u16 svc_id,
+					      u8 *ul_pipe, u8 *dl_pipe)
+{
+	switch (svc_id) {
+	case ATH10K_HTC_SVC_ID_RSVD_CTRL:
+	case ATH10K_HTC_SVC_ID_WMI_CONTROL:
+		*ul_pipe = ATH10K_USB_PIPE_TX_CTRL;
+		/* due to large control packets, shift to data pipe */
+		*dl_pipe = ATH10K_USB_PIPE_RX_DATA;
+		break;
+	case ATH10K_HTC_SVC_ID_HTT_DATA_MSG:
+		*ul_pipe = ATH10K_USB_PIPE_TX_DATA_LP;
+		/* Disable rxdata2 directly, it will be enabled
+		 * if FW enable rxdata2
+		 */
+		*dl_pipe = ATH10K_USB_PIPE_RX_DATA;
+		break;
+	default:
+		return -EPERM;
+	}
+
+	return 0;
+}
+
+/* This op is currently only used by htc_wait_target if the HTC ready
+ * message times out. It is not applicable for USB since there is nothing
+ * we can do if the HTC ready message does not arrive in time.
+ * TODO: Make this op non mandatory by introducing a NULL check in the
+ * hif op wrapper.
+ */
+static void ath10k_usb_hif_send_complete_check(struct ath10k *ar,
+					       u8 pipe, int force)
+{
+}
+
+static int ath10k_usb_hif_power_up(struct ath10k *ar)
+{
+	return 0;
+}
+
+static void ath10k_usb_hif_power_down(struct ath10k *ar)
+{
+	ath10k_usb_flush_all(ar);
+}
+
+#ifdef CONFIG_PM
+
+static int ath10k_usb_hif_suspend(struct ath10k *ar)
+{
+	return -EOPNOTSUPP;
+}
+
+static int ath10k_usb_hif_resume(struct ath10k *ar)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+static const struct ath10k_hif_ops ath10k_usb_hif_ops = {
+	.tx_sg			= ath10k_usb_hif_tx_sg,
+	.diag_read		= ath10k_usb_hif_diag_read,
+	.diag_write		= ath10k_usb_hif_diag_write,
+	.exchange_bmi_msg	= ath10k_usb_bmi_exchange_msg,
+	.start			= ath10k_usb_hif_start,
+	.stop			= ath10k_usb_hif_stop,
+	.map_service_to_pipe	= ath10k_usb_hif_map_service_to_pipe,
+	.get_default_pipe	= ath10k_usb_hif_get_default_pipe,
+	.send_complete_check	= ath10k_usb_hif_send_complete_check,
+	.get_free_queue_number	= ath10k_usb_hif_get_free_queue_number,
+	.power_up		= ath10k_usb_hif_power_up,
+	.power_down		= ath10k_usb_hif_power_down,
+#ifdef CONFIG_PM
+	.suspend		= ath10k_usb_hif_suspend,
+	.resume			= ath10k_usb_hif_resume,
+#endif
+};
+
+static u8 ath10k_usb_get_logical_pipe_num(u8 ep_address, int *urb_count)
+{
+	u8 pipe_num = ATH10K_USB_PIPE_INVALID;
+
+	switch (ep_address) {
+	case ATH10K_USB_EP_ADDR_APP_CTRL_IN:
+		pipe_num = ATH10K_USB_PIPE_RX_CTRL;
+		*urb_count = RX_URB_COUNT;
+		break;
+	case ATH10K_USB_EP_ADDR_APP_DATA_IN:
+		pipe_num = ATH10K_USB_PIPE_RX_DATA;
+		*urb_count = RX_URB_COUNT;
+		break;
+	case ATH10K_USB_EP_ADDR_APP_INT_IN:
+		pipe_num = ATH10K_USB_PIPE_RX_INT;
+		*urb_count = RX_URB_COUNT;
+		break;
+	case ATH10K_USB_EP_ADDR_APP_DATA2_IN:
+		pipe_num = ATH10K_USB_PIPE_RX_DATA2;
+		*urb_count = RX_URB_COUNT;
+		break;
+	case ATH10K_USB_EP_ADDR_APP_CTRL_OUT:
+		pipe_num = ATH10K_USB_PIPE_TX_CTRL;
+		*urb_count = TX_URB_COUNT;
+		break;
+	case ATH10K_USB_EP_ADDR_APP_DATA_LP_OUT:
+		pipe_num = ATH10K_USB_PIPE_TX_DATA_LP;
+		*urb_count = TX_URB_COUNT;
+		break;
+	case ATH10K_USB_EP_ADDR_APP_DATA_MP_OUT:
+		pipe_num = ATH10K_USB_PIPE_TX_DATA_MP;
+		*urb_count = TX_URB_COUNT;
+		break;
+	case ATH10K_USB_EP_ADDR_APP_DATA_HP_OUT:
+		pipe_num = ATH10K_USB_PIPE_TX_DATA_HP;
+		*urb_count = TX_URB_COUNT;
+		break;
+	default:
+		/* note: there may be endpoints not currently used */
+		break;
+	}
+
+	return pipe_num;
+}
+
+static int ath10k_usb_alloc_pipe_resources(struct ath10k *ar,
+					   struct ath10k_usb_pipe *pipe,
+					   int urb_cnt)
+{
+	struct ath10k_urb_context *urb_context;
+	int i;
+
+	INIT_LIST_HEAD(&pipe->urb_list_head);
+	init_usb_anchor(&pipe->urb_submitted);
+
+	for (i = 0; i < urb_cnt; i++) {
+		urb_context = kzalloc(sizeof(*urb_context), GFP_KERNEL);
+		if (!urb_context)
+			return -ENOMEM;
+
+		urb_context->pipe = pipe;
+
+		/* we are only allocate the urb contexts here, the actual URB
+		 * is allocated from the kernel as needed to do a transaction
+		 */
+		pipe->urb_alloc++;
+		ath10k_usb_free_urb_to_pipe(pipe, urb_context);
+	}
+
+	ath10k_dbg(ar, ATH10K_DBG_USB,
+		   "usb alloc resources lpipe %d hpipe 0x%x urbs %d\n",
+		   pipe->logical_pipe_num, pipe->usb_pipe_handle,
+		   pipe->urb_alloc);
+
+	return 0;
+}
+
+static int ath10k_usb_setup_pipe_resources(struct ath10k *ar,
+					   struct usb_interface *interface)
+{
+	struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
+	struct usb_host_interface *iface_desc = interface->cur_altsetting;
+	struct usb_endpoint_descriptor *endpoint;
+	struct ath10k_usb_pipe *pipe;
+	int ret, i, urbcount;
+	u8 pipe_num;
+
+	ath10k_dbg(ar, ATH10K_DBG_USB, "usb setting up pipes using interface\n");
+
+	/* walk decriptors and setup pipes */
+	for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
+		endpoint = &iface_desc->endpoint[i].desc;
+
+		if (ATH10K_USB_IS_BULK_EP(endpoint->bmAttributes)) {
+			ath10k_dbg(ar, ATH10K_DBG_USB,
+				   "usb %s bulk ep 0x%2.2x maxpktsz %d\n",
+				   ATH10K_USB_IS_DIR_IN
+				   (endpoint->bEndpointAddress) ?
+				   "rx" : "tx", endpoint->bEndpointAddress,
+				   le16_to_cpu(endpoint->wMaxPacketSize));
+		} else if (ATH10K_USB_IS_INT_EP(endpoint->bmAttributes)) {
+			ath10k_dbg(ar, ATH10K_DBG_USB,
+				   "usb %s int ep 0x%2.2x maxpktsz %d interval %d\n",
+				   ATH10K_USB_IS_DIR_IN
+				   (endpoint->bEndpointAddress) ?
+				   "rx" : "tx", endpoint->bEndpointAddress,
+				   le16_to_cpu(endpoint->wMaxPacketSize),
+				   endpoint->bInterval);
+		} else if (ATH10K_USB_IS_ISOC_EP(endpoint->bmAttributes)) {
+			/* TODO for ISO */
+			ath10k_dbg(ar, ATH10K_DBG_USB,
+				   "usb %s isoc ep 0x%2.2x maxpktsz %d interval %d\n",
+				   ATH10K_USB_IS_DIR_IN
+				   (endpoint->bEndpointAddress) ?
+				   "rx" : "tx", endpoint->bEndpointAddress,
+				   le16_to_cpu(endpoint->wMaxPacketSize),
+				   endpoint->bInterval);
+		}
+		urbcount = 0;
+
+		pipe_num =
+		    ath10k_usb_get_logical_pipe_num(endpoint->bEndpointAddress,
+						    &urbcount);
+		if (pipe_num == ATH10K_USB_PIPE_INVALID)
+			continue;
+
+		pipe = &ar_usb->pipes[pipe_num];
+		if (pipe->ar_usb)
+			/* hmmm..pipe was already setup */
+			continue;
+
+		pipe->ar_usb = ar_usb;
+		pipe->logical_pipe_num = pipe_num;
+		pipe->ep_address = endpoint->bEndpointAddress;
+		pipe->max_packet_size = le16_to_cpu(endpoint->wMaxPacketSize);
+
+		if (ATH10K_USB_IS_BULK_EP(endpoint->bmAttributes)) {
+			if (ATH10K_USB_IS_DIR_IN(pipe->ep_address)) {
+				pipe->usb_pipe_handle =
+				    usb_rcvbulkpipe(ar_usb->udev,
+						    pipe->ep_address);
+			} else {
+				pipe->usb_pipe_handle =
+				    usb_sndbulkpipe(ar_usb->udev,
+						    pipe->ep_address);
+			}
+		} else if (ATH10K_USB_IS_INT_EP(endpoint->bmAttributes)) {
+			if (ATH10K_USB_IS_DIR_IN(pipe->ep_address)) {
+				pipe->usb_pipe_handle =
+				    usb_rcvintpipe(ar_usb->udev,
+						   pipe->ep_address);
+			} else {
+				pipe->usb_pipe_handle =
+				    usb_sndintpipe(ar_usb->udev,
+						   pipe->ep_address);
+			}
+		} else if (ATH10K_USB_IS_ISOC_EP(endpoint->bmAttributes)) {
+			/* TODO for ISO */
+			if (ATH10K_USB_IS_DIR_IN(pipe->ep_address)) {
+				pipe->usb_pipe_handle =
+				    usb_rcvisocpipe(ar_usb->udev,
+						    pipe->ep_address);
+			} else {
+				pipe->usb_pipe_handle =
+				    usb_sndisocpipe(ar_usb->udev,
+						    pipe->ep_address);
+			}
+		}
+
+		pipe->ep_desc = endpoint;
+
+		if (!ATH10K_USB_IS_DIR_IN(pipe->ep_address))
+			pipe->flags |= ATH10K_USB_PIPE_FLAG_TX;
+
+		ret = ath10k_usb_alloc_pipe_resources(ar, pipe, urbcount);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int ath10k_usb_create(struct ath10k *ar,
+			     struct usb_interface *interface)
+{
+	struct ath10k_usb *ar_usb = ath10k_usb_priv(ar);
+	struct usb_device *dev = interface_to_usbdev(interface);
+	struct ath10k_usb_pipe *pipe;
+	int ret, i;
+
+	usb_set_intfdata(interface, ar_usb);
+	spin_lock_init(&ar_usb->cs_lock);
+	ar_usb->udev = dev;
+	ar_usb->interface = interface;
+
+	for (i = 0; i < ATH10K_USB_PIPE_MAX; i++) {
+		pipe = &ar_usb->pipes[i];
+		INIT_WORK(&pipe->io_complete_work,
+			  ath10k_usb_io_comp_work);
+		skb_queue_head_init(&pipe->io_comp_queue);
+	}
+
+	ar_usb->diag_cmd_buffer = kzalloc(ATH10K_USB_MAX_DIAG_CMD, GFP_KERNEL);
+	if (!ar_usb->diag_cmd_buffer) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ar_usb->diag_resp_buffer = kzalloc(ATH10K_USB_MAX_DIAG_RESP,
+					   GFP_KERNEL);
+	if (!ar_usb->diag_resp_buffer) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = ath10k_usb_setup_pipe_resources(ar, interface);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	ath10k_usb_destroy(ar);
+	return ret;
+}
+
+/* ath10k usb driver registered functions */
+static int ath10k_usb_probe(struct usb_interface *interface,
+			    const struct usb_device_id *id)
+{
+	struct ath10k *ar;
+	struct ath10k_usb *ar_usb;
+	struct usb_device *dev = interface_to_usbdev(interface);
+	int ret, vendor_id, product_id;
+	enum ath10k_hw_rev hw_rev;
+	u32 chip_id;
+
+	/* Assumption: All USB based chipsets (so far) are QCA9377 based.
+	 * If there will be newer chipsets that does not use the hw reg
+	 * setup as defined in qca6174_regs and qca6174_values, this
+	 * assumption is no longer valid and hw_rev must be setup differently
+	 * depending on chipset.
+	 */
+	hw_rev = ATH10K_HW_QCA9377;
+
+	ar = ath10k_core_create(sizeof(*ar_usb), &dev->dev, ATH10K_BUS_USB,
+				hw_rev, &ath10k_usb_hif_ops);
+	if (!ar) {
+		dev_err(&dev->dev, "failed to allocate core\n");
+		return -ENOMEM;
+	}
+
+	usb_get_dev(dev);
+	vendor_id = le16_to_cpu(dev->descriptor.idVendor);
+	product_id = le16_to_cpu(dev->descriptor.idProduct);
+
+	ath10k_dbg(ar, ATH10K_DBG_BOOT,
+		   "usb new func vendor 0x%04x product 0x%04x\n",
+		   vendor_id, product_id);
+
+	ar_usb = ath10k_usb_priv(ar);
+	ret = ath10k_usb_create(ar, interface);
+	ar_usb->ar = ar;
+
+	ar->dev_id = product_id;
+	ar->id.vendor = vendor_id;
+	ar->id.device = product_id;
+
+	/* TODO: don't know yet how to get chip_id with USB */
+	chip_id = 0;
+	ret = ath10k_core_register(ar, chip_id);
+	if (ret) {
+		ath10k_warn(ar, "failed to register driver core: %d\n", ret);
+		goto err;
+	}
+
+	/* TODO: remove this once USB support is fully implemented */
+	ath10k_warn(ar, "WARNING: ath10k USB support is incomplete, don't expect anything to work!\n");
+
+	return 0;
+
+err:
+	ath10k_core_destroy(ar);
+
+	usb_put_dev(dev);
+
+	return ret;
+}
+
+static void ath10k_usb_remove(struct usb_interface *interface)
+{
+	struct ath10k_usb *ar_usb;
+
+	ar_usb = usb_get_intfdata(interface);
+	if (!ar_usb)
+		return;
+
+	ath10k_core_unregister(ar_usb->ar);
+	ath10k_usb_destroy(ar_usb->ar);
+	usb_put_dev(interface_to_usbdev(interface));
+	ath10k_core_destroy(ar_usb->ar);
+}
+
+#ifdef CONFIG_PM
+
+static int ath10k_usb_pm_suspend(struct usb_interface *interface,
+				 pm_message_t message)
+{
+	struct ath10k_usb *ar_usb = usb_get_intfdata(interface);
+
+	ath10k_usb_flush_all(ar_usb->ar);
+	return 0;
+}
+
+static int ath10k_usb_pm_resume(struct usb_interface *interface)
+{
+	struct ath10k_usb *ar_usb = usb_get_intfdata(interface);
+	struct ath10k *ar = ar_usb->ar;
+
+	ath10k_usb_post_recv_transfers(ar,
+				       &ar_usb->pipes[ATH10K_USB_PIPE_RX_DATA]);
+
+	return 0;
+}
+
+#else
+
+#define ath10k_usb_pm_suspend NULL
+#define ath10k_usb_pm_resume NULL
+
+#endif
+
+/* table of devices that work with this driver */
+static struct usb_device_id ath10k_usb_ids[] = {
+	{USB_DEVICE(0x13b1, 0x0042)}, /* Linksys WUSB6100M */
+	{ /* Terminating entry */ },
+};
+
+MODULE_DEVICE_TABLE(usb, ath10k_usb_ids);
+
+static struct usb_driver ath10k_usb_driver = {
+	.name = "ath10k_usb",
+	.probe = ath10k_usb_probe,
+	.suspend = ath10k_usb_pm_suspend,
+	.resume = ath10k_usb_pm_resume,
+	.disconnect = ath10k_usb_remove,
+	.id_table = ath10k_usb_ids,
+	.supports_autosuspend = true,
+	.disable_hub_initiated_lpm = 1,
+};
+
+module_usb_driver(ath10k_usb_driver);
+
+MODULE_AUTHOR("Atheros Communications, Inc.");
+MODULE_DESCRIPTION("Driver support for Qualcomm Atheros 802.11ac WLAN USB devices");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/ath/ath10k/usb.h b/drivers/net/wireless/ath/ath10k/usb.h
new file mode 100644
index 0000000..f60a3cc
--- /dev/null
+++ b/drivers/net/wireless/ath/ath10k/usb.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2004-2011 Atheros Communications Inc.
+ * Copyright (c) 2011-2012 Qualcomm Atheros, Inc.
+ * Copyright (c) 2016-2017 Erik Stromdahl <erik.stromdahl@gmail.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _USB_H_
+#define _USB_H_
+
+/* constants */
+#define TX_URB_COUNT               32
+#define RX_URB_COUNT               32
+#define ATH10K_USB_RX_BUFFER_SIZE  4096
+
+#define ATH10K_USB_PIPE_INVALID ATH10K_USB_PIPE_MAX
+
+/* USB endpoint definitions */
+#define ATH10K_USB_EP_ADDR_APP_CTRL_IN          0x81
+#define ATH10K_USB_EP_ADDR_APP_DATA_IN          0x82
+#define ATH10K_USB_EP_ADDR_APP_DATA2_IN         0x83
+#define ATH10K_USB_EP_ADDR_APP_INT_IN           0x84
+
+#define ATH10K_USB_EP_ADDR_APP_CTRL_OUT         0x01
+#define ATH10K_USB_EP_ADDR_APP_DATA_LP_OUT      0x02
+#define ATH10K_USB_EP_ADDR_APP_DATA_MP_OUT      0x03
+#define ATH10K_USB_EP_ADDR_APP_DATA_HP_OUT      0x04
+
+/* diagnostic command defnitions */
+#define ATH10K_USB_CONTROL_REQ_SEND_BMI_CMD        1
+#define ATH10K_USB_CONTROL_REQ_RECV_BMI_RESP       2
+#define ATH10K_USB_CONTROL_REQ_DIAG_CMD            3
+#define ATH10K_USB_CONTROL_REQ_DIAG_RESP           4
+
+#define ATH10K_USB_CTRL_DIAG_CC_READ               0
+#define ATH10K_USB_CTRL_DIAG_CC_WRITE              1
+
+#define ATH10K_USB_IS_BULK_EP(attr) (((attr) & 3) == 0x02)
+#define ATH10K_USB_IS_INT_EP(attr)  (((attr) & 3) == 0x03)
+#define ATH10K_USB_IS_ISOC_EP(attr) (((attr) & 3) == 0x01)
+#define ATH10K_USB_IS_DIR_IN(addr)  ((addr) & 0x80)
+
+struct ath10k_usb_ctrl_diag_cmd_write {
+	__le32 cmd;
+	__le32 address;
+	__le32 value;
+	__le32 padding;
+} __packed;
+
+struct ath10k_usb_ctrl_diag_cmd_read {
+	__le32 cmd;
+	__le32 address;
+} __packed;
+
+struct ath10k_usb_ctrl_diag_resp_read {
+	u8 value[4];
+} __packed;
+
+/* tx/rx pipes for usb */
+enum ath10k_usb_pipe_id {
+	ATH10K_USB_PIPE_TX_CTRL = 0,
+	ATH10K_USB_PIPE_TX_DATA_LP,
+	ATH10K_USB_PIPE_TX_DATA_MP,
+	ATH10K_USB_PIPE_TX_DATA_HP,
+	ATH10K_USB_PIPE_RX_CTRL,
+	ATH10K_USB_PIPE_RX_DATA,
+	ATH10K_USB_PIPE_RX_DATA2,
+	ATH10K_USB_PIPE_RX_INT,
+	ATH10K_USB_PIPE_MAX
+};
+
+struct ath10k_usb_pipe {
+	struct list_head urb_list_head;
+	struct usb_anchor urb_submitted;
+	u32 urb_alloc;
+	u32 urb_cnt;
+	u32 urb_cnt_thresh;
+	unsigned int usb_pipe_handle;
+	u32 flags;
+	u8 ep_address;
+	u8 logical_pipe_num;
+	struct ath10k_usb *ar_usb;
+	u16 max_packet_size;
+	struct work_struct io_complete_work;
+	struct sk_buff_head io_comp_queue;
+	struct usb_endpoint_descriptor *ep_desc;
+};
+
+#define ATH10K_USB_PIPE_FLAG_TX BIT(0)
+
+/* usb device object */
+struct ath10k_usb {
+	/* protects pipe->urb_list_head and  pipe->urb_cnt */
+	spinlock_t cs_lock;
+
+	struct usb_device *udev;
+	struct usb_interface *interface;
+	struct ath10k_usb_pipe pipes[ATH10K_USB_PIPE_MAX];
+	u8 *diag_cmd_buffer;
+	u8 *diag_resp_buffer;
+	struct ath10k *ar;
+};
+
+/* usb urb object */
+struct ath10k_urb_context {
+	struct list_head link;
+	struct ath10k_usb_pipe *pipe;
+	struct sk_buff *skb;
+	struct ath10k *ar;
+};
+
+static inline struct ath10k_usb *ath10k_usb_priv(struct ath10k *ar)
+{
+	return (struct ath10k_usb *)ar->drv_priv;
+}
+
+#endif
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 3efb404..38a9708 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -651,8 +651,6 @@ static struct wmi_cmd_map wmi_10_4_cmd_map = {
 	.gpio_output_cmdid = WMI_10_4_GPIO_OUTPUT_CMDID,
 	.pdev_get_temperature_cmdid = WMI_10_4_PDEV_GET_TEMPERATURE_CMDID,
 	.vdev_set_wmm_params_cmdid = WMI_CMD_UNSUPPORTED,
-	.tdls_set_state_cmdid = WMI_CMD_UNSUPPORTED,
-	.tdls_peer_update_cmdid = WMI_CMD_UNSUPPORTED,
 	.adaptive_qcs_cmdid = WMI_CMD_UNSUPPORTED,
 	.scan_update_request_cmdid = WMI_10_4_SCAN_UPDATE_REQUEST_CMDID,
 	.vdev_standby_response_cmdid = WMI_10_4_VDEV_STANDBY_RESPONSE_CMDID,
@@ -711,6 +709,33 @@ static struct wmi_cmd_map wmi_10_4_cmd_map = {
 	.pdev_bss_chan_info_request_cmdid =
 			WMI_10_4_PDEV_BSS_CHAN_INFO_REQUEST_CMDID,
 	.ext_resource_cfg_cmdid = WMI_10_4_EXT_RESOURCE_CFG_CMDID,
+	.vdev_set_ie_cmdid = WMI_10_4_VDEV_SET_IE_CMDID,
+	.set_lteu_config_cmdid = WMI_10_4_SET_LTEU_CONFIG_CMDID,
+	.atf_ssid_grouping_request_cmdid =
+			WMI_10_4_ATF_SSID_GROUPING_REQUEST_CMDID,
+	.peer_atf_ext_request_cmdid = WMI_10_4_PEER_ATF_EXT_REQUEST_CMDID,
+	.set_periodic_channel_stats_cfg_cmdid =
+			WMI_10_4_SET_PERIODIC_CHANNEL_STATS_CONFIG,
+	.peer_bwf_request_cmdid = WMI_10_4_PEER_BWF_REQUEST_CMDID,
+	.btcoex_cfg_cmdid = WMI_10_4_BTCOEX_CFG_CMDID,
+	.peer_tx_mu_txmit_count_cmdid = WMI_10_4_PEER_TX_MU_TXMIT_COUNT_CMDID,
+	.peer_tx_mu_txmit_rstcnt_cmdid = WMI_10_4_PEER_TX_MU_TXMIT_RSTCNT_CMDID,
+	.peer_gid_userpos_list_cmdid = WMI_10_4_PEER_GID_USERPOS_LIST_CMDID,
+	.pdev_check_cal_version_cmdid = WMI_10_4_PDEV_CHECK_CAL_VERSION_CMDID,
+	.coex_version_cfg_cmid = WMI_10_4_COEX_VERSION_CFG_CMID,
+	.pdev_get_rx_filter_cmdid = WMI_10_4_PDEV_GET_RX_FILTER_CMDID,
+	.pdev_extended_nss_cfg_cmdid = WMI_10_4_PDEV_EXTENDED_NSS_CFG_CMDID,
+	.vdev_set_scan_nac_rssi_cmdid = WMI_10_4_VDEV_SET_SCAN_NAC_RSSI_CMDID,
+	.prog_gpio_band_select_cmdid = WMI_10_4_PROG_GPIO_BAND_SELECT_CMDID,
+	.config_smart_logging_cmdid = WMI_10_4_CONFIG_SMART_LOGGING_CMDID,
+	.debug_fatal_condition_cmdid = WMI_10_4_DEBUG_FATAL_CONDITION_CMDID,
+	.get_tsf_timer_cmdid = WMI_10_4_GET_TSF_TIMER_CMDID,
+	.pdev_get_tpc_table_cmdid = WMI_10_4_PDEV_GET_TPC_TABLE_CMDID,
+	.vdev_sifs_trigger_time_cmdid = WMI_10_4_VDEV_SIFS_TRIGGER_TIME_CMDID,
+	.pdev_wds_entry_list_cmdid = WMI_10_4_PDEV_WDS_ENTRY_LIST_CMDID,
+	.tdls_set_state_cmdid = WMI_10_4_TDLS_SET_STATE_CMDID,
+	.tdls_peer_update_cmdid = WMI_10_4_TDLS_PEER_UPDATE_CMDID,
+	.tdls_set_offchan_mode_cmdid = WMI_10_4_TDLS_SET_OFFCHAN_MODE_CMDID,
 };
 
 /* MAIN WMI VDEV param map */
@@ -3305,7 +3330,7 @@ static void ath10k_wmi_update_noa(struct ath10k *ar, struct ath10k_vif *arvif,
 	if (arvif->u.ap.noa_data)
 		if (!pskb_expand_head(bcn, 0, arvif->u.ap.noa_len, GFP_ATOMIC))
 			skb_put_data(bcn, arvif->u.ap.noa_data,
-			             arvif->u.ap.noa_len);
+				     arvif->u.ap.noa_len);
 }
 
 static int ath10k_wmi_op_pull_swba_ev(struct ath10k *ar, struct sk_buff *skb,
@@ -6473,6 +6498,7 @@ ath10k_wmi_op_gen_peer_create(struct ath10k *ar, u32 vdev_id,
 	cmd = (struct wmi_peer_create_cmd *)skb->data;
 	cmd->vdev_id = __cpu_to_le32(vdev_id);
 	ether_addr_copy(cmd->peer_macaddr.addr, peer_addr);
+	cmd->peer_type = __cpu_to_le32(peer_type);
 
 	ath10k_dbg(ar, ATH10K_DBG_WMI,
 		   "wmi peer create vdev_id %d peer_addr %pM\n",
@@ -7803,14 +7829,28 @@ ath10k_wmi_10_4_ext_resource_config(struct ath10k *ar,
 {
 	struct wmi_ext_resource_config_10_4_cmd *cmd;
 	struct sk_buff *skb;
+	u32 num_tdls_sleep_sta = 0;
 
 	skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
+	if (test_bit(WMI_SERVICE_TDLS_UAPSD_SLEEP_STA, ar->wmi.svc_map))
+		num_tdls_sleep_sta = TARGET_10_4_NUM_TDLS_SLEEP_STA;
+
 	cmd = (struct wmi_ext_resource_config_10_4_cmd *)skb->data;
 	cmd->host_platform_config = __cpu_to_le32(type);
 	cmd->fw_feature_bitmap = __cpu_to_le32(fw_feature_bitmap);
+	cmd->wlan_gpio_priority = __cpu_to_le32(-1);
+	cmd->coex_version = __cpu_to_le32(WMI_NO_COEX_VERSION_SUPPORT);
+	cmd->coex_gpio_pin1 = __cpu_to_le32(-1);
+	cmd->coex_gpio_pin2 = __cpu_to_le32(-1);
+	cmd->coex_gpio_pin3 = __cpu_to_le32(-1);
+	cmd->num_tdls_vdevs = __cpu_to_le32(TARGET_10_4_NUM_TDLS_VDEVS);
+	cmd->num_tdls_conn_table_entries = __cpu_to_le32(20);
+	cmd->max_tdls_concurrent_sleep_sta = __cpu_to_le32(num_tdls_sleep_sta);
+	cmd->max_tdls_concurrent_buffer_sta =
+			__cpu_to_le32(TARGET_10_4_NUM_TDLS_BUFFER_STA);
 
 	ath10k_dbg(ar, ATH10K_DBG_WMI,
 		   "wmi ext resource config host type %d firmware feature bitmap %08x\n",
@@ -7819,6 +7859,124 @@ ath10k_wmi_10_4_ext_resource_config(struct ath10k *ar,
 }
 
 static struct sk_buff *
+ath10k_wmi_10_4_gen_update_fw_tdls_state(struct ath10k *ar, u32 vdev_id,
+					 enum wmi_tdls_state state)
+{
+	struct wmi_10_4_tdls_set_state_cmd *cmd;
+	struct sk_buff *skb;
+	u32 options = 0;
+
+	skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	if (test_bit(WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY, ar->wmi.svc_map))
+		state = WMI_TDLS_ENABLE_PASSIVE;
+
+	if (test_bit(WMI_SERVICE_TDLS_UAPSD_BUFFER_STA, ar->wmi.svc_map))
+		options |= WMI_TDLS_BUFFER_STA_EN;
+
+	cmd = (struct wmi_10_4_tdls_set_state_cmd *)skb->data;
+	cmd->vdev_id = __cpu_to_le32(vdev_id);
+	cmd->state = __cpu_to_le32(state);
+	cmd->notification_interval_ms = __cpu_to_le32(5000);
+	cmd->tx_discovery_threshold = __cpu_to_le32(100);
+	cmd->tx_teardown_threshold = __cpu_to_le32(5);
+	cmd->rssi_teardown_threshold = __cpu_to_le32(-75);
+	cmd->rssi_delta = __cpu_to_le32(-20);
+	cmd->tdls_options = __cpu_to_le32(options);
+	cmd->tdls_peer_traffic_ind_window = __cpu_to_le32(2);
+	cmd->tdls_peer_traffic_response_timeout_ms = __cpu_to_le32(5000);
+	cmd->tdls_puapsd_mask = __cpu_to_le32(0xf);
+	cmd->tdls_puapsd_inactivity_time_ms = __cpu_to_le32(0);
+	cmd->tdls_puapsd_rx_frame_threshold = __cpu_to_le32(10);
+	cmd->teardown_notification_ms = __cpu_to_le32(10);
+	cmd->tdls_peer_kickout_threshold = __cpu_to_le32(96);
+
+	ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi update fw tdls state %d for vdev %i\n",
+		   state, vdev_id);
+	return skb;
+}
+
+static u32 ath10k_wmi_prepare_peer_qos(u8 uapsd_queues, u8 sp)
+{
+	u32 peer_qos = 0;
+
+	if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO)
+		peer_qos |= WMI_TDLS_PEER_QOS_AC_VO;
+	if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI)
+		peer_qos |= WMI_TDLS_PEER_QOS_AC_VI;
+	if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK)
+		peer_qos |= WMI_TDLS_PEER_QOS_AC_BK;
+	if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE)
+		peer_qos |= WMI_TDLS_PEER_QOS_AC_BE;
+
+	peer_qos |= SM(sp, WMI_TDLS_PEER_SP);
+
+	return peer_qos;
+}
+
+static struct sk_buff *
+ath10k_wmi_10_4_gen_tdls_peer_update(struct ath10k *ar,
+				     const struct wmi_tdls_peer_update_cmd_arg *arg,
+				     const struct wmi_tdls_peer_capab_arg *cap,
+				     const struct wmi_channel_arg *chan_arg)
+{
+	struct wmi_10_4_tdls_peer_update_cmd *cmd;
+	struct wmi_tdls_peer_capabilities *peer_cap;
+	struct wmi_channel *chan;
+	struct sk_buff *skb;
+	u32 peer_qos;
+	int len, chan_len;
+	int i;
+
+	/* tdls peer update cmd has place holder for one channel*/
+	chan_len = cap->peer_chan_len ? (cap->peer_chan_len - 1) : 0;
+
+	len = sizeof(*cmd) + chan_len * sizeof(*chan);
+
+	skb = ath10k_wmi_alloc_skb(ar, len);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	memset(skb->data, 0, sizeof(*cmd));
+
+	cmd = (struct wmi_10_4_tdls_peer_update_cmd *)skb->data;
+	cmd->vdev_id = __cpu_to_le32(arg->vdev_id);
+	ether_addr_copy(cmd->peer_macaddr.addr, arg->addr);
+	cmd->peer_state = __cpu_to_le32(arg->peer_state);
+
+	peer_qos = ath10k_wmi_prepare_peer_qos(cap->peer_uapsd_queues,
+					       cap->peer_max_sp);
+
+	peer_cap = &cmd->peer_capab;
+	peer_cap->peer_qos = __cpu_to_le32(peer_qos);
+	peer_cap->buff_sta_support = __cpu_to_le32(cap->buff_sta_support);
+	peer_cap->off_chan_support = __cpu_to_le32(cap->off_chan_support);
+	peer_cap->peer_curr_operclass = __cpu_to_le32(cap->peer_curr_operclass);
+	peer_cap->self_curr_operclass = __cpu_to_le32(cap->self_curr_operclass);
+	peer_cap->peer_chan_len = __cpu_to_le32(cap->peer_chan_len);
+	peer_cap->peer_operclass_len = __cpu_to_le32(cap->peer_operclass_len);
+
+	for (i = 0; i < WMI_TDLS_MAX_SUPP_OPER_CLASSES; i++)
+		peer_cap->peer_operclass[i] = cap->peer_operclass[i];
+
+	peer_cap->is_peer_responder = __cpu_to_le32(cap->is_peer_responder);
+	peer_cap->pref_offchan_num = __cpu_to_le32(cap->pref_offchan_num);
+	peer_cap->pref_offchan_bw = __cpu_to_le32(cap->pref_offchan_bw);
+
+	for (i = 0; i < cap->peer_chan_len; i++) {
+		chan = (struct wmi_channel *)&peer_cap->peer_chan_list[i];
+		ath10k_wmi_put_wmi_channel(chan, &chan_arg[i]);
+	}
+
+	ath10k_dbg(ar, ATH10K_DBG_WMI,
+		   "wmi tdls peer update vdev %i state %d n_chans %u\n",
+		   arg->vdev_id, arg->peer_state, cap->peer_chan_len);
+	return skb;
+}
+
+static struct sk_buff *
 ath10k_wmi_op_gen_echo(struct ath10k *ar, u32 value)
 {
 	struct wmi_echo_cmd *cmd;
@@ -8197,6 +8355,8 @@ static const struct wmi_ops wmi_10_4_ops = {
 	.gen_delba_send = ath10k_wmi_op_gen_delba_send,
 	.fw_stats_fill = ath10k_wmi_10_4_op_fw_stats_fill,
 	.ext_resource_config = ath10k_wmi_10_4_ext_resource_config,
+	.gen_update_fw_tdls_state = ath10k_wmi_10_4_gen_update_fw_tdls_state,
+	.gen_tdls_peer_update = ath10k_wmi_10_4_gen_tdls_peer_update,
 
 	/* shared with 10.2 */
 	.pull_echo_ev = ath10k_wmi_op_pull_echo_ev,
diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
index baa38c8..7a3606d 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.h
+++ b/drivers/net/wireless/ath/ath10k/wmi.h
@@ -184,6 +184,17 @@ enum wmi_service {
 	WMI_SERVICE_TX_MODE_PUSH_ONLY,
 	WMI_SERVICE_TX_MODE_PUSH_PULL,
 	WMI_SERVICE_TX_MODE_DYNAMIC,
+	WMI_SERVICE_VDEV_RX_FILTER,
+	WMI_SERVICE_BTCOEX,
+	WMI_SERVICE_CHECK_CAL_VERSION,
+	WMI_SERVICE_DBGLOG_WARN2,
+	WMI_SERVICE_BTCOEX_DUTY_CYCLE,
+	WMI_SERVICE_4_WIRE_COEX_SUPPORT,
+	WMI_SERVICE_EXTENDED_NSS_SUPPORT,
+	WMI_SERVICE_PROG_GPIO_BAND_SELECT,
+	WMI_SERVICE_SMART_LOGGING_SUPPORT,
+	WMI_SERVICE_TDLS_CONN_TRACKER_IN_HOST_MODE,
+	WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY,
 
 	/* keep last */
 	WMI_SERVICE_MAX,
@@ -310,6 +321,21 @@ enum wmi_10_4_service {
 	WMI_10_4_SERVICE_TX_MODE_PUSH_ONLY,
 	WMI_10_4_SERVICE_TX_MODE_PUSH_PULL,
 	WMI_10_4_SERVICE_TX_MODE_DYNAMIC,
+	WMI_10_4_SERVICE_VDEV_RX_FILTER,
+	WMI_10_4_SERVICE_BTCOEX,
+	WMI_10_4_SERVICE_CHECK_CAL_VERSION,
+	WMI_10_4_SERVICE_DBGLOG_WARN2,
+	WMI_10_4_SERVICE_BTCOEX_DUTY_CYCLE,
+	WMI_10_4_SERVICE_4_WIRE_COEX_SUPPORT,
+	WMI_10_4_SERVICE_EXTENDED_NSS_SUPPORT,
+	WMI_10_4_SERVICE_PROG_GPIO_BAND_SELECT,
+	WMI_10_4_SERVICE_SMART_LOGGING_SUPPORT,
+	WMI_10_4_SERVICE_TDLS,
+	WMI_10_4_SERVICE_TDLS_OFFCHAN,
+	WMI_10_4_SERVICE_TDLS_UAPSD_BUFFER_STA,
+	WMI_10_4_SERVICE_TDLS_UAPSD_SLEEP_STA,
+	WMI_10_4_SERVICE_TDLS_CONN_TRACKER_IN_HOST_MODE,
+	WMI_10_4_SERVICE_TDLS_EXPLICIT_MODE_ONLY,
 };
 
 static inline char *wmi_service_name(int service_id)
@@ -408,6 +434,16 @@ static inline char *wmi_service_name(int service_id)
 	SVCSTR(WMI_SERVICE_TX_MODE_PUSH_ONLY);
 	SVCSTR(WMI_SERVICE_TX_MODE_PUSH_PULL);
 	SVCSTR(WMI_SERVICE_TX_MODE_DYNAMIC);
+	SVCSTR(WMI_SERVICE_VDEV_RX_FILTER);
+	SVCSTR(WMI_SERVICE_CHECK_CAL_VERSION);
+	SVCSTR(WMI_SERVICE_DBGLOG_WARN2);
+	SVCSTR(WMI_SERVICE_BTCOEX_DUTY_CYCLE);
+	SVCSTR(WMI_SERVICE_4_WIRE_COEX_SUPPORT);
+	SVCSTR(WMI_SERVICE_EXTENDED_NSS_SUPPORT);
+	SVCSTR(WMI_SERVICE_PROG_GPIO_BAND_SELECT);
+	SVCSTR(WMI_SERVICE_SMART_LOGGING_SUPPORT);
+	SVCSTR(WMI_SERVICE_TDLS_CONN_TRACKER_IN_HOST_MODE);
+	SVCSTR(WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY);
 	default:
 		return NULL;
 	}
@@ -420,9 +456,20 @@ static inline char *wmi_service_name(int service_id)
 	 __le32_to_cpu((wmi_svc_bmap)[(svc_id) / (sizeof(u32))]) & \
 	 BIT((svc_id) % (sizeof(u32))))
 
+/* This extension is required to accommodate new services, current limit
+ * for wmi_services is 64 as target is using only 4-bits of each 32-bit
+ * wmi_service word. Extending this to make use of remaining unused bits
+ * for new services.
+ */
+#define WMI_EXT_SERVICE_IS_ENABLED(wmi_svc_bmap, svc_id, len) \
+	((svc_id) >= (len) && \
+	__le32_to_cpu((wmi_svc_bmap)[((svc_id) - (len)) / 28]) & \
+	BIT(((((svc_id) - (len)) % 28) & 0x1f) + 4))
+
 #define SVCMAP(x, y, len) \
 	do { \
-		if (WMI_SERVICE_IS_ENABLED((in), (x), (len))) \
+		if ((WMI_SERVICE_IS_ENABLED((in), (x), (len))) || \
+		    (WMI_EXT_SERVICE_IS_ENABLED((in), (x), (len)))) \
 			__set_bit(y, out); \
 	} while (0)
 
@@ -663,6 +710,36 @@ static inline void wmi_10_4_svc_map(const __le32 *in, unsigned long *out,
 	       WMI_SERVICE_TX_MODE_PUSH_PULL, len);
 	SVCMAP(WMI_10_4_SERVICE_TX_MODE_DYNAMIC,
 	       WMI_SERVICE_TX_MODE_DYNAMIC, len);
+	SVCMAP(WMI_10_4_SERVICE_VDEV_RX_FILTER,
+	       WMI_SERVICE_VDEV_RX_FILTER, len);
+	SVCMAP(WMI_10_4_SERVICE_BTCOEX,
+	       WMI_SERVICE_BTCOEX, len);
+	SVCMAP(WMI_10_4_SERVICE_CHECK_CAL_VERSION,
+	       WMI_SERVICE_CHECK_CAL_VERSION, len);
+	SVCMAP(WMI_10_4_SERVICE_DBGLOG_WARN2,
+	       WMI_SERVICE_DBGLOG_WARN2, len);
+	SVCMAP(WMI_10_4_SERVICE_BTCOEX_DUTY_CYCLE,
+	       WMI_SERVICE_BTCOEX_DUTY_CYCLE, len);
+	SVCMAP(WMI_10_4_SERVICE_4_WIRE_COEX_SUPPORT,
+	       WMI_SERVICE_4_WIRE_COEX_SUPPORT, len);
+	SVCMAP(WMI_10_4_SERVICE_EXTENDED_NSS_SUPPORT,
+	       WMI_SERVICE_EXTENDED_NSS_SUPPORT, len);
+	SVCMAP(WMI_10_4_SERVICE_PROG_GPIO_BAND_SELECT,
+	       WMI_SERVICE_PROG_GPIO_BAND_SELECT, len);
+	SVCMAP(WMI_10_4_SERVICE_SMART_LOGGING_SUPPORT,
+	       WMI_SERVICE_SMART_LOGGING_SUPPORT, len);
+	SVCMAP(WMI_10_4_SERVICE_TDLS,
+	       WMI_SERVICE_TDLS, len);
+	SVCMAP(WMI_10_4_SERVICE_TDLS_OFFCHAN,
+	       WMI_SERVICE_TDLS_OFFCHAN, len);
+	SVCMAP(WMI_10_4_SERVICE_TDLS_UAPSD_BUFFER_STA,
+	       WMI_SERVICE_TDLS_UAPSD_BUFFER_STA, len);
+	SVCMAP(WMI_10_4_SERVICE_TDLS_UAPSD_SLEEP_STA,
+	       WMI_SERVICE_TDLS_UAPSD_SLEEP_STA, len);
+	SVCMAP(WMI_10_4_SERVICE_TDLS_CONN_TRACKER_IN_HOST_MODE,
+	       WMI_SERVICE_TDLS_CONN_TRACKER_IN_HOST_MODE, len);
+	SVCMAP(WMI_10_4_SERVICE_TDLS_EXPLICIT_MODE_ONLY,
+	       WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY, len);
 }
 
 #undef SVCMAP
@@ -837,6 +914,29 @@ struct wmi_cmd_map {
 	u32 pdev_bss_chan_info_request_cmdid;
 	u32 pdev_enable_adaptive_cca_cmdid;
 	u32 ext_resource_cfg_cmdid;
+	u32 vdev_set_ie_cmdid;
+	u32 set_lteu_config_cmdid;
+	u32 atf_ssid_grouping_request_cmdid;
+	u32 peer_atf_ext_request_cmdid;
+	u32 set_periodic_channel_stats_cfg_cmdid;
+	u32 peer_bwf_request_cmdid;
+	u32 btcoex_cfg_cmdid;
+	u32 peer_tx_mu_txmit_count_cmdid;
+	u32 peer_tx_mu_txmit_rstcnt_cmdid;
+	u32 peer_gid_userpos_list_cmdid;
+	u32 pdev_check_cal_version_cmdid;
+	u32 coex_version_cfg_cmid;
+	u32 pdev_get_rx_filter_cmdid;
+	u32 pdev_extended_nss_cfg_cmdid;
+	u32 vdev_set_scan_nac_rssi_cmdid;
+	u32 prog_gpio_band_select_cmdid;
+	u32 config_smart_logging_cmdid;
+	u32 debug_fatal_condition_cmdid;
+	u32 get_tsf_timer_cmdid;
+	u32 pdev_get_tpc_table_cmdid;
+	u32 vdev_sifs_trigger_time_cmdid;
+	u32 pdev_wds_entry_list_cmdid;
+	u32 tdls_set_offchan_mode_cmdid;
 };
 
 /*
@@ -1647,6 +1747,29 @@ enum wmi_10_4_cmd_id {
 	WMI_10_4_EXT_RESOURCE_CFG_CMDID,
 	WMI_10_4_VDEV_SET_IE_CMDID,
 	WMI_10_4_SET_LTEU_CONFIG_CMDID,
+	WMI_10_4_ATF_SSID_GROUPING_REQUEST_CMDID,
+	WMI_10_4_PEER_ATF_EXT_REQUEST_CMDID,
+	WMI_10_4_SET_PERIODIC_CHANNEL_STATS_CONFIG,
+	WMI_10_4_PEER_BWF_REQUEST_CMDID,
+	WMI_10_4_BTCOEX_CFG_CMDID,
+	WMI_10_4_PEER_TX_MU_TXMIT_COUNT_CMDID,
+	WMI_10_4_PEER_TX_MU_TXMIT_RSTCNT_CMDID,
+	WMI_10_4_PEER_GID_USERPOS_LIST_CMDID,
+	WMI_10_4_PDEV_CHECK_CAL_VERSION_CMDID,
+	WMI_10_4_COEX_VERSION_CFG_CMID,
+	WMI_10_4_PDEV_GET_RX_FILTER_CMDID,
+	WMI_10_4_PDEV_EXTENDED_NSS_CFG_CMDID,
+	WMI_10_4_VDEV_SET_SCAN_NAC_RSSI_CMDID,
+	WMI_10_4_PROG_GPIO_BAND_SELECT_CMDID,
+	WMI_10_4_CONFIG_SMART_LOGGING_CMDID,
+	WMI_10_4_DEBUG_FATAL_CONDITION_CMDID,
+	WMI_10_4_GET_TSF_TIMER_CMDID,
+	WMI_10_4_PDEV_GET_TPC_TABLE_CMDID,
+	WMI_10_4_VDEV_SIFS_TRIGGER_TIME_CMDID,
+	WMI_10_4_PDEV_WDS_ENTRY_LIST_CMDID,
+	WMI_10_4_TDLS_SET_STATE_CMDID,
+	WMI_10_4_TDLS_PEER_UPDATE_CMDID,
+	WMI_10_4_TDLS_SET_OFFCHAN_MODE_CMDID,
 	WMI_10_4_PDEV_UTF_CMDID = WMI_10_4_END_CMDID - 1,
 };
 
@@ -1710,6 +1833,18 @@ enum wmi_10_4_event_id {
 	WMI_10_4_PDEV_NFCAL_POWER_ALL_CHANNELS_EVENTID,
 	WMI_10_4_PDEV_BSS_CHAN_INFO_EVENTID,
 	WMI_10_4_MU_REPORT_EVENTID,
+	WMI_10_4_TX_DATA_TRAFFIC_CTRL_EVENTID,
+	WMI_10_4_PEER_TX_MU_TXMIT_COUNT_EVENTID,
+	WMI_10_4_PEER_GID_USERPOS_LIST_EVENTID,
+	WMI_10_4_PDEV_CHECK_CAL_VERSION_EVENTID,
+	WMI_10_4_ATF_PEER_STATS_EVENTID,
+	WMI_10_4_PDEV_GET_RX_FILTER_EVENTID,
+	WMI_10_4_NAC_RSSI_EVENTID,
+	WMI_10_4_DEBUG_FATAL_CONDITION_EVENTID,
+	WMI_10_4_GET_TSF_TIMER_RESP_EVENTID,
+	WMI_10_4_PDEV_TPC_TABLE_EVENTID,
+	WMI_10_4_PDEV_WDS_ENTRY_LIST_EVENTID,
+	WMI_10_4_TDLS_PEER_EVENTID,
 	WMI_10_4_PDEV_UTF_EVENTID = WMI_10_4_END_EVENTID - 1,
 };
 
@@ -2718,6 +2853,18 @@ struct wmi_resource_config_10_4 {
 	__le32 qwrap_config;
 } __packed;
 
+enum wmi_coex_version {
+	WMI_NO_COEX_VERSION_SUPPORT	= 0,
+	/* 3 wire coex support*/
+	WMI_COEX_VERSION_1		= 1,
+	/* 2.5 wire coex support*/
+	WMI_COEX_VERSION_2		= 2,
+	/* 2.5 wire coex with duty cycle support */
+	WMI_COEX_VERSION_3		= 3,
+	/* 4 wire coex support*/
+	WMI_COEX_VERSION_4		= 4,
+};
+
 /**
  * enum wmi_10_4_feature_mask - WMI 10.4 feature enable/disable flags
  * @WMI_10_4_LTEU_SUPPORT: LTEU config
@@ -2726,6 +2873,14 @@ struct wmi_resource_config_10_4 {
  * @WMI_10_4_AUX_RADIO_CHAN_LOAD_INTF: AUX Radio Enhancement for chan load scan
  * @WMI_10_4_BSS_CHANNEL_INFO_64: BSS channel info stats
  * @WMI_10_4_PEER_STATS: Per station stats
+ * @WMI_10_4_VDEV_STATS: Per vdev stats
+ * @WMI_10_4_TDLS: Implicit TDLS support in firmware enable/disable
+ * @WMI_10_4_TDLS_OFFCHAN: TDLS offchannel support enable/disable
+ * @WMI_10_4_TDLS_UAPSD_BUFFER_STA: TDLS buffer sta support enable/disable
+ * @WMI_10_4_TDLS_UAPSD_SLEEP_STA: TDLS sleep sta support enable/disable
+ * @WMI_10_4_TDLS_CONN_TRACKER_IN_HOST_MODE: TDLS connection tracker in host
+ *	enable/disable
+ * @WMI_10_4_TDLS_EXPLICIT_MODE_ONLY:Explicit TDLS mode enable/disable
  */
 enum wmi_10_4_feature_mask {
 	WMI_10_4_LTEU_SUPPORT			= BIT(0),
@@ -2734,6 +2889,14 @@ enum wmi_10_4_feature_mask {
 	WMI_10_4_AUX_RADIO_CHAN_LOAD_INTF	= BIT(3),
 	WMI_10_4_BSS_CHANNEL_INFO_64		= BIT(4),
 	WMI_10_4_PEER_STATS			= BIT(5),
+	WMI_10_4_VDEV_STATS			= BIT(6),
+	WMI_10_4_TDLS				= BIT(7),
+	WMI_10_4_TDLS_OFFCHAN			= BIT(8),
+	WMI_10_4_TDLS_UAPSD_BUFFER_STA		= BIT(9),
+	WMI_10_4_TDLS_UAPSD_SLEEP_STA		= BIT(10),
+	WMI_10_4_TDLS_CONN_TRACKER_IN_HOST_MODE = BIT(11),
+	WMI_10_4_TDLS_EXPLICIT_MODE_ONLY	= BIT(12),
+
 };
 
 struct wmi_ext_resource_config_10_4_cmd {
@@ -2741,6 +2904,22 @@ struct wmi_ext_resource_config_10_4_cmd {
 	__le32 host_platform_config;
 	/* see enum wmi_10_4_feature_mask */
 	__le32 fw_feature_bitmap;
+	/* WLAN priority GPIO number */
+	__le32 wlan_gpio_priority;
+	/* see enum wmi_coex_version */
+	__le32 coex_version;
+	/* COEX GPIO config */
+	__le32 coex_gpio_pin1;
+	__le32 coex_gpio_pin2;
+	__le32 coex_gpio_pin3;
+	/* number of vdevs allowed to perform tdls */
+	__le32 num_tdls_vdevs;
+	/* number of peers to track per TDLS vdev */
+	__le32 num_tdls_conn_table_entries;
+	/* number of tdls sleep sta supported */
+	__le32 max_tdls_concurrent_sleep_sta;
+	/* number of tdls buffer sta supported */
+	__le32 max_tdls_concurrent_buffer_sta;
 };
 
 /* strucutre describing host memory chunk. */
@@ -5698,6 +5877,7 @@ struct wmi_tbtt_offset_event {
 struct wmi_peer_create_cmd {
 	__le32 vdev_id;
 	struct wmi_mac_addr peer_macaddr;
+	__le32 peer_type;
 } __packed;
 
 enum wmi_peer_type {
@@ -6556,6 +6736,22 @@ struct wmi_tdls_peer_update_cmd_arg {
 
 #define WMI_TDLS_MAX_SUPP_OPER_CLASSES 32
 
+#define WMI_TDLS_PEER_SP_MASK	0x60
+#define WMI_TDLS_PEER_SP_LSB	5
+
+enum wmi_tdls_options {
+	WMI_TDLS_OFFCHAN_EN = BIT(0),
+	WMI_TDLS_BUFFER_STA_EN = BIT(1),
+	WMI_TDLS_SLEEP_STA_EN = BIT(2),
+};
+
+enum {
+	WMI_TDLS_PEER_QOS_AC_VO = BIT(0),
+	WMI_TDLS_PEER_QOS_AC_VI = BIT(1),
+	WMI_TDLS_PEER_QOS_AC_BK = BIT(2),
+	WMI_TDLS_PEER_QOS_AC_BE = BIT(3),
+};
+
 struct wmi_tdls_peer_capab_arg {
 	u8 peer_uapsd_queues;
 	u8 peer_max_sp;
@@ -6571,6 +6767,79 @@ struct wmi_tdls_peer_capab_arg {
 	u32 pref_offchan_bw;
 };
 
+struct wmi_10_4_tdls_set_state_cmd {
+	__le32 vdev_id;
+	__le32 state;
+	__le32 notification_interval_ms;
+	__le32 tx_discovery_threshold;
+	__le32 tx_teardown_threshold;
+	__le32 rssi_teardown_threshold;
+	__le32 rssi_delta;
+	__le32 tdls_options;
+	__le32 tdls_peer_traffic_ind_window;
+	__le32 tdls_peer_traffic_response_timeout_ms;
+	__le32 tdls_puapsd_mask;
+	__le32 tdls_puapsd_inactivity_time_ms;
+	__le32 tdls_puapsd_rx_frame_threshold;
+	__le32 teardown_notification_ms;
+	__le32 tdls_peer_kickout_threshold;
+} __packed;
+
+struct wmi_tdls_peer_capabilities {
+	__le32 peer_qos;
+	__le32 buff_sta_support;
+	__le32 off_chan_support;
+	__le32 peer_curr_operclass;
+	__le32 self_curr_operclass;
+	__le32 peer_chan_len;
+	__le32 peer_operclass_len;
+	u8 peer_operclass[WMI_TDLS_MAX_SUPP_OPER_CLASSES];
+	__le32 is_peer_responder;
+	__le32 pref_offchan_num;
+	__le32 pref_offchan_bw;
+	struct wmi_channel peer_chan_list[1];
+} __packed;
+
+struct wmi_10_4_tdls_peer_update_cmd {
+	__le32 vdev_id;
+	struct wmi_mac_addr peer_macaddr;
+	__le32 peer_state;
+	__le32 reserved[4];
+	struct wmi_tdls_peer_capabilities peer_capab;
+} __packed;
+
+enum wmi_tdls_peer_reason {
+	WMI_TDLS_TEARDOWN_REASON_TX,
+	WMI_TDLS_TEARDOWN_REASON_RSSI,
+	WMI_TDLS_TEARDOWN_REASON_SCAN,
+	WMI_TDLS_DISCONNECTED_REASON_PEER_DELETE,
+	WMI_TDLS_TEARDOWN_REASON_PTR_TIMEOUT,
+	WMI_TDLS_TEARDOWN_REASON_BAD_PTR,
+	WMI_TDLS_TEARDOWN_REASON_NO_RESPONSE,
+	WMI_TDLS_ENTER_BUF_STA,
+	WMI_TDLS_EXIT_BUF_STA,
+	WMI_TDLS_ENTER_BT_BUSY_MODE,
+	WMI_TDLS_EXIT_BT_BUSY_MODE,
+	WMI_TDLS_SCAN_STARTED_EVENT,
+	WMI_TDLS_SCAN_COMPLETED_EVENT,
+};
+
+enum wmi_tdls_peer_notification {
+	WMI_TDLS_SHOULD_DISCOVER,
+	WMI_TDLS_SHOULD_TEARDOWN,
+	WMI_TDLS_PEER_DISCONNECTED,
+	WMI_TDLS_CONNECTION_TRACKER_NOTIFICATION,
+};
+
+struct wmi_tdls_peer_event {
+	struct wmi_mac_addr peer_macaddr;
+	/* see enum wmi_tdls_peer_notification*/
+	__le32 peer_status;
+	/* see enum wmi_tdls_peer_reason */
+	__le32 peer_reason;
+	__le32 vdev_id;
+} __packed;
+
 enum wmi_txbf_conf {
 	WMI_TXBF_CONF_UNSUPPORTED,
 	WMI_TXBF_CONF_BEFORE_ASSOC,
diff --git a/drivers/net/wireless/ath/ath10k/wow.c b/drivers/net/wireless/ath/ath10k/wow.c
index 77100d4..0d46d6d 100644
--- a/drivers/net/wireless/ath/ath10k/wow.c
+++ b/drivers/net/wireless/ath/ath10k/wow.c
@@ -277,6 +277,18 @@ exit:
 	return ret ? 1 : 0;
 }
 
+void ath10k_wow_op_set_wakeup(struct ieee80211_hw *hw, bool enabled)
+{
+	struct ath10k *ar = hw->priv;
+
+	mutex_lock(&ar->conf_mutex);
+	if (test_bit(ATH10K_FW_FEATURE_WOWLAN_SUPPORT,
+		     ar->running_fw->fw_file.fw_features)) {
+		device_set_wakeup_enable(ar->dev, enabled);
+	}
+	mutex_unlock(&ar->conf_mutex);
+}
+
 int ath10k_wow_op_resume(struct ieee80211_hw *hw)
 {
 	struct ath10k *ar = hw->priv;
@@ -336,5 +348,7 @@ int ath10k_wow_init(struct ath10k *ar)
 	ar->wow.wowlan_support.n_patterns = ar->wow.max_num_patterns;
 	ar->hw->wiphy->wowlan = &ar->wow.wowlan_support;
 
+	device_set_wakeup_capable(ar->dev, true);
+
 	return 0;
 }
diff --git a/drivers/net/wireless/ath/ath10k/wow.h b/drivers/net/wireless/ath/ath10k/wow.h
index abbb04b..9745b9d 100644
--- a/drivers/net/wireless/ath/ath10k/wow.h
+++ b/drivers/net/wireless/ath/ath10k/wow.h
@@ -28,6 +28,7 @@ int ath10k_wow_init(struct ath10k *ar);
 int ath10k_wow_op_suspend(struct ieee80211_hw *hw,
 			  struct cfg80211_wowlan *wowlan);
 int ath10k_wow_op_resume(struct ieee80211_hw *hw);
+void ath10k_wow_op_set_wakeup(struct ieee80211_hw *hw, bool enabled);
 
 #else
 
diff --git a/drivers/net/wireless/ath/ath6kl/usb.c b/drivers/net/wireless/ath/ath6kl/usb.c
index 9da3594..4defb7a 100644
--- a/drivers/net/wireless/ath/ath6kl/usb.c
+++ b/drivers/net/wireless/ath/ath6kl/usb.c
@@ -1201,7 +1201,7 @@ static int ath6kl_usb_pm_resume(struct usb_interface *interface)
 #endif
 
 /* table of devices that work with this driver */
-static struct usb_device_id ath6kl_usb_ids[] = {
+static const struct usb_device_id ath6kl_usb_ids[] = {
 	{USB_DEVICE(0x0cf3, 0x9375)},
 	{USB_DEVICE(0x0cf3, 0x9374)},
 	{ /* Terminating entry */ },
diff --git a/drivers/net/wireless/ath/ath9k/debug.c b/drivers/net/wireless/ath/ath9k/debug.c
index 2e64977..01fa301 100644
--- a/drivers/net/wireless/ath/ath9k/debug.c
+++ b/drivers/net/wireless/ath/ath9k/debug.c
@@ -1452,7 +1452,7 @@ int ath9k_init_debug(struct ath_hw *ah)
 #endif
 
 #ifdef CONFIG_ATH9K_DYNACK
-	debugfs_create_file("ack_to", S_IRUSR | S_IWUSR, sc->debug.debugfs_phy,
+	debugfs_create_file("ack_to", S_IRUSR, sc->debug.debugfs_phy,
 			    sc, &fops_ackto);
 #endif
 	debugfs_create_file("tpc", S_IRUSR | S_IWUSR,
diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c
index 0d9687a..c5f4dd8 100644
--- a/drivers/net/wireless/ath/ath9k/hif_usb.c
+++ b/drivers/net/wireless/ath/ath9k/hif_usb.c
@@ -20,7 +20,7 @@
 MODULE_FIRMWARE(HTC_7010_MODULE_FW);
 MODULE_FIRMWARE(HTC_9271_MODULE_FW);
 
-static struct usb_device_id ath9k_hif_usb_ids[] = {
+static const struct usb_device_id ath9k_hif_usb_ids[] = {
 	{ USB_DEVICE(0x0cf3, 0x9271) }, /* Atheros */
 	{ USB_DEVICE(0x0cf3, 0x1006) }, /* Atheros */
 	{ USB_DEVICE(0x0846, 0x9030) }, /* Netgear N150 */
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
index defacc6..da2164b 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
@@ -71,7 +71,7 @@ static void ath9k_htc_op_ps_restore(struct ath_common *common)
 	ath9k_htc_ps_restore((struct ath9k_htc_priv *) common->priv);
 }
 
-static struct ath_ps_ops ath9k_htc_ps_ops = {
+static const struct ath_ps_ops ath9k_htc_ps_ops = {
 	.wakeup = ath9k_htc_op_ps_wakeup,
 	.restore = ath9k_htc_op_ps_restore,
 };
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index fd9a618..bb79360 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -104,7 +104,7 @@ static void ath9k_op_ps_restore(struct ath_common *common)
 	ath9k_ps_restore((struct ath_softc *) common->priv);
 }
 
-static struct ath_ps_ops ath9k_ps_ops = {
+static const struct ath_ps_ops ath9k_ps_ops = {
 	.wakeup = ath9k_op_ps_wakeup,
 	.restore = ath9k_op_ps_restore,
 };
diff --git a/drivers/net/wireless/ath/ath9k/pci.c b/drivers/net/wireless/ath/ath9k/pci.c
index 7b7627f..2236063 100644
--- a/drivers/net/wireless/ath/ath9k/pci.c
+++ b/drivers/net/wireless/ath/ath9k/pci.c
@@ -388,6 +388,11 @@ static const struct pci_device_id ath_pci_id_table[] = {
 			 PCI_VENDOR_ID_DELL,
 			 0x020B),
 	  .driver_data = ATH9K_PCI_WOW },
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_ATHEROS,
+			 0x0034,
+			 PCI_VENDOR_ID_DELL,
+			 0x0300),
+	  .driver_data = ATH9K_PCI_WOW },
 
 	/* Killer Wireless (2x2) */
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_ATHEROS,
diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c
index 99ab203..e7c3f3b 100644
--- a/drivers/net/wireless/ath/carl9170/usb.c
+++ b/drivers/net/wireless/ath/carl9170/usb.c
@@ -64,7 +64,7 @@ MODULE_ALIAS("arusb_lnx");
  * http://wireless.kernel.org/en/users/Drivers/ar9170/devices ),
  * whenever you add a new device.
  */
-static struct usb_device_id carl9170_usb_ids[] = {
+static const struct usb_device_id carl9170_usb_ids[] = {
 	/* Atheros 9170 */
 	{ USB_DEVICE(0x0cf3, 0x9170) },
 	/* Atheros TG121N */
diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.c b/drivers/net/wireless/ath/wcn36xx/dxe.c
index 87dfdaf..d5c810a 100644
--- a/drivers/net/wireless/ath/wcn36xx/dxe.c
+++ b/drivers/net/wireless/ath/wcn36xx/dxe.c
@@ -289,6 +289,11 @@ static int wcn36xx_dxe_fill_skb(struct device *dev, struct wcn36xx_dxe_ctl *ctl)
 					 skb_tail_pointer(skb),
 					 WCN36XX_PKT_SIZE,
 					 DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, dxe->dst_addr_l)) {
+		dev_err(dev, "unable to map skb\n");
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
 	ctl->skb = skb;
 
 	return 0;
diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index 517a315..35bd50b 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -372,6 +372,8 @@ static int wcn36xx_config(struct ieee80211_hw *hw, u32 changed)
 
 	wcn36xx_dbg(WCN36XX_DBG_MAC, "mac config changed 0x%08x\n", changed);
 
+	mutex_lock(&wcn->conf_mutex);
+
 	if (changed & IEEE80211_CONF_CHANGE_CHANNEL) {
 		int ch = WCN36XX_HW_CHANNEL(wcn);
 		wcn36xx_dbg(WCN36XX_DBG_MAC, "wcn36xx_config channel switch=%d\n",
@@ -382,6 +384,8 @@ static int wcn36xx_config(struct ieee80211_hw *hw, u32 changed)
 		}
 	}
 
+	mutex_unlock(&wcn->conf_mutex);
+
 	return 0;
 }
 
@@ -396,6 +400,8 @@ static void wcn36xx_configure_filter(struct ieee80211_hw *hw,
 
 	wcn36xx_dbg(WCN36XX_DBG_MAC, "mac configure filter\n");
 
+	mutex_lock(&wcn->conf_mutex);
+
 	*total &= FIF_ALLMULTI;
 
 	fp = (void *)(unsigned long)multicast;
@@ -408,6 +414,8 @@ static void wcn36xx_configure_filter(struct ieee80211_hw *hw,
 		else if (NL80211_IFTYPE_STATION == vif->type && tmp->sta_assoc)
 			wcn36xx_smd_set_mc_list(wcn, vif, fp);
 	}
+
+	mutex_unlock(&wcn->conf_mutex);
 	kfree(fp);
 }
 
@@ -471,6 +479,8 @@ static int wcn36xx_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 			 key_conf->key,
 			 key_conf->keylen);
 
+	mutex_lock(&wcn->conf_mutex);
+
 	switch (key_conf->cipher) {
 	case WLAN_CIPHER_SUITE_WEP40:
 		vif_priv->encrypt_type = WCN36XX_HAL_ED_WEP40;
@@ -565,6 +575,8 @@ static int wcn36xx_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	}
 
 out:
+	mutex_unlock(&wcn->conf_mutex);
+
 	return ret;
 }
 
@@ -725,6 +737,8 @@ static void wcn36xx_bss_info_changed(struct ieee80211_hw *hw,
 	wcn36xx_dbg(WCN36XX_DBG_MAC, "mac bss info changed vif %p changed 0x%08x\n",
 		    vif, changed);
 
+	mutex_lock(&wcn->conf_mutex);
+
 	if (changed & BSS_CHANGED_BEACON_INFO) {
 		wcn36xx_dbg(WCN36XX_DBG_MAC,
 			    "mac bss changed dtim period %d\n",
@@ -787,7 +801,13 @@ static void wcn36xx_bss_info_changed(struct ieee80211_hw *hw,
 				     bss_conf->aid);
 
 			vif_priv->sta_assoc = true;
-			rcu_read_lock();
+
+			/*
+			 * Holding conf_mutex ensures mutal exclusion with
+			 * wcn36xx_sta_remove() and as such ensures that sta
+			 * won't be freed while we're operating on it. As such
+			 * we do not need to hold the rcu_read_lock().
+			 */
 			sta = ieee80211_find_sta(vif, bss_conf->bssid);
 			if (!sta) {
 				wcn36xx_err("sta %pM is not found\n",
@@ -811,7 +831,6 @@ static void wcn36xx_bss_info_changed(struct ieee80211_hw *hw,
 			 * place where AID is available.
 			 */
 			wcn36xx_smd_config_sta(wcn, vif, sta);
-			rcu_read_unlock();
 		} else {
 			wcn36xx_dbg(WCN36XX_DBG_MAC,
 				    "disassociated bss %pM vif %pM AID=%d\n",
@@ -873,6 +892,9 @@ static void wcn36xx_bss_info_changed(struct ieee80211_hw *hw,
 		}
 	}
 out:
+
+	mutex_unlock(&wcn->conf_mutex);
+
 	return;
 }
 
@@ -882,7 +904,10 @@ static int wcn36xx_set_rts_threshold(struct ieee80211_hw *hw, u32 value)
 	struct wcn36xx *wcn = hw->priv;
 	wcn36xx_dbg(WCN36XX_DBG_MAC, "mac set RTS threshold %d\n", value);
 
+	mutex_lock(&wcn->conf_mutex);
 	wcn36xx_smd_update_cfg(wcn, WCN36XX_HAL_CFG_RTS_THRESHOLD, value);
+	mutex_unlock(&wcn->conf_mutex);
+
 	return 0;
 }
 
@@ -893,8 +918,12 @@ static void wcn36xx_remove_interface(struct ieee80211_hw *hw,
 	struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif);
 	wcn36xx_dbg(WCN36XX_DBG_MAC, "mac remove interface vif %p\n", vif);
 
+	mutex_lock(&wcn->conf_mutex);
+
 	list_del(&vif_priv->list);
 	wcn36xx_smd_delete_sta_self(wcn, vif->addr);
+
+	mutex_unlock(&wcn->conf_mutex);
 }
 
 static int wcn36xx_add_interface(struct ieee80211_hw *hw,
@@ -915,9 +944,13 @@ static int wcn36xx_add_interface(struct ieee80211_hw *hw,
 		return -EOPNOTSUPP;
 	}
 
+	mutex_lock(&wcn->conf_mutex);
+
 	list_add(&vif_priv->list, &wcn->vif_list);
 	wcn36xx_smd_add_sta_self(wcn, vif);
 
+	mutex_unlock(&wcn->conf_mutex);
+
 	return 0;
 }
 
@@ -930,6 +963,8 @@ static int wcn36xx_sta_add(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	wcn36xx_dbg(WCN36XX_DBG_MAC, "mac sta add vif %p sta %pM\n",
 		    vif, sta->addr);
 
+	mutex_lock(&wcn->conf_mutex);
+
 	spin_lock_init(&sta_priv->ampdu_lock);
 	sta_priv->vif = vif_priv;
 	/*
@@ -941,6 +976,9 @@ static int wcn36xx_sta_add(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		sta_priv->aid = sta->aid;
 		wcn36xx_smd_config_sta(wcn, vif, sta);
 	}
+
+	mutex_unlock(&wcn->conf_mutex);
+
 	return 0;
 }
 
@@ -954,8 +992,13 @@ static int wcn36xx_sta_remove(struct ieee80211_hw *hw,
 	wcn36xx_dbg(WCN36XX_DBG_MAC, "mac sta remove vif %p sta %pM index %d\n",
 		    vif, sta->addr, sta_priv->sta_index);
 
+	mutex_lock(&wcn->conf_mutex);
+
 	wcn36xx_smd_delete_sta(wcn, sta_priv->sta_index);
 	sta_priv->vif = NULL;
+
+	mutex_unlock(&wcn->conf_mutex);
+
 	return 0;
 }
 
@@ -999,6 +1042,8 @@ static int wcn36xx_ampdu_action(struct ieee80211_hw *hw,
 	wcn36xx_dbg(WCN36XX_DBG_MAC, "mac ampdu action action %d tid %d\n",
 		    action, tid);
 
+	mutex_lock(&wcn->conf_mutex);
+
 	switch (action) {
 	case IEEE80211_AMPDU_RX_START:
 		sta_priv->tid = tid;
@@ -1038,6 +1083,8 @@ static int wcn36xx_ampdu_action(struct ieee80211_hw *hw,
 		wcn36xx_err("Unknown AMPDU action\n");
 	}
 
+	mutex_unlock(&wcn->conf_mutex);
+
 	return 0;
 }
 
@@ -1216,6 +1263,7 @@ static int wcn36xx_probe(struct platform_device *pdev)
 	wcn = hw->priv;
 	wcn->hw = hw;
 	wcn->dev = &pdev->dev;
+	mutex_init(&wcn->conf_mutex);
 	mutex_init(&wcn->hal_mutex);
 	mutex_init(&wcn->scan_lock);
 
diff --git a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
index b52b4da9..6aefba4 100644
--- a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
+++ b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
@@ -202,6 +202,9 @@ struct wcn36xx {
 	struct qcom_smem_state	*tx_rings_empty_state;
 	unsigned		tx_rings_empty_state_bit;
 
+	/* prevents concurrent FW reconfiguration */
+	struct mutex		conf_mutex;
+
 	/*
 	 * smd_buf must be protected with smd_mutex to garantee
 	 * that all messages are sent one after another
diff --git a/drivers/net/wireless/ath/wil6210/Kconfig b/drivers/net/wireless/ath/wil6210/Kconfig
index 6dfedc8..b448926 100644
--- a/drivers/net/wireless/ath/wil6210/Kconfig
+++ b/drivers/net/wireless/ath/wil6210/Kconfig
@@ -40,3 +40,15 @@ config WIL6210_TRACING
 	  option if you are interested in debugging the driver.
 
 	  If unsure, say Y to make it easier to debug problems.
+
+config WIL6210_DEBUGFS
+	bool "wil6210 debugfs support"
+	depends on WIL6210
+	depends on DEBUG_FS
+	default y
+	---help---
+	  Say Y here to enable wil6210 debugfs support, using the
+	  kernel debugfs infrastructure. Select this
+	  option if you are interested in debugging the driver.
+
+	  If unsure, say Y to make it easier to debug problems.
diff --git a/drivers/net/wireless/ath/wil6210/Makefile b/drivers/net/wireless/ath/wil6210/Makefile
index 4ae21da..d27efe8 100644
--- a/drivers/net/wireless/ath/wil6210/Makefile
+++ b/drivers/net/wireless/ath/wil6210/Makefile
@@ -4,7 +4,7 @@ wil6210-y := main.o
 wil6210-y += netdev.o
 wil6210-y += cfg80211.o
 wil6210-y += pcie_bus.o
-wil6210-y += debugfs.o
+wil6210-$(CONFIG_WIL6210_DEBUGFS) += debugfs.o
 wil6210-y += wmi.o
 wil6210-y += interrupt.o
 wil6210-y += txrx.o
diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 0b5383a..85d5c04 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -26,6 +26,12 @@ bool disable_ap_sme;
 module_param(disable_ap_sme, bool, 0444);
 MODULE_PARM_DESC(disable_ap_sme, " let user space handle AP mode SME");
 
+#ifdef CONFIG_PM
+static struct wiphy_wowlan_support wil_wowlan_support = {
+	.flags = WIPHY_WOWLAN_ANY | WIPHY_WOWLAN_DISCONNECT,
+};
+#endif
+
 #define CHAN60G(_channel, _flags) {				\
 	.band			= NL80211_BAND_60GHZ,		\
 	.center_freq		= 56160 + (2160 * (_channel)),	\
@@ -273,12 +279,12 @@ int wil_cid_fill_sinfo(struct wil6210_priv *wil, int cid,
 
 	wil_dbg_wmi(wil, "Link status for CID %d: {\n"
 		    "  MCS %d TSF 0x%016llx\n"
-		    "  BF status 0x%08x SNR 0x%08x SQI %d%%\n"
+		    "  BF status 0x%08x RSSI %d SQI %d%%\n"
 		    "  Tx Tpt %d goodput %d Rx goodput %d\n"
 		    "  Sectors(rx:tx) my %d:%d peer %d:%d\n""}\n",
 		    cid, le16_to_cpu(reply.evt.bf_mcs),
 		    le64_to_cpu(reply.evt.tsf), reply.evt.status,
-		    le32_to_cpu(reply.evt.snr_val),
+		    reply.evt.rssi,
 		    reply.evt.sqi,
 		    le32_to_cpu(reply.evt.tx_tpt),
 		    le32_to_cpu(reply.evt.tx_goodput),
@@ -311,7 +317,11 @@ int wil_cid_fill_sinfo(struct wil6210_priv *wil, int cid,
 
 	if (test_bit(wil_status_fwconnected, wil->status)) {
 		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
-		sinfo->signal = reply.evt.sqi;
+		if (test_bit(WMI_FW_CAPABILITY_RSSI_REPORTING,
+			     wil->fw_capabilities))
+			sinfo->signal = reply.evt.rssi;
+		else
+			sinfo->signal = reply.evt.sqi;
 	}
 
 	return rc;
@@ -372,6 +382,34 @@ static int wil_cfg80211_dump_station(struct wiphy *wiphy,
 	return rc;
 }
 
+static int wil_cfg80211_start_p2p_device(struct wiphy *wiphy,
+					 struct wireless_dev *wdev)
+{
+	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+
+	wil_dbg_misc(wil, "start_p2p_device: entered\n");
+	wil->p2p.p2p_dev_started = 1;
+	return 0;
+}
+
+static void wil_cfg80211_stop_p2p_device(struct wiphy *wiphy,
+					 struct wireless_dev *wdev)
+{
+	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
+	struct wil_p2p_info *p2p = &wil->p2p;
+
+	if (!p2p->p2p_dev_started)
+		return;
+
+	wil_dbg_misc(wil, "stop_p2p_device: entered\n");
+	mutex_lock(&wil->mutex);
+	mutex_lock(&wil->p2p_wdev_mutex);
+	wil_p2p_stop_radio_operations(wil);
+	p2p->p2p_dev_started = 0;
+	mutex_unlock(&wil->p2p_wdev_mutex);
+	mutex_unlock(&wil->mutex);
+}
+
 static struct wireless_dev *
 wil_cfg80211_add_iface(struct wiphy *wiphy, const char *name,
 		       unsigned char name_assign_type,
@@ -420,6 +458,7 @@ static int wil_cfg80211_del_iface(struct wiphy *wiphy,
 		return -EINVAL;
 	}
 
+	wil_cfg80211_stop_p2p_device(wiphy, wdev);
 	wil_p2p_wdev_free(wil);
 
 	return 0;
@@ -801,7 +840,7 @@ static int wil_cfg80211_connect(struct wiphy *wiphy,
 		wil->bss = bss;
 		/* Connect can take lots of time */
 		mod_timer(&wil->connect_timer,
-			  jiffies + msecs_to_jiffies(2000));
+			  jiffies + msecs_to_jiffies(5000));
 	} else {
 		clear_bit(wil_status_fwconnecting, wil->status);
 	}
@@ -884,6 +923,9 @@ int wil_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 	wil_hex_dump_misc("mgmt tx frame ", DUMP_PREFIX_OFFSET, 16, 1, buf,
 			  len, true);
 
+	if (len < sizeof(struct ieee80211_hdr_3addr))
+		return -EINVAL;
+
 	cmd = kmalloc(sizeof(*cmd) + len, GFP_KERNEL);
 	if (!cmd) {
 		rc = -ENOMEM;
@@ -1648,34 +1690,6 @@ static int wil_cfg80211_change_bss(struct wiphy *wiphy,
 	return 0;
 }
 
-static int wil_cfg80211_start_p2p_device(struct wiphy *wiphy,
-					 struct wireless_dev *wdev)
-{
-	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
-
-	wil_dbg_misc(wil, "start_p2p_device: entered\n");
-	wil->p2p.p2p_dev_started = 1;
-	return 0;
-}
-
-static void wil_cfg80211_stop_p2p_device(struct wiphy *wiphy,
-					 struct wireless_dev *wdev)
-{
-	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
-	struct wil_p2p_info *p2p = &wil->p2p;
-
-	if (!p2p->p2p_dev_started)
-		return;
-
-	wil_dbg_misc(wil, "stop_p2p_device: entered\n");
-	mutex_lock(&wil->mutex);
-	mutex_lock(&wil->p2p_wdev_mutex);
-	wil_p2p_stop_radio_operations(wil);
-	p2p->p2p_dev_started = 0;
-	mutex_unlock(&wil->p2p_wdev_mutex);
-	mutex_unlock(&wil->mutex);
-}
-
 static int wil_cfg80211_set_power_mgmt(struct wiphy *wiphy,
 				       struct net_device *dev,
 				       bool enabled, int timeout)
@@ -1791,7 +1805,7 @@ static void wil_wiphy_init(struct wiphy *wiphy)
 
 	wiphy->bands[NL80211_BAND_60GHZ] = &wil_band_60ghz;
 
-	/* TODO: figure this out */
+	/* may change after reading FW capabilities */
 	wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC;
 
 	wiphy->cipher_suites = wil_cipher_suites;
@@ -1801,6 +1815,10 @@ static void wil_wiphy_init(struct wiphy *wiphy)
 
 	wiphy->n_vendor_commands = ARRAY_SIZE(wil_nl80211_vendor_commands);
 	wiphy->vendor_commands = wil_nl80211_vendor_commands;
+
+#ifdef CONFIG_PM
+	wiphy->wowlan = &wil_wowlan_support;
+#endif
 }
 
 struct wireless_dev *wil_cfg80211_init(struct device *dev)
diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c
index f82506d..6db00c1 100644
--- a/drivers/net/wireless/ath/wil6210/debugfs.c
+++ b/drivers/net/wireless/ath/wil6210/debugfs.c
@@ -20,7 +20,6 @@
 #include <linux/pci.h>
 #include <linux/rtnetlink.h>
 #include <linux/power_supply.h>
-
 #include "wil6210.h"
 #include "wmi.h"
 #include "txrx.h"
@@ -30,7 +29,6 @@
 static u32 mem_addr;
 static u32 dbg_txdesc_index;
 static u32 dbg_vring_index; /* 24+ for Rx, 0..23 for Tx */
-u32 vring_idle_trsh = 16; /* HW fetches up to 16 descriptors at once */
 
 enum dbg_off_type {
 	doff_u32 = 0,
@@ -801,6 +799,9 @@ static ssize_t wil_write_file_txmgmt(struct file *file, const char __user *buf,
 	int rc;
 	void *frame;
 
+	if (!len)
+		return -EINVAL;
+
 	frame = memdup_user(buf, len);
 	if (IS_ERR(frame))
 		return PTR_ERR(frame);
@@ -1013,6 +1014,7 @@ static int wil_bf_debugfs_show(struct seq_file *s, void *data)
 			   "  TSF = 0x%016llx\n"
 			   "  TxMCS = %2d TxTpt = %4d\n"
 			   "  SQI = %4d\n"
+			   "  RSSI = %4d\n"
 			   "  Status = 0x%08x %s\n"
 			   "  Sectors(rx:tx) my %2d:%2d peer %2d:%2d\n"
 			   "  Goodput(rx:tx) %4d:%4d\n"
@@ -1022,6 +1024,7 @@ static int wil_bf_debugfs_show(struct seq_file *s, void *data)
 			   le16_to_cpu(reply.evt.bf_mcs),
 			   le32_to_cpu(reply.evt.tx_tpt),
 			   reply.evt.sqi,
+			   reply.evt.rssi,
 			   status, wil_bfstatus_str(status),
 			   le16_to_cpu(reply.evt.my_rx_sector),
 			   le16_to_cpu(reply.evt.my_tx_sector),
@@ -1612,6 +1615,8 @@ static ssize_t wil_write_suspend_stats(struct file *file,
 	struct wil6210_priv *wil = file->private_data;
 
 	memset(&wil->suspend_stats, 0, sizeof(wil->suspend_stats));
+	wil->suspend_stats.min_suspend_time = ULONG_MAX;
+	wil->suspend_stats.collection_start = ktime_get();
 
 	return len;
 }
@@ -1623,18 +1628,27 @@ static ssize_t wil_read_suspend_stats(struct file *file,
 	struct wil6210_priv *wil = file->private_data;
 	static char text[400];
 	int n;
+	unsigned long long stats_collection_time =
+		ktime_to_us(ktime_sub(ktime_get(),
+				      wil->suspend_stats.collection_start));
 
 	n = snprintf(text, sizeof(text),
 		     "Suspend statistics:\n"
 		     "successful suspends:%ld failed suspends:%ld\n"
 		     "successful resumes:%ld failed resumes:%ld\n"
-		     "rejected by host:%ld rejected by device:%ld\n",
+		     "rejected by host:%ld rejected by device:%ld\n"
+		     "total suspend time:%lld min suspend time:%lld\n"
+		     "max suspend time:%lld stats collection time: %lld\n",
 		     wil->suspend_stats.successful_suspends,
 		     wil->suspend_stats.failed_suspends,
 		     wil->suspend_stats.successful_resumes,
 		     wil->suspend_stats.failed_resumes,
 		     wil->suspend_stats.rejected_by_host,
-		     wil->suspend_stats.rejected_by_device);
+		     wil->suspend_stats.rejected_by_device,
+		     wil->suspend_stats.total_suspend_time,
+		     wil->suspend_stats.min_suspend_time,
+		     wil->suspend_stats.max_suspend_time,
+		     stats_collection_time);
 
 	n = min_t(int, n, sizeof(text));
 
@@ -1747,6 +1761,7 @@ static const struct dbg_off dbg_wil_off[] = {
 	WIL_FIELD(chip_revision, 0444,	doff_u8),
 	WIL_FIELD(abft_len, 0644,		doff_u8),
 	WIL_FIELD(wakeup_trigger, 0644,		doff_u8),
+	WIL_FIELD(vring_idle_trsh, 0644,	doff_u32),
 	{},
 };
 
@@ -1762,8 +1777,6 @@ static const struct dbg_off dbg_statics[] = {
 	{"desc_index",	0644, (ulong)&dbg_txdesc_index, doff_u32},
 	{"vring_index",	0644, (ulong)&dbg_vring_index, doff_u32},
 	{"mem_addr",	0644, (ulong)&mem_addr, doff_u32},
-	{"vring_idle_trsh", 0644, (ulong)&vring_idle_trsh,
-	 doff_u32},
 	{"led_polarity", 0644, (ulong)&led_polarity, doff_u8},
 	{},
 };
@@ -1790,6 +1803,8 @@ int wil6210_debugfs_init(struct wil6210_priv *wil)
 
 	wil6210_debugfs_create_ITR_CNT(wil, dbg);
 
+	wil->suspend_stats.collection_start = ktime_get();
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c
index cad8a95..59def4f 100644
--- a/drivers/net/wireless/ath/wil6210/interrupt.c
+++ b/drivers/net/wireless/ath/wil6210/interrupt.c
@@ -244,7 +244,7 @@ static irqreturn_t wil6210_irq_rx(int irq, void *cookie)
 	wil_dbg_irq(wil, "ISR RX 0x%08x\n", isr);
 
 	if (unlikely(!isr)) {
-		wil_err(wil, "spurious IRQ: RX\n");
+		wil_err_ratelimited(wil, "spurious IRQ: RX\n");
 		return IRQ_NONE;
 	}
 
@@ -269,11 +269,12 @@ static irqreturn_t wil6210_irq_rx(int irq, void *cookie)
 				need_unmask = false;
 				napi_schedule(&wil->napi_rx);
 			} else {
-				wil_err(wil,
+				wil_err_ratelimited(
+					wil,
 					"Got Rx interrupt while stopping interface\n");
 			}
 		} else {
-			wil_err(wil, "Got Rx interrupt while in reset\n");
+			wil_err_ratelimited(wil, "Got Rx interrupt while in reset\n");
 		}
 	}
 
@@ -302,7 +303,7 @@ static irqreturn_t wil6210_irq_tx(int irq, void *cookie)
 	wil_dbg_irq(wil, "ISR TX 0x%08x\n", isr);
 
 	if (unlikely(!isr)) {
-		wil_err(wil, "spurious IRQ: TX\n");
+		wil_err_ratelimited(wil, "spurious IRQ: TX\n");
 		return IRQ_NONE;
 	}
 
@@ -318,12 +319,13 @@ static irqreturn_t wil6210_irq_tx(int irq, void *cookie)
 			need_unmask = false;
 			napi_schedule(&wil->napi_tx);
 		} else {
-			wil_err(wil, "Got Tx interrupt while in reset\n");
+			wil_err_ratelimited(wil, "Got Tx interrupt while in reset\n");
 		}
 	}
 
 	if (unlikely(isr))
-		wil_err(wil, "un-handled TX ISR bits 0x%08x\n", isr);
+		wil_err_ratelimited(wil, "un-handled TX ISR bits 0x%08x\n",
+				    isr);
 
 	/* Tx IRQ will be enabled when NAPI processing finished */
 
diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c
index daf944a..bac829a 100644
--- a/drivers/net/wireless/ath/wil6210/main.c
+++ b/drivers/net/wireless/ath/wil6210/main.c
@@ -394,10 +394,11 @@ static void wil_fw_error_worker(struct work_struct *work)
 	struct wil6210_priv *wil = container_of(work, struct wil6210_priv,
 						fw_error_worker);
 	struct wireless_dev *wdev = wil->wdev;
+	struct net_device *ndev = wil_to_ndev(wil);
 
 	wil_dbg_misc(wil, "fw error worker\n");
 
-	if (!netif_running(wil_to_ndev(wil))) {
+	if (!(ndev->flags & IFF_UP)) {
 		wil_info(wil, "No recovery - interface is down\n");
 		return;
 	}
@@ -578,6 +579,9 @@ int wil_priv_init(struct wil6210_priv *wil)
 
 	wil->wakeup_trigger = WMI_WAKEUP_TRIGGER_UCAST |
 			      WMI_WAKEUP_TRIGGER_BCAST;
+	memset(&wil->suspend_stats, 0, sizeof(wil->suspend_stats));
+	wil->suspend_stats.min_suspend_time = ULONG_MAX;
+	wil->vring_idle_trsh = 16;
 
 	return 0;
 
@@ -926,6 +930,29 @@ int wil_ps_update(struct wil6210_priv *wil, enum wmi_ps_profile_type ps_profile)
 	return rc;
 }
 
+static void wil_pre_fw_config(struct wil6210_priv *wil)
+{
+	/* Mark FW as loaded from host */
+	wil_s(wil, RGF_USER_USAGE_6, 1);
+
+	/* clear any interrupts which on-card-firmware
+	 * may have set
+	 */
+	wil6210_clear_irq(wil);
+	/* CAF_ICR - clear and mask */
+	/* it is W1C, clear by writing back same value */
+	wil_s(wil, RGF_CAF_ICR + offsetof(struct RGF_ICR, ICR), 0);
+	wil_w(wil, RGF_CAF_ICR + offsetof(struct RGF_ICR, IMV), ~0);
+	/* clear PAL_UNIT_ICR (potential D0->D3 leftover) */
+	wil_s(wil, RGF_PAL_UNIT_ICR + offsetof(struct RGF_ICR, ICR), 0);
+
+	if (wil->fw_calib_result > 0) {
+		__le32 val = cpu_to_le32(wil->fw_calib_result |
+						(CALIB_RESULT_SIGNATURE << 8));
+		wil_w(wil, RGF_USER_FW_CALIB_RESULT, (u32 __force)val);
+	}
+}
+
 /*
  * We reset all the structures, and we reset the UMAC.
  * After calling this routine, you're expected to reload
@@ -1019,18 +1046,7 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw)
 		if (rc)
 			return rc;
 
-		/* Mark FW as loaded from host */
-		wil_s(wil, RGF_USER_USAGE_6, 1);
-
-		/* clear any interrupts which on-card-firmware
-		 * may have set
-		 */
-		wil6210_clear_irq(wil);
-		/* CAF_ICR - clear and mask */
-		/* it is W1C, clear by writing back same value */
-		wil_s(wil, RGF_CAF_ICR + offsetof(struct RGF_ICR, ICR), 0);
-		wil_w(wil, RGF_CAF_ICR + offsetof(struct RGF_ICR, IMV), ~0);
-
+		wil_pre_fw_config(wil);
 		wil_release_cpu(wil);
 	}
 
diff --git a/drivers/net/wireless/ath/wil6210/pcie_bus.c b/drivers/net/wireless/ath/wil6210/pcie_bus.c
index d571feb..6a3ab4b 100644
--- a/drivers/net/wireless/ath/wil6210/pcie_bus.c
+++ b/drivers/net/wireless/ath/wil6210/pcie_bus.c
@@ -84,6 +84,9 @@ void wil_set_capabilities(struct wil6210_priv *wil)
 
 	/* extract FW capabilities from file without loading the FW */
 	wil_request_firmware(wil, wil->wil_fw_name, false);
+
+	if (test_bit(WMI_FW_CAPABILITY_RSSI_REPORTING, wil->fw_capabilities))
+		wil_to_wiphy(wil)->signal_type = CFG80211_SIGNAL_TYPE_MBM;
 }
 
 void wil_disable_irq(struct wil6210_priv *wil)
diff --git a/drivers/net/wireless/ath/wil6210/pm.c b/drivers/net/wireless/ath/wil6210/pm.c
index ce1f384..8f5d1b44 100644
--- a/drivers/net/wireless/ath/wil6210/pm.c
+++ b/drivers/net/wireless/ath/wil6210/pm.c
@@ -21,10 +21,11 @@ int wil_can_suspend(struct wil6210_priv *wil, bool is_runtime)
 {
 	int rc = 0;
 	struct wireless_dev *wdev = wil->wdev;
+	struct net_device *ndev = wil_to_ndev(wil);
 
 	wil_dbg_pm(wil, "can_suspend: %s\n", is_runtime ? "runtime" : "system");
 
-	if (!netif_running(wil_to_ndev(wil))) {
+	if (!(ndev->flags & IFF_UP)) {
 		/* can always sleep when down */
 		wil_dbg_pm(wil, "Interface is down\n");
 		goto out;
@@ -85,7 +86,9 @@ static int wil_resume_keep_radio_on(struct wil6210_priv *wil)
 	/* Send WMI resume request to the device */
 	rc = wmi_resume(wil);
 	if (rc) {
-		wil_err(wil, "device failed to resume (%d), resetting\n", rc);
+		wil_err(wil, "device failed to resume (%d)\n", rc);
+		if (no_fw_recovery)
+			goto out;
 		rc = wil_down(wil);
 		if (rc) {
 			wil_err(wil, "wil_down failed (%d)\n", rc);
@@ -298,6 +301,9 @@ int wil_suspend(struct wil6210_priv *wil, bool is_runtime)
 	wil_dbg_pm(wil, "suspend: %s => %d\n",
 		   is_runtime ? "runtime" : "system", rc);
 
+	if (!rc)
+		wil->suspend_stats.suspend_start_time = ktime_get();
+
 	return rc;
 }
 
@@ -307,6 +313,7 @@ int wil_resume(struct wil6210_priv *wil, bool is_runtime)
 	struct net_device *ndev = wil_to_ndev(wil);
 	bool keep_radio_on = ndev->flags & IFF_UP &&
 			     wil->keep_radio_on_during_sleep;
+	unsigned long long suspend_time_usec = 0;
 
 	wil_dbg_pm(wil, "resume: %s\n", is_runtime ? "runtime" : "system");
 
@@ -324,8 +331,20 @@ int wil_resume(struct wil6210_priv *wil, bool is_runtime)
 	else
 		rc = wil_resume_radio_off(wil);
 
+	if (rc)
+		goto out;
+
+	suspend_time_usec =
+		ktime_to_us(ktime_sub(ktime_get(),
+				      wil->suspend_stats.suspend_start_time));
+	wil->suspend_stats.total_suspend_time += suspend_time_usec;
+	if (suspend_time_usec < wil->suspend_stats.min_suspend_time)
+		wil->suspend_stats.min_suspend_time = suspend_time_usec;
+	if (suspend_time_usec > wil->suspend_stats.max_suspend_time)
+		wil->suspend_stats.max_suspend_time = suspend_time_usec;
+
 out:
-	wil_dbg_pm(wil, "resume: %s => %d\n",
-		   is_runtime ? "runtime" : "system", rc);
+	wil_dbg_pm(wil, "resume: %s => %d, suspend time %lld usec\n",
+		   is_runtime ? "runtime" : "system", rc, suspend_time_usec);
 	return rc;
 }
diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c
index ec57bcc..389c718 100644
--- a/drivers/net/wireless/ath/wil6210/txrx.c
+++ b/drivers/net/wireless/ath/wil6210/txrx.c
@@ -1666,7 +1666,7 @@ static int __wil_tx_vring_tso(struct wil6210_priv *wil, struct vring *vring,
 
 	/* performance monitoring */
 	used = wil_vring_used_tx(vring);
-	if (wil_val_in_range(vring_idle_trsh,
+	if (wil_val_in_range(wil->vring_idle_trsh,
 			     used, used + descs_used)) {
 		txdata->idle += get_cycles() - txdata->last_idle;
 		wil_dbg_txrx(wil,  "Ring[%2d] not idle %d -> %d\n",
@@ -1813,7 +1813,7 @@ static int __wil_tx_vring(struct wil6210_priv *wil, struct vring *vring,
 
 	/* performance monitoring */
 	used = wil_vring_used_tx(vring);
-	if (wil_val_in_range(vring_idle_trsh,
+	if (wil_val_in_range(wil->vring_idle_trsh,
 			     used, used + nr_frags + 1)) {
 		txdata->idle += get_cycles() - txdata->last_idle;
 		wil_dbg_txrx(wil,  "Ring[%2d] not idle %d -> %d\n",
@@ -2175,7 +2175,7 @@ int wil_tx_complete(struct wil6210_priv *wil, int ringid)
 
 	/* performance monitoring */
 	used_new = wil_vring_used_tx(vring);
-	if (wil_val_in_range(vring_idle_trsh,
+	if (wil_val_in_range(wil->vring_idle_trsh,
 			     used_new, used_before_complete)) {
 		wil_dbg_txrx(wil, "Ring[%2d] idle %d -> %d\n",
 			     ringid, used_before_complete, used_new);
diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h
index d085ccf..315ec8b 100644
--- a/drivers/net/wireless/ath/wil6210/wil6210.h
+++ b/drivers/net/wireless/ath/wil6210/wil6210.h
@@ -30,7 +30,6 @@ extern bool no_fw_recovery;
 extern unsigned int mtu_max;
 extern unsigned short rx_ring_overflow_thrsh;
 extern int agg_wsize;
-extern u32 vring_idle_trsh;
 extern bool rx_align_2;
 extern bool rx_large_buf;
 extern bool debug_fw;
@@ -90,6 +89,11 @@ struct wil_suspend_stats {
 	unsigned long failed_resumes;
 	unsigned long rejected_by_device;
 	unsigned long rejected_by_host;
+	unsigned long long total_suspend_time;
+	unsigned long long min_suspend_time;
+	unsigned long long max_suspend_time;
+	ktime_t collection_start;
+	ktime_t suspend_start_time;
 };
 
 /* Calculate MAC buffer size for the firmware. It includes all overhead,
@@ -166,6 +170,10 @@ struct RGF_ICR {
 #define RGF_USER_USER_SCRATCH_PAD	(0x8802bc)
 #define RGF_USER_BL			(0x880A3C) /* Boot Loader */
 #define RGF_USER_FW_REV_ID		(0x880a8c) /* chip revision */
+#define RGF_USER_FW_CALIB_RESULT	(0x880a90) /* b0-7:result
+						    * b8-15:signature
+						    */
+	#define CALIB_RESULT_SIGNATURE	(0x11)
 #define RGF_USER_CLKS_CTL_0		(0x880abc)
 	#define BIT_USER_CLKS_CAR_AHB_SW_SEL	BIT(1) /* ref clk/PLL */
 	#define BIT_USER_CLKS_RST_PWGD	BIT(11) /* reset on "power good" */
@@ -260,6 +268,7 @@ struct RGF_ICR {
 	#define BIT_DMA_PSEUDO_CAUSE_MISC	BIT(2)
 
 #define RGF_HP_CTRL			(0x88265c)
+#define RGF_PAL_UNIT_ICR		(0x88266c) /* struct RGF_ICR */
 #define RGF_PCIE_LOS_COUNTER_CTL	(0x882dc4)
 
 /* MAC timer, usec, for packet lifetime */
@@ -684,6 +693,7 @@ struct wil6210_priv {
 	u8 vring2cid_tid[WIL6210_MAX_TX_RINGS][2]; /* [0] - CID, [1] - TID */
 	struct wil_sta_info sta[WIL6210_MAX_CID];
 	int bcast_vring;
+	u32 vring_idle_trsh; /* HW fetches up to 16 descriptors at once  */
 	bool use_extended_dma_addr; /* indicates whether we are using 48 bits */
 	/* scan */
 	struct cfg80211_scan_request *scan_request;
@@ -719,6 +729,8 @@ struct wil6210_priv {
 
 	enum wmi_ps_profile_type ps_profile;
 
+	int fw_calib_result;
+
 #ifdef CONFIG_PM
 #ifdef CONFIG_PM_SLEEP
 	struct notifier_block pm_notify;
@@ -929,8 +941,14 @@ int wil_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 			 struct cfg80211_mgmt_tx_params *params,
 			 u64 *cookie);
 
+#if defined(CONFIG_WIL6210_DEBUGFS)
 int wil6210_debugfs_init(struct wil6210_priv *wil);
 void wil6210_debugfs_remove(struct wil6210_priv *wil);
+#else
+static inline int wil6210_debugfs_init(struct wil6210_priv *wil) { return 0; }
+static inline void wil6210_debugfs_remove(struct wil6210_priv *wil) {}
+#endif
+
 int wil_cid_fill_sinfo(struct wil6210_priv *wil, int cid,
 		       struct station_info *sinfo);
 
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index 65ef673..ffdd2fa 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -344,6 +344,11 @@ static void wmi_evt_ready(struct wil6210_priv *wil, int id, void *d, int len)
 	strlcpy(wdev->wiphy->fw_version, wil->fw_version,
 		sizeof(wdev->wiphy->fw_version));
 
+	if (len > offsetof(struct wmi_ready_event, rfc_read_calib_result)) {
+		wil_dbg_wmi(wil, "rfc calibration result %d\n",
+			    evt->rfc_read_calib_result);
+		wil->fw_calib_result = evt->rfc_read_calib_result;
+	}
 	wil_set_recovery_state(wil, fw_recovery_idle);
 	set_bit(wil_status_fwready, wil->status);
 	/* let the reset sequence continue */
@@ -381,12 +386,15 @@ static void wmi_evt_rx_mgmt(struct wil6210_priv *wil, int id, void *d, int len)
 	ch_no = data->info.channel + 1;
 	freq = ieee80211_channel_to_frequency(ch_no, NL80211_BAND_60GHZ);
 	channel = ieee80211_get_channel(wiphy, freq);
-	signal = data->info.sqi;
+	if (test_bit(WMI_FW_CAPABILITY_RSSI_REPORTING, wil->fw_capabilities))
+		signal = 100 * data->info.rssi;
+	else
+		signal = data->info.sqi;
 	d_status = le16_to_cpu(data->info.status);
 	fc = rx_mgmt_frame->frame_control;
 
-	wil_dbg_wmi(wil, "MGMT Rx: channel %d MCS %d SNR %d SQI %d%%\n",
-		    data->info.channel, data->info.mcs, data->info.snr,
+	wil_dbg_wmi(wil, "MGMT Rx: channel %d MCS %d RSSI %d SQI %d%%\n",
+		    data->info.channel, data->info.mcs, data->info.rssi,
 		    data->info.sqi);
 	wil_dbg_wmi(wil, "status 0x%04x len %d fc 0x%04x\n", d_status, d_len,
 		    le16_to_cpu(fc));
diff --git a/drivers/net/wireless/ath/wil6210/wmi.h b/drivers/net/wireless/ath/wil6210/wmi.h
index 256f63c..5263ee7 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.h
+++ b/drivers/net/wireless/ath/wil6210/wmi.h
@@ -36,6 +36,11 @@
 #define WMI_PROX_RANGE_NUM		(3)
 #define WMI_MAX_LOSS_DMG_BEACONS	(20)
 #define MAX_NUM_OF_SECTORS		(128)
+#define WMI_SCHED_MAX_ALLOCS_PER_CMD	(4)
+#define WMI_RF_DTYPE_LENGTH		(3)
+#define WMI_RF_ETYPE_LENGTH		(3)
+#define WMI_RF_RX2TX_LENGTH		(3)
+#define WMI_RF_ETYPE_VAL_PER_RANGE	(5)
 
 /* Mailbox interface
  * used for commands and events
@@ -52,14 +57,20 @@ enum wmi_mid {
  * the host
  */
 enum wmi_fw_capability {
-	WMI_FW_CAPABILITY_FTM			= 0,
-	WMI_FW_CAPABILITY_PS_CONFIG		= 1,
-	WMI_FW_CAPABILITY_RF_SECTORS		= 2,
-	WMI_FW_CAPABILITY_MGMT_RETRY_LIMIT	= 3,
-	WMI_FW_CAPABILITY_DISABLE_AP_SME	= 4,
-	WMI_FW_CAPABILITY_WMI_ONLY		= 5,
-	WMI_FW_CAPABILITY_THERMAL_THROTTLING	= 7,
-	WMI_FW_CAPABILITY_D3_SUSPEND		= 8,
+	WMI_FW_CAPABILITY_FTM				= 0,
+	WMI_FW_CAPABILITY_PS_CONFIG			= 1,
+	WMI_FW_CAPABILITY_RF_SECTORS			= 2,
+	WMI_FW_CAPABILITY_MGMT_RETRY_LIMIT		= 3,
+	WMI_FW_CAPABILITY_DISABLE_AP_SME		= 4,
+	WMI_FW_CAPABILITY_WMI_ONLY			= 5,
+	WMI_FW_CAPABILITY_THERMAL_THROTTLING		= 7,
+	WMI_FW_CAPABILITY_D3_SUSPEND			= 8,
+	WMI_FW_CAPABILITY_LONG_RANGE			= 9,
+	WMI_FW_CAPABILITY_FIXED_SCHEDULING		= 10,
+	WMI_FW_CAPABILITY_MULTI_DIRECTED_OMNIS		= 11,
+	WMI_FW_CAPABILITY_RSSI_REPORTING		= 12,
+	WMI_FW_CAPABILITY_SET_SILENT_RSSI_TABLE		= 13,
+	WMI_FW_CAPABILITY_LO_POWER_CALIB_FROM_OTP	= 14,
 	WMI_FW_CAPABILITY_MAX,
 };
 
@@ -79,6 +90,7 @@ enum wmi_command_id {
 	WMI_START_SCAN_CMDID				= 0x07,
 	WMI_SET_BSS_FILTER_CMDID			= 0x09,
 	WMI_SET_PROBED_SSID_CMDID			= 0x0A,
+	/* deprecated */
 	WMI_SET_LISTEN_INT_CMDID			= 0x0B,
 	WMI_BCON_CTRL_CMDID				= 0x0F,
 	WMI_ADD_CIPHER_KEY_CMDID			= 0x16,
@@ -93,26 +105,28 @@ enum wmi_command_id {
 	WMI_ECHO_CMDID					= 0x803,
 	WMI_DEEP_ECHO_CMDID				= 0x804,
 	WMI_CONFIG_MAC_CMDID				= 0x805,
+	/* deprecated */
 	WMI_CONFIG_PHY_DEBUG_CMDID			= 0x806,
 	WMI_ADD_DEBUG_TX_PCKT_CMDID			= 0x808,
 	WMI_PHY_GET_STATISTICS_CMDID			= 0x809,
+	/* deprecated */
 	WMI_FS_TUNE_CMDID				= 0x80A,
+	/* deprecated */
 	WMI_CORR_MEASURE_CMDID				= 0x80B,
 	WMI_READ_RSSI_CMDID				= 0x80C,
 	WMI_TEMP_SENSE_CMDID				= 0x80E,
 	WMI_DC_CALIB_CMDID				= 0x80F,
+	/* deprecated */
 	WMI_SEND_TONE_CMDID				= 0x810,
+	/* deprecated */
 	WMI_IQ_TX_CALIB_CMDID				= 0x811,
+	/* deprecated */
 	WMI_IQ_RX_CALIB_CMDID				= 0x812,
-	WMI_SET_UCODE_IDLE_CMDID			= 0x813,
 	WMI_SET_WORK_MODE_CMDID				= 0x815,
 	WMI_LO_LEAKAGE_CALIB_CMDID			= 0x816,
-	WMI_MARLON_R_READ_CMDID				= 0x818,
-	WMI_MARLON_R_WRITE_CMDID			= 0x819,
-	WMI_MARLON_R_TXRX_SEL_CMDID			= 0x81A,
-	MAC_IO_STATIC_PARAMS_CMDID			= 0x81B,
-	MAC_IO_DYNAMIC_PARAMS_CMDID			= 0x81C,
+	WMI_LO_POWER_CALIB_FROM_OTP_CMDID		= 0x817,
 	WMI_SILENT_RSSI_CALIB_CMDID			= 0x81D,
+	/* deprecated */
 	WMI_RF_RX_TEST_CMDID				= 0x81E,
 	WMI_CFG_RX_CHAIN_CMDID				= 0x820,
 	WMI_VRING_CFG_CMDID				= 0x821,
@@ -126,11 +140,6 @@ enum wmi_command_id {
 	WMI_SET_PCP_CHANNEL_CMDID			= 0x829,
 	WMI_GET_PCP_CHANNEL_CMDID			= 0x82A,
 	WMI_SW_TX_REQ_CMDID				= 0x82B,
-	WMI_READ_MAC_RXQ_CMDID				= 0x830,
-	WMI_READ_MAC_TXQ_CMDID				= 0x831,
-	WMI_WRITE_MAC_RXQ_CMDID				= 0x832,
-	WMI_WRITE_MAC_TXQ_CMDID				= 0x833,
-	WMI_WRITE_MAC_XQ_FIELD_CMDID			= 0x834,
 	WMI_MLME_PUSH_CMDID				= 0x835,
 	WMI_BEAMFORMING_MGMT_CMDID			= 0x836,
 	WMI_BF_TXSS_MGMT_CMDID				= 0x837,
@@ -144,9 +153,13 @@ enum wmi_command_id {
 	WMI_MAINTAIN_RESUME_CMDID			= 0x851,
 	WMI_RS_MGMT_CMDID				= 0x852,
 	WMI_RF_MGMT_CMDID				= 0x853,
-	WMI_OTP_READ_CMDID				= 0x856,
-	WMI_OTP_WRITE_CMDID				= 0x857,
+	WMI_RF_XPM_READ_CMDID				= 0x856,
+	WMI_RF_XPM_WRITE_CMDID				= 0x857,
 	WMI_LED_CFG_CMDID				= 0x858,
+	WMI_SET_CONNECT_SNR_THR_CMDID			= 0x85B,
+	WMI_SET_ACTIVE_SILENT_RSSI_TABLE_CMDID		= 0x85C,
+	WMI_RF_PWR_ON_DELAY_CMDID			= 0x85D,
+	WMI_SET_HIGH_POWER_TABLE_PARAMS_CMDID		= 0x85E,
 	/* Performance monitoring commands */
 	WMI_BF_CTRL_CMDID				= 0x862,
 	WMI_NOTIFY_REQ_CMDID				= 0x863,
@@ -154,7 +167,6 @@ enum wmi_command_id {
 	WMI_GET_RF_STATUS_CMDID				= 0x866,
 	WMI_GET_BASEBAND_TYPE_CMDID			= 0x867,
 	WMI_UNIT_TEST_CMDID				= 0x900,
-	WMI_HICCUP_CMDID				= 0x901,
 	WMI_FLASH_READ_CMDID				= 0x902,
 	WMI_FLASH_WRITE_CMDID				= 0x903,
 	/* Power management */
@@ -174,16 +186,6 @@ enum wmi_command_id {
 	WMI_GET_PCP_FACTOR_CMDID			= 0x91B,
 	/* Power Save Configuration Commands */
 	WMI_PS_DEV_PROFILE_CFG_CMDID			= 0x91C,
-	/* Not supported yet */
-	WMI_PS_DEV_CFG_CMDID				= 0x91D,
-	/* Not supported yet */
-	WMI_PS_DEV_CFG_READ_CMDID			= 0x91E,
-	/* Per MAC Power Save Configuration commands
-	 * Not supported yet
-	 */
-	WMI_PS_MID_CFG_CMDID				= 0x91F,
-	/* Not supported yet */
-	WMI_PS_MID_CFG_READ_CMDID			= 0x920,
 	WMI_RS_CFG_CMDID				= 0x921,
 	WMI_GET_DETAILED_RS_RES_CMDID			= 0x922,
 	WMI_AOA_MEAS_CMDID				= 0x923,
@@ -194,13 +196,16 @@ enum wmi_command_id {
 	WMI_DEL_STA_CMDID				= 0x936,
 	WMI_SET_THERMAL_THROTTLING_CFG_CMDID		= 0x940,
 	WMI_GET_THERMAL_THROTTLING_CFG_CMDID		= 0x941,
+	/* Read Power Save profile type */
+	WMI_PS_DEV_PROFILE_CFG_READ_CMDID		= 0x942,
 	WMI_TOF_SESSION_START_CMDID			= 0x991,
 	WMI_TOF_GET_CAPABILITIES_CMDID			= 0x992,
 	WMI_TOF_SET_LCR_CMDID				= 0x993,
 	WMI_TOF_SET_LCI_CMDID				= 0x994,
-	WMI_TOF_CHANNEL_INFO_CMDID			= 0x995,
+	WMI_TOF_CFG_RESPONDER_CMDID			= 0x996,
 	WMI_TOF_SET_TX_RX_OFFSET_CMDID			= 0x997,
 	WMI_TOF_GET_TX_RX_OFFSET_CMDID			= 0x998,
+	WMI_TOF_CHANNEL_INFO_CMDID			= 0x999,
 	WMI_GET_RF_SECTOR_PARAMS_CMDID			= 0x9A0,
 	WMI_SET_RF_SECTOR_PARAMS_CMDID			= 0x9A1,
 	WMI_GET_SELECTED_RF_SECTOR_INDEX_CMDID		= 0x9A2,
@@ -209,12 +214,20 @@ enum wmi_command_id {
 	WMI_PRIO_TX_SECTORS_ORDER_CMDID			= 0x9A5,
 	WMI_PRIO_TX_SECTORS_NUMBER_CMDID		= 0x9A6,
 	WMI_PRIO_TX_SECTORS_SET_DEFAULT_CFG_CMDID	= 0x9A7,
+	WMI_SCHEDULING_SCHEME_CMDID			= 0xA01,
+	WMI_FIXED_SCHEDULING_CONFIG_CMDID		= 0xA02,
+	WMI_ENABLE_FIXED_SCHEDULING_CMDID		= 0xA03,
+	WMI_SET_MULTI_DIRECTED_OMNIS_CONFIG_CMDID	= 0xA04,
+	WMI_SET_LONG_RANGE_CONFIG_CMDID			= 0xA05,
 	WMI_SET_MAC_ADDRESS_CMDID			= 0xF003,
 	WMI_ABORT_SCAN_CMDID				= 0xF007,
 	WMI_SET_PROMISCUOUS_MODE_CMDID			= 0xF041,
+	/* deprecated */
 	WMI_GET_PMK_CMDID				= 0xF048,
 	WMI_SET_PASSPHRASE_CMDID			= 0xF049,
+	/* deprecated */
 	WMI_SEND_ASSOC_RES_CMDID			= 0xF04A,
+	/* deprecated */
 	WMI_SET_ASSOC_REQ_RELAY_CMDID			= 0xF04B,
 	WMI_MAC_ADDR_REQ_CMDID				= 0xF04D,
 	WMI_FW_VER_CMDID				= 0xF04E,
@@ -440,11 +453,6 @@ struct wmi_rf_mgmt_cmd {
 	__le32 rf_mgmt_type;
 } __packed;
 
-/* WMI_RF_RX_TEST_CMDID */
-struct wmi_rf_rx_test_cmd {
-	__le32 sector;
-} __packed;
-
 /* WMI_CORR_MEASURE_CMDID */
 struct wmi_corr_measure_cmd {
 	__le32 freq_mhz;
@@ -657,6 +665,20 @@ struct wmi_bcast_vring_cfg_cmd {
 	struct wmi_bcast_vring_cfg vring_cfg;
 } __packed;
 
+/* WMI_LO_POWER_CALIB_FROM_OTP_CMDID */
+struct wmi_lo_power_calib_from_otp_cmd {
+	/* index to read from OTP. zero based */
+	u8 index;
+	u8 reserved[3];
+} __packed;
+
+/* WMI_LO_POWER_CALIB_FROM_OTP_EVENTID */
+struct wmi_lo_power_calib_from_otp_event {
+	/* wmi_fw_status */
+	u8 status;
+	u8 reserved[3];
+} __packed;
+
 /* WMI_VRING_BA_EN_CMDID */
 struct wmi_vring_ba_en_cmd {
 	u8 ringid;
@@ -692,6 +714,24 @@ enum wmi_sniffer_cfg_mode {
 	WMI_SNIFFER_ON	= 0x01,
 };
 
+/* WMI_SILENT_RSSI_TABLE */
+enum wmi_silent_rssi_table {
+	RF_TEMPERATURE_CALIB_DEFAULT_DB		= 0x00,
+	RF_TEMPERATURE_CALIB_HIGH_POWER_DB	= 0x01,
+};
+
+/* WMI_SILENT_RSSI_STATUS */
+enum wmi_silent_rssi_status {
+	SILENT_RSSI_SUCCESS	= 0x00,
+	SILENT_RSSI_FAILURE	= 0x01,
+};
+
+/* WMI_SET_ACTIVE_SILENT_RSSI_TABLE_CMDID */
+struct wmi_set_active_silent_rssi_table_cmd {
+	/* enum wmi_silent_rssi_table */
+	__le32 table;
+} __packed;
+
 enum wmi_sniffer_cfg_phy_info_mode {
 	WMI_SNIFFER_PHY_INFO_DISABLED	= 0x00,
 	WMI_SNIFFER_PHY_INFO_ENABLED	= 0x01,
@@ -835,18 +875,85 @@ struct wmi_echo_cmd {
 	__le32 value;
 } __packed;
 
-/* WMI_OTP_READ_CMDID */
-struct wmi_otp_read_cmd {
-	__le32 addr;
-	__le32 size;
-	__le32 values;
+/* WMI_RF_PWR_ON_DELAY_CMDID
+ * set FW time parameters used through RF resetting
+ *  RF reset consists of bringing its power down for a period of time, then
+ * bringing the power up
+ * Returned event: WMI_RF_PWR_ON_DELAY_RSP_EVENTID
+ */
+struct wmi_rf_pwr_on_delay_cmd {
+	/* time in usec the FW waits after bringing the RF PWR down,
+	 * set 0 for default
+	 */
+	__le16 down_delay_usec;
+	/* time in usec the FW waits after bringing the RF PWR up,
+	 * set 0 for default
+	 */
+	__le16 up_delay_usec;
+} __packed;
+
+/* \WMI_SET_HIGH_POWER_TABLE_PARAMS_CMDID
+ * This API controls the Tx and Rx gain over temperature.
+ * It controls the Tx D-type, Rx D-type and Rx E-type amplifiers.
+ * It also controls the Tx gain index, by controlling the Rx to Tx gain index
+ * offset.
+ * The control is divided by 3 temperature values to 4 temperature ranges.
+ * Each parameter uses its own temperature values.
+ * Returned event: WMI_SET_HIGH_POWER_TABLE_PARAMS_EVENTID
+ */
+struct wmi_set_high_power_table_params_cmd {
+	/* Temperature range for Tx D-type parameters */
+	u8 tx_dtype_temp[WMI_RF_DTYPE_LENGTH];
+	u8 reserved0;
+	/* Tx D-type values to be used for each temperature range */
+	__le32 tx_dtype_conf[WMI_RF_DTYPE_LENGTH + 1];
+	/* Temperature range for Rx D-type parameters */
+	u8 rx_dtype_temp[WMI_RF_DTYPE_LENGTH];
+	u8 reserved1;
+	/* Rx D-type values to be used for each temperature range */
+	__le32 rx_dtype_conf[WMI_RF_DTYPE_LENGTH + 1];
+	/* Temperature range for Rx E-type parameters */
+	u8 rx_etype_temp[WMI_RF_ETYPE_LENGTH];
+	u8 reserved2;
+	/* Rx E-type values to be used for each temperature range.
+	 * The last 4 values of any range are the first 4 values of the next
+	 * range and so on
+	 */
+	__le32 rx_etype_conf[WMI_RF_ETYPE_VAL_PER_RANGE + WMI_RF_ETYPE_LENGTH];
+	/* Temperature range for rx_2_tx_offs parameters */
+	u8 rx_2_tx_temp[WMI_RF_RX2TX_LENGTH];
+	u8 reserved3;
+	/* Rx to Tx gain index offset */
+	s8 rx_2_tx_offs[WMI_RF_RX2TX_LENGTH + 1];
+} __packed;
+
+/* CMD: WMI_RF_XPM_READ_CMDID */
+struct wmi_rf_xpm_read_cmd {
+	u8 rf_id;
+	u8 reserved[3];
+	/* XPM bit start address in range [0,8191]bits - rounded by FW to
+	 * multiple of 8bits
+	 */
+	__le32 xpm_bit_address;
+	__le32 num_bytes;
 } __packed;
 
-/* WMI_OTP_WRITE_CMDID */
-struct wmi_otp_write_cmd {
-	__le32 addr;
-	__le32 size;
-	__le32 values;
+/* CMD: WMI_RF_XPM_WRITE_CMDID */
+struct wmi_rf_xpm_write_cmd {
+	u8 rf_id;
+	u8 reserved0[3];
+	/* XPM bit start address in range [0,8191]bits - rounded by FW to
+	 * multiple of 8bits
+	 */
+	__le32 xpm_bit_address;
+	__le32 num_bytes;
+	/* boolean flag indicating whether FW should verify the write
+	 * operation
+	 */
+	u8 verify;
+	u8 reserved1[3];
+	/* actual size=num_bytes */
+	u8 data_bytes[0];
 } __packed;
 
 /* WMI_TEMP_SENSE_CMDID
@@ -989,19 +1096,26 @@ struct wmi_ftm_dest_info {
 	 */
 	__le16 burst_period;
 	u8 dst_mac[WMI_MAC_LEN];
-	__le16 reserved;
+	u8 reserved;
+	u8 num_burst_per_aoa_meas;
 } __packed;
 
 /* WMI_TOF_SESSION_START_CMDID */
 struct wmi_tof_session_start_cmd {
 	__le32 session_id;
-	u8 num_of_aoa_measures;
+	u8 reserved1;
 	u8 aoa_type;
 	__le16 num_of_dest;
 	u8 reserved[4];
 	struct wmi_ftm_dest_info ftm_dest_info[0];
 } __packed;
 
+/* WMI_TOF_CFG_RESPONDER_CMDID */
+struct wmi_tof_cfg_responder_cmd {
+	u8 enable;
+	u8 reserved[3];
+} __packed;
+
 enum wmi_tof_channel_info_report_type {
 	WMI_TOF_CHANNEL_INFO_TYPE_CIR			= 0x1,
 	WMI_TOF_CHANNEL_INFO_TYPE_RSSI			= 0x2,
@@ -1022,7 +1136,99 @@ struct wmi_tof_set_tx_rx_offset_cmd {
 	__le32 tx_offset;
 	/* RX delay offset */
 	__le32 rx_offset;
-	__le32 reserved[2];
+	/* Mask to define which RFs to configure. 0 means all RFs */
+	__le32 rf_mask;
+	/* Offset to strongest tap of CIR */
+	__le32 precursor;
+} __packed;
+
+/* WMI_TOF_GET_TX_RX_OFFSET_CMDID */
+struct wmi_tof_get_tx_rx_offset_cmd {
+	/* rf index to read offsets from */
+	u8 rf_index;
+	u8 reserved[3];
+} __packed;
+
+/* WMI_FIXED_SCHEDULING_CONFIG_CMDID */
+struct wmi_map_mcs_to_schd_params {
+	u8 mcs;
+	/* time in usec from start slot to start tx flow - default 15 */
+	u8 time_in_usec_before_initiate_tx;
+	/* RD enable - if yes consider RD according to STA mcs */
+	u8 rd_enabled;
+	u8 reserved;
+	/* time in usec from start slot to stop vring */
+	__le16 time_in_usec_to_stop_vring;
+	/* timeout to force flush from start of slot */
+	__le16 flush_to_in_usec;
+	/* per mcs the mac buffer limit size in bytes */
+	__le32 mac_buff_size_in_bytes;
+} __packed;
+
+/* WMI_FIXED_SCHEDULING_CONFIG_COMPLETE_EVENTID */
+struct wmi_fixed_scheduling_config_complete_event {
+	/* wmi_fw_status */
+	u8 status;
+	u8 reserved[3];
+} __packed;
+
+#define WMI_NUM_MCS	(13)
+
+/* WMI_FIXED_SCHEDULING_CONFIG_CMDID */
+struct wmi_fixed_scheduling_config_cmd {
+	/* defaults in the SAS table */
+	struct wmi_map_mcs_to_schd_params mcs_to_schd_params_map[WMI_NUM_MCS];
+	/* default 150 uSec */
+	__le16 max_sta_rd_ppdu_duration_in_usec;
+	/* default 300 uSec */
+	__le16 max_sta_grant_ppdu_duration_in_usec;
+	/* default 1000 uSec */
+	__le16 assoc_slot_duration_in_usec;
+	/* default 360 uSec */
+	__le16 virtual_slot_duration_in_usec;
+	/* each this field value slots start with grant frame to the station
+	 * - default 2
+	 */
+	u8 number_of_ap_slots_for_initiate_grant;
+	u8 reserved[3];
+} __packed;
+
+/* WMI_ENABLE_FIXED_SCHEDULING_CMDID */
+struct wmi_enable_fixed_scheduling_cmd {
+	__le32 reserved;
+} __packed;
+
+/* WMI_ENABLE_FIXED_SCHEDULING_COMPLETE_EVENTID */
+struct wmi_enable_fixed_scheduling_complete_event {
+	/* wmi_fw_status */
+	u8 status;
+	u8 reserved[3];
+} __packed;
+
+/* WMI_SET_MULTI_DIRECTED_OMNIS_CONFIG_CMDID */
+struct wmi_set_multi_directed_omnis_config_cmd {
+	/* number of directed omnis at destination AP */
+	u8 dest_ap_num_directed_omnis;
+	u8 reserved[3];
+} __packed;
+
+/* WMI_SET_MULTI_DIRECTED_OMNIS_CONFIG_EVENTID */
+struct wmi_set_multi_directed_omnis_config_event {
+	/* wmi_fw_status */
+	u8 status;
+	u8 reserved[3];
+} __packed;
+
+/* WMI_SET_LONG_RANGE_CONFIG_CMDID */
+struct wmi_set_long_range_config_cmd {
+	__le32 reserved;
+} __packed;
+
+/* WMI_SET_LONG_RANGE_CONFIG_COMPLETE_EVENTID */
+struct wmi_set_long_range_config_complete_event {
+	/* wmi_fw_status */
+	u8 status;
+	u8 reserved[3];
 } __packed;
 
 /* WMI Events
@@ -1038,19 +1244,22 @@ enum wmi_event_id {
 	WMI_FW_READY_EVENTID				= 0x1801,
 	WMI_EXIT_FAST_MEM_ACC_MODE_EVENTID		= 0x200,
 	WMI_ECHO_RSP_EVENTID				= 0x1803,
+	/* deprecated */
 	WMI_FS_TUNE_DONE_EVENTID			= 0x180A,
+	/* deprecated */
 	WMI_CORR_MEASURE_EVENTID			= 0x180B,
 	WMI_READ_RSSI_EVENTID				= 0x180C,
 	WMI_TEMP_SENSE_DONE_EVENTID			= 0x180E,
 	WMI_DC_CALIB_DONE_EVENTID			= 0x180F,
+	/* deprecated */
 	WMI_IQ_TX_CALIB_DONE_EVENTID			= 0x1811,
+	/* deprecated */
 	WMI_IQ_RX_CALIB_DONE_EVENTID			= 0x1812,
 	WMI_SET_WORK_MODE_DONE_EVENTID			= 0x1815,
 	WMI_LO_LEAKAGE_CALIB_DONE_EVENTID		= 0x1816,
-	WMI_MARLON_R_READ_DONE_EVENTID			= 0x1818,
-	WMI_MARLON_R_WRITE_DONE_EVENTID			= 0x1819,
-	WMI_MARLON_R_TXRX_SEL_DONE_EVENTID		= 0x181A,
+	WMI_LO_POWER_CALIB_FROM_OTP_EVENTID		= 0x1817,
 	WMI_SILENT_RSSI_CALIB_DONE_EVENTID		= 0x181D,
+	/* deprecated */
 	WMI_RF_RX_TEST_DONE_EVENTID			= 0x181E,
 	WMI_CFG_RX_CHAIN_DONE_EVENTID			= 0x1820,
 	WMI_VRING_CFG_DONE_EVENTID			= 0x1821,
@@ -1061,11 +1270,6 @@ enum wmi_event_id {
 	WMI_GET_SSID_EVENTID				= 0x1828,
 	WMI_GET_PCP_CHANNEL_EVENTID			= 0x182A,
 	WMI_SW_TX_COMPLETE_EVENTID			= 0x182B,
-	WMI_READ_MAC_RXQ_EVENTID			= 0x1830,
-	WMI_READ_MAC_TXQ_EVENTID			= 0x1831,
-	WMI_WRITE_MAC_RXQ_EVENTID			= 0x1832,
-	WMI_WRITE_MAC_TXQ_EVENTID			= 0x1833,
-	WMI_WRITE_MAC_XQ_FIELD_EVENTID			= 0x1834,
 	WMI_BEAMFORMING_MGMT_DONE_EVENTID		= 0x1836,
 	WMI_BF_TXSS_MGMT_DONE_EVENTID			= 0x1837,
 	WMI_BF_RXSS_MGMT_DONE_EVENTID			= 0x1839,
@@ -1076,8 +1280,12 @@ enum wmi_event_id {
 	WMI_TX_MGMT_PACKET_EVENTID			= 0x1841,
 	WMI_LINK_MAINTAIN_CFG_WRITE_DONE_EVENTID	= 0x1842,
 	WMI_LINK_MAINTAIN_CFG_READ_DONE_EVENTID		= 0x1843,
-	WMI_OTP_READ_RESULT_EVENTID			= 0x1856,
+	WMI_RF_XPM_READ_RESULT_EVENTID			= 0x1856,
+	WMI_RF_XPM_WRITE_RESULT_EVENTID			= 0x1857,
 	WMI_LED_CFG_DONE_EVENTID			= 0x1858,
+	WMI_SET_SILENT_RSSI_TABLE_DONE_EVENTID		= 0x185C,
+	WMI_RF_PWR_ON_DELAY_RSP_EVENTID			= 0x185D,
+	WMI_SET_HIGH_POWER_TABLE_PARAMS_EVENTID		= 0x185E,
 	/* Performance monitoring events */
 	WMI_DATA_PORT_OPEN_EVENTID			= 0x1860,
 	WMI_WBE_LINK_DOWN_EVENTID			= 0x1861,
@@ -1106,14 +1314,6 @@ enum wmi_event_id {
 	WMI_PCP_FACTOR_EVENTID				= 0x191A,
 	/* Power Save Configuration Events */
 	WMI_PS_DEV_PROFILE_CFG_EVENTID			= 0x191C,
-	/* Not supported yet */
-	WMI_PS_DEV_CFG_EVENTID				= 0x191D,
-	/* Not supported yet */
-	WMI_PS_DEV_CFG_READ_EVENTID			= 0x191E,
-	/* Not supported yet */
-	WMI_PS_MID_CFG_EVENTID				= 0x191F,
-	/* Not supported yet */
-	WMI_PS_MID_CFG_READ_EVENTID			= 0x1920,
 	WMI_RS_CFG_DONE_EVENTID				= 0x1921,
 	WMI_GET_DETAILED_RS_RES_EVENTID			= 0x1922,
 	WMI_AOA_MEAS_EVENTID				= 0x1923,
@@ -1122,14 +1322,17 @@ enum wmi_event_id {
 	WMI_GET_MGMT_RETRY_LIMIT_EVENTID		= 0x1931,
 	WMI_SET_THERMAL_THROTTLING_CFG_EVENTID		= 0x1940,
 	WMI_GET_THERMAL_THROTTLING_CFG_EVENTID		= 0x1941,
+	/* return the Power Save profile */
+	WMI_PS_DEV_PROFILE_CFG_READ_EVENTID		= 0x1942,
 	WMI_TOF_SESSION_END_EVENTID			= 0x1991,
 	WMI_TOF_GET_CAPABILITIES_EVENTID		= 0x1992,
 	WMI_TOF_SET_LCR_EVENTID				= 0x1993,
 	WMI_TOF_SET_LCI_EVENTID				= 0x1994,
 	WMI_TOF_FTM_PER_DEST_RES_EVENTID		= 0x1995,
-	WMI_TOF_CHANNEL_INFO_EVENTID			= 0x1996,
+	WMI_TOF_CFG_RESPONDER_EVENTID			= 0x1996,
 	WMI_TOF_SET_TX_RX_OFFSET_EVENTID		= 0x1997,
 	WMI_TOF_GET_TX_RX_OFFSET_EVENTID		= 0x1998,
+	WMI_TOF_CHANNEL_INFO_EVENTID			= 0x1999,
 	WMI_GET_RF_SECTOR_PARAMS_DONE_EVENTID		= 0x19A0,
 	WMI_SET_RF_SECTOR_PARAMS_DONE_EVENTID		= 0x19A1,
 	WMI_GET_SELECTED_RF_SECTOR_INDEX_DONE_EVENTID	= 0x19A2,
@@ -1138,12 +1341,18 @@ enum wmi_event_id {
 	WMI_PRIO_TX_SECTORS_ORDER_EVENTID		= 0x19A5,
 	WMI_PRIO_TX_SECTORS_NUMBER_EVENTID		= 0x19A6,
 	WMI_PRIO_TX_SECTORS_SET_DEFAULT_CFG_EVENTID	= 0x19A7,
+	WMI_SCHEDULING_SCHEME_EVENTID			= 0x1A01,
+	WMI_FIXED_SCHEDULING_CONFIG_COMPLETE_EVENTID	= 0x1A02,
+	WMI_ENABLE_FIXED_SCHEDULING_COMPLETE_EVENTID	= 0x1A03,
+	WMI_SET_MULTI_DIRECTED_OMNIS_CONFIG_EVENTID	= 0x1A04,
+	WMI_SET_LONG_RANGE_CONFIG_COMPLETE_EVENTID	= 0x1A05,
 	WMI_SET_CHANNEL_EVENTID				= 0x9000,
 	WMI_ASSOC_REQ_EVENTID				= 0x9001,
 	WMI_EAPOL_RX_EVENTID				= 0x9002,
 	WMI_MAC_ADDR_RESP_EVENTID			= 0x9003,
 	WMI_FW_VER_EVENTID				= 0x9004,
 	WMI_ACS_PASSIVE_SCAN_COMPLETE_EVENTID		= 0x9005,
+	WMI_COMMAND_NOT_SUPPORTED_EVENTID		= 0xFFFF,
 };
 
 /* Events data structures */
@@ -1200,7 +1409,7 @@ struct wmi_fw_ver_event {
 	__le32 bl_minor;
 	__le32 bl_subminor;
 	__le32 bl_build;
-	/* The number of entries in the FW capabilies array */
+	/* The number of entries in the FW capabilities array */
 	u8 fw_capabilities_len;
 	u8 reserved[3];
 	/* FW capabilities info
@@ -1245,7 +1454,9 @@ struct wmi_get_rf_status_event {
 	__le32 board_file_platform_type;
 	/* board file version */
 	__le32 board_file_version;
-	__le32 reserved[2];
+	/* enabled XIFs bit vector */
+	__le32 enabled_xif_vector;
+	__le32 reserved;
 } __packed;
 
 /* WMI_GET_BASEBAND_TYPE_EVENTID */
@@ -1299,6 +1510,9 @@ struct wmi_ready_event {
 	/* enum wmi_phy_capability */
 	u8 phy_capability;
 	u8 numof_additional_mids;
+	/* rfc read calibration result. 5..15 */
+	u8 rfc_read_calib_result;
+	u8 reserved[3];
 } __packed;
 
 /* WMI_NOTIFY_REQ_DONE_EVENTID */
@@ -1306,7 +1520,8 @@ struct wmi_notify_req_done_event {
 	/* beamforming status, 0: fail; 1: OK; 2: retrying */
 	__le32 status;
 	__le64 tsf;
-	__le32 snr_val;
+	s8 rssi;
+	u8 reserved0[3];
 	__le32 tx_tpt;
 	__le32 tx_goodput;
 	__le32 rx_goodput;
@@ -1576,7 +1791,7 @@ struct wmi_sw_tx_complete_event {
 	u8 reserved[3];
 } __packed;
 
-/* WMI_CORR_MEASURE_EVENTID */
+/* WMI_CORR_MEASURE_EVENTID - deprecated */
 struct wmi_corr_measure_event {
 	/* signed */
 	__le32 i;
@@ -1602,31 +1817,35 @@ struct wmi_get_ssid_event {
 /* wmi_rx_mgmt_info */
 struct wmi_rx_mgmt_info {
 	u8 mcs;
-	s8 snr;
+	s8 rssi;
 	u8 range;
 	u8 sqi;
 	__le16 stype;
 	__le16 status;
 	__le32 len;
-	/* Not resolved when == 0xFFFFFFFF  ==> Broadcast to all MIDS */
+	/* Not resolved when == 0xFFFFFFFF == > Broadcast to all MIDS */
 	u8 qid;
-	/* Not resolved when == 0xFFFFFFFF  ==> Broadcast to all MIDS */
+	/* Not resolved when == 0xFFFFFFFF == > Broadcast to all MIDS */
 	u8 mid;
 	u8 cid;
 	/* From Radio MNGR */
 	u8 channel;
 } __packed;
 
-/* wmi_otp_read_write_cmd */
-struct wmi_otp_read_write_cmd {
-	__le32 addr;
-	__le32 size;
-	u8 values[0];
+/* EVENT: WMI_RF_XPM_READ_RESULT_EVENTID */
+struct wmi_rf_xpm_read_result_event {
+	/* enum wmi_fw_status_e - success=0 or fail=1 */
+	u8 status;
+	u8 reserved[3];
+	/* requested num_bytes of data */
+	u8 data_bytes[0];
 } __packed;
 
-/* WMI_OTP_READ_RESULT_EVENTID */
-struct wmi_otp_read_result_event {
-	u8 payload[0];
+/* EVENT: WMI_RF_XPM_WRITE_RESULT_EVENTID */
+struct wmi_rf_xpm_write_result_event {
+	/* enum wmi_fw_status_e - success=0 or fail=1 */
+	u8 status;
+	u8 reserved[3];
 } __packed;
 
 /* WMI_TX_MGMT_PACKET_EVENTID */
@@ -1645,6 +1864,20 @@ struct wmi_echo_rsp_event {
 	__le32 echoed_value;
 } __packed;
 
+/* WMI_RF_PWR_ON_DELAY_RSP_EVENTID */
+struct wmi_rf_pwr_on_delay_rsp_event {
+	/* wmi_fw_status */
+	u8 status;
+	u8 reserved[3];
+} __packed;
+
+/* WMI_SET_HIGH_POWER_TABLE_PARAMS_EVENTID */
+struct wmi_set_high_power_table_params_event {
+	/* wmi_fw_status */
+	u8 status;
+	u8 reserved[3];
+} __packed;
+
 /* WMI_TEMP_SENSE_DONE_EVENTID
  *
  * Measure MAC and radio temperatures
@@ -1722,14 +1955,22 @@ struct wmi_led_cfg_cmd {
 	u8 reserved;
 } __packed;
 
+/* \WMI_SET_CONNECT_SNR_THR_CMDID */
+struct wmi_set_connect_snr_thr_cmd {
+	u8 enable;
+	u8 reserved;
+	/* 1/4 Db units */
+	__le16 omni_snr_thr;
+	/* 1/4 Db units */
+	__le16 direct_snr_thr;
+} __packed;
+
 /* WMI_LED_CFG_DONE_EVENTID */
 struct wmi_led_cfg_done_event {
 	/* led config status */
 	__le32 status;
 } __packed;
 
-#define WMI_NUM_MCS	(13)
-
 /* Rate search parameters configuration per connection */
 struct wmi_rs_cfg {
 	/* The maximal allowed PER for each MCS
@@ -1754,6 +1995,98 @@ struct wmi_rs_cfg {
 	__le32 mcs_en_vec;
 } __packed;
 
+/* Slot types */
+enum wmi_sched_scheme_slot_type {
+	WMI_SCHED_SLOT_SP		= 0x0,
+	WMI_SCHED_SLOT_CBAP		= 0x1,
+	WMI_SCHED_SLOT_IDLE		= 0x2,
+	WMI_SCHED_SLOT_ANNOUNCE_NO_ACK	= 0x3,
+	WMI_SCHED_SLOT_DISCOVERY	= 0x4,
+};
+
+enum wmi_sched_scheme_slot_flags {
+	WMI_SCHED_SCHEME_SLOT_PERIODIC	= 0x1,
+};
+
+struct wmi_sched_scheme_slot {
+	/* in microsecond */
+	__le32 tbtt_offset;
+	/* wmi_sched_scheme_slot_flags */
+	u8 flags;
+	/* wmi_sched_scheme_slot_type */
+	u8 type;
+	/* in microsecond */
+	__le16 duration;
+	/* frame_exchange_sequence_duration */
+	__le16 tx_op;
+	/* time in microseconds between two consecutive slots
+	 * relevant only if flag WMI_SCHED_SCHEME_SLOT_PERIODIC set
+	 */
+	__le16 period;
+	/* relevant only if flag WMI_SCHED_SCHEME_SLOT_PERIODIC set
+	 * number of times to repeat allocation
+	 */
+	u8 num_of_blocks;
+	/* relevant only if flag WMI_SCHED_SCHEME_SLOT_PERIODIC set
+	 * every idle_period allocation will be idle
+	 */
+	u8 idle_period;
+	u8 src_aid;
+	u8 dest_aid;
+	__le32 reserved;
+} __packed;
+
+enum wmi_sched_scheme_flags {
+	/* should not be set when clearing scheduling scheme */
+	WMI_SCHED_SCHEME_ENABLE		= 0x01,
+	WMI_SCHED_PROTECTED_SP		= 0x02,
+	/* should be set only on first WMI fragment of scheme */
+	WMI_SCHED_FIRST			= 0x04,
+	/* should be set only on last WMI fragment of scheme */
+	WMI_SCHED_LAST			= 0x08,
+	WMI_SCHED_IMMEDIATE_START	= 0x10,
+};
+
+enum wmi_sched_scheme_advertisment {
+	/* ESE is not advertised at all, STA has to be configured with WMI
+	 * also
+	 */
+	WMI_ADVERTISE_ESE_DISABLED		= 0x0,
+	WMI_ADVERTISE_ESE_IN_BEACON		= 0x1,
+	WMI_ADVERTISE_ESE_IN_ANNOUNCE_FRAME	= 0x2,
+};
+
+/* WMI_SCHEDULING_SCHEME_CMD */
+struct wmi_scheduling_scheme_cmd {
+	u8 serial_num;
+	/* wmi_sched_scheme_advertisment */
+	u8 ese_advertisment;
+	/* wmi_sched_scheme_flags */
+	__le16 flags;
+	u8 num_allocs;
+	u8 reserved[3];
+	__le64 start_tbtt;
+	/* allocations list */
+	struct wmi_sched_scheme_slot allocs[WMI_SCHED_MAX_ALLOCS_PER_CMD];
+} __packed;
+
+enum wmi_sched_scheme_failure_type {
+	WMI_SCHED_SCHEME_FAILURE_NO_ERROR		= 0x00,
+	WMI_SCHED_SCHEME_FAILURE_OLD_START_TSF_ERR	= 0x01,
+};
+
+/* WMI_SCHEDULING_SCHEME_EVENTID */
+struct wmi_scheduling_scheme_event {
+	/* wmi_fw_status_e */
+	u8 status;
+	/* serial number given in command */
+	u8 serial_num;
+	/* wmi_sched_scheme_failure_type */
+	u8 failure_type;
+	/* alignment to 32b */
+	u8 reserved[1];
+} __packed;
+
 /* WMI_RS_CFG_CMDID */
 struct wmi_rs_cfg_cmd {
 	/* connection id */
@@ -1971,6 +2304,19 @@ enum wmi_ps_profile_type {
 	WMI_PS_PROFILE_TYPE_LOW_LATENCY_PS	= 0x03,
 };
 
+/* WMI_PS_DEV_PROFILE_CFG_READ_CMDID */
+struct wmi_ps_dev_profile_cfg_read_cmd {
+	/* reserved */
+	__le32 reserved;
+} __packed;
+
+/* WMI_PS_DEV_PROFILE_CFG_READ_EVENTID */
+struct wmi_ps_dev_profile_cfg_read_event {
+	/* wmi_ps_profile_type_e */
+	u8 ps_profile;
+	u8 reserved[3];
+} __packed;
+
 /* WMI_PS_DEV_PROFILE_CFG_CMDID
  *
  * Power save profile to be used by the device
@@ -2019,157 +2365,6 @@ enum wmi_ps_d3_resp_policy {
 	WMI_PS_D3_RESP_POLICY_APPROVED	= 0x02,
 };
 
-/* Device common power save configurations */
-struct wmi_ps_dev_cfg {
-	/* lowest level of PS allowed while unassociated, enum wmi_ps_level_e
-	 */
-	u8 ps_unassoc_min_level;
-	/* lowest deep sleep clock level while nonassoc, enum
-	 * wmi_ps_deep_sleep_clk_level_e
-	 */
-	u8 ps_unassoc_deep_sleep_min_level;
-	/* lowest level of PS allowed while associated, enum wmi_ps_level_e */
-	u8 ps_assoc_min_level;
-	/* lowest deep sleep clock level while assoc, enum
-	 * wmi_ps_deep_sleep_clk_level_e
-	 */
-	u8 ps_assoc_deep_sleep_min_level;
-	/* enum wmi_ps_deep_sleep_clk_level_e */
-	u8 ps_assoc_low_latency_ds_min_level;
-	/* enum wmi_ps_d3_resp_policy_e */
-	u8 ps_D3_response_policy;
-	/* BOOL */
-	u8 ps_D3_pm_pme_enabled;
-	/* BOOL */
-	u8 ps_halp_enable;
-	u8 ps_deep_sleep_enter_thresh_msec;
-	/* BOOL */
-	u8 ps_voltage_scaling_en;
-} __packed;
-
-/* WMI_PS_DEV_CFG_CMDID
- *
- * Configure common Power Save parameters of the device and all MIDs.
- *
- * Returned event:
- * - WMI_PS_DEV_CFG_EVENTID
- */
-struct wmi_ps_dev_cfg_cmd {
-	/* Device Power Save configuration to be applied */
-	struct wmi_ps_dev_cfg ps_dev_cfg;
-	/* alignment to 32b */
-	u8 reserved[2];
-} __packed;
-
-/* WMI_PS_DEV_CFG_EVENTID */
-struct wmi_ps_dev_cfg_event {
-	/* wmi_ps_cfg_cmd_status_e */
-	__le32 status;
-} __packed;
-
-/* WMI_PS_DEV_CFG_READ_CMDID
- *
- * request to retrieve  device Power Save configuration
- * (WMI_PS_DEV_CFG_CMD params)
- *
- * Returned event:
- * - WMI_PS_DEV_CFG_READ_EVENTID
- */
-struct wmi_ps_dev_cfg_read_cmd {
-	__le32 reserved;
-} __packed;
-
-/* WMI_PS_DEV_CFG_READ_EVENTID */
-struct wmi_ps_dev_cfg_read_event {
-	/* wmi_ps_cfg_cmd_status_e */
-	__le32 status;
-	/* Retrieved device Power Save configuration (WMI_PS_DEV_CFG_CMD
-	 * params)
-	 */
-	struct wmi_ps_dev_cfg dev_ps_cfg;
-	/* alignment to 32b */
-	u8 reserved[2];
-} __packed;
-
-/* Per Mac Power Save configurations */
-struct wmi_ps_mid_cfg {
-	/* Low power RX in BTI is enabled, BOOL */
-	u8 beacon_lprx_enable;
-	/* Sync to sector ID enabled, BOOL */
-	u8 beacon_sync_to_sectorId_enable;
-	/* Low power RX in DTI is enabled, BOOL */
-	u8 frame_exchange_lprx_enable;
-	/* Sleep Cycle while in scheduled PS, 1-31 */
-	u8 scheduled_sleep_cycle_pow2;
-	/* Stay Awake for k BIs every (sleep_cycle - k) BIs, 1-31 */
-	u8 scheduled_num_of_awake_bis;
-	u8 am_to_traffic_load_thresh_mbp;
-	u8 traffic_to_am_load_thresh_mbps;
-	u8 traffic_to_am_num_of_no_traffic_bis;
-	/* BOOL */
-	u8 continuous_traffic_psm;
-	__le16 no_traffic_to_min_usec;
-	__le16 no_traffic_to_max_usec;
-	__le16 snoozing_sleep_interval_milisec;
-	u8 max_no_data_awake_events;
-	/* Trigger WEB after k failed beacons */
-	u8 num_of_failed_beacons_rx_to_trigger_web;
-	/* Trigger BF after k failed beacons */
-	u8 num_of_failed_beacons_rx_to_trigger_bf;
-	/* Trigger SOB after k successful beacons */
-	u8 num_of_successful_beacons_rx_to_trigger_sob;
-} __packed;
-
-/* WMI_PS_MID_CFG_CMDID
- *
- * Configure Power Save parameters of a specific MID.
- * These parameters are relevant for the specific BSS this MID belongs to.
- *
- * Returned event:
- * - WMI_PS_MID_CFG_EVENTID
- */
-struct wmi_ps_mid_cfg_cmd {
-	/* MAC ID */
-	u8 mid;
-	/* mid PS configuration to be applied */
-	struct wmi_ps_mid_cfg ps_mid_cfg;
-} __packed;
-
-/* WMI_PS_MID_CFG_EVENTID */
-struct wmi_ps_mid_cfg_event {
-	/* MAC ID */
-	u8 mid;
-	/* alignment to 32b */
-	u8 reserved[3];
-	/* wmi_ps_cfg_cmd_status_e */
-	__le32 status;
-} __packed;
-
-/* WMI_PS_MID_CFG_READ_CMDID
- *
- * request to retrieve Power Save configuration of mid
- * (WMI_PS_MID_CFG_CMD params)
- *
- * Returned event:
- * - WMI_PS_MID_CFG_READ_EVENTID
- */
-struct wmi_ps_mid_cfg_read_cmd {
-	/* MAC ID */
-	u8 mid;
-	/* alignment to 32b */
-	u8 reserved[3];
-} __packed;
-
-/* WMI_PS_MID_CFG_READ_EVENTID */
-struct wmi_ps_mid_cfg_read_event {
-	/* MAC ID */
-	u8 mid;
-	/* Retrieved MID Power Save configuration(WMI_PS_MID_CFG_CMD params) */
-	struct wmi_ps_mid_cfg mid_ps_cfg;
-	/* wmi_ps_cfg_cmd_status_e */
-	__le32 status;
-} __packed;
-
 #define WMI_AOA_MAX_DATA_SIZE	(128)
 
 enum wmi_aoa_meas_status {
@@ -2260,6 +2455,20 @@ struct wmi_tof_session_end_event {
 	u8 reserved[3];
 } __packed;
 
+/* WMI_TOF_SET_LCI_EVENTID */
+struct wmi_tof_set_lci_event {
+	/* enum wmi_fw_status */
+	u8 status;
+	u8 reserved[3];
+} __packed;
+
+/* WMI_TOF_SET_LCR_EVENTID */
+struct wmi_tof_set_lcr_event {
+	/* enum wmi_fw_status */
+	u8 status;
+	u8 reserved[3];
+} __packed;
+
 /* Responder FTM Results */
 struct wmi_responder_ftm_res {
 	u8 t1[6];
@@ -2313,10 +2522,19 @@ struct wmi_tof_ftm_per_dest_res_event {
 	__le32 tsf_sync;
 	/* actual received ftm per burst */
 	u8 actual_ftm_per_burst;
-	u8 reserved0[7];
+	/* Measurments are from RFs, defined by the mask */
+	__le32 meas_rf_mask;
+	u8 reserved0[3];
 	struct wmi_responder_ftm_res responder_ftm_res[0];
 } __packed;
 
+/* WMI_TOF_CFG_RESPONDER_EVENTID */
+struct wmi_tof_cfg_responder_event {
+	/* enum wmi_fw_status */
+	u8 status;
+	u8 reserved[3];
+} __packed;
+
 enum wmi_tof_channel_info_type {
 	WMI_TOF_CHANNEL_INFO_AOA		= 0x00,
 	WMI_TOF_CHANNEL_INFO_LCI		= 0x01,
@@ -2353,12 +2571,15 @@ struct wmi_tof_set_tx_rx_offset_event {
 struct wmi_tof_get_tx_rx_offset_event {
 	/* enum wmi_fw_status */
 	u8 status;
-	u8 reserved1[3];
+	/* RF index used to read the offsets */
+	u8 rf_index;
+	u8 reserved1[2];
 	/* TX delay offset */
 	__le32 tx_offset;
 	/* RX delay offset */
 	__le32 rx_offset;
-	__le32 reserved2[2];
+	/* Offset to strongest tap of CIR */
+	__le32 precursor;
 } __packed;
 
 /* Result status codes for WMI commands */
@@ -2621,4 +2842,23 @@ struct wmi_prio_tx_sectors_set_default_cfg_event {
 	u8 reserved[3];
 } __packed;
 
+/* WMI_SET_SILENT_RSSI_TABLE_DONE_EVENTID */
+struct wmi_set_silent_rssi_table_done_event {
+	/* enum wmi_silent_rssi_status */
+	__le32 status;
+	/* enum wmi_silent_rssi_table */
+	__le32 table;
+} __packed;
+
+/* \WMI_COMMAND_NOT_SUPPORTED_EVENTID */
+struct wmi_command_not_supported_event {
+	/* device id */
+	u8 mid;
+	u8 reserved0;
+	__le16 command_id;
+	/* for UT command only, otherwise reserved */
+	__le16 command_subtype;
+	__le16 reserved1;
+} __packed;
+
 #endif /* __WILOCITY_WMI_H__ */
diff --git a/drivers/net/wireless/atmel/at76c50x-usb.c b/drivers/net/wireless/atmel/at76c50x-usb.c
index 09defbc..94bf01f 100644
--- a/drivers/net/wireless/atmel/at76c50x-usb.c
+++ b/drivers/net/wireless/atmel/at76c50x-usb.c
@@ -130,7 +130,7 @@ MODULE_FIRMWARE("atmel_at76c505amx-rfmd.bin");
 
 #define USB_DEVICE_DATA(__ops)	.driver_info = (kernel_ulong_t)(__ops)
 
-static struct usb_device_id dev_table[] = {
+static const struct usb_device_id dev_table[] = {
 	/*
 	 * at76c503-i3861
 	 */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index 984c1d0..cd58732 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -1105,6 +1105,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = {
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43455),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4354),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4356),
+	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_CYPRESS_4373),
 	{ /* end: all zeroes */ }
 };
 MODULE_DEVICE_TABLE(sdio, brcmf_sdmmc_ids);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 7e689c8..aaed4ab 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -3940,6 +3940,7 @@ brcmf_cfg80211_flush_pmksa(struct wiphy *wiphy, struct net_device *ndev)
 static s32 brcmf_configure_opensecurity(struct brcmf_if *ifp)
 {
 	s32 err;
+	s32 wpa_val;
 
 	/* set auth */
 	err = brcmf_fil_bsscfg_int_set(ifp, "auth", 0);
@@ -3954,7 +3955,11 @@ static s32 brcmf_configure_opensecurity(struct brcmf_if *ifp)
 		return err;
 	}
 	/* set upper-layer auth */
-	err = brcmf_fil_bsscfg_int_set(ifp, "wpa_auth", WPA_AUTH_NONE);
+	if (brcmf_is_ibssmode(ifp->vif))
+		wpa_val = WPA_AUTH_NONE;
+	else
+		wpa_val = WPA_AUTH_DISABLED;
+	err = brcmf_fil_bsscfg_int_set(ifp, "wpa_auth", wpa_val);
 	if (err < 0) {
 		brcmf_err("wpa_auth error %d\n", err);
 		return err;
@@ -5693,10 +5698,13 @@ brcmf_notify_roaming_status(struct brcmf_if *ifp,
 	u32 status = e->status;
 
 	if (event == BRCMF_E_ROAM && status == BRCMF_E_STATUS_SUCCESS) {
-		if (test_bit(BRCMF_VIF_STATUS_CONNECTED, &ifp->vif->sme_state))
+		if (test_bit(BRCMF_VIF_STATUS_CONNECTED,
+			     &ifp->vif->sme_state)) {
 			brcmf_bss_roaming_done(cfg, ifp->ndev, e);
-		else
+		} else {
 			brcmf_bss_connect_done(cfg, ifp->ndev, e, true);
+			brcmf_net_setcarrier(ifp, true);
+		}
 	}
 
 	return 0;
@@ -6456,6 +6464,8 @@ static int brcmf_setup_ifmodes(struct wiphy *wiphy, struct brcmf_if *ifp)
 	if (p2p) {
 		if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_MCHAN))
 			combo[c].num_different_channels = 2;
+		else
+			combo[c].num_different_channels = 1;
 		wiphy->interface_modes |= BIT(NL80211_IFTYPE_P2P_CLIENT) |
 					  BIT(NL80211_IFTYPE_P2P_GO) |
 					  BIT(NL80211_IFTYPE_P2P_DEVICE);
@@ -6465,10 +6475,10 @@ static int brcmf_setup_ifmodes(struct wiphy *wiphy, struct brcmf_if *ifp)
 		c0_limits[i++].types = BIT(NL80211_IFTYPE_P2P_CLIENT) |
 				       BIT(NL80211_IFTYPE_P2P_GO);
 	} else {
+		combo[c].num_different_channels = 1;
 		c0_limits[i].max = 1;
 		c0_limits[i++].types = BIT(NL80211_IFTYPE_AP);
 	}
-	combo[c].num_different_channels = 1;
 	combo[c].max_interfaces = i;
 	combo[c].n_limits = i;
 	combo[c].limits = c0_limits;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
index 05f22ff..c5d1a1c 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
@@ -690,6 +690,8 @@ static u32 brcmf_chip_tcm_rambase(struct brcmf_chip_priv *ci)
 	case BRCM_CC_4365_CHIP_ID:
 	case BRCM_CC_4366_CHIP_ID:
 		return 0x200000;
+	case CY_CC_4373_CHIP_ID:
+		return 0x160000;
 	default:
 		brcmf_err("unknown chip: %s\n", ci->pub.name);
 		break;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c
index 1447a83..2d3e5e2 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c
@@ -78,10 +78,7 @@ int brcmf_debug_attach(struct brcmf_pub *drvr)
 		return -ENODEV;
 
 	drvr->dbgfs_dir = debugfs_create_dir(dev_name(dev), root_folder);
-	if (IS_ERR(drvr->dbgfs_dir))
-		return PTR_ERR(drvr->dbgfs_dir);
-
-	return 0;
+	return PTR_ERR_OR_ZERO(drvr->dbgfs_dir);
 }
 
 void brcmf_debug_detach(struct brcmf_pub *drvr)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
index d231042..091b529 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
@@ -601,6 +601,9 @@ int brcmf_fw_map_chip_to_name(u32 chip, u32 chiprev,
 	if ((nvram_name) && (mapping_table[i].nvram))
 		strlcat(nvram_name, mapping_table[i].nvram, BRCMF_FW_NAME_LEN);
 
+	brcmf_info("using %s for chip %#08x(%d) rev %#08x\n",
+		   fw_name, chip, chip, chiprev);
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index f878706..e6e9b00 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -1951,7 +1951,7 @@ static const struct dev_pm_ops brcmf_pciedrvr_pm = {
 	BRCM_PCIE_VENDOR_ID_BROADCOM, dev_id,\
 	subvend, subdev, PCI_CLASS_NETWORK_OTHER << 8, 0xffff00, 0 }
 
-static struct pci_device_id brcmf_pcie_devid_table[] = {
+static const struct pci_device_id brcmf_pcie_devid_table[] = {
 	BRCMF_PCIE_DEVICE(BRCM_PCIE_4350_DEVICE_ID),
 	BRCMF_PCIE_DEVICE(BRCM_PCIE_4356_DEVICE_ID),
 	BRCMF_PCIE_DEVICE(BRCM_PCIE_43567_DEVICE_ID),
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index f355612..613caca 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -618,6 +618,7 @@ BRCMF_FW_NVRAM_DEF(43430A1, "brcmfmac43430-sdio.bin", "brcmfmac43430-sdio.txt");
 BRCMF_FW_NVRAM_DEF(43455, "brcmfmac43455-sdio.bin", "brcmfmac43455-sdio.txt");
 BRCMF_FW_NVRAM_DEF(4354, "brcmfmac4354-sdio.bin", "brcmfmac4354-sdio.txt");
 BRCMF_FW_NVRAM_DEF(4356, "brcmfmac4356-sdio.bin", "brcmfmac4356-sdio.txt");
+BRCMF_FW_NVRAM_DEF(4373, "brcmfmac4373-sdio.bin", "brcmfmac4373-sdio.txt");
 
 static struct brcmf_firmware_mapping brcmf_sdio_fwnames[] = {
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43143_CHIP_ID, 0xFFFFFFFF, 43143),
@@ -636,7 +637,8 @@ static struct brcmf_firmware_mapping brcmf_sdio_fwnames[] = {
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_43430_CHIP_ID, 0xFFFFFFFE, 43430A1),
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4345_CHIP_ID, 0xFFFFFFC0, 43455),
 	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4354_CHIP_ID, 0xFFFFFFFF, 4354),
-	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4356_CHIP_ID, 0xFFFFFFFF, 4356)
+	BRCMF_FW_NVRAM_ENTRY(BRCM_CC_4356_CHIP_ID, 0xFFFFFFFF, 4356),
+	BRCMF_FW_NVRAM_ENTRY(CY_CC_4373_CHIP_ID, 0xFFFFFFFF, 4373)
 };
 
 static void pkt_align(struct sk_buff *p, int len, int align)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index 0eea48e..11ffaa0 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -50,6 +50,7 @@ BRCMF_FW_DEF(43143, "brcmfmac43143.bin");
 BRCMF_FW_DEF(43236B, "brcmfmac43236b.bin");
 BRCMF_FW_DEF(43242A, "brcmfmac43242a.bin");
 BRCMF_FW_DEF(43569, "brcmfmac43569.bin");
+BRCMF_FW_DEF(4373, "brcmfmac4373.bin");
 
 static struct brcmf_firmware_mapping brcmf_usb_fwnames[] = {
 	BRCMF_FW_ENTRY(BRCM_CC_43143_CHIP_ID, 0xFFFFFFFF, 43143),
@@ -58,7 +59,8 @@ static struct brcmf_firmware_mapping brcmf_usb_fwnames[] = {
 	BRCMF_FW_ENTRY(BRCM_CC_43238_CHIP_ID, 0x00000008, 43236B),
 	BRCMF_FW_ENTRY(BRCM_CC_43242_CHIP_ID, 0xFFFFFFFF, 43242A),
 	BRCMF_FW_ENTRY(BRCM_CC_43566_CHIP_ID, 0xFFFFFFFF, 43569),
-	BRCMF_FW_ENTRY(BRCM_CC_43569_CHIP_ID, 0xFFFFFFFF, 43569)
+	BRCMF_FW_ENTRY(BRCM_CC_43569_CHIP_ID, 0xFFFFFFFF, 43569),
+	BRCMF_FW_ENTRY(CY_CC_4373_CHIP_ID, 0xFFFFFFFF, 4373)
 };
 
 #define TRX_MAGIC		0x30524448	/* "HDR0" */
@@ -1463,15 +1465,20 @@ static int brcmf_usb_reset_resume(struct usb_interface *intf)
 #define LINKSYS_USB_DEVICE(dev_id)	\
 	{ USB_DEVICE(BRCM_USB_VENDOR_ID_LINKSYS, dev_id) }
 
-static struct usb_device_id brcmf_usb_devid_table[] = {
+#define CYPRESS_USB_DEVICE(dev_id)	\
+	{ USB_DEVICE(CY_USB_VENDOR_ID_CYPRESS, dev_id) }
+
+static const struct usb_device_id brcmf_usb_devid_table[] = {
 	BRCMF_USB_DEVICE(BRCM_USB_43143_DEVICE_ID),
 	BRCMF_USB_DEVICE(BRCM_USB_43236_DEVICE_ID),
 	BRCMF_USB_DEVICE(BRCM_USB_43242_DEVICE_ID),
 	BRCMF_USB_DEVICE(BRCM_USB_43569_DEVICE_ID),
 	LINKSYS_USB_DEVICE(BRCM_USB_43235_LINKSYS_DEVICE_ID),
+	CYPRESS_USB_DEVICE(CY_USB_4373_DEVICE_ID),
 	{ USB_DEVICE(BRCM_USB_VENDOR_ID_LG, BRCM_USB_43242_LG_DEVICE_ID) },
 	/* special entry for device with firmware loaded and running */
 	BRCMF_USB_DEVICE(BRCM_USB_BCMFW_DEVICE_ID),
+	CYPRESS_USB_DEVICE(BRCM_USB_BCMFW_DEVICE_ID),
 	{ /* end: all zeroes */ }
 };
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
index f1fb8a3..57544a3 100644
--- a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
+++ b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
@@ -23,6 +23,7 @@
 #define BRCM_USB_VENDOR_ID_BROADCOM	0x0a5c
 #define BRCM_USB_VENDOR_ID_LG		0x043e
 #define BRCM_USB_VENDOR_ID_LINKSYS	0x13b1
+#define CY_USB_VENDOR_ID_CYPRESS	0x04b4
 #define BRCM_PCIE_VENDOR_ID_BROADCOM	PCI_VENDOR_ID_BROADCOM
 
 /* Chipcommon Core Chip IDs */
@@ -57,6 +58,7 @@
 #define BRCM_CC_4365_CHIP_ID		0x4365
 #define BRCM_CC_4366_CHIP_ID		0x4366
 #define BRCM_CC_4371_CHIP_ID		0x4371
+#define CY_CC_4373_CHIP_ID		0x4373
 
 /* USB Device IDs */
 #define BRCM_USB_43143_DEVICE_ID	0xbd1e
@@ -66,6 +68,7 @@
 #define BRCM_USB_43242_LG_DEVICE_ID	0x3101
 #define BRCM_USB_43569_DEVICE_ID	0xbd27
 #define BRCM_USB_BCMFW_DEVICE_ID	0x0bdc
+#define CY_USB_4373_DEVICE_ID		0xbd29
 
 /* PCIE Device IDs */
 #define BRCM_PCIE_4350_DEVICE_ID	0x43a3
diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index 84143a0..54201c0 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -7837,7 +7837,7 @@ static int writerids(struct net_device *dev, aironet_ioctl *comp) {
 	struct airo_info *ai = dev->ml_priv;
 	int  ridcode;
         int  enabled;
-	static int (* writer)(struct airo_info *, u16 rid, const void *, int, int);
+	int (*writer)(struct airo_info *, u16 rid, const void *, int, int);
 	unsigned char *iobuf;
 
 	/* Only super-user can write RIDs */
diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
index aaaca4d..19c442c 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
@@ -340,7 +340,7 @@ static int ipw2100_ucode_download(struct ipw2100_priv *priv,
 				  struct ipw2100_fw *fw);
 static void ipw2100_wx_event_work(struct work_struct *work);
 static struct iw_statistics *ipw2100_wx_wireless_stats(struct net_device *dev);
-static struct iw_handler_def ipw2100_wx_handler_def;
+static const struct iw_handler_def ipw2100_wx_handler_def;
 
 static inline void read_register(struct net_device *dev, u32 reg, u32 * val)
 {
@@ -1724,7 +1724,7 @@ static const struct libipw_geo ipw_geos[] = {
 static int ipw2100_up(struct ipw2100_priv *priv, int deferred)
 {
 	unsigned long flags;
-	int rc = 0;
+	int err = 0;
 	u32 lock;
 	u32 ord_len = sizeof(lock);
 
@@ -1757,33 +1757,33 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred)
 	if (priv->status & STATUS_POWERED ||
 	    (priv->status & STATUS_RESET_PENDING)) {
 		/* Power cycle the card ... */
-		if (ipw2100_power_cycle_adapter(priv)) {
+		err = ipw2100_power_cycle_adapter(priv);
+		if (err) {
 			printk(KERN_WARNING DRV_NAME
 			       ": %s: Could not cycle adapter.\n",
 			       priv->net_dev->name);
-			rc = 1;
 			goto exit;
 		}
 	} else
 		priv->status |= STATUS_POWERED;
 
 	/* Load the firmware, start the clocks, etc. */
-	if (ipw2100_start_adapter(priv)) {
+	err = ipw2100_start_adapter(priv);
+	if (err) {
 		printk(KERN_ERR DRV_NAME
 		       ": %s: Failed to start the firmware.\n",
 		       priv->net_dev->name);
-		rc = 1;
 		goto exit;
 	}
 
 	ipw2100_initialize_ordinals(priv);
 
 	/* Determine capabilities of this particular HW configuration */
-	if (ipw2100_get_hw_features(priv)) {
+	err = ipw2100_get_hw_features(priv);
+	if (err) {
 		printk(KERN_ERR DRV_NAME
 		       ": %s: Failed to determine HW features.\n",
 		       priv->net_dev->name);
-		rc = 1;
 		goto exit;
 	}
 
@@ -1792,11 +1792,11 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred)
 	priv->ieee->freq_band = LIBIPW_24GHZ_BAND;
 
 	lock = LOCK_NONE;
-	if (ipw2100_set_ordinal(priv, IPW_ORD_PERS_DB_LOCK, &lock, &ord_len)) {
+	err = ipw2100_set_ordinal(priv, IPW_ORD_PERS_DB_LOCK, &lock, &ord_len);
+	if (err) {
 		printk(KERN_ERR DRV_NAME
 		       ": %s: Failed to clear ordinal lock.\n",
 		       priv->net_dev->name);
-		rc = 1;
 		goto exit;
 	}
 
@@ -1820,21 +1820,21 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred)
 
 	/* Send all of the commands that must be sent prior to
 	 * HOST_COMPLETE */
-	if (ipw2100_adapter_setup(priv)) {
+	err = ipw2100_adapter_setup(priv);
+	if (err) {
 		printk(KERN_ERR DRV_NAME ": %s: Failed to start the card.\n",
 		       priv->net_dev->name);
-		rc = 1;
 		goto exit;
 	}
 
 	if (!deferred) {
 		/* Enable the adapter - sends HOST_COMPLETE */
-		if (ipw2100_enable_adapter(priv)) {
+		err = ipw2100_enable_adapter(priv);
+		if (err) {
 			printk(KERN_ERR DRV_NAME ": "
 			       "%s: failed in call to enable adapter.\n",
 			       priv->net_dev->name);
 			ipw2100_hw_stop_adapter(priv);
-			rc = 1;
 			goto exit;
 		}
 
@@ -1844,7 +1844,7 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred)
 	}
 
       exit:
-	return rc;
+	return err;
 }
 
 static void ipw2100_down(struct ipw2100_priv *priv)
@@ -4324,7 +4324,7 @@ static struct attribute *ipw2100_sysfs_entries[] = {
 	NULL,
 };
 
-static struct attribute_group ipw2100_attribute_group = {
+static const struct attribute_group ipw2100_attribute_group = {
 	.attrs = ipw2100_sysfs_entries,
 };
 
@@ -8273,7 +8273,7 @@ static struct iw_statistics *ipw2100_wx_wireless_stats(struct net_device *dev)
 	return (struct iw_statistics *)NULL;
 }
 
-static struct iw_handler_def ipw2100_wx_handler_def = {
+static const struct iw_handler_def ipw2100_wx_handler_def = {
 	.standard = ipw2100_wx_handlers,
 	.num_standard = ARRAY_SIZE(ipw2100_wx_handlers),
 	.num_private = ARRAY_SIZE(ipw2100_private_handler),
diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
index 9368abd..8da8749 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
@@ -3209,7 +3209,7 @@ static int ipw_load_firmware(struct ipw_priv *priv, u8 * data, size_t len)
 	struct fw_chunk *chunk;
 	int total_nr = 0;
 	int i;
-	struct pci_pool *pool;
+	struct dma_pool *pool;
 	void **virts;
 	dma_addr_t *phys;
 
@@ -3226,9 +3226,10 @@ static int ipw_load_firmware(struct ipw_priv *priv, u8 * data, size_t len)
 		kfree(virts);
 		return -ENOMEM;
 	}
-	pool = pci_pool_create("ipw2200", priv->pci_dev, CB_MAX_LENGTH, 0, 0);
+	pool = dma_pool_create("ipw2200", &priv->pci_dev->dev, CB_MAX_LENGTH, 0,
+			       0);
 	if (!pool) {
-		IPW_ERROR("pci_pool_create failed\n");
+		IPW_ERROR("dma_pool_create failed\n");
 		kfree(phys);
 		kfree(virts);
 		return -ENOMEM;
@@ -3253,7 +3254,7 @@ static int ipw_load_firmware(struct ipw_priv *priv, u8 * data, size_t len)
 
 		nr = (chunk_len + CB_MAX_LENGTH - 1) / CB_MAX_LENGTH;
 		for (i = 0; i < nr; i++) {
-			virts[total_nr] = pci_pool_alloc(pool, GFP_KERNEL,
+			virts[total_nr] = dma_pool_alloc(pool, GFP_KERNEL,
 							 &phys[total_nr]);
 			if (!virts[total_nr]) {
 				ret = -ENOMEM;
@@ -3297,9 +3298,9 @@ static int ipw_load_firmware(struct ipw_priv *priv, u8 * data, size_t len)
 	}
  out:
 	for (i = 0; i < total_nr; i++)
-		pci_pool_free(pool, virts[i], phys[i]);
+		dma_pool_free(pool, virts[i], phys[i]);
 
-	pci_pool_destroy(pool);
+	dma_pool_destroy(pool);
 	kfree(phys);
 	kfree(virts);
 
@@ -10008,7 +10009,7 @@ static iw_handler ipw_priv_handler[] = {
 #endif
 };
 
-static struct iw_handler_def ipw_wx_handler_def = {
+static const struct iw_handler_def ipw_wx_handler_def = {
 	.standard = ipw_wx_handlers,
 	.num_standard = ARRAY_SIZE(ipw_wx_handlers),
 	.num_private = ARRAY_SIZE(ipw_priv_handler),
@@ -11500,7 +11501,7 @@ static struct attribute *ipw_sysfs_entries[] = {
 	NULL
 };
 
-static struct attribute_group ipw_attribute_group = {
+static const struct attribute_group ipw_attribute_group = {
 	.name = NULL,		/* put in device directory */
 	.attrs = ipw_sysfs_entries,
 };
diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
index 38bf403..329f3a6 100644
--- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
@@ -3464,7 +3464,7 @@ static struct attribute *il3945_sysfs_entries[] = {
 	NULL
 };
 
-static struct attribute_group il3945_attribute_group = {
+static const struct attribute_group il3945_attribute_group = {
 	.name = NULL,		/* put in device directory */
 	.attrs = il3945_sysfs_entries,
 };
diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
index 5b51fba..de9b652 100644
--- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
@@ -4654,7 +4654,7 @@ static struct attribute *il_sysfs_entries[] = {
 	NULL
 };
 
-static struct attribute_group il_attribute_group = {
+static const struct attribute_group il_attribute_group = {
 	.name = NULL,		/* put in device directory */
 	.attrs = il_sysfs_entries,
 };
diff --git a/drivers/net/wireless/intel/iwlwifi/Makefile b/drivers/net/wireless/intel/iwlwifi/Makefile
index 20bd261..35a32a3 100644
--- a/drivers/net/wireless/intel/iwlwifi/Makefile
+++ b/drivers/net/wireless/intel/iwlwifi/Makefile
@@ -11,6 +11,8 @@ iwlwifi-$(CONFIG_IWLDVM) += cfg/1000.o cfg/2000.o cfg/5000.o cfg/6000.o
 iwlwifi-$(CONFIG_IWLMVM) += cfg/7000.o cfg/8000.o cfg/9000.o cfg/a000.o
 iwlwifi-objs		+= iwl-trans.o
 iwlwifi-objs		+= fw/notif-wait.o
+iwlwifi-$(CONFIG_IWLMVM) += fw/paging.o fw/smem.o fw/init.o fw/dbg.o
+iwlwifi-$(CONFIG_IWLMVM) += fw/common_rx.o fw/nvm.o
 
 iwlwifi-objs += $(iwlwifi-m)
 
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/8000.c b/drivers/net/wireless/intel/iwlwifi/cfg/8000.c
index 5081720..2e6c526 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/8000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/8000.c
@@ -70,8 +70,8 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL8000_UCODE_API_MAX	33
-#define IWL8265_UCODE_API_MAX	33
+#define IWL8000_UCODE_API_MAX	34
+#define IWL8265_UCODE_API_MAX	34
 
 /* Lowest firmware API version supported */
 #define IWL8000_UCODE_API_MIN	22
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
index 97208ce..2babe0a 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
@@ -55,7 +55,7 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL9000_UCODE_API_MAX	33
+#define IWL9000_UCODE_API_MAX	34
 
 /* Lowest firmware API version supported */
 #define IWL9000_UCODE_API_MIN	30
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/a000.c b/drivers/net/wireless/intel/iwlwifi/cfg/a000.c
index 98f24cd..76ba1f8 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/a000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/a000.c
@@ -55,7 +55,7 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL_A000_UCODE_API_MAX	33
+#define IWL_A000_UCODE_API_MAX	34
 
 /* Lowest firmware API version supported */
 #define IWL_A000_UCODE_API_MIN	24
@@ -75,11 +75,20 @@
 #define IWL_A000_JF_FW_PRE	"iwlwifi-Qu-a0-jf-b0-"
 #define IWL_A000_HR_FW_PRE	"iwlwifi-Qu-a0-hr-a0-"
 #define IWL_A000_HR_CDB_FW_PRE	"iwlwifi-QuIcp-z0-hrcdb-a0-"
+#define IWL_A000_HR_F0_FW_PRE	"iwlwifi-QuQnj-f0-hr-a0-"
+#define IWL_A000_JF_B0_FW_PRE	"iwlwifi-QuQnj-a0-jf-b0-"
+#define IWL_A000_HR_A0_FW_PRE	"iwlwifi-QuQnj-a0-hr-a0-"
 
 #define IWL_A000_HR_MODULE_FIRMWARE(api) \
 	IWL_A000_HR_FW_PRE "-" __stringify(api) ".ucode"
 #define IWL_A000_JF_MODULE_FIRMWARE(api) \
 	IWL_A000_JF_FW_PRE "-" __stringify(api) ".ucode"
+#define IWL_A000_HR_F0_QNJ_MODULE_FIRMWARE(api) \
+	IWL_A000_HR_F0_FW_PRE "-" __stringify(api) ".ucode"
+#define IWL_A000_JF_B0_QNJ_MODULE_FIRMWARE(api) \
+	IWL_A000_JF_B0_FW_PRE "-" __stringify(api) ".ucode"
+#define IWL_A000_HR_A0_QNJ_MODULE_FIRMWARE(api) \
+	IWL_A000_HR_A0_FW_PRE "-" __stringify(api) ".ucode"
 
 #define NVM_HW_SECTION_NUM_FAMILY_A000		10
 
@@ -168,5 +177,38 @@ const struct iwl_cfg iwla000_2ax_cfg_hr = {
 		.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
 };
 
+const struct iwl_cfg iwla000_2ax_cfg_qnj_hr_f0 = {
+		.name = "Intel(R) Dual Band Wireless AX a000",
+		.fw_name_pre = IWL_A000_HR_F0_FW_PRE,
+		IWL_DEVICE_A000,
+		.ht_params = &iwl_a000_ht_params,
+		.nvm_ver = IWL_A000_NVM_VERSION,
+		.nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
+		.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+};
+
+const struct iwl_cfg iwla000_2ax_cfg_qnj_jf_b0 = {
+		.name = "Intel(R) Dual Band Wireless AX a000",
+		.fw_name_pre = IWL_A000_JF_B0_FW_PRE,
+		IWL_DEVICE_A000,
+		.ht_params = &iwl_a000_ht_params,
+		.nvm_ver = IWL_A000_NVM_VERSION,
+		.nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
+		.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+};
+
+const struct iwl_cfg iwla000_2ax_cfg_qnj_hr_a0 = {
+		.name = "Intel(R) Dual Band Wireless AX a000",
+		.fw_name_pre = IWL_A000_HR_A0_FW_PRE,
+		IWL_DEVICE_A000,
+		.ht_params = &iwl_a000_ht_params,
+		.nvm_ver = IWL_A000_NVM_VERSION,
+		.nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
+		.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+};
+
 MODULE_FIRMWARE(IWL_A000_HR_MODULE_FIRMWARE(IWL_A000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_A000_JF_MODULE_FIRMWARE(IWL_A000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_A000_HR_F0_QNJ_MODULE_FIRMWARE(IWL_A000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_A000_JF_B0_QNJ_MODULE_FIRMWARE(IWL_A000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_A000_HR_A0_QNJ_MODULE_FIRMWARE(IWL_A000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/commands.h b/drivers/net/wireless/intel/iwlwifi/dvm/commands.h
index 2ab2773..f89736d 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/commands.h
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/commands.h
@@ -311,11 +311,6 @@ enum {
 
 /**
  * rate_n_flags Tx antenna masks
- * 4965 has 2 transmitters
- * 5100 has 1 transmitter B
- * 5150 has 1 transmitter A
- * 5300 has 3 transmitters
- * 5350 has 3 transmitters
  * bit14:16
  */
 #define RATE_MCS_ANT_POS	14
@@ -1230,7 +1225,6 @@ struct iwl_rx_mpdu_res_start {
  */
 
 /*
- * 4965 uCode updates these Tx attempt count values in host DRAM.
  * Used for managing Tx retries when expecting block-acks.
  * Driver should set these fields to 0.
  */
@@ -1437,22 +1431,6 @@ struct agg_tx_status {
 	__le16 sequence;
 } __packed;
 
-/*
- * definitions for initial rate index field
- * bits [3:0] initial rate index
- * bits [6:4] rate table color, used for the initial rate
- * bit-7 invalid rate indication
- *   i.e. rate was not chosen from rate table
- *   or rate table color was changed during frame retries
- * refer tlc rate info
- */
-
-#define IWL50_TX_RES_INIT_RATE_INDEX_POS	0
-#define IWL50_TX_RES_INIT_RATE_INDEX_MSK	0x0f
-#define IWL50_TX_RES_RATE_TABLE_COLOR_POS	4
-#define IWL50_TX_RES_RATE_TABLE_COLOR_MSK	0x70
-#define IWL50_TX_RES_INV_RATE_INDEX_MSK	0x80
-
 /* refer to ra_tid */
 #define IWLAGN_TX_RES_TID_POS	0
 #define IWLAGN_TX_RES_TID_MSK	0x0f
@@ -1556,7 +1534,7 @@ struct iwl_link_qual_general_params {
 	/* Best single antenna to use for single stream (legacy, SISO). */
 	u8 single_stream_ant_msk;	/* LINK_QUAL_ANT_* */
 
-	/* Best antennas to use for MIMO (unused for 4965, assumes both). */
+	/* Best antennas to use for MIMO */
 	u8 dual_stream_ant_msk;		/* LINK_QUAL_ANT_* */
 
 	/*
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/alive.h b/drivers/net/wireless/intel/iwlwifi/fw/api/alive.h
new file mode 100644
index 0000000..3684a3e
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/alive.h
@@ -0,0 +1,206 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_alive_h__
+#define __iwl_fw_api_alive_h__
+
+/* alive response is_valid values */
+#define ALIVE_RESP_UCODE_OK	BIT(0)
+#define ALIVE_RESP_RFKILL	BIT(1)
+
+/* alive response ver_type values */
+enum {
+	FW_TYPE_HW = 0,
+	FW_TYPE_PROT = 1,
+	FW_TYPE_AP = 2,
+	FW_TYPE_WOWLAN = 3,
+	FW_TYPE_TIMING = 4,
+	FW_TYPE_WIPAN = 5
+};
+
+/* alive response ver_subtype values */
+enum {
+	FW_SUBTYPE_FULL_FEATURE = 0,
+	FW_SUBTYPE_BOOTSRAP = 1, /* Not valid */
+	FW_SUBTYPE_REDUCED = 2,
+	FW_SUBTYPE_ALIVE_ONLY = 3,
+	FW_SUBTYPE_WOWLAN = 4,
+	FW_SUBTYPE_AP_SUBTYPE = 5,
+	FW_SUBTYPE_WIPAN = 6,
+	FW_SUBTYPE_INITIALIZE = 9
+};
+
+#define IWL_ALIVE_STATUS_ERR 0xDEAD
+#define IWL_ALIVE_STATUS_OK 0xCAFE
+
+#define IWL_ALIVE_FLG_RFKILL	BIT(0)
+
+struct iwl_lmac_alive {
+	__le32 ucode_minor;
+	__le32 ucode_major;
+	u8 ver_subtype;
+	u8 ver_type;
+	u8 mac;
+	u8 opt;
+	__le32 timestamp;
+	__le32 error_event_table_ptr;	/* SRAM address for error log */
+	__le32 log_event_table_ptr;	/* SRAM address for LMAC event log */
+	__le32 cpu_register_ptr;
+	__le32 dbgm_config_ptr;
+	__le32 alive_counter_ptr;
+	__le32 scd_base_ptr;		/* SRAM address for SCD */
+	__le32 st_fwrd_addr;		/* pointer to Store and forward */
+	__le32 st_fwrd_size;
+} __packed; /* UCODE_ALIVE_NTFY_API_S_VER_3 */
+
+struct iwl_umac_alive {
+	__le32 umac_minor;		/* UMAC version: minor */
+	__le32 umac_major;		/* UMAC version: major */
+	__le32 error_info_addr;		/* SRAM address for UMAC error log */
+	__le32 dbg_print_buff_addr;
+} __packed; /* UMAC_ALIVE_DATA_API_S_VER_2 */
+
+struct mvm_alive_resp_v3 {
+	__le16 status;
+	__le16 flags;
+	struct iwl_lmac_alive lmac_data;
+	struct iwl_umac_alive umac_data;
+} __packed; /* ALIVE_RES_API_S_VER_3 */
+
+struct mvm_alive_resp {
+	__le16 status;
+	__le16 flags;
+	struct iwl_lmac_alive lmac_data[2];
+	struct iwl_umac_alive umac_data;
+} __packed; /* ALIVE_RES_API_S_VER_4 */
+
+/**
+ * enum iwl_extended_cfg_flag - commands driver may send before
+ *	finishing init flow
+ * @IWL_INIT_DEBUG_CFG: driver is going to send debug config command
+ * @IWL_INIT_NVM: driver is going to send NVM_ACCESS commands
+ * @IWL_INIT_PHY: driver is going to send PHY_DB commands
+ */
+enum iwl_extended_cfg_flags {
+	IWL_INIT_DEBUG_CFG,
+	IWL_INIT_NVM,
+	IWL_INIT_PHY,
+};
+
+/**
+ * struct iwl_extended_cfg_cmd - mark what commands ucode should wait for
+ * before finishing init flows
+ * @init_flags: values from iwl_extended_cfg_flags
+ */
+struct iwl_init_extended_cfg_cmd {
+	__le32 init_flags;
+} __packed; /* INIT_EXTENDED_CFG_CMD_API_S_VER_1 */
+
+/**
+ * struct iwl_radio_version_notif - information on the radio version
+ * ( RADIO_VERSION_NOTIFICATION = 0x68 )
+ * @radio_flavor: radio flavor
+ * @radio_step: radio version step
+ * @radio_dash: radio version dash
+ */
+struct iwl_radio_version_notif {
+	__le32 radio_flavor;
+	__le32 radio_step;
+	__le32 radio_dash;
+} __packed; /* RADIO_VERSION_NOTOFICATION_S_VER_1 */
+
+enum iwl_card_state_flags {
+	CARD_ENABLED		= 0x00,
+	HW_CARD_DISABLED	= 0x01,
+	SW_CARD_DISABLED	= 0x02,
+	CT_KILL_CARD_DISABLED	= 0x04,
+	HALT_CARD_DISABLED	= 0x08,
+	CARD_DISABLED_MSK	= 0x0f,
+	CARD_IS_RX_ON		= 0x10,
+};
+
+/**
+ * struct iwl_radio_version_notif - information on the card state
+ * ( CARD_STATE_NOTIFICATION = 0xa1 )
+ * @flags: &enum iwl_card_state_flags
+ */
+struct iwl_card_state_notif {
+	__le32 flags;
+} __packed; /* CARD_STATE_NTFY_API_S_VER_1 */
+
+/**
+ * struct iwl_fseq_ver_mismatch_nty - Notification about version
+ *
+ * This notification does not have a direct impact on the init flow.
+ * It means that another core (not WiFi) has initiated the FSEQ flow
+ * and updated the FSEQ version.  The driver only prints an error when
+ * this occurs.
+ *
+ * @aux_read_fseq_ver: auxiliary read FSEQ version
+ * @wifi_fseq_ver: FSEQ version (embedded in WiFi)
+ */
+struct iwl_fseq_ver_mismatch_ntf {
+	__le32 aux_read_fseq_ver;
+	__le32 wifi_fseq_ver;
+} __packed; /* FSEQ_VER_MISMATCH_NTFY_API_S_VER_1 */
+
+#endif /* __iwl_fw_api_alive_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/binding.h b/drivers/net/wireless/intel/iwlwifi/fw/api/binding.h
new file mode 100644
index 0000000..d2717fa
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/binding.h
@@ -0,0 +1,144 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_binding_h__
+#define __iwl_fw_api_binding_h__
+
+#define MAX_MACS_IN_BINDING	(3)
+#define MAX_BINDINGS		(4)
+
+/**
+ * struct iwl_binding_cmd_v1 - configuring bindings
+ * ( BINDING_CONTEXT_CMD = 0x2b )
+ * @id_and_color: ID and color of the relevant Binding,
+ *	&enum iwl_ctxt_id_and_color
+ * @action: action to perform, one of FW_CTXT_ACTION_*
+ * @macs: array of MAC id and colors which belong to the binding,
+ *	&enum iwl_ctxt_id_and_color
+ * @phy: PHY id and color which belongs to the binding,
+ *	&enum iwl_ctxt_id_and_color
+ */
+struct iwl_binding_cmd_v1 {
+	/* COMMON_INDEX_HDR_API_S_VER_1 */
+	__le32 id_and_color;
+	__le32 action;
+	/* BINDING_DATA_API_S_VER_1 */
+	__le32 macs[MAX_MACS_IN_BINDING];
+	__le32 phy;
+} __packed; /* BINDING_CMD_API_S_VER_1 */
+
+/**
+ * struct iwl_binding_cmd - configuring bindings
+ * ( BINDING_CONTEXT_CMD = 0x2b )
+ * @id_and_color: ID and color of the relevant Binding,
+ *	&enum iwl_ctxt_id_and_color
+ * @action: action to perform, one of FW_CTXT_ACTION_*
+ * @macs: array of MAC id and colors which belong to the binding
+ *	&enum iwl_ctxt_id_and_color
+ * @phy: PHY id and color which belongs to the binding
+ *	&enum iwl_ctxt_id_and_color
+ * @lmac_id: the lmac id the binding belongs to
+ */
+struct iwl_binding_cmd {
+	/* COMMON_INDEX_HDR_API_S_VER_1 */
+	__le32 id_and_color;
+	__le32 action;
+	/* BINDING_DATA_API_S_VER_1 */
+	__le32 macs[MAX_MACS_IN_BINDING];
+	__le32 phy;
+	__le32 lmac_id;
+} __packed; /* BINDING_CMD_API_S_VER_2 */
+
+#define IWL_BINDING_CMD_SIZE_V1	sizeof(struct iwl_binding_cmd_v1)
+#define IWL_LMAC_24G_INDEX		0
+#define IWL_LMAC_5G_INDEX		1
+
+/* The maximal number of fragments in the FW's schedule session */
+#define IWL_MVM_MAX_QUOTA 128
+
+/**
+ * struct iwl_time_quota_data - configuration of time quota per binding
+ * @id_and_color: ID and color of the relevant Binding,
+ *	&enum iwl_ctxt_id_and_color
+ * @quota: absolute time quota in TU. The scheduler will try to divide the
+ *	remainig quota (after Time Events) according to this quota.
+ * @max_duration: max uninterrupted context duration in TU
+ */
+struct iwl_time_quota_data {
+	__le32 id_and_color;
+	__le32 quota;
+	__le32 max_duration;
+} __packed; /* TIME_QUOTA_DATA_API_S_VER_1 */
+
+/**
+ * struct iwl_time_quota_cmd - configuration of time quota between bindings
+ * ( TIME_QUOTA_CMD = 0x2c )
+ * @quotas: allocations per binding
+ * Note: on non-CDB the fourth one is the auxilary mac and is
+ *	essentially zero.
+ *	On CDB the fourth one is a regular binding.
+ */
+struct iwl_time_quota_cmd {
+	struct iwl_time_quota_data quotas[MAX_BINDINGS];
+} __packed; /* TIME_QUOTA_ALLOCATION_CMD_API_S_VER_1 */
+
+#endif /* __iwl_fw_api_binding_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api.h b/drivers/net/wireless/intel/iwlwifi/fw/api/cmdhdr.h
index 0e107f9..ea4a3f0 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/cmdhdr.h
@@ -59,8 +59,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *****************************************************************************/
-#ifndef __iwl_fw_api_h__
-#define __iwl_fw_api_h__
+#ifndef __iwl_fw_api_cmdhdr_h__
+#define __iwl_fw_api_cmdhdr_h__
 
 /**
  * DOC: Host command section
@@ -112,15 +112,24 @@ static inline u32 iwl_cmd_id(u8 opcode, u8 groupid, u8 version)
 #define IWL_ALWAYS_LONG_GROUP	1
 
 /**
- * struct iwl_cmd_header
+ * struct iwl_cmd_header - (short) command header format
  *
  * This header format appears in the beginning of each command sent from the
  * driver, and each response/notification received from uCode.
  */
 struct iwl_cmd_header {
-	u8 cmd;		/* Command ID:  REPLY_RXON, etc. */
+	/**
+	 * @cmd: Command ID: REPLY_RXON, etc.
+	 */
+	u8 cmd;
+	/**
+	 * @group_id: group ID, for commands with groups
+	 */
 	u8 group_id;
-	/*
+	/**
+	 * @sequence:
+	 * Sequence number for the command.
+	 *
 	 * The driver sets up the sequence number to values of its choosing.
 	 * uCode does not use this value, but passes it back to the driver
 	 * when sending the response to each driver-originated command, so
@@ -150,6 +159,13 @@ struct iwl_cmd_header {
  * driver, and each response/notification received from uCode.
  * this is the wide version that contains more information about the command
  * like length, version and command type
+ *
+ * @cmd: command ID, like in &struct iwl_cmd_header
+ * @group_id: group ID, like in &struct iwl_cmd_header
+ * @sequence: sequence, like in &struct iwl_cmd_header
+ * @length: length of the command
+ * @reserved: reserved
+ * @version: command version
  */
 struct iwl_cmd_header_wide {
 	u8 cmd;
@@ -161,48 +177,6 @@ struct iwl_cmd_header_wide {
 } __packed;
 
 /**
- * iwl_tx_queue_cfg_actions - TXQ config options
- * @TX_QUEUE_CFG_ENABLE_QUEUE: enable a queue
- * @TX_QUEUE_CFG_TFD_SHORT_FORMAT: use short TFD format
- */
-enum iwl_tx_queue_cfg_actions {
-	TX_QUEUE_CFG_ENABLE_QUEUE		= BIT(0),
-	TX_QUEUE_CFG_TFD_SHORT_FORMAT		= BIT(1),
-};
-
-/**
- * struct iwl_tx_queue_cfg_cmd - txq hw scheduler config command
- * @sta_id: station id
- * @tid: tid of the queue
- * @flags: see &enum iwl_tx_queue_cfg_actions
- * @cb_size: size of TFD cyclic buffer. Value is exponent - 3.
- *	Minimum value 0 (8 TFDs), maximum value 5 (256 TFDs)
- * @byte_cnt_addr: address of byte count table
- * @tfdq_addr: address of TFD circular buffer
- */
-struct iwl_tx_queue_cfg_cmd {
-	u8 sta_id;
-	u8 tid;
-	__le16 flags;
-	__le32 cb_size;
-	__le64 byte_cnt_addr;
-	__le64 tfdq_addr;
-} __packed; /* TX_QUEUE_CFG_CMD_API_S_VER_2 */
-
-/**
- * struct iwl_tx_queue_cfg_rsp - response to txq hw scheduler config
- * @queue_number: queue number assigned to this RA -TID
- * @flags: set on failure
- * @write_pointer: initial value for write pointer
- */
-struct iwl_tx_queue_cfg_rsp {
-	__le16 queue_number;
-	__le16 flags;
-	__le16 write_pointer;
-	__le16 reserved;
-} __packed; /* TX_QUEUE_CFG_RSP_API_S_VER_2 */
-
-/**
  * struct iwl_calib_res_notif_phy_db - Receive phy db chunk after calibrations
  * @type: type of the result - mostly ignored
  * @length: length of the data
@@ -226,4 +200,12 @@ struct iwl_phy_db_cmd {
 	u8 data[];
 } __packed;
 
-#endif /* __iwl_fw_api_h__*/
+/**
+ * struct iwl_cmd_response - generic response struct for most commands
+ * @status: status of the command asked, changes for each one
+ */
+struct iwl_cmd_response {
+	__le32 status;
+};
+
+#endif /* __iwl_fw_api_cmdhdr_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-coex.h b/drivers/net/wireless/intel/iwlwifi/fw/api/coex.h
index 8cd06aa..d09555a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-coex.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/coex.h
@@ -18,11 +18,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
- * USA
- *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
@@ -64,8 +59,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *****************************************************************************/
 
-#ifndef __fw_api_bt_coex_h__
-#define __fw_api_bt_coex_h__
+#ifndef __iwl_fw_api_coex_h__
+#define __iwl_fw_api_coex_h__
 
 #include <linux/types.h>
 #include <linux/bitops.h>
@@ -81,7 +76,6 @@ enum iwl_bt_coex_lut_type {
 	BT_COEX_INVALID_LUT = 0xff,
 }; /* BT_COEX_DECISION_LUT_INDEX_API_E_VER_1 */
 
-#define BT_COEX_CORUN_LUT_SIZE (32)
 #define BT_REDUCED_TX_POWER_BIT BIT(7)
 
 enum iwl_bt_coex_mode {
@@ -112,18 +106,6 @@ struct iwl_bt_coex_cmd {
 } __packed; /* BT_COEX_CMD_API_S_VER_6 */
 
 /**
- * struct iwl_bt_coex_corun_lut_update - bt coex update the corun lut
- * @corun_lut20: co-running 20 MHz LUT configuration
- * @corun_lut40: co-running 40 MHz LUT configuration
- *
- * The structure is used for the BT_COEX_UPDATE_CORUN_LUT command.
- */
-struct iwl_bt_coex_corun_lut_update_cmd {
-	__le32 corun_lut20[BT_COEX_CORUN_LUT_SIZE];
-	__le32 corun_lut40[BT_COEX_CORUN_LUT_SIZE];
-} __packed; /* BT_COEX_UPDATE_CORUN_LUT_API_S_VER_1 */
-
-/**
  * struct iwl_bt_coex_reduced_txp_update_cmd
  * @reduced_txp: bit BT_REDUCED_TX_POWER_BIT to enable / disable, rest of the
  *	bits are the sta_id (value)
@@ -196,6 +178,7 @@ enum iwl_bt_mxbox_dw3 {
 	BT_MBOX(3, ACL_STATE, 3, 1),
 	BT_MBOX(3, MSTR_STATE, 4, 1),
 	BT_MBOX(3, OBX_STATE, 5, 1),
+	BT_MBOX(3, A2DP_SRC, 6, 1),
 	BT_MBOX(3, OPEN_CON_2, 8, 2),
 	BT_MBOX(3, TRAFFIC_LOAD, 10, 2),
 	BT_MBOX(3, CHL_SEQN_LSB, 12, 1),
@@ -205,10 +188,21 @@ enum iwl_bt_mxbox_dw3 {
 	BT_MBOX(3, UPDATE_REQUEST, 21, 1),
 };
 
+enum iwl_bt_mxbox_dw4 {
+	BT_MBOX(4, ATS_BT_INTERVAL, 0, 7),
+	BT_MBOX(4, ATS_BT_ACTIVE_MAX_TH, 7, 7),
+};
+
 #define BT_MBOX_MSG(_notif, _num, _field)				     \
 	((le32_to_cpu((_notif)->mbox_msg[(_num)]) & BT_MBOX##_num##_##_field)\
 	>> BT_MBOX##_num##_##_field##_POS)
 
+#define BT_MBOX_PRINT(_num, _field, _end)				    \
+			pos += scnprintf(buf + pos, bufsz - pos,	    \
+					 "\t%s: %d%s",			    \
+					 #_field,			    \
+					 BT_MBOX_MSG(notif, _num, _field),  \
+					 true ? "\n" : ", ");
 enum iwl_bt_activity_grading {
 	BT_OFF			= 0,
 	BT_ON_NO_CONNECTION	= 1,
@@ -225,11 +219,30 @@ enum iwl_bt_ci_compliance {
 	BT_CI_COMPLIANCE_BOTH		= 3,
 }; /* BT_COEX_CI_COMPLIENCE_E_VER_1 */
 
-#define IWL_COEX_IS_TTC_ON(_ttc_rrc_status, _phy_id)	\
-		(_ttc_rrc_status & BIT(_phy_id))
+/**
+ * struct iwl_bt_coex_profile_notif - notification about BT coex
+ * @mbox_msg: message from BT to WiFi
+ * @msg_idx: the index of the message
+ * @bt_ci_compliance: enum %iwl_bt_ci_compliance
+ * @primary_ch_lut: LUT used for primary channel &enum iwl_bt_coex_lut_type
+ * @secondary_ch_lut: LUT used for secondary channel &enum iwl_bt_coex_lut_type
+ * @bt_activity_grading: the activity of BT &enum iwl_bt_activity_grading
+ * @ttc_status: is TTC enabled - one bit per PHY
+ * @rrc_status: is RRC enabled - one bit per PHY
+ * @reserved: reserved
+ */
+struct iwl_bt_coex_profile_notif {
+	__le32 mbox_msg[8];
+	__le32 msg_idx;
+	__le32 bt_ci_compliance;
 
-#define IWL_COEX_IS_RRC_ON(_ttc_rrc_status, _phy_id)	\
-		((_ttc_rrc_status >> 4) & BIT(_phy_id))
+	__le32 primary_ch_lut;
+	__le32 secondary_ch_lut;
+	__le32 bt_activity_grading;
+	u8 ttc_status;
+	u8 rrc_status;
+	__le16 reserved;
+} __packed; /* BT_COEX_PROFILE_NTFY_API_S_VER_5 */
 
 /**
  * struct iwl_bt_coex_profile_notif - notification about BT coex
@@ -239,10 +252,11 @@ enum iwl_bt_ci_compliance {
  * @primary_ch_lut: LUT used for primary channel &enum iwl_bt_coex_lut_type
  * @secondary_ch_lut: LUT used for secondary channel &enum iwl_bt_coex_lut_type
  * @bt_activity_grading: the activity of BT &enum iwl_bt_activity_grading
- * @ttc_rrc_status: is TTC or RRC enabled - one bit per PHY
+ * @ttc_status: is TTC enabled - one bit per PHY
+ * @rrc_status: is RRC enabled - one bit per PHY
  * @reserved: reserved
  */
-struct iwl_bt_coex_profile_notif {
+struct iwl_bt_coex_profile_notif_v4 {
 	__le32 mbox_msg[4];
 	__le32 msg_idx;
 	__le32 bt_ci_compliance;
@@ -250,8 +264,9 @@ struct iwl_bt_coex_profile_notif {
 	__le32 primary_ch_lut;
 	__le32 secondary_ch_lut;
 	__le32 bt_activity_grading;
-	u8 ttc_rrc_status;
-	u8 reserved[3];
+	u8 ttc_status;
+	u8 rrc_status;
+	__le16 reserved;
 } __packed; /* BT_COEX_PROFILE_NTFY_API_S_VER_4 */
 
-#endif /* __fw_api_bt_coex_h__ */
+#endif /* __iwl_fw_api_coex_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
new file mode 100644
index 0000000..0748683
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
@@ -0,0 +1,657 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_commands_h__
+#define __iwl_fw_api_commands_h__
+
+/**
+ * enum iwl_mvm_command_groups - command groups for the firmware
+ * @LEGACY_GROUP: legacy group, uses command IDs from &enum iwl_legacy_cmds
+ * @LONG_GROUP: legacy group with long header, also uses command IDs
+ *	from &enum iwl_legacy_cmds
+ * @SYSTEM_GROUP: system group, uses command IDs from
+ *	&enum iwl_system_subcmd_ids
+ * @MAC_CONF_GROUP: MAC configuration group, uses command IDs from
+ *	&enum iwl_mac_conf_subcmd_ids
+ * @PHY_OPS_GROUP: PHY operations group, uses command IDs from
+ *	&enum iwl_phy_ops_subcmd_ids
+ * @DATA_PATH_GROUP: data path group, uses command IDs from
+ *	&enum iwl_data_path_subcmd_ids
+ * @NAN_GROUP: NAN group, uses command IDs from &enum iwl_nan_subcmd_ids
+ * @TOF_GROUP: TOF group, uses command IDs from &enum iwl_tof_subcmd_ids
+ * @PROT_OFFLOAD_GROUP: protocol offload group, uses command IDs from
+ *	&enum iwl_prot_offload_subcmd_ids
+ * @REGULATORY_AND_NVM_GROUP: regulatory/NVM group, uses command IDs from
+ *	&enum iwl_regulatory_and_nvm_subcmd_ids
+ * @DEBUG_GROUP: Debug group, uses command IDs from &enum iwl_debug_cmds
+ */
+enum iwl_mvm_command_groups {
+	LEGACY_GROUP = 0x0,
+	LONG_GROUP = 0x1,
+	SYSTEM_GROUP = 0x2,
+	MAC_CONF_GROUP = 0x3,
+	PHY_OPS_GROUP = 0x4,
+	DATA_PATH_GROUP = 0x5,
+	NAN_GROUP = 0x7,
+	TOF_GROUP = 0x8,
+	PROT_OFFLOAD_GROUP = 0xb,
+	REGULATORY_AND_NVM_GROUP = 0xc,
+	DEBUG_GROUP = 0xf,
+};
+
+/**
+ * enum iwl_legacy_cmds - legacy group command IDs
+ */
+enum iwl_legacy_cmds {
+	/**
+	 * @MVM_ALIVE:
+	 * Alive data from the firmware, as described in
+	 * &struct mvm_alive_resp_v3 or &struct mvm_alive_resp.
+	 */
+	MVM_ALIVE = 0x1,
+
+	/**
+	 * @REPLY_ERROR: Cause an error in the firmware, for testing purposes.
+	 */
+	REPLY_ERROR = 0x2,
+
+	/**
+	 * @ECHO_CMD: Send data to the device to have it returned immediately.
+	 */
+	ECHO_CMD = 0x3,
+
+	/**
+	 * @INIT_COMPLETE_NOTIF: Notification that initialization is complete.
+	 */
+	INIT_COMPLETE_NOTIF = 0x4,
+
+	/**
+	 * @PHY_CONTEXT_CMD:
+	 * Add/modify/remove a PHY context, using &struct iwl_phy_context_cmd.
+	 */
+	PHY_CONTEXT_CMD = 0x8,
+
+	/**
+	 * @DBG_CFG: Debug configuration command.
+	 */
+	DBG_CFG = 0x9,
+
+	/**
+	 * @SCAN_ITERATION_COMPLETE_UMAC:
+	 * Firmware indicates a scan iteration completed, using
+	 * &struct iwl_umac_scan_iter_complete_notif.
+	 */
+	SCAN_ITERATION_COMPLETE_UMAC = 0xb5,
+
+	/**
+	 * @SCAN_CFG_CMD:
+	 * uses &struct iwl_scan_config_v1 or &struct iwl_scan_config
+	 */
+	SCAN_CFG_CMD = 0xc,
+
+	/**
+	 * @SCAN_REQ_UMAC: uses &struct iwl_scan_req_umac
+	 */
+	SCAN_REQ_UMAC = 0xd,
+
+	/**
+	 * @SCAN_ABORT_UMAC: uses &struct iwl_umac_scan_abort
+	 */
+	SCAN_ABORT_UMAC = 0xe,
+
+	/**
+	 * @SCAN_COMPLETE_UMAC: uses &struct iwl_umac_scan_complete
+	 */
+	SCAN_COMPLETE_UMAC = 0xf,
+
+	/**
+	 * @BA_WINDOW_STATUS_NOTIFICATION_ID:
+	 * uses &struct iwl_ba_window_status_notif
+	 */
+	BA_WINDOW_STATUS_NOTIFICATION_ID = 0x13,
+
+	/**
+	 * @ADD_STA_KEY:
+	 * &struct iwl_mvm_add_sta_key_cmd_v1 or
+	 * &struct iwl_mvm_add_sta_key_cmd.
+	 */
+	ADD_STA_KEY = 0x17,
+
+	/**
+	 * @ADD_STA:
+	 * &struct iwl_mvm_add_sta_cmd or &struct iwl_mvm_add_sta_cmd_v7.
+	 */
+	ADD_STA = 0x18,
+
+	/**
+	 * @REMOVE_STA: &struct iwl_mvm_rm_sta_cmd
+	 */
+	REMOVE_STA = 0x19,
+
+	/**
+	 * @FW_GET_ITEM_CMD: uses &struct iwl_fw_get_item_cmd
+	 */
+	FW_GET_ITEM_CMD = 0x1a,
+
+	/**
+	 * @TX_CMD: uses &struct iwl_tx_cmd or &struct iwl_tx_cmd_gen2,
+	 *	response in &struct iwl_mvm_tx_resp or
+	 *	&struct iwl_mvm_tx_resp_v3
+	 */
+	TX_CMD = 0x1c,
+
+	/**
+	 * @TXPATH_FLUSH: &struct iwl_tx_path_flush_cmd
+	 */
+	TXPATH_FLUSH = 0x1e,
+
+	/**
+	 * @MGMT_MCAST_KEY:
+	 * &struct iwl_mvm_mgmt_mcast_key_cmd or
+	 * &struct iwl_mvm_mgmt_mcast_key_cmd_v1
+	 */
+	MGMT_MCAST_KEY = 0x1f,
+
+	/* scheduler config */
+	/**
+	 * @SCD_QUEUE_CFG: &struct iwl_scd_txq_cfg_cmd for older hardware,
+	 *	&struct iwl_tx_queue_cfg_cmd with &struct iwl_tx_queue_cfg_rsp
+	 *	for newer (A000) hardware.
+	 */
+	SCD_QUEUE_CFG = 0x1d,
+
+	/**
+	 * @WEP_KEY: uses &struct iwl_mvm_wep_key_cmd
+	 */
+	WEP_KEY = 0x20,
+
+	/**
+	 * @SHARED_MEM_CFG:
+	 * retrieve shared memory configuration - response in
+	 * &struct iwl_shared_mem_cfg
+	 */
+	SHARED_MEM_CFG = 0x25,
+
+	/**
+	 * @TDLS_CHANNEL_SWITCH_CMD: uses &struct iwl_tdls_channel_switch_cmd
+	 */
+	TDLS_CHANNEL_SWITCH_CMD = 0x27,
+
+	/**
+	 * @TDLS_CHANNEL_SWITCH_NOTIFICATION:
+	 * uses &struct iwl_tdls_channel_switch_notif
+	 */
+	TDLS_CHANNEL_SWITCH_NOTIFICATION = 0xaa,
+
+	/**
+	 * @TDLS_CONFIG_CMD:
+	 * &struct iwl_tdls_config_cmd, response in &struct iwl_tdls_config_res
+	 */
+	TDLS_CONFIG_CMD = 0xa7,
+
+	/**
+	 * @MAC_CONTEXT_CMD: &struct iwl_mac_ctx_cmd
+	 */
+	MAC_CONTEXT_CMD = 0x28,
+
+	/**
+	 * @TIME_EVENT_CMD:
+	 * &struct iwl_time_event_cmd, response in &struct iwl_time_event_resp
+	 */
+	TIME_EVENT_CMD = 0x29, /* both CMD and response */
+
+	/**
+	 * @TIME_EVENT_NOTIFICATION: &struct iwl_time_event_notif
+	 */
+	TIME_EVENT_NOTIFICATION = 0x2a,
+
+	/**
+	 * @BINDING_CONTEXT_CMD:
+	 * &struct iwl_binding_cmd or &struct iwl_binding_cmd_v1
+	 */
+	BINDING_CONTEXT_CMD = 0x2b,
+
+	/**
+	 * @TIME_QUOTA_CMD: &struct iwl_time_quota_cmd
+	 */
+	TIME_QUOTA_CMD = 0x2c,
+
+	/**
+	 * @NON_QOS_TX_COUNTER_CMD:
+	 * command is &struct iwl_nonqos_seq_query_cmd
+	 */
+	NON_QOS_TX_COUNTER_CMD = 0x2d,
+
+	/**
+	 * @LEDS_CMD: command is &struct iwl_led_cmd
+	 */
+	LEDS_CMD = 0x48,
+
+	/**
+	 * @LQ_CMD: using &struct iwl_lq_cmd
+	 */
+	LQ_CMD = 0x4e,
+
+	/**
+	 * @FW_PAGING_BLOCK_CMD:
+	 * &struct iwl_fw_paging_cmd
+	 */
+	FW_PAGING_BLOCK_CMD = 0x4f,
+
+	/**
+	 * @SCAN_OFFLOAD_REQUEST_CMD: uses &struct iwl_scan_req_lmac
+	 */
+	SCAN_OFFLOAD_REQUEST_CMD = 0x51,
+
+	/**
+	 * @SCAN_OFFLOAD_ABORT_CMD: abort the scan - no further contents
+	 */
+	SCAN_OFFLOAD_ABORT_CMD = 0x52,
+
+	/**
+	 * @HOT_SPOT_CMD: uses &struct iwl_hs20_roc_req
+	 */
+	HOT_SPOT_CMD = 0x53,
+
+	/**
+	 * @SCAN_OFFLOAD_COMPLETE:
+	 * notification, &struct iwl_periodic_scan_complete
+	 */
+	SCAN_OFFLOAD_COMPLETE = 0x6D,
+
+	/**
+	 * @SCAN_OFFLOAD_UPDATE_PROFILES_CMD:
+	 * update scan offload (scheduled scan) profiles/blacklist/etc.
+	 */
+	SCAN_OFFLOAD_UPDATE_PROFILES_CMD = 0x6E,
+
+	/**
+	 * @MATCH_FOUND_NOTIFICATION: scan match found
+	 */
+	MATCH_FOUND_NOTIFICATION = 0xd9,
+
+	/**
+	 * @SCAN_ITERATION_COMPLETE:
+	 * uses &struct iwl_lmac_scan_complete_notif
+	 */
+	SCAN_ITERATION_COMPLETE = 0xe7,
+
+	/* Phy */
+	/**
+	 * @PHY_CONFIGURATION_CMD: &struct iwl_phy_cfg_cmd
+	 */
+	PHY_CONFIGURATION_CMD = 0x6a,
+
+	/**
+	 * @CALIB_RES_NOTIF_PHY_DB: &struct iwl_calib_res_notif_phy_db
+	 */
+	CALIB_RES_NOTIF_PHY_DB = 0x6b,
+
+	/**
+	 * @PHY_DB_CMD: &struct iwl_phy_db_cmd
+	 */
+	PHY_DB_CMD = 0x6c,
+
+	/**
+	 * @TOF_CMD: &struct iwl_tof_config_cmd
+	 */
+	TOF_CMD = 0x10,
+
+	/**
+	 * @TOF_NOTIFICATION: &struct iwl_tof_gen_resp_cmd
+	 */
+	TOF_NOTIFICATION = 0x11,
+
+	/**
+	 * @POWER_TABLE_CMD: &struct iwl_device_power_cmd
+	 */
+	POWER_TABLE_CMD = 0x77,
+
+	/**
+	 * @PSM_UAPSD_AP_MISBEHAVING_NOTIFICATION:
+	 * &struct iwl_uapsd_misbehaving_ap_notif
+	 */
+	PSM_UAPSD_AP_MISBEHAVING_NOTIFICATION = 0x78,
+
+	/**
+	 * @LTR_CONFIG: &struct iwl_ltr_config_cmd
+	 */
+	LTR_CONFIG = 0xee,
+
+	/**
+	 * @REPLY_THERMAL_MNG_BACKOFF:
+	 * Thermal throttling command
+	 */
+	REPLY_THERMAL_MNG_BACKOFF = 0x7e,
+
+	/**
+	 * @DC2DC_CONFIG_CMD:
+	 * Set/Get DC2DC frequency tune
+	 * Command is &struct iwl_dc2dc_config_cmd,
+	 * response is &struct iwl_dc2dc_config_resp
+	 */
+	DC2DC_CONFIG_CMD = 0x83,
+
+	/**
+	 * @NVM_ACCESS_CMD: using &struct iwl_nvm_access_cmd
+	 */
+	NVM_ACCESS_CMD = 0x88,
+
+	/**
+	 * @BEACON_NOTIFICATION: &struct iwl_extended_beacon_notif
+	 */
+	BEACON_NOTIFICATION = 0x90,
+
+	/**
+	 * @BEACON_TEMPLATE_CMD:
+	 *	Uses one of &struct iwl_mac_beacon_cmd_v6,
+	 *	&struct iwl_mac_beacon_cmd_v7 or &struct iwl_mac_beacon_cmd
+	 *	depending on the device version.
+	 */
+	BEACON_TEMPLATE_CMD = 0x91,
+	/**
+	 * @TX_ANT_CONFIGURATION_CMD: &struct iwl_tx_ant_cfg_cmd
+	 */
+	TX_ANT_CONFIGURATION_CMD = 0x98,
+
+	/**
+	 * @STATISTICS_CMD: &struct iwl_statistics_cmd
+	 */
+	STATISTICS_CMD = 0x9c,
+
+	/**
+	 * @STATISTICS_NOTIFICATION:
+	 * one of &struct iwl_notif_statistics_v10,
+	 * &struct iwl_notif_statistics_v11,
+	 * &struct iwl_notif_statistics_cdb
+	 */
+	STATISTICS_NOTIFICATION = 0x9d,
+
+	/**
+	 * @EOSP_NOTIFICATION:
+	 * Notify that a service period ended,
+	 * &struct iwl_mvm_eosp_notification
+	 */
+	EOSP_NOTIFICATION = 0x9e,
+
+	/**
+	 * @REDUCE_TX_POWER_CMD:
+	 * &struct iwl_dev_tx_power_cmd_v3 or &struct iwl_dev_tx_power_cmd
+	 */
+	REDUCE_TX_POWER_CMD = 0x9f,
+
+	/**
+	 * @CARD_STATE_NOTIFICATION:
+	 * Card state (RF/CT kill) notification,
+	 * uses &struct iwl_card_state_notif
+	 */
+	CARD_STATE_NOTIFICATION = 0xa1,
+
+	/**
+	 * @MISSED_BEACONS_NOTIFICATION: &struct iwl_missed_beacons_notif
+	 */
+	MISSED_BEACONS_NOTIFICATION = 0xa2,
+
+	/**
+	 * @MAC_PM_POWER_TABLE: using &struct iwl_mac_power_cmd
+	 */
+	MAC_PM_POWER_TABLE = 0xa9,
+
+	/**
+	 * @MFUART_LOAD_NOTIFICATION: &struct iwl_mfuart_load_notif
+	 */
+	MFUART_LOAD_NOTIFICATION = 0xb1,
+
+	/**
+	 * @RSS_CONFIG_CMD: &struct iwl_rss_config_cmd
+	 */
+	RSS_CONFIG_CMD = 0xb3,
+
+	/**
+	 * @REPLY_RX_PHY_CMD: &struct iwl_rx_phy_info
+	 */
+	REPLY_RX_PHY_CMD = 0xc0,
+
+	/**
+	 * @REPLY_RX_MPDU_CMD:
+	 * &struct iwl_rx_mpdu_res_start or &struct iwl_rx_mpdu_desc
+	 */
+	REPLY_RX_MPDU_CMD = 0xc1,
+
+	/**
+	 * @FRAME_RELEASE:
+	 * Frame release (reorder helper) notification, uses
+	 * &struct iwl_frame_release
+	 */
+	FRAME_RELEASE = 0xc3,
+
+	/**
+	 * @BA_NOTIF:
+	 * BlockAck notification, uses &struct iwl_mvm_compressed_ba_notif
+	 * or &struct iwl_mvm_ba_notif depending on the HW
+	 */
+	BA_NOTIF = 0xc5,
+
+	/* Location Aware Regulatory */
+	/**
+	 * @MCC_UPDATE_CMD: using &struct iwl_mcc_update_cmd
+	 */
+	MCC_UPDATE_CMD = 0xc8,
+
+	/**
+	 * @MCC_CHUB_UPDATE_CMD: using &struct iwl_mcc_chub_notif
+	 */
+	MCC_CHUB_UPDATE_CMD = 0xc9,
+
+	/**
+	 * @MARKER_CMD: trace marker command, uses &struct iwl_mvm_marker
+	 */
+	MARKER_CMD = 0xcb,
+
+	/**
+	 * @BT_PROFILE_NOTIFICATION: &struct iwl_bt_coex_profile_notif
+	 */
+	BT_PROFILE_NOTIFICATION = 0xce,
+
+	/**
+	 * @BT_CONFIG: &struct iwl_bt_coex_cmd
+	 */
+	BT_CONFIG = 0x9b,
+
+	/**
+	 * @BT_COEX_UPDATE_REDUCED_TXP:
+	 * &struct iwl_bt_coex_reduced_txp_update_cmd
+	 */
+	BT_COEX_UPDATE_REDUCED_TXP = 0x5c,
+
+	/**
+	 * @BT_COEX_CI: &struct iwl_bt_coex_ci_cmd
+	 */
+	BT_COEX_CI = 0x5d,
+
+	/**
+	 * @REPLY_SF_CFG_CMD: &struct iwl_sf_cfg_cmd
+	 */
+	REPLY_SF_CFG_CMD = 0xd1,
+	/**
+	 * @REPLY_BEACON_FILTERING_CMD: &struct iwl_beacon_filter_cmd
+	 */
+	REPLY_BEACON_FILTERING_CMD = 0xd2,
+
+	/**
+	 * @DTS_MEASUREMENT_NOTIFICATION:
+	 * &struct iwl_dts_measurement_notif_v1 or
+	 * &struct iwl_dts_measurement_notif_v2
+	 */
+	DTS_MEASUREMENT_NOTIFICATION = 0xdd,
+
+	/**
+	 * @LDBG_CONFIG_CMD: configure continuous trace recording
+	 */
+	LDBG_CONFIG_CMD = 0xf6,
+
+	/**
+	 * @DEBUG_LOG_MSG: Debugging log data from firmware
+	 */
+	DEBUG_LOG_MSG = 0xf7,
+
+	/**
+	 * @BCAST_FILTER_CMD: &struct iwl_bcast_filter_cmd
+	 */
+	BCAST_FILTER_CMD = 0xcf,
+
+	/**
+	 * @MCAST_FILTER_CMD: &struct iwl_mcast_filter_cmd
+	 */
+	MCAST_FILTER_CMD = 0xd0,
+
+	/**
+	 * @D3_CONFIG_CMD: &struct iwl_d3_manager_config
+	 */
+	D3_CONFIG_CMD = 0xd3,
+
+	/**
+	 * @PROT_OFFLOAD_CONFIG_CMD: Depending on firmware, uses one of
+	 * &struct iwl_proto_offload_cmd_v1, &struct iwl_proto_offload_cmd_v2,
+	 * &struct iwl_proto_offload_cmd_v3_small,
+	 * &struct iwl_proto_offload_cmd_v3_large
+	 */
+	PROT_OFFLOAD_CONFIG_CMD = 0xd4,
+
+	/**
+	 * @OFFLOADS_QUERY_CMD:
+	 * No data in command, response in &struct iwl_wowlan_status
+	 */
+	OFFLOADS_QUERY_CMD = 0xd5,
+
+	/**
+	 * @REMOTE_WAKE_CONFIG_CMD: &struct iwl_wowlan_remote_wake_config
+	 */
+	REMOTE_WAKE_CONFIG_CMD = 0xd6,
+
+	/**
+	 * @D0I3_END_CMD: End D0i3/D3 state, no command data
+	 */
+	D0I3_END_CMD = 0xed,
+
+	/**
+	 * @WOWLAN_PATTERNS: &struct iwl_wowlan_patterns_cmd
+	 */
+	WOWLAN_PATTERNS = 0xe0,
+
+	/**
+	 * @WOWLAN_CONFIGURATION: &struct iwl_wowlan_config_cmd
+	 */
+	WOWLAN_CONFIGURATION = 0xe1,
+
+	/**
+	 * @WOWLAN_TSC_RSC_PARAM: &struct iwl_wowlan_rsc_tsc_params_cmd
+	 */
+	WOWLAN_TSC_RSC_PARAM = 0xe2,
+
+	/**
+	 * @WOWLAN_TKIP_PARAM: &struct iwl_wowlan_tkip_params_cmd
+	 */
+	WOWLAN_TKIP_PARAM = 0xe3,
+
+	/**
+	 * @WOWLAN_KEK_KCK_MATERIAL: &struct iwl_wowlan_kek_kck_material_cmd
+	 */
+	WOWLAN_KEK_KCK_MATERIAL = 0xe4,
+
+	/**
+	 * @WOWLAN_GET_STATUSES: response in &struct iwl_wowlan_status
+	 */
+	WOWLAN_GET_STATUSES = 0xe5,
+
+	/**
+	 * @SCAN_OFFLOAD_PROFILES_QUERY_CMD:
+	 * No command data, response is &struct iwl_scan_offload_profiles_query
+	 */
+	SCAN_OFFLOAD_PROFILES_QUERY_CMD = 0x56,
+};
+
+/**
+ * enum iwl_system_subcmd_ids - system group command IDs
+ */
+enum iwl_system_subcmd_ids {
+	/**
+	 * @SHARED_MEM_CFG_CMD:
+	 * response in &struct iwl_shared_mem_cfg or
+	 * &struct iwl_shared_mem_cfg_v2
+	 */
+	SHARED_MEM_CFG_CMD = 0x0,
+
+	/**
+	 * @INIT_EXTENDED_CFG_CMD: &struct iwl_init_extended_cfg_cmd
+	 */
+	INIT_EXTENDED_CFG_CMD = 0x03,
+
+	/**
+	 * @FSEQ_VER_MISMATCH_NTF: Notification about fseq version
+	 *	mismatch during init.  The format is specified in
+	 *	&struct iwl_fseq_ver_mismatch_ntf.
+	 */
+	FSEQ_VER_MISMATCH_NTF = 0xFF,
+};
+
+#endif /* __iwl_fw_api_commands_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/config.h b/drivers/net/wireless/intel/iwlwifi/fw/api/config.h
new file mode 100644
index 0000000..7f645b6
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/config.h
@@ -0,0 +1,184 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_config_h__
+#define __iwl_fw_api_config_h__
+
+/*
+ * struct iwl_dqa_enable_cmd
+ * @cmd_queue: the TXQ number of the command queue
+ */
+struct iwl_dqa_enable_cmd {
+	__le32 cmd_queue;
+} __packed; /* DQA_CONTROL_CMD_API_S_VER_1 */
+
+/*
+ * struct iwl_tx_ant_cfg_cmd
+ * @valid: valid antenna configuration
+ */
+struct iwl_tx_ant_cfg_cmd {
+	__le32 valid;
+} __packed;
+
+/**
+ * struct iwl_calib_ctrl - Calibration control struct.
+ * Sent as part of the phy configuration command.
+ * @flow_trigger: bitmap for which calibrations to perform according to
+ *		flow triggers, using &enum iwl_calib_cfg
+ * @event_trigger: bitmap for which calibrations to perform according to
+ *		event triggers, using &enum iwl_calib_cfg
+ */
+struct iwl_calib_ctrl {
+	__le32 flow_trigger;
+	__le32 event_trigger;
+} __packed;
+
+/* This enum defines the bitmap of various calibrations to enable in both
+ * init ucode and runtime ucode through CALIBRATION_CFG_CMD.
+ */
+enum iwl_calib_cfg {
+	IWL_CALIB_CFG_XTAL_IDX			= BIT(0),
+	IWL_CALIB_CFG_TEMPERATURE_IDX		= BIT(1),
+	IWL_CALIB_CFG_VOLTAGE_READ_IDX		= BIT(2),
+	IWL_CALIB_CFG_PAPD_IDX			= BIT(3),
+	IWL_CALIB_CFG_TX_PWR_IDX		= BIT(4),
+	IWL_CALIB_CFG_DC_IDX			= BIT(5),
+	IWL_CALIB_CFG_BB_FILTER_IDX		= BIT(6),
+	IWL_CALIB_CFG_LO_LEAKAGE_IDX		= BIT(7),
+	IWL_CALIB_CFG_TX_IQ_IDX			= BIT(8),
+	IWL_CALIB_CFG_TX_IQ_SKEW_IDX		= BIT(9),
+	IWL_CALIB_CFG_RX_IQ_IDX			= BIT(10),
+	IWL_CALIB_CFG_RX_IQ_SKEW_IDX		= BIT(11),
+	IWL_CALIB_CFG_SENSITIVITY_IDX		= BIT(12),
+	IWL_CALIB_CFG_CHAIN_NOISE_IDX		= BIT(13),
+	IWL_CALIB_CFG_DISCONNECTED_ANT_IDX	= BIT(14),
+	IWL_CALIB_CFG_ANT_COUPLING_IDX		= BIT(15),
+	IWL_CALIB_CFG_DAC_IDX			= BIT(16),
+	IWL_CALIB_CFG_ABS_IDX			= BIT(17),
+	IWL_CALIB_CFG_AGC_IDX			= BIT(18),
+};
+
+/**
+ * struct iwl_phy_cfg_cmd - Phy configuration command
+ * @phy_cfg: PHY configuration value, uses &enum iwl_fw_phy_cfg
+ * @calib_control: calibration control data
+ */
+struct iwl_phy_cfg_cmd {
+	__le32	phy_cfg;
+	struct iwl_calib_ctrl calib_control;
+} __packed;
+
+#define PHY_CFG_RADIO_TYPE	(BIT(0) | BIT(1))
+#define PHY_CFG_RADIO_STEP	(BIT(2) | BIT(3))
+#define PHY_CFG_RADIO_DASH	(BIT(4) | BIT(5))
+#define PHY_CFG_PRODUCT_NUMBER	(BIT(6) | BIT(7))
+#define PHY_CFG_TX_CHAIN_A	BIT(8)
+#define PHY_CFG_TX_CHAIN_B	BIT(9)
+#define PHY_CFG_TX_CHAIN_C	BIT(10)
+#define PHY_CFG_RX_CHAIN_A	BIT(12)
+#define PHY_CFG_RX_CHAIN_B	BIT(13)
+#define PHY_CFG_RX_CHAIN_C	BIT(14)
+
+/*
+ * enum iwl_dc2dc_config_id - flag ids
+ *
+ * Ids of dc2dc configuration flags
+ */
+enum iwl_dc2dc_config_id {
+	DCDC_LOW_POWER_MODE_MSK_SET  = 0x1, /* not used */
+	DCDC_FREQ_TUNE_SET = 0x2,
+}; /* MARKER_ID_API_E_VER_1 */
+
+/**
+ * struct iwl_dc2dc_config_cmd - configure dc2dc values
+ *
+ * (DC2DC_CONFIG_CMD = 0x83)
+ *
+ * Set/Get & configure dc2dc values.
+ * The command always returns the current dc2dc values.
+ *
+ * @flags: set/get dc2dc
+ * @enable_low_power_mode: not used.
+ * @dc2dc_freq_tune0: frequency divider - digital domain
+ * @dc2dc_freq_tune1: frequency divider - analog domain
+ */
+struct iwl_dc2dc_config_cmd {
+	__le32 flags;
+	__le32 enable_low_power_mode; /* not used */
+	__le32 dc2dc_freq_tune0;
+	__le32 dc2dc_freq_tune1;
+} __packed; /* DC2DC_CONFIG_CMD_API_S_VER_1 */
+
+/**
+ * struct iwl_dc2dc_config_resp - response for iwl_dc2dc_config_cmd
+ *
+ * Current dc2dc values returned by the FW.
+ *
+ * @dc2dc_freq_tune0: frequency divider - digital domain
+ * @dc2dc_freq_tune1: frequency divider - analog domain
+ */
+struct iwl_dc2dc_config_resp {
+	__le32 dc2dc_freq_tune0;
+	__le32 dc2dc_freq_tune1;
+} __packed; /* DC2DC_CONFIG_RESP_API_S_VER_1 */
+
+#endif /* __iwl_fw_api_config_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/context.h b/drivers/net/wireless/intel/iwlwifi/fw/api/context.h
new file mode 100644
index 0000000..2f0d7c4
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/context.h
@@ -0,0 +1,94 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_context_h__
+#define __iwl_fw_api_context_h__
+
+/**
+ * enum iwl_ctxt_id_and_color - ID and color fields in context dword
+ * @FW_CTXT_ID_POS: position of the ID
+ * @FW_CTXT_ID_MSK: mask of the ID
+ * @FW_CTXT_COLOR_POS: position of the color
+ * @FW_CTXT_COLOR_MSK: mask of the color
+ * @FW_CTXT_INVALID: value used to indicate unused/invalid
+ */
+enum iwl_ctxt_id_and_color {
+	FW_CTXT_ID_POS		= 0,
+	FW_CTXT_ID_MSK		= 0xff << FW_CTXT_ID_POS,
+	FW_CTXT_COLOR_POS	= 8,
+	FW_CTXT_COLOR_MSK	= 0xff << FW_CTXT_COLOR_POS,
+	FW_CTXT_INVALID		= 0xffffffff,
+};
+
+#define FW_CMD_ID_AND_COLOR(_id, _color) (((_id) << FW_CTXT_ID_POS) |\
+					  ((_color) << FW_CTXT_COLOR_POS))
+
+/* Possible actions on PHYs, MACs and Bindings */
+enum iwl_ctxt_action {
+	FW_CTXT_ACTION_STUB = 0,
+	FW_CTXT_ACTION_ADD,
+	FW_CTXT_ACTION_MODIFY,
+	FW_CTXT_ACTION_REMOVE,
+	FW_CTXT_ACTION_NUM
+}; /* COMMON_CONTEXT_ACTION_API_E_VER_1 */
+
+#endif /* __iwl_fw_api_context_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-d3.h b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
index d4a4c28..57f4bc2 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-d3.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
@@ -18,11 +18,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
- * USA
- *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
@@ -64,8 +59,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *****************************************************************************/
 
-#ifndef __fw_api_d3_h__
-#define __fw_api_d3_h__
+#ifndef __iwl_fw_api_d3_h__
+#define __iwl_fw_api_d3_h__
 
 /**
  * enum iwl_d3_wakeup_flags - D3 manager wakeup flags
@@ -468,4 +463,4 @@ struct iwl_wowlan_remote_wake_config {
 
 /* TODO: NetDetect API */
 
-#endif /* __fw_api_d3_h__ */
+#endif /* __iwl_fw_api_d3_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h b/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h
new file mode 100644
index 0000000..aa76dcc
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h
@@ -0,0 +1,127 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_datapath_h__
+#define __iwl_fw_api_datapath_h__
+
+/**
+ * enum iwl_data_path_subcmd_ids - data path group commands
+ */
+enum iwl_data_path_subcmd_ids {
+	/**
+	 * @DQA_ENABLE_CMD: &struct iwl_dqa_enable_cmd
+	 */
+	DQA_ENABLE_CMD = 0x0,
+
+	/**
+	 * @UPDATE_MU_GROUPS_CMD: &struct iwl_mu_group_mgmt_cmd
+	 */
+	UPDATE_MU_GROUPS_CMD = 0x1,
+
+	/**
+	 * @TRIGGER_RX_QUEUES_NOTIF_CMD: &struct iwl_rxq_sync_cmd
+	 */
+	TRIGGER_RX_QUEUES_NOTIF_CMD = 0x2,
+
+	/**
+	 * @STA_PM_NOTIF: &struct iwl_mvm_pm_state_notification
+	 */
+	STA_PM_NOTIF = 0xFD,
+
+	/**
+	 * @MU_GROUP_MGMT_NOTIF: &struct iwl_mu_group_mgmt_notif
+	 */
+	MU_GROUP_MGMT_NOTIF = 0xFE,
+
+	/**
+	 * @RX_QUEUES_NOTIFICATION: &struct iwl_rxq_sync_notification
+	 */
+	RX_QUEUES_NOTIFICATION = 0xFF,
+};
+
+/**
+ * struct iwl_mu_group_mgmt_cmd - VHT MU-MIMO group configuration
+ *
+ * @reserved: reserved
+ * @membership_status: a bitmap of MU groups
+ * @user_position:the position of station in a group. If the station is in the
+ *	group then bits (group * 2) is the position -1
+ */
+struct iwl_mu_group_mgmt_cmd {
+	__le32 reserved;
+	__le32 membership_status[2];
+	__le32 user_position[4];
+} __packed; /* MU_GROUP_ID_MNG_TABLE_API_S_VER_1 */
+
+/**
+ * struct iwl_mu_group_mgmt_notif - VHT MU-MIMO group id notification
+ *
+ * @membership_status: a bitmap of MU groups
+ * @user_position: the position of station in a group. If the station is in the
+ *	group then bits (group * 2) is the position -1
+ */
+struct iwl_mu_group_mgmt_notif {
+	__le32 membership_status[2];
+	__le32 user_position[4];
+} __packed; /* MU_GROUP_MNG_NTFY_API_S_VER_1 */
+
+#endif /* __iwl_fw_api_datapath_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h
new file mode 100644
index 0000000..9f88b61
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h
@@ -0,0 +1,345 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+#ifndef __iwl_fw_api_debug_h__
+#define __iwl_fw_api_debug_h__
+
+/**
+ * enum iwl_debug_cmds - debug commands
+ */
+enum iwl_debug_cmds {
+	/**
+	 * @LMAC_RD_WR:
+	 * LMAC memory read/write, using &struct iwl_dbg_mem_access_cmd and
+	 * &struct iwl_dbg_mem_access_rsp
+	 */
+	LMAC_RD_WR = 0x0,
+	/**
+	 * @UMAC_RD_WR:
+	 * UMAC memory read/write, using &struct iwl_dbg_mem_access_cmd and
+	 * &struct iwl_dbg_mem_access_rsp
+	 */
+	UMAC_RD_WR = 0x1,
+	/**
+	 * @MFU_ASSERT_DUMP_NTF:
+	 * &struct iwl_mfu_assert_dump_notif
+	 */
+	MFU_ASSERT_DUMP_NTF = 0xFE,
+};
+
+/* Error response/notification */
+enum {
+	FW_ERR_UNKNOWN_CMD = 0x0,
+	FW_ERR_INVALID_CMD_PARAM = 0x1,
+	FW_ERR_SERVICE = 0x2,
+	FW_ERR_ARC_MEMORY = 0x3,
+	FW_ERR_ARC_CODE = 0x4,
+	FW_ERR_WATCH_DOG = 0x5,
+	FW_ERR_WEP_GRP_KEY_INDX = 0x10,
+	FW_ERR_WEP_KEY_SIZE = 0x11,
+	FW_ERR_OBSOLETE_FUNC = 0x12,
+	FW_ERR_UNEXPECTED = 0xFE,
+	FW_ERR_FATAL = 0xFF
+};
+
+/**
+ * struct iwl_error_resp - FW error indication
+ * ( REPLY_ERROR = 0x2 )
+ * @error_type: one of FW_ERR_*
+ * @cmd_id: the command ID for which the error occurred
+ * @reserved1: reserved
+ * @bad_cmd_seq_num: sequence number of the erroneous command
+ * @error_service: which service created the error, applicable only if
+ *     error_type = 2, otherwise 0
+ * @timestamp: TSF in usecs.
+ */
+struct iwl_error_resp {
+	__le32 error_type;
+	u8 cmd_id;
+	u8 reserved1;
+	__le16 bad_cmd_seq_num;
+	__le32 error_service;
+	__le64 timestamp;
+} __packed;
+
+#define TX_FIFO_MAX_NUM_9000		8
+#define TX_FIFO_MAX_NUM			15
+#define RX_FIFO_MAX_NUM			2
+#define TX_FIFO_INTERNAL_MAX_NUM	6
+
+/**
+ * struct iwl_shared_mem_cfg_v2 - Shared memory configuration information
+ *
+ * @shared_mem_addr: shared memory addr (pre 8000 HW set to 0x0 as MARBH is not
+ *	accessible)
+ * @shared_mem_size: shared memory size
+ * @sample_buff_addr: internal sample (mon/adc) buff addr (pre 8000 HW set to
+ *	0x0 as accessible only via DBGM RDAT)
+ * @sample_buff_size: internal sample buff size
+ * @txfifo_addr: start addr of TXF0 (excluding the context table 0.5KB), (pre
+ *	8000 HW set to 0x0 as not accessible)
+ * @txfifo_size: size of TXF0 ... TXF7
+ * @rxfifo_size: RXF1, RXF2 sizes. If there is no RXF2, it'll have a value of 0
+ * @page_buff_addr: used by UMAC and performance debug (page miss analysis),
+ *	when paging is not supported this should be 0
+ * @page_buff_size: size of %page_buff_addr
+ * @rxfifo_addr: Start address of rxFifo
+ * @internal_txfifo_addr: start address of internalFifo
+ * @internal_txfifo_size: internal fifos' size
+ *
+ * NOTE: on firmware that don't have IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG
+ *	 set, the last 3 members don't exist.
+ */
+struct iwl_shared_mem_cfg_v2 {
+	__le32 shared_mem_addr;
+	__le32 shared_mem_size;
+	__le32 sample_buff_addr;
+	__le32 sample_buff_size;
+	__le32 txfifo_addr;
+	__le32 txfifo_size[TX_FIFO_MAX_NUM_9000];
+	__le32 rxfifo_size[RX_FIFO_MAX_NUM];
+	__le32 page_buff_addr;
+	__le32 page_buff_size;
+	__le32 rxfifo_addr;
+	__le32 internal_txfifo_addr;
+	__le32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM];
+} __packed; /* SHARED_MEM_ALLOC_API_S_VER_2 */
+
+/**
+ * struct iwl_shared_mem_lmac_cfg - LMAC shared memory configuration
+ *
+ * @txfifo_addr: start addr of TXF0 (excluding the context table 0.5KB)
+ * @txfifo_size: size of TX FIFOs
+ * @rxfifo1_addr: RXF1 addr
+ * @rxfifo1_size: RXF1 size
+ */
+struct iwl_shared_mem_lmac_cfg {
+	__le32 txfifo_addr;
+	__le32 txfifo_size[TX_FIFO_MAX_NUM];
+	__le32 rxfifo1_addr;
+	__le32 rxfifo1_size;
+
+} __packed; /* SHARED_MEM_ALLOC_LMAC_API_S_VER_1 */
+
+/**
+ * struct iwl_shared_mem_cfg - Shared memory configuration information
+ *
+ * @shared_mem_addr: shared memory address
+ * @shared_mem_size: shared memory size
+ * @sample_buff_addr: internal sample (mon/adc) buff addr
+ * @sample_buff_size: internal sample buff size
+ * @rxfifo2_addr: start addr of RXF2
+ * @rxfifo2_size: size of RXF2
+ * @page_buff_addr: used by UMAC and performance debug (page miss analysis),
+ *	when paging is not supported this should be 0
+ * @page_buff_size: size of %page_buff_addr
+ * @lmac_num: number of LMACs (1 or 2)
+ * @lmac_smem: per - LMAC smem data
+ */
+struct iwl_shared_mem_cfg {
+	__le32 shared_mem_addr;
+	__le32 shared_mem_size;
+	__le32 sample_buff_addr;
+	__le32 sample_buff_size;
+	__le32 rxfifo2_addr;
+	__le32 rxfifo2_size;
+	__le32 page_buff_addr;
+	__le32 page_buff_size;
+	__le32 lmac_num;
+	struct iwl_shared_mem_lmac_cfg lmac_smem[2];
+} __packed; /* SHARED_MEM_ALLOC_API_S_VER_3 */
+
+/**
+ * struct iwl_mfuart_load_notif - mfuart image version & status
+ * ( MFUART_LOAD_NOTIFICATION = 0xb1 )
+ * @installed_ver: installed image version
+ * @external_ver: external image version
+ * @status: MFUART loading status
+ * @duration: MFUART loading time
+ * @image_size: MFUART image size in bytes
+*/
+struct iwl_mfuart_load_notif {
+	__le32 installed_ver;
+	__le32 external_ver;
+	__le32 status;
+	__le32 duration;
+	/* image size valid only in v2 of the command */
+	__le32 image_size;
+} __packed; /* MFU_LOADER_NTFY_API_S_VER_2 */
+
+/**
+ * struct iwl_mfu_assert_dump_notif - mfuart dump logs
+ * ( MFU_ASSERT_DUMP_NTF = 0xfe )
+ * @assert_id: mfuart assert id that cause the notif
+ * @curr_reset_num: number of asserts since uptime
+ * @index_num: current chunk id
+ * @parts_num: total number of chunks
+ * @data_size: number of data bytes sent
+ * @data: data buffer
+ */
+struct iwl_mfu_assert_dump_notif {
+	__le32   assert_id;
+	__le32   curr_reset_num;
+	__le16   index_num;
+	__le16   parts_num;
+	__le32   data_size;
+	__le32   data[0];
+} __packed; /* MFU_DUMP_ASSERT_API_S_VER_1 */
+
+/**
+ * enum iwl_mvm_marker_id - marker ids
+ *
+ * The ids for different type of markers to insert into the usniffer logs
+ *
+ * @MARKER_ID_TX_FRAME_LATENCY: TX latency marker
+ */
+enum iwl_mvm_marker_id {
+	MARKER_ID_TX_FRAME_LATENCY = 1,
+}; /* MARKER_ID_API_E_VER_1 */
+
+/**
+ * struct iwl_mvm_marker - mark info into the usniffer logs
+ *
+ * (MARKER_CMD = 0xcb)
+ *
+ * Mark the UTC time stamp into the usniffer logs together with additional
+ * metadata, so the usniffer output can be parsed.
+ * In the command response the ucode will return the GP2 time.
+ *
+ * @dw_len: The amount of dwords following this byte including this byte.
+ * @marker_id: A unique marker id (iwl_mvm_marker_id).
+ * @reserved: reserved.
+ * @timestamp: in milliseconds since 1970-01-01 00:00:00 UTC
+ * @metadata: additional meta data that will be written to the unsiffer log
+ */
+struct iwl_mvm_marker {
+	u8 dw_len;
+	u8 marker_id;
+	__le16 reserved;
+	__le64 timestamp;
+	__le32 metadata[0];
+} __packed; /* MARKER_API_S_VER_1 */
+
+/* Operation types for the debug mem access */
+enum {
+	DEBUG_MEM_OP_READ = 0,
+	DEBUG_MEM_OP_WRITE = 1,
+	DEBUG_MEM_OP_WRITE_BYTES = 2,
+};
+
+#define DEBUG_MEM_MAX_SIZE_DWORDS 32
+
+/**
+ * struct iwl_dbg_mem_access_cmd - Request the device to read/write memory
+ * @op: DEBUG_MEM_OP_*
+ * @addr: address to read/write from/to
+ * @len: in dwords, to read/write
+ * @data: for write opeations, contains the source buffer
+ */
+struct iwl_dbg_mem_access_cmd {
+	__le32 op;
+	__le32 addr;
+	__le32 len;
+	__le32 data[];
+} __packed; /* DEBUG_(U|L)MAC_RD_WR_CMD_API_S_VER_1 */
+
+/* Status responses for the debug mem access */
+enum {
+	DEBUG_MEM_STATUS_SUCCESS = 0x0,
+	DEBUG_MEM_STATUS_FAILED = 0x1,
+	DEBUG_MEM_STATUS_LOCKED = 0x2,
+	DEBUG_MEM_STATUS_HIDDEN = 0x3,
+	DEBUG_MEM_STATUS_LENGTH = 0x4,
+};
+
+/**
+ * struct iwl_dbg_mem_access_rsp - Response to debug mem commands
+ * @status: DEBUG_MEM_STATUS_*
+ * @len: read dwords (0 for write operations)
+ * @data: contains the read DWs
+ */
+struct iwl_dbg_mem_access_rsp {
+	__le32 status;
+	__le32 len;
+	__le32 data[];
+} __packed; /* DEBUG_(U|L)MAC_RD_WR_RSP_API_S_VER_1 */
+
+#define CONT_REC_COMMAND_SIZE	80
+#define ENABLE_CONT_RECORDING	0x15
+#define DISABLE_CONT_RECORDING	0x16
+
+/*
+ * struct iwl_continuous_record_mode - recording mode
+ */
+struct iwl_continuous_record_mode {
+	__le16 enable_recording;
+} __packed;
+
+/*
+ * struct iwl_continuous_record_cmd - enable/disable continuous recording
+ */
+struct iwl_continuous_record_cmd {
+	struct iwl_continuous_record_mode record_mode;
+	u8 pad[CONT_REC_COMMAND_SIZE -
+		sizeof(struct iwl_continuous_record_mode)];
+} __packed;
+
+#endif /* __iwl_fw_api_debug_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/filter.h b/drivers/net/wireless/intel/iwlwifi/fw/api/filter.h
new file mode 100644
index 0000000..befc3b1
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/filter.h
@@ -0,0 +1,183 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_filter_h__
+#define __iwl_fw_api_filter_h__
+
+#include "fw/api/mac.h"
+
+#define MAX_PORT_ID_NUM	2
+#define MAX_MCAST_FILTERING_ADDRESSES 256
+
+/**
+ * struct iwl_mcast_filter_cmd - configure multicast filter.
+ * @filter_own: Set 1 to filter out multicast packets sent by station itself
+ * @port_id:	Multicast MAC addresses array specifier. This is a strange way
+ *		to identify network interface adopted in host-device IF.
+ *		It is used by FW as index in array of addresses. This array has
+ *		MAX_PORT_ID_NUM members.
+ * @count:	Number of MAC addresses in the array
+ * @pass_all:	Set 1 to pass all multicast packets.
+ * @bssid:	current association BSSID.
+ * @reserved:	reserved
+ * @addr_list:	Place holder for array of MAC addresses.
+ *		IMPORTANT: add padding if necessary to ensure DWORD alignment.
+ */
+struct iwl_mcast_filter_cmd {
+	u8 filter_own;
+	u8 port_id;
+	u8 count;
+	u8 pass_all;
+	u8 bssid[6];
+	u8 reserved[2];
+	u8 addr_list[0];
+} __packed; /* MCAST_FILTERING_CMD_API_S_VER_1 */
+
+#define MAX_BCAST_FILTERS 8
+#define MAX_BCAST_FILTER_ATTRS 2
+
+/**
+ * enum iwl_mvm_bcast_filter_attr_offset - written by fw for each Rx packet
+ * @BCAST_FILTER_OFFSET_PAYLOAD_START: offset is from payload start.
+ * @BCAST_FILTER_OFFSET_IP_END: offset is from ip header end (i.e.
+ *	start of ip payload).
+ */
+enum iwl_mvm_bcast_filter_attr_offset {
+	BCAST_FILTER_OFFSET_PAYLOAD_START = 0,
+	BCAST_FILTER_OFFSET_IP_END = 1,
+};
+
+/**
+ * struct iwl_fw_bcast_filter_attr - broadcast filter attribute
+ * @offset_type:	&enum iwl_mvm_bcast_filter_attr_offset.
+ * @offset:	starting offset of this pattern.
+ * @reserved1:	reserved
+ * @val:	value to match - big endian (MSB is the first
+ *		byte to match from offset pos).
+ * @mask:	mask to match (big endian).
+ */
+struct iwl_fw_bcast_filter_attr {
+	u8 offset_type;
+	u8 offset;
+	__le16 reserved1;
+	__be32 val;
+	__be32 mask;
+} __packed; /* BCAST_FILTER_ATT_S_VER_1 */
+
+/**
+ * enum iwl_mvm_bcast_filter_frame_type - filter frame type
+ * @BCAST_FILTER_FRAME_TYPE_ALL: consider all frames.
+ * @BCAST_FILTER_FRAME_TYPE_IPV4: consider only ipv4 frames
+ */
+enum iwl_mvm_bcast_filter_frame_type {
+	BCAST_FILTER_FRAME_TYPE_ALL = 0,
+	BCAST_FILTER_FRAME_TYPE_IPV4 = 1,
+};
+
+/**
+ * struct iwl_fw_bcast_filter - broadcast filter
+ * @discard: discard frame (1) or let it pass (0).
+ * @frame_type: &enum iwl_mvm_bcast_filter_frame_type.
+ * @reserved1: reserved
+ * @num_attrs: number of valid attributes in this filter.
+ * @attrs: attributes of this filter. a filter is considered matched
+ *	only when all its attributes are matched (i.e. AND relationship)
+ */
+struct iwl_fw_bcast_filter {
+	u8 discard;
+	u8 frame_type;
+	u8 num_attrs;
+	u8 reserved1;
+	struct iwl_fw_bcast_filter_attr attrs[MAX_BCAST_FILTER_ATTRS];
+} __packed; /* BCAST_FILTER_S_VER_1 */
+
+/**
+ * struct iwl_fw_bcast_mac - per-mac broadcast filtering configuration.
+ * @default_discard: default action for this mac (discard (1) / pass (0)).
+ * @reserved1: reserved
+ * @attached_filters: bitmap of relevant filters for this mac.
+ */
+struct iwl_fw_bcast_mac {
+	u8 default_discard;
+	u8 reserved1;
+	__le16 attached_filters;
+} __packed; /* BCAST_MAC_CONTEXT_S_VER_1 */
+
+/**
+ * struct iwl_bcast_filter_cmd - broadcast filtering configuration
+ * @disable: enable (0) / disable (1)
+ * @max_bcast_filters: max number of filters (MAX_BCAST_FILTERS)
+ * @max_macs: max number of macs (NUM_MAC_INDEX_DRIVER)
+ * @reserved1: reserved
+ * @filters: broadcast filters
+ * @macs: broadcast filtering configuration per-mac
+ */
+struct iwl_bcast_filter_cmd {
+	u8 disable;
+	u8 max_bcast_filters;
+	u8 max_macs;
+	u8 reserved1;
+	struct iwl_fw_bcast_filter filters[MAX_BCAST_FILTERS];
+	struct iwl_fw_bcast_mac macs[NUM_MAC_INDEX_DRIVER];
+} __packed; /* BCAST_FILTERING_HCMD_API_S_VER_1 */
+
+#endif /* __iwl_fw_api_filter_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/led.h b/drivers/net/wireless/intel/iwlwifi/fw/api/led.h
new file mode 100644
index 0000000..b30c9d2
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/led.h
@@ -0,0 +1,71 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_led_h__
+#define __iwl_fw_api_led_h__
+
+/**
+ * struct iwl_led_cmd - LED switching command
+ *
+ * @status: LED status (on/off)
+ */
+struct iwl_led_cmd {
+	__le32 status;
+} __packed; /* LEDS_CMD_API_S_VER_2 */
+
+#endif /* __iwl_fw_api_led_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h
new file mode 100644
index 0000000..39c89e8
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h
@@ -0,0 +1,152 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_mac_cfg_h__
+#define __iwl_fw_api_mac_cfg_h__
+
+/**
+ * enum iwl_mac_conf_subcmd_ids - mac configuration command IDs
+ */
+enum iwl_mac_conf_subcmd_ids {
+	/**
+	 * @LINK_QUALITY_MEASUREMENT_CMD: &struct iwl_link_qual_msrmnt_cmd
+	 */
+	LINK_QUALITY_MEASUREMENT_CMD = 0x1,
+
+	/**
+	 * @LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF:
+	 * &struct iwl_link_qual_msrmnt_notif
+	 */
+	LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF = 0xFE,
+
+	/**
+	 * @CHANNEL_SWITCH_NOA_NOTIF: &struct iwl_channel_switch_noa_notif
+	 */
+	CHANNEL_SWITCH_NOA_NOTIF = 0xFF,
+};
+
+#define LQM_NUMBER_OF_STATIONS_IN_REPORT 16
+
+enum iwl_lqm_cmd_operatrions {
+	LQM_CMD_OPERATION_START_MEASUREMENT = 0x01,
+	LQM_CMD_OPERATION_STOP_MEASUREMENT = 0x02,
+};
+
+enum iwl_lqm_status {
+	LQM_STATUS_SUCCESS = 0,
+	LQM_STATUS_TIMEOUT = 1,
+	LQM_STATUS_ABORT = 2,
+};
+
+/**
+ * struct iwl_link_qual_msrmnt_cmd - Link Quality Measurement command
+ * @cmd_operation: command operation to be performed (start or stop)
+ *	as defined above.
+ * @mac_id: MAC ID the measurement applies to.
+ * @measurement_time: time of the total measurement to be performed, in uSec.
+ * @timeout: maximum time allowed until a response is sent, in uSec.
+ */
+struct iwl_link_qual_msrmnt_cmd {
+	__le32 cmd_operation;
+	__le32 mac_id;
+	__le32 measurement_time;
+	__le32 timeout;
+} __packed /* LQM_CMD_API_S_VER_1 */;
+
+/**
+ * struct iwl_link_qual_msrmnt_notif - Link Quality Measurement notification
+ *
+ * @frequent_stations_air_time: an array containing the total air time
+ *	(in uSec) used by the most frequently transmitting stations.
+ * @number_of_stations: the number of uniqe stations included in the array
+ *	(a number between 0 to 16)
+ * @total_air_time_other_stations: the total air time (uSec) used by all the
+ *	stations which are not included in the above report.
+ * @time_in_measurement_window: the total time in uSec in which a measurement
+ *	took place.
+ * @tx_frame_dropped: the number of TX frames dropped due to retry limit during
+ *	measurement
+ * @mac_id: MAC ID the measurement applies to.
+ * @status: return status. may be one of the LQM_STATUS_* defined above.
+ * @reserved: reserved.
+ */
+struct iwl_link_qual_msrmnt_notif {
+	__le32 frequent_stations_air_time[LQM_NUMBER_OF_STATIONS_IN_REPORT];
+	__le32 number_of_stations;
+	__le32 total_air_time_other_stations;
+	__le32 time_in_measurement_window;
+	__le32 tx_frame_dropped;
+	__le32 mac_id;
+	__le32 status;
+	u8 reserved[12];
+} __packed; /* LQM_MEASUREMENT_COMPLETE_NTF_API_S_VER1 */
+
+/**
+ * struct iwl_channel_switch_noa_notif - Channel switch NOA notification
+ *
+ * @id_and_color: ID and color of the MAC
+ */
+struct iwl_channel_switch_noa_notif {
+	__le32 id_and_color;
+} __packed; /* CHANNEL_SWITCH_START_NTFY_API_S_VER_1 */
+
+#endif /* __iwl_fw_api_mac_cfg_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-mac.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h
index 0c3350a..f2e31e0 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-mac.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h
@@ -6,6 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2017        Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -16,11 +17,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
- * USA
- *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
@@ -31,6 +27,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2017        Intel Deutschland GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -60,8 +57,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *****************************************************************************/
 
-#ifndef __fw_api_mac_h__
-#define __fw_api_mac_h__
+#ifndef __iwl_fw_api_mac_h__
+#define __iwl_fw_api_mac_h__
 
 /*
  * The first MAC indices (starting from 0) are available to the driver,
@@ -76,8 +73,6 @@
 #define IWL_MVM_STATION_COUNT		16
 #define IWL_MVM_INVALID_STA		0xFF
 
-#define IWL_MVM_TDLS_STA_COUNT	4
-
 enum iwl_ac {
 	AC_BK,
 	AC_BE,
@@ -393,4 +388,22 @@ struct iwl_nonqos_seq_query_cmd {
 	__le16 reserved;
 } __packed; /* NON_QOS_TX_COUNTER_GET_SET_API_S_VER_1 */
 
-#endif /* __fw_api_mac_h__ */
+/**
+ * struct iwl_missed_beacons_notif - information on missed beacons
+ * ( MISSED_BEACONS_NOTIFICATION = 0xa2 )
+ * @mac_id: interface ID
+ * @consec_missed_beacons_since_last_rx: number of consecutive missed
+ *	beacons since last RX.
+ * @consec_missed_beacons: number of consecutive missed beacons
+ * @num_expected_beacons: number of expected beacons
+ * @num_recvd_beacons: number of received beacons
+ */
+struct iwl_missed_beacons_notif {
+	__le32 mac_id;
+	__le32 consec_missed_beacons_since_last_rx;
+	__le32 consec_missed_beacons;
+	__le32 num_expected_beacons;
+	__le32 num_recvd_beacons;
+} __packed; /* MISSED_BEACON_NTFY_API_S_VER_3 */
+
+#endif /* __iwl_fw_api_mac_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
new file mode 100644
index 0000000..00bc7a2
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
@@ -0,0 +1,386 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_nvm_reg_h__
+#define __iwl_fw_api_nvm_reg_h__
+
+/**
+ * enum iwl_regulatory_and_nvm_subcmd_ids - regulatory/NVM commands
+ */
+enum iwl_regulatory_and_nvm_subcmd_ids {
+	/**
+	 * @NVM_ACCESS_COMPLETE: &struct iwl_nvm_access_complete_cmd
+	 */
+	NVM_ACCESS_COMPLETE = 0x0,
+
+	/**
+	 * @NVM_GET_INFO:
+	 * Command is &struct iwl_nvm_get_info,
+	 * response is &struct iwl_nvm_get_info_rsp
+	 */
+	NVM_GET_INFO = 0x2,
+};
+
+/**
+ * enum iwl_nvm_access_op - NVM access opcode
+ * @IWL_NVM_READ: read NVM
+ * @IWL_NVM_WRITE: write NVM
+ */
+enum iwl_nvm_access_op {
+	IWL_NVM_READ	= 0,
+	IWL_NVM_WRITE	= 1,
+};
+
+/**
+ * enum iwl_nvm_access_target - target of the NVM_ACCESS_CMD
+ * @NVM_ACCESS_TARGET_CACHE: access the cache
+ * @NVM_ACCESS_TARGET_OTP: access the OTP
+ * @NVM_ACCESS_TARGET_EEPROM: access the EEPROM
+ */
+enum iwl_nvm_access_target {
+	NVM_ACCESS_TARGET_CACHE = 0,
+	NVM_ACCESS_TARGET_OTP = 1,
+	NVM_ACCESS_TARGET_EEPROM = 2,
+};
+
+/**
+ * enum iwl_nvm_section_type - section types for NVM_ACCESS_CMD
+ * @NVM_SECTION_TYPE_SW: software section
+ * @NVM_SECTION_TYPE_REGULATORY: regulatory section
+ * @NVM_SECTION_TYPE_CALIBRATION: calibration section
+ * @NVM_SECTION_TYPE_PRODUCTION: production section
+ * @NVM_SECTION_TYPE_MAC_OVERRIDE: MAC override section
+ * @NVM_SECTION_TYPE_PHY_SKU: PHY SKU section
+ * @NVM_MAX_NUM_SECTIONS: number of sections
+ */
+enum iwl_nvm_section_type {
+	NVM_SECTION_TYPE_SW = 1,
+	NVM_SECTION_TYPE_REGULATORY = 3,
+	NVM_SECTION_TYPE_CALIBRATION = 4,
+	NVM_SECTION_TYPE_PRODUCTION = 5,
+	NVM_SECTION_TYPE_MAC_OVERRIDE = 11,
+	NVM_SECTION_TYPE_PHY_SKU = 12,
+	NVM_MAX_NUM_SECTIONS = 13,
+};
+
+/**
+ * struct iwl_nvm_access_cmd - Request the device to send an NVM section
+ * @op_code: &enum iwl_nvm_access_op
+ * @target: &enum iwl_nvm_access_target
+ * @type: &enum iwl_nvm_section_type
+ * @offset: offset in bytes into the section
+ * @length: in bytes, to read/write
+ * @data: if write operation, the data to write. On read its empty
+ */
+struct iwl_nvm_access_cmd {
+	u8 op_code;
+	u8 target;
+	__le16 type;
+	__le16 offset;
+	__le16 length;
+	u8 data[];
+} __packed; /* NVM_ACCESS_CMD_API_S_VER_2 */
+
+/**
+ * struct iwl_nvm_access_resp_ver2 - response to NVM_ACCESS_CMD
+ * @offset: offset in bytes into the section
+ * @length: in bytes, either how much was written or read
+ * @type: NVM_SECTION_TYPE_*
+ * @status: 0 for success, fail otherwise
+ * @data: if read operation, the data returned. Empty on write.
+ */
+struct iwl_nvm_access_resp {
+	__le16 offset;
+	__le16 length;
+	__le16 type;
+	__le16 status;
+	u8 data[];
+} __packed; /* NVM_ACCESS_CMD_RESP_API_S_VER_2 */
+
+/*
+ * struct iwl_nvm_get_info - request to get NVM data
+ */
+struct iwl_nvm_get_info {
+	__le32 reserved;
+} __packed; /* GRP_REGULATORY_NVM_GET_INFO_CMD_S_VER_1 */
+
+/**
+ * enum iwl_nvm_info_general_flags - flags in NVM_GET_INFO resp
+ * @NVM_GENERAL_FLAGS_EMPTY_OTP: 1 if OTP is empty
+ */
+enum iwl_nvm_info_general_flags {
+	NVM_GENERAL_FLAGS_EMPTY_OTP	= BIT(0),
+};
+
+/**
+ * struct iwl_nvm_get_info_general - general NVM data
+ * @flags: bit 0: 1 - empty, 0 - non-empty
+ * @nvm_version: nvm version
+ * @board_type: board type
+ * @reserved: reserved
+ */
+struct iwl_nvm_get_info_general {
+	__le32 flags;
+	__le16 nvm_version;
+	u8 board_type;
+	u8 reserved;
+} __packed; /* GRP_REGULATORY_NVM_GET_INFO_GENERAL_S_VER_1 */
+
+/**
+ * struct iwl_nvm_get_info_sku - mac information
+ * @enable_24g: band 2.4G enabled
+ * @enable_5g: band 5G enabled
+ * @enable_11n: 11n enabled
+ * @enable_11ac: 11ac enabled
+ * @mimo_disable: MIMO enabled
+ * @ext_crypto: Extended crypto enabled
+ */
+struct iwl_nvm_get_info_sku {
+	__le32 enable_24g;
+	__le32 enable_5g;
+	__le32 enable_11n;
+	__le32 enable_11ac;
+	__le32 mimo_disable;
+	__le32 ext_crypto;
+} __packed; /* GRP_REGULATORY_NVM_GET_INFO_MAC_SKU_SECTION_S_VER_1 */
+
+/**
+ * struct iwl_nvm_get_info_phy - phy information
+ * @tx_chains: BIT 0 chain A, BIT 1 chain B
+ * @rx_chains: BIT 0 chain A, BIT 1 chain B
+ */
+struct iwl_nvm_get_info_phy {
+	__le32 tx_chains;
+	__le32 rx_chains;
+} __packed; /* GRP_REGULATORY_NVM_GET_INFO_PHY_SKU_SECTION_S_VER_1 */
+
+#define IWL_NUM_CHANNELS (51)
+
+/**
+ * struct iwl_nvm_get_info_regulatory - regulatory information
+ * @lar_enabled: is LAR enabled
+ * @channel_profile: regulatory data of this channel
+ * @reserved: reserved
+ */
+struct iwl_nvm_get_info_regulatory {
+	__le32 lar_enabled;
+	__le16 channel_profile[IWL_NUM_CHANNELS];
+	__le16 reserved;
+} __packed; /* GRP_REGULATORY_NVM_GET_INFO_REGULATORY_S_VER_1 */
+
+/**
+ * struct iwl_nvm_get_info_rsp - response to get NVM data
+ * @general: general NVM data
+ * @mac_sku: data relating to MAC sku
+ * @phy_sku: data relating to PHY sku
+ * @regulatory: regulatory data
+ */
+struct iwl_nvm_get_info_rsp {
+	struct iwl_nvm_get_info_general general;
+	struct iwl_nvm_get_info_sku mac_sku;
+	struct iwl_nvm_get_info_phy phy_sku;
+	struct iwl_nvm_get_info_regulatory regulatory;
+} __packed; /* GRP_REGULATORY_NVM_GET_INFO_CMD_RSP_S_VER_1 */
+
+/**
+ * struct iwl_nvm_access_complete_cmd - NVM_ACCESS commands are completed
+ * @reserved: reserved
+ */
+struct iwl_nvm_access_complete_cmd {
+	__le32 reserved;
+} __packed; /* NVM_ACCESS_COMPLETE_CMD_API_S_VER_1 */
+
+/**
+ * struct iwl_mcc_update_cmd_v1 - Request the device to update geographic
+ * regulatory profile according to the given MCC (Mobile Country Code).
+ * The MCC is two letter-code, ascii upper case[A-Z] or '00' for world domain.
+ * 'ZZ' MCC will be used to switch to NVM default profile; in this case, the
+ * MCC in the cmd response will be the relevant MCC in the NVM.
+ * @mcc: given mobile country code
+ * @source_id: the source from where we got the MCC, see iwl_mcc_source
+ * @reserved: reserved for alignment
+ */
+struct iwl_mcc_update_cmd_v1 {
+	__le16 mcc;
+	u8 source_id;
+	u8 reserved;
+} __packed; /* LAR_UPDATE_MCC_CMD_API_S_VER_1 */
+
+/**
+ * struct iwl_mcc_update_cmd - Request the device to update geographic
+ * regulatory profile according to the given MCC (Mobile Country Code).
+ * The MCC is two letter-code, ascii upper case[A-Z] or '00' for world domain.
+ * 'ZZ' MCC will be used to switch to NVM default profile; in this case, the
+ * MCC in the cmd response will be the relevant MCC in the NVM.
+ * @mcc: given mobile country code
+ * @source_id: the source from where we got the MCC, see iwl_mcc_source
+ * @reserved: reserved for alignment
+ * @key: integrity key for MCC API OEM testing
+ * @reserved2: reserved
+ */
+struct iwl_mcc_update_cmd {
+	__le16 mcc;
+	u8 source_id;
+	u8 reserved;
+	__le32 key;
+	u8 reserved2[20];
+} __packed; /* LAR_UPDATE_MCC_CMD_API_S_VER_2 */
+
+/**
+ * struct iwl_mcc_update_resp_v1  - response to MCC_UPDATE_CMD.
+ * Contains the new channel control profile map, if changed, and the new MCC
+ * (mobile country code).
+ * The new MCC may be different than what was requested in MCC_UPDATE_CMD.
+ * @status: see &enum iwl_mcc_update_status
+ * @mcc: the new applied MCC
+ * @cap: capabilities for all channels which matches the MCC
+ * @source_id: the MCC source, see iwl_mcc_source
+ * @n_channels: number of channels in @channels_data (may be 14, 39, 50 or 51
+ *		channels, depending on platform)
+ * @channels: channel control data map, DWORD for each channel. Only the first
+ *	16bits are used.
+ */
+struct iwl_mcc_update_resp_v1  {
+	__le32 status;
+	__le16 mcc;
+	u8 cap;
+	u8 source_id;
+	__le32 n_channels;
+	__le32 channels[0];
+} __packed; /* LAR_UPDATE_MCC_CMD_RESP_S_VER_1 */
+
+/**
+ * struct iwl_mcc_update_resp - response to MCC_UPDATE_CMD.
+ * Contains the new channel control profile map, if changed, and the new MCC
+ * (mobile country code).
+ * The new MCC may be different than what was requested in MCC_UPDATE_CMD.
+ * @status: see &enum iwl_mcc_update_status
+ * @mcc: the new applied MCC
+ * @cap: capabilities for all channels which matches the MCC
+ * @source_id: the MCC source, see iwl_mcc_source
+ * @time: time elapsed from the MCC test start (in 30 seconds TU)
+ * @reserved: reserved.
+ * @n_channels: number of channels in @channels_data (may be 14, 39, 50 or 51
+ *		channels, depending on platform)
+ * @channels: channel control data map, DWORD for each channel. Only the first
+ *	16bits are used.
+ */
+struct iwl_mcc_update_resp {
+	__le32 status;
+	__le16 mcc;
+	u8 cap;
+	u8 source_id;
+	__le16 time;
+	__le16 reserved;
+	__le32 n_channels;
+	__le32 channels[0];
+} __packed; /* LAR_UPDATE_MCC_CMD_RESP_S_VER_2 */
+
+/**
+ * struct iwl_mcc_chub_notif - chub notifies of mcc change
+ * (MCC_CHUB_UPDATE_CMD = 0xc9)
+ * The Chub (Communication Hub, CommsHUB) is a HW component that connects to
+ * the cellular and connectivity cores that gets updates of the mcc, and
+ * notifies the ucode directly of any mcc change.
+ * The ucode requests the driver to request the device to update geographic
+ * regulatory  profile according to the given MCC (Mobile Country Code).
+ * The MCC is two letter-code, ascii upper case[A-Z] or '00' for world domain.
+ * 'ZZ' MCC will be used to switch to NVM default profile; in this case, the
+ * MCC in the cmd response will be the relevant MCC in the NVM.
+ * @mcc: given mobile country code
+ * @source_id: identity of the change originator, see iwl_mcc_source
+ * @reserved1: reserved for alignment
+ */
+struct iwl_mcc_chub_notif {
+	__le16 mcc;
+	u8 source_id;
+	u8 reserved1;
+} __packed; /* LAR_MCC_NOTIFY_S */
+
+enum iwl_mcc_update_status {
+	MCC_RESP_NEW_CHAN_PROFILE,
+	MCC_RESP_SAME_CHAN_PROFILE,
+	MCC_RESP_INVALID,
+	MCC_RESP_NVM_DISABLED,
+	MCC_RESP_ILLEGAL,
+	MCC_RESP_LOW_PRIORITY,
+	MCC_RESP_TEST_MODE_ACTIVE,
+	MCC_RESP_TEST_MODE_NOT_ACTIVE,
+	MCC_RESP_TEST_MODE_DENIAL_OF_SERVICE,
+};
+
+enum iwl_mcc_source {
+	MCC_SOURCE_OLD_FW = 0,
+	MCC_SOURCE_ME = 1,
+	MCC_SOURCE_BIOS = 2,
+	MCC_SOURCE_3G_LTE_HOST = 3,
+	MCC_SOURCE_3G_LTE_DEVICE = 4,
+	MCC_SOURCE_WIFI = 5,
+	MCC_SOURCE_RESERVED = 6,
+	MCC_SOURCE_DEFAULT = 7,
+	MCC_SOURCE_UNINITIALIZED = 8,
+	MCC_SOURCE_MCC_API = 9,
+	MCC_SOURCE_GET_CURRENT = 0x10,
+	MCC_SOURCE_GETTING_MCC_TEST_MODE = 0x11,
+};
+
+#endif /* __iwl_fw_api_nvm_reg_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/offload.h b/drivers/net/wireless/intel/iwlwifi/fw/api/offload.h
new file mode 100644
index 0000000..53cab99
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/offload.h
@@ -0,0 +1,101 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_offload_h__
+#define __iwl_fw_api_offload_h__
+
+/**
+ * enum iwl_prot_offload_subcmd_ids - protocol offload commands
+ */
+enum iwl_prot_offload_subcmd_ids {
+	/**
+	 * @STORED_BEACON_NTF: &struct iwl_stored_beacon_notif
+	 */
+	STORED_BEACON_NTF = 0xFF,
+};
+
+#define MAX_STORED_BEACON_SIZE 600
+
+/**
+ * struct iwl_stored_beacon_notif - Stored beacon notification
+ *
+ * @system_time: system time on air rise
+ * @tsf: TSF on air rise
+ * @beacon_timestamp: beacon on air rise
+ * @band: band, matches &RX_RES_PHY_FLAGS_BAND_24 definition
+ * @channel: channel this beacon was received on
+ * @rates: rate in ucode internal format
+ * @byte_count: frame's byte count
+ * @data: beacon data, length in @byte_count
+ */
+struct iwl_stored_beacon_notif {
+	__le32 system_time;
+	__le64 tsf;
+	__le32 beacon_timestamp;
+	__le16 band;
+	__le16 channel;
+	__le32 rates;
+	__le32 byte_count;
+	u8 data[MAX_STORED_BEACON_SIZE];
+} __packed; /* WOWLAN_STROED_BEACON_INFO_S_VER_2 */
+
+#endif /* __iwl_fw_api_offload_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/paging.h b/drivers/net/wireless/intel/iwlwifi/fw/api/paging.h
new file mode 100644
index 0000000..e76f9cd
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/paging.h
@@ -0,0 +1,108 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+#ifndef __iwl_fw_api_paging_h__
+#define __iwl_fw_api_paging_h__
+
+#define NUM_OF_FW_PAGING_BLOCKS	33 /* 32 for data and 1 block for CSS */
+
+/**
+ * struct iwl_fw_paging_cmd - paging layout
+ *
+ * Send to FW the paging layout in the driver.
+ *
+ * @flags: various flags for the command
+ * @block_size: the block size in powers of 2
+ * @block_num: number of blocks specified in the command.
+ * @device_phy_addr: virtual addresses from device side
+ */
+struct iwl_fw_paging_cmd {
+	__le32 flags;
+	__le32 block_size;
+	__le32 block_num;
+	__le32 device_phy_addr[NUM_OF_FW_PAGING_BLOCKS];
+} __packed; /* FW_PAGING_BLOCK_CMD_API_S_VER_1 */
+
+/**
+ * enum iwl_fw_item_id - FW item IDs
+ *
+ * @IWL_FW_ITEM_ID_PAGING: Address of the pages that the FW will upload
+ *	download
+ */
+enum iwl_fw_item_id {
+	IWL_FW_ITEM_ID_PAGING = 3,
+};
+
+/**
+ * struct iwl_fw_get_item_cmd - get an item from the fw
+ * @item_id: ID of item to obtain, see &enum iwl_fw_item_id
+ */
+struct iwl_fw_get_item_cmd {
+	__le32 item_id;
+} __packed; /* FW_GET_ITEM_CMD_API_S_VER_1 */
+
+struct iwl_fw_get_item_resp {
+	__le32 item_id;
+	__le32 item_byte_cnt;
+	__le32 item_val;
+} __packed; /* FW_GET_ITEM_RSP_S_VER_1 */
+
+#endif /* __iwl_fw_api_paging_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h b/drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h
new file mode 100644
index 0000000..45f61c6
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h
@@ -0,0 +1,164 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_phy_ctxt_h__
+#define __iwl_fw_api_phy_ctxt_h__
+
+/* Supported bands */
+#define PHY_BAND_5  (0)
+#define PHY_BAND_24 (1)
+
+/* Supported channel width, vary if there is VHT support */
+#define PHY_VHT_CHANNEL_MODE20	(0x0)
+#define PHY_VHT_CHANNEL_MODE40	(0x1)
+#define PHY_VHT_CHANNEL_MODE80	(0x2)
+#define PHY_VHT_CHANNEL_MODE160	(0x3)
+
+/*
+ * Control channel position:
+ * For legacy set bit means upper channel, otherwise lower.
+ * For VHT - bit-2 marks if the control is lower/upper relative to center-freq
+ *   bits-1:0 mark the distance from the center freq. for 20Mhz, offset is 0.
+ *                                   center_freq
+ *                                        |
+ * 40Mhz                          |_______|_______|
+ * 80Mhz                  |_______|_______|_______|_______|
+ * 160Mhz |_______|_______|_______|_______|_______|_______|_______|_______|
+ * code      011     010     001     000  |  100     101     110    111
+ */
+#define PHY_VHT_CTRL_POS_1_BELOW  (0x0)
+#define PHY_VHT_CTRL_POS_2_BELOW  (0x1)
+#define PHY_VHT_CTRL_POS_3_BELOW  (0x2)
+#define PHY_VHT_CTRL_POS_4_BELOW  (0x3)
+#define PHY_VHT_CTRL_POS_1_ABOVE  (0x4)
+#define PHY_VHT_CTRL_POS_2_ABOVE  (0x5)
+#define PHY_VHT_CTRL_POS_3_ABOVE  (0x6)
+#define PHY_VHT_CTRL_POS_4_ABOVE  (0x7)
+
+/*
+ * @band: PHY_BAND_*
+ * @channel: channel number
+ * @width: PHY_[VHT|LEGACY]_CHANNEL_*
+ * @ctrl channel: PHY_[VHT|LEGACY]_CTRL_*
+ */
+struct iwl_fw_channel_info {
+	u8 band;
+	u8 channel;
+	u8 width;
+	u8 ctrl_pos;
+} __packed;
+
+#define PHY_RX_CHAIN_DRIVER_FORCE_POS	(0)
+#define PHY_RX_CHAIN_DRIVER_FORCE_MSK \
+	(0x1 << PHY_RX_CHAIN_DRIVER_FORCE_POS)
+#define PHY_RX_CHAIN_VALID_POS		(1)
+#define PHY_RX_CHAIN_VALID_MSK \
+	(0x7 << PHY_RX_CHAIN_VALID_POS)
+#define PHY_RX_CHAIN_FORCE_SEL_POS	(4)
+#define PHY_RX_CHAIN_FORCE_SEL_MSK \
+	(0x7 << PHY_RX_CHAIN_FORCE_SEL_POS)
+#define PHY_RX_CHAIN_FORCE_MIMO_SEL_POS	(7)
+#define PHY_RX_CHAIN_FORCE_MIMO_SEL_MSK \
+	(0x7 << PHY_RX_CHAIN_FORCE_MIMO_SEL_POS)
+#define PHY_RX_CHAIN_CNT_POS		(10)
+#define PHY_RX_CHAIN_CNT_MSK \
+	(0x3 << PHY_RX_CHAIN_CNT_POS)
+#define PHY_RX_CHAIN_MIMO_CNT_POS	(12)
+#define PHY_RX_CHAIN_MIMO_CNT_MSK \
+	(0x3 << PHY_RX_CHAIN_MIMO_CNT_POS)
+#define PHY_RX_CHAIN_MIMO_FORCE_POS	(14)
+#define PHY_RX_CHAIN_MIMO_FORCE_MSK \
+	(0x1 << PHY_RX_CHAIN_MIMO_FORCE_POS)
+
+/* TODO: fix the value, make it depend on firmware at runtime? */
+#define NUM_PHY_CTX	3
+
+/* TODO: complete missing documentation */
+/**
+ * struct iwl_phy_context_cmd - config of the PHY context
+ * ( PHY_CONTEXT_CMD = 0x8 )
+ * @id_and_color: ID and color of the relevant Binding
+ * @action: action to perform, one of FW_CTXT_ACTION_*
+ * @apply_time: 0 means immediate apply and context switch.
+ *	other value means apply new params after X usecs
+ * @tx_param_color: ???
+ * @ci: channel info
+ * @txchain_info: ???
+ * @rxchain_info: ???
+ * @acquisition_data: ???
+ * @dsp_cfg_flags: set to 0
+ */
+struct iwl_phy_context_cmd {
+	/* COMMON_INDEX_HDR_API_S_VER_1 */
+	__le32 id_and_color;
+	__le32 action;
+	/* PHY_CONTEXT_DATA_API_S_VER_1 */
+	__le32 apply_time;
+	__le32 tx_param_color;
+	struct iwl_fw_channel_info ci;
+	__le32 txchain_info;
+	__le32 rxchain_info;
+	__le32 acquisition_data;
+	__le32 dsp_cfg_flags;
+} __packed; /* PHY_CONTEXT_CMD_API_VER_1 */
+
+#endif /* __iwl_fw_api_phy_ctxt_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/phy.h b/drivers/net/wireless/intel/iwlwifi/fw/api/phy.h
new file mode 100644
index 0000000..9cc59e0
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/phy.h
@@ -0,0 +1,258 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_phy_h__
+#define __iwl_fw_api_phy_h__
+
+/**
+ * enum iwl_phy_ops_subcmd_ids - PHY group commands
+ */
+enum iwl_phy_ops_subcmd_ids {
+	/**
+	 * @CMD_DTS_MEASUREMENT_TRIGGER_WIDE:
+	 * Uses either &struct iwl_dts_measurement_cmd or
+	 * &struct iwl_ext_dts_measurement_cmd
+	 */
+	CMD_DTS_MEASUREMENT_TRIGGER_WIDE = 0x0,
+
+	/**
+	 * @CTDP_CONFIG_CMD: &struct iwl_mvm_ctdp_cmd
+	 */
+	CTDP_CONFIG_CMD = 0x03,
+
+	/**
+	 * @TEMP_REPORTING_THRESHOLDS_CMD: &struct temp_report_ths_cmd
+	 */
+	TEMP_REPORTING_THRESHOLDS_CMD = 0x04,
+
+	/**
+	 * @GEO_TX_POWER_LIMIT: &struct iwl_geo_tx_power_profiles_cmd
+	 */
+	GEO_TX_POWER_LIMIT = 0x05,
+
+	/**
+	 * @CT_KILL_NOTIFICATION: &struct ct_kill_notif
+	 */
+	CT_KILL_NOTIFICATION = 0xFE,
+
+	/**
+	 * @DTS_MEASUREMENT_NOTIF_WIDE:
+	 * &struct iwl_dts_measurement_notif_v1 or
+	 * &struct iwl_dts_measurement_notif_v2
+	 */
+	DTS_MEASUREMENT_NOTIF_WIDE = 0xFF,
+};
+
+/* DTS measurements */
+
+enum iwl_dts_measurement_flags {
+	DTS_TRIGGER_CMD_FLAGS_TEMP	= BIT(0),
+	DTS_TRIGGER_CMD_FLAGS_VOLT	= BIT(1),
+};
+
+/**
+ * struct iwl_dts_measurement_cmd - request DTS temp and/or voltage measurements
+ *
+ * @flags: indicates which measurements we want as specified in
+ *	&enum iwl_dts_measurement_flags
+ */
+struct iwl_dts_measurement_cmd {
+	__le32 flags;
+} __packed; /* TEMPERATURE_MEASUREMENT_TRIGGER_CMD_S */
+
+/**
+* enum iwl_dts_control_measurement_mode - DTS measurement type
+* @DTS_AUTOMATIC: Automatic mode (full SW control). Provide temperature read
+*                 back (latest value. Not waiting for new value). Use automatic
+*                 SW DTS configuration.
+* @DTS_REQUEST_READ: Request DTS read. Configure DTS with manual settings,
+*                    trigger DTS reading and provide read back temperature read
+*                    when available.
+* @DTS_OVER_WRITE: over-write the DTS temperatures in the SW until next read
+* @DTS_DIRECT_WITHOUT_MEASURE: DTS returns its latest temperature result,
+*                              without measurement trigger.
+*/
+enum iwl_dts_control_measurement_mode {
+	DTS_AUTOMATIC			= 0,
+	DTS_REQUEST_READ		= 1,
+	DTS_OVER_WRITE			= 2,
+	DTS_DIRECT_WITHOUT_MEASURE	= 3,
+};
+
+/**
+* enum iwl_dts_used - DTS to use or used for measurement in the DTS request
+* @DTS_USE_TOP: Top
+* @DTS_USE_CHAIN_A: chain A
+* @DTS_USE_CHAIN_B: chain B
+* @DTS_USE_CHAIN_C: chain C
+* @XTAL_TEMPERATURE: read temperature from xtal
+*/
+enum iwl_dts_used {
+	DTS_USE_TOP		= 0,
+	DTS_USE_CHAIN_A		= 1,
+	DTS_USE_CHAIN_B		= 2,
+	DTS_USE_CHAIN_C		= 3,
+	XTAL_TEMPERATURE	= 4,
+};
+
+/**
+* enum iwl_dts_bit_mode - bit-mode to use in DTS request read mode
+* @DTS_BIT6_MODE: bit 6 mode
+* @DTS_BIT8_MODE: bit 8 mode
+*/
+enum iwl_dts_bit_mode {
+	DTS_BIT6_MODE	= 0,
+	DTS_BIT8_MODE	= 1,
+};
+
+/**
+ * struct iwl_ext_dts_measurement_cmd - request extended DTS temp measurements
+ * @control_mode: see &enum iwl_dts_control_measurement_mode
+ * @temperature: used when over write DTS mode is selected
+ * @sensor: set temperature sensor to use. See &enum iwl_dts_used
+ * @avg_factor: average factor to DTS in request DTS read mode
+ * @bit_mode: value defines the DTS bit mode to use. See &enum iwl_dts_bit_mode
+ * @step_duration: step duration for the DTS
+ */
+struct iwl_ext_dts_measurement_cmd {
+	__le32 control_mode;
+	__le32 temperature;
+	__le32 sensor;
+	__le32 avg_factor;
+	__le32 bit_mode;
+	__le32 step_duration;
+} __packed; /* XVT_FW_DTS_CONTROL_MEASUREMENT_REQUEST_API_S */
+
+/**
+ * struct iwl_dts_measurement_notif_v1 - measurements notification
+ *
+ * @temp: the measured temperature
+ * @voltage: the measured voltage
+ */
+struct iwl_dts_measurement_notif_v1 {
+	__le32 temp;
+	__le32 voltage;
+} __packed; /* TEMPERATURE_MEASUREMENT_TRIGGER_NTFY_S_VER_1*/
+
+/**
+ * struct iwl_dts_measurement_notif_v2 - measurements notification
+ *
+ * @temp: the measured temperature
+ * @voltage: the measured voltage
+ * @threshold_idx: the trip index that was crossed
+ */
+struct iwl_dts_measurement_notif_v2 {
+	__le32 temp;
+	__le32 voltage;
+	__le32 threshold_idx;
+} __packed; /* TEMPERATURE_MEASUREMENT_TRIGGER_NTFY_S_VER_2 */
+
+/**
+ * struct ct_kill_notif - CT-kill entry notification
+ *
+ * @temperature: the current temperature in celsius
+ * @reserved: reserved
+ */
+struct ct_kill_notif {
+	__le16 temperature;
+	__le16 reserved;
+} __packed; /* GRP_PHY_CT_KILL_NTF */
+
+/**
+* enum ctdp_cmd_operation - CTDP command operations
+* @CTDP_CMD_OPERATION_START: update the current budget
+* @CTDP_CMD_OPERATION_STOP: stop ctdp
+* @CTDP_CMD_OPERATION_REPORT: get the average budget
+*/
+enum iwl_mvm_ctdp_cmd_operation {
+	CTDP_CMD_OPERATION_START	= 0x1,
+	CTDP_CMD_OPERATION_STOP		= 0x2,
+	CTDP_CMD_OPERATION_REPORT	= 0x4,
+};/* CTDP_CMD_OPERATION_TYPE_E */
+
+/**
+ * struct iwl_mvm_ctdp_cmd - track and manage the FW power consumption budget
+ *
+ * @operation: see &enum iwl_mvm_ctdp_cmd_operation
+ * @budget: the budget in milliwatt
+ * @window_size: defined in API but not used
+ */
+struct iwl_mvm_ctdp_cmd {
+	__le32 operation;
+	__le32 budget;
+	__le32 window_size;
+} __packed;
+
+#define IWL_MAX_DTS_TRIPS	8
+
+/**
+ * struct temp_report_ths_cmd - set temperature thresholds
+ *
+ * @num_temps: number of temperature thresholds passed
+ * @thresholds: array with the thresholds to be configured
+ */
+struct temp_report_ths_cmd {
+	__le32 num_temps;
+	__le16 thresholds[IWL_MAX_DTS_TRIPS];
+} __packed; /* GRP_PHY_TEMP_REPORTING_THRESHOLDS_CMD */
+
+#endif /* __iwl_fw_api_phy_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h
index 7da57ef..a06afb5 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h
@@ -18,11 +18,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
- * USA
- *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
@@ -65,8 +60,8 @@
  *
  *****************************************************************************/
 
-#ifndef __fw_api_power_h__
-#define __fw_api_power_h__
+#ifndef __iwl_fw_api_power_h__
+#define __iwl_fw_api_power_h__
 
 /* Power Management Commands, Responses, Notifications */
 
@@ -224,7 +219,7 @@ struct iwl_device_power_cmd {
 /**
  * struct iwl_mac_power_cmd - New power command containing uAPSD support
  * MAC_PM_POWER_TABLE = 0xA9 (command, has simple generic response)
- * @id_and_color:	MAC contex identifier, &enum iwl_mvm_id_and_color
+ * @id_and_color:	MAC contex identifier, &enum iwl_ctxt_id_and_color
  * @flags:		Power table command flags from POWER_FLAGS_*
  * @keep_alive_seconds:	Keep alive period in seconds. Default - 25 sec.
  *			Minimum allowed:- 3 * DTIM. Keep alive period must be
@@ -528,4 +523,4 @@ struct iwl_beacon_filter_cmd {
 
 #define IWL_BF_CMD_CONFIG_DEFAULTS IWL_BF_CMD_CONFIG(_DEFAULT)
 #define IWL_BF_CMD_CONFIG_D0I3 IWL_BF_CMD_CONFIG(_D0I3)
-#endif
+#endif /* __iwl_fw_api_power_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rs.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h
index bdf1228..a13fd8a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rs.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h
@@ -17,11 +17,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
- * USA
- *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
@@ -62,10 +57,10 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *****************************************************************************/
 
-#ifndef __fw_api_rs_h__
-#define __fw_api_rs_h__
+#ifndef __iwl_fw_api_rs_h__
+#define __iwl_fw_api_rs_h__
 
-#include "fw-api-mac.h"
+#include "mac.h"
 
 /*
  * These serve as indexes into
@@ -410,4 +405,4 @@ struct iwl_lq_cmd {
 	__le32 ss_params;
 }; /* LINK_QUALITY_CMD_API_S_VER_1 */
 
-#endif /* __fw_api_rs_h__ */
+#endif /* __iwl_fw_api_rs_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h
index 59038ad..e7565f3 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-rx.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h
@@ -18,11 +18,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
- * USA
- *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
@@ -65,8 +60,8 @@
  *
  *****************************************************************************/
 
-#ifndef __fw_api_rx_h__
-#define __fw_api_rx_h__
+#ifndef __iwl_fw_api_rx_h__
+#define __iwl_fw_api_rx_h__
 
 /* API for pre-9000 hardware */
 
@@ -571,4 +566,24 @@ struct iwl_mvm_pm_state_notification {
 	__le16 reserved;
 } __packed; /* PEER_PM_NTFY_API_S_VER_1 */
 
-#endif /* __fw_api_rx_h__ */
+#define BA_WINDOW_STREAMS_MAX		16
+#define BA_WINDOW_STATUS_TID_MSK	0x000F
+#define BA_WINDOW_STATUS_STA_ID_POS	4
+#define BA_WINDOW_STATUS_STA_ID_MSK	0x01F0
+#define BA_WINDOW_STATUS_VALID_MSK	BIT(9)
+
+/**
+ * struct iwl_ba_window_status_notif - reordering window's status notification
+ * @bitmap: bitmap of received frames [start_seq_num + 0]..[start_seq_num + 63]
+ * @ra_tid: bit 3:0 - TID, bit 8:4 - STA_ID, bit 9 - valid
+ * @start_seq_num: the start sequence number of the bitmap
+ * @mpdu_rx_count: the number of received MPDUs since entering D0i3
+ */
+struct iwl_ba_window_status_notif {
+	__le64 bitmap[BA_WINDOW_STREAMS_MAX];
+	__le16 ra_tid[BA_WINDOW_STREAMS_MAX];
+	__le32 start_seq_num[BA_WINDOW_STREAMS_MAX];
+	__le16 mpdu_rx_count[BA_WINDOW_STREAMS_MAX];
+} __packed; /* BA_WINDOW_STATUS_NTFY_API_S_VER_1 */
+
+#endif /* __iwl_fw_api_rx_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
index 1cd7cc0..5a40092 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
@@ -18,11 +18,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
- * USA
- *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
@@ -65,8 +60,8 @@
  *
  *****************************************************************************/
 
-#ifndef __fw_api_scan_h__
-#define __fw_api_scan_h__
+#ifndef __iwl_fw_api_scan_h__
+#define __iwl_fw_api_scan_h__
 
 /* Scan Commands, Responses, Notifications */
 
@@ -789,4 +784,4 @@ struct iwl_umac_scan_iter_complete_notif {
 	struct iwl_scan_results_notif results[];
 } __packed; /* SCAN_ITER_COMPLETE_NTF_UMAC_API_S_VER_2 */
 
-#endif
+#endif /* __iwl_fw_api_scan_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/sf.h b/drivers/net/wireless/intel/iwlwifi/fw/api/sf.h
new file mode 100644
index 0000000..e517b55
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/sf.h
@@ -0,0 +1,138 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_sf_h__
+#define __iwl_fw_api_sf_h__
+
+/* Smart Fifo state */
+enum iwl_sf_state {
+	SF_LONG_DELAY_ON = 0, /* should never be called by driver */
+	SF_FULL_ON,
+	SF_UNINIT,
+	SF_INIT_OFF,
+	SF_HW_NUM_STATES
+};
+
+/* Smart Fifo possible scenario */
+enum iwl_sf_scenario {
+	SF_SCENARIO_SINGLE_UNICAST,
+	SF_SCENARIO_AGG_UNICAST,
+	SF_SCENARIO_MULTICAST,
+	SF_SCENARIO_BA_RESP,
+	SF_SCENARIO_TX_RESP,
+	SF_NUM_SCENARIO
+};
+
+#define SF_TRANSIENT_STATES_NUMBER 2	/* SF_LONG_DELAY_ON and SF_FULL_ON */
+#define SF_NUM_TIMEOUT_TYPES 2		/* Aging timer and Idle timer */
+
+/* smart FIFO default values */
+#define SF_W_MARK_SISO 6144
+#define SF_W_MARK_MIMO2 8192
+#define SF_W_MARK_MIMO3 6144
+#define SF_W_MARK_LEGACY 4096
+#define SF_W_MARK_SCAN 4096
+
+/* SF Scenarios timers for default configuration (aligned to 32 uSec) */
+#define SF_SINGLE_UNICAST_IDLE_TIMER_DEF 160	/* 150 uSec  */
+#define SF_SINGLE_UNICAST_AGING_TIMER_DEF 400	/* 0.4 mSec */
+#define SF_AGG_UNICAST_IDLE_TIMER_DEF 160		/* 150 uSec */
+#define SF_AGG_UNICAST_AGING_TIMER_DEF 400		/* 0.4 mSec */
+#define SF_MCAST_IDLE_TIMER_DEF 160		/* 150 mSec */
+#define SF_MCAST_AGING_TIMER_DEF 400		/* 0.4 mSec */
+#define SF_BA_IDLE_TIMER_DEF 160			/* 150 uSec */
+#define SF_BA_AGING_TIMER_DEF 400			/* 0.4 mSec */
+#define SF_TX_RE_IDLE_TIMER_DEF 160			/* 150 uSec */
+#define SF_TX_RE_AGING_TIMER_DEF 400		/* 0.4 mSec */
+
+/* SF Scenarios timers for BSS MAC configuration (aligned to 32 uSec) */
+#define SF_SINGLE_UNICAST_IDLE_TIMER 320	/* 300 uSec  */
+#define SF_SINGLE_UNICAST_AGING_TIMER 2016	/* 2 mSec */
+#define SF_AGG_UNICAST_IDLE_TIMER 320		/* 300 uSec */
+#define SF_AGG_UNICAST_AGING_TIMER 2016		/* 2 mSec */
+#define SF_MCAST_IDLE_TIMER 2016		/* 2 mSec */
+#define SF_MCAST_AGING_TIMER 10016		/* 10 mSec */
+#define SF_BA_IDLE_TIMER 320			/* 300 uSec */
+#define SF_BA_AGING_TIMER 2016			/* 2 mSec */
+#define SF_TX_RE_IDLE_TIMER 320			/* 300 uSec */
+#define SF_TX_RE_AGING_TIMER 2016		/* 2 mSec */
+
+#define SF_LONG_DELAY_AGING_TIMER 1000000	/* 1 Sec */
+
+#define SF_CFG_DUMMY_NOTIF_OFF	BIT(16)
+
+/**
+ * struct iwl_sf_cfg_cmd - Smart Fifo configuration command.
+ * @state: smart fifo state, types listed in &enum iwl_sf_state.
+ * @watermark: Minimum allowed available free space in RXF for transient state.
+ * @long_delay_timeouts: aging and idle timer values for each scenario
+ * in long delay state.
+ * @full_on_timeouts: timer values for each scenario in full on state.
+ */
+struct iwl_sf_cfg_cmd {
+	__le32 state;
+	__le32 watermark[SF_TRANSIENT_STATES_NUMBER];
+	__le32 long_delay_timeouts[SF_NUM_SCENARIO][SF_NUM_TIMEOUT_TYPES];
+	__le32 full_on_timeouts[SF_NUM_SCENARIO][SF_NUM_TIMEOUT_TYPES];
+} __packed; /* SF_CFG_API_S_VER_2 */
+
+#endif /* __iwl_fw_api_sf_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
index 81f0a34..af369eb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
@@ -18,11 +18,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
- * USA
- *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
@@ -64,8 +59,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *****************************************************************************/
 
-#ifndef __fw_api_sta_h__
-#define __fw_api_sta_h__
+#ifndef __iwl_fw_api_sta_h__
+#define __iwl_fw_api_sta_h__
 
 /**
  * enum iwl_sta_flags - flags for the ADD_STA host command
@@ -291,7 +286,7 @@ struct iwl_mvm_keyinfo {
  * @tid_disable_tx: is tid BIT(tid) enabled for Tx. Clear BIT(x) to enable
  *	AMPDU for tid x. Set %STA_MODIFY_TID_DISABLE_TX to change this field.
  * @mac_id_n_color: the Mac context this station belongs to,
- *	see &enum iwl_mvm_id_and_color
+ *	see &enum iwl_ctxt_id_and_color
  * @addr: station's MAC address
  * @reserved2: reserved
  * @sta_id: index of station in uCode's station table
@@ -372,7 +367,7 @@ enum iwl_sta_type {
  * @tid_disable_tx: is tid BIT(tid) enabled for Tx. Clear BIT(x) to enable
  *	AMPDU for tid x. Set %STA_MODIFY_TID_DISABLE_TX to change this field.
  * @mac_id_n_color: the Mac context this station belongs to,
- *	see &enum iwl_mvm_id_and_color
+ *	see &enum iwl_ctxt_id_and_color
  * @addr: station's MAC address
  * @reserved2: reserved
  * @sta_id: index of station in uCode's station table
@@ -575,4 +570,4 @@ struct iwl_mvm_eosp_notification {
 	__le32 sta_id;
 } __packed; /* UAPSD_EOSP_NTFY_API_S_VER_1 */
 
-#endif /* __fw_api_sta_h__ */
+#endif /* __iwl_fw_api_sta_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-stats.h b/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h
index c7531da..53cb622 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-stats.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/stats.h
@@ -18,11 +18,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
- * USA
- *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
@@ -64,9 +59,9 @@
  *
  *****************************************************************************/
 
-#ifndef __fw_api_stats_h__
-#define __fw_api_stats_h__
-#include "fw-api-mac.h"
+#ifndef __iwl_fw_api_stats_h__
+#define __iwl_fw_api_stats_h__
+#include "mac.h"
 
 struct mvm_statistics_dbg {
 	__le32 burst_check;
@@ -476,4 +471,4 @@ struct iwl_statistics_cmd {
 	__le32 flags;
 } __packed; /* STATISTICS_CMD_API_S_VER_1 */
 
-#endif /* __fw_api_stats_h__ */
+#endif /* __iwl_fw_api_stats_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/tdls.h b/drivers/net/wireless/intel/iwlwifi/fw/api/tdls.h
new file mode 100644
index 0000000..7c6c246
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/tdls.h
@@ -0,0 +1,208 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_tdls_h__
+#define __iwl_fw_api_tdls_h__
+
+#include "fw/api/tx.h"
+#include "fw/api/phy-ctxt.h"
+
+#define IWL_MVM_TDLS_STA_COUNT	4
+
+/* Type of TDLS request */
+enum iwl_tdls_channel_switch_type {
+	TDLS_SEND_CHAN_SW_REQ = 0,
+	TDLS_SEND_CHAN_SW_RESP_AND_MOVE_CH,
+	TDLS_MOVE_CH,
+}; /* TDLS_STA_CHANNEL_SWITCH_CMD_TYPE_API_E_VER_1 */
+
+/**
+ * struct iwl_tdls_channel_switch_timing - Switch timing in TDLS channel-switch
+ * @frame_timestamp: GP2 timestamp of channel-switch request/response packet
+ *	received from peer
+ * @max_offchan_duration: What amount of microseconds out of a DTIM is given
+ *	to the TDLS off-channel communication. For instance if the DTIM is
+ *	200TU and the TDLS peer is to be given 25% of the time, the value
+ *	given will be 50TU, or 50 * 1024 if translated into microseconds.
+ * @switch_time: switch time the peer sent in its channel switch timing IE
+ * @switch_timeout: switch timeout the peer sent in its channel switch timing IE
+ */
+struct iwl_tdls_channel_switch_timing {
+	__le32 frame_timestamp; /* GP2 time of peer packet Rx */
+	__le32 max_offchan_duration; /* given in micro-seconds */
+	__le32 switch_time; /* given in micro-seconds */
+	__le32 switch_timeout; /* given in micro-seconds */
+} __packed; /* TDLS_STA_CHANNEL_SWITCH_TIMING_DATA_API_S_VER_1 */
+
+#define IWL_TDLS_CH_SW_FRAME_MAX_SIZE 200
+
+/**
+ * struct iwl_tdls_channel_switch_frame - TDLS channel switch frame template
+ *
+ * A template representing a TDLS channel-switch request or response frame
+ *
+ * @switch_time_offset: offset to the channel switch timing IE in the template
+ * @tx_cmd: Tx parameters for the frame
+ * @data: frame data
+ */
+struct iwl_tdls_channel_switch_frame {
+	__le32 switch_time_offset;
+	struct iwl_tx_cmd tx_cmd;
+	u8 data[IWL_TDLS_CH_SW_FRAME_MAX_SIZE];
+} __packed; /* TDLS_STA_CHANNEL_SWITCH_FRAME_API_S_VER_1 */
+
+/**
+ * struct iwl_tdls_channel_switch_cmd - TDLS channel switch command
+ *
+ * The command is sent to initiate a channel switch and also in response to
+ * incoming TDLS channel-switch request/response packets from remote peers.
+ *
+ * @switch_type: see &enum iwl_tdls_channel_switch_type
+ * @peer_sta_id: station id of TDLS peer
+ * @ci: channel we switch to
+ * @timing: timing related data for command
+ * @frame: channel-switch request/response template, depending to switch_type
+ */
+struct iwl_tdls_channel_switch_cmd {
+	u8 switch_type;
+	__le32 peer_sta_id;
+	struct iwl_fw_channel_info ci;
+	struct iwl_tdls_channel_switch_timing timing;
+	struct iwl_tdls_channel_switch_frame frame;
+} __packed; /* TDLS_STA_CHANNEL_SWITCH_CMD_API_S_VER_1 */
+
+/**
+ * struct iwl_tdls_channel_switch_notif - TDLS channel switch start notification
+ *
+ * @status: non-zero on success
+ * @offchannel_duration: duration given in microseconds
+ * @sta_id: peer currently performing the channel-switch with
+ */
+struct iwl_tdls_channel_switch_notif {
+	__le32 status;
+	__le32 offchannel_duration;
+	__le32 sta_id;
+} __packed; /* TDLS_STA_CHANNEL_SWITCH_NTFY_API_S_VER_1 */
+
+/**
+ * struct iwl_tdls_sta_info - TDLS station info
+ *
+ * @sta_id: station id of the TDLS peer
+ * @tx_to_peer_tid: TID reserved vs. the peer for FW based Tx
+ * @tx_to_peer_ssn: initial SSN the FW should use for Tx on its TID vs the peer
+ * @is_initiator: 1 if the peer is the TDLS link initiator, 0 otherwise
+ */
+struct iwl_tdls_sta_info {
+	u8 sta_id;
+	u8 tx_to_peer_tid;
+	__le16 tx_to_peer_ssn;
+	__le32 is_initiator;
+} __packed; /* TDLS_STA_INFO_VER_1 */
+
+/**
+ * struct iwl_tdls_config_cmd - TDLS basic config command
+ *
+ * @id_and_color: MAC id and color being configured
+ * @tdls_peer_count: amount of currently connected TDLS peers
+ * @tx_to_ap_tid: TID reverved vs. the AP for FW based Tx
+ * @tx_to_ap_ssn: initial SSN the FW should use for Tx on its TID vs. the AP
+ * @sta_info: per-station info. Only the first tdls_peer_count entries are set
+ * @pti_req_data_offset: offset of network-level data for the PTI template
+ * @pti_req_tx_cmd: Tx parameters for PTI request template
+ * @pti_req_template: PTI request template data
+ */
+struct iwl_tdls_config_cmd {
+	__le32 id_and_color; /* mac id and color */
+	u8 tdls_peer_count;
+	u8 tx_to_ap_tid;
+	__le16 tx_to_ap_ssn;
+	struct iwl_tdls_sta_info sta_info[IWL_MVM_TDLS_STA_COUNT];
+
+	__le32 pti_req_data_offset;
+	struct iwl_tx_cmd pti_req_tx_cmd;
+	u8 pti_req_template[0];
+} __packed; /* TDLS_CONFIG_CMD_API_S_VER_1 */
+
+/**
+ * struct iwl_tdls_config_sta_info_res - TDLS per-station config information
+ *
+ * @sta_id: station id of the TDLS peer
+ * @tx_to_peer_last_seq: last sequence number used by FW during FW-based Tx to
+ *	the peer
+ */
+struct iwl_tdls_config_sta_info_res {
+	__le16 sta_id;
+	__le16 tx_to_peer_last_seq;
+} __packed; /* TDLS_STA_INFO_RSP_VER_1 */
+
+/**
+ * struct iwl_tdls_config_res - TDLS config information from FW
+ *
+ * @tx_to_ap_last_seq: last sequence number used by FW during FW-based Tx to AP
+ * @sta_info: per-station TDLS config information
+ */
+struct iwl_tdls_config_res {
+	__le32 tx_to_ap_last_seq;
+	struct iwl_tdls_config_sta_info_res sta_info[IWL_MVM_TDLS_STA_COUNT];
+} __packed; /* TDLS_CONFIG_RSP_API_S_VER_1 */
+
+#endif /* __iwl_fw_api_tdls_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h b/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h
new file mode 100644
index 0000000..3721a3e
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h
@@ -0,0 +1,386 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#ifndef __iwl_fw_api_time_event_h__
+#define __iwl_fw_api_time_event_h__
+
+#include "fw/api/phy-ctxt.h"
+
+/* Time Event types, according to MAC type */
+enum iwl_time_event_type {
+	/* BSS Station Events */
+	TE_BSS_STA_AGGRESSIVE_ASSOC,
+	TE_BSS_STA_ASSOC,
+	TE_BSS_EAP_DHCP_PROT,
+	TE_BSS_QUIET_PERIOD,
+
+	/* P2P Device Events */
+	TE_P2P_DEVICE_DISCOVERABLE,
+	TE_P2P_DEVICE_LISTEN,
+	TE_P2P_DEVICE_ACTION_SCAN,
+	TE_P2P_DEVICE_FULL_SCAN,
+
+	/* P2P Client Events */
+	TE_P2P_CLIENT_AGGRESSIVE_ASSOC,
+	TE_P2P_CLIENT_ASSOC,
+	TE_P2P_CLIENT_QUIET_PERIOD,
+
+	/* P2P GO Events */
+	TE_P2P_GO_ASSOC_PROT,
+	TE_P2P_GO_REPETITIVET_NOA,
+	TE_P2P_GO_CT_WINDOW,
+
+	/* WiDi Sync Events */
+	TE_WIDI_TX_SYNC,
+
+	/* Channel Switch NoA */
+	TE_CHANNEL_SWITCH_PERIOD,
+
+	TE_MAX
+}; /* MAC_EVENT_TYPE_API_E_VER_1 */
+
+/* Time event - defines for command API v1 */
+
+/*
+ * @TE_V1_FRAG_NONE: fragmentation of the time event is NOT allowed.
+ * @TE_V1_FRAG_SINGLE: fragmentation of the time event is allowed, but only
+ *	the first fragment is scheduled.
+ * @TE_V1_FRAG_DUAL: fragmentation of the time event is allowed, but only
+ *	the first 2 fragments are scheduled.
+ * @TE_V1_FRAG_ENDLESS: fragmentation of the time event is allowed, and any
+ *	number of fragments are valid.
+ *
+ * Other than the constant defined above, specifying a fragmentation value 'x'
+ * means that the event can be fragmented but only the first 'x' will be
+ * scheduled.
+ */
+enum {
+	TE_V1_FRAG_NONE = 0,
+	TE_V1_FRAG_SINGLE = 1,
+	TE_V1_FRAG_DUAL = 2,
+	TE_V1_FRAG_ENDLESS = 0xffffffff
+};
+
+/* If a Time Event can be fragmented, this is the max number of fragments */
+#define TE_V1_FRAG_MAX_MSK	0x0fffffff
+/* Repeat the time event endlessly (until removed) */
+#define TE_V1_REPEAT_ENDLESS	0xffffffff
+/* If a Time Event has bounded repetitions, this is the maximal value */
+#define TE_V1_REPEAT_MAX_MSK_V1	0x0fffffff
+
+/* Time Event dependencies: none, on another TE, or in a specific time */
+enum {
+	TE_V1_INDEPENDENT		= 0,
+	TE_V1_DEP_OTHER			= BIT(0),
+	TE_V1_DEP_TSF			= BIT(1),
+	TE_V1_EVENT_SOCIOPATHIC		= BIT(2),
+}; /* MAC_EVENT_DEPENDENCY_POLICY_API_E_VER_2 */
+
+/*
+ * @TE_V1_NOTIF_NONE: no notifications
+ * @TE_V1_NOTIF_HOST_EVENT_START: request/receive notification on event start
+ * @TE_V1_NOTIF_HOST_EVENT_END:request/receive notification on event end
+ * @TE_V1_NOTIF_INTERNAL_EVENT_START: internal FW use
+ * @TE_V1_NOTIF_INTERNAL_EVENT_END: internal FW use.
+ * @TE_V1_NOTIF_HOST_FRAG_START: request/receive notification on frag start
+ * @TE_V1_NOTIF_HOST_FRAG_END:request/receive notification on frag end
+ * @TE_V1_NOTIF_INTERNAL_FRAG_START: internal FW use.
+ * @TE_V1_NOTIF_INTERNAL_FRAG_END: internal FW use.
+ *
+ * Supported Time event notifications configuration.
+ * A notification (both event and fragment) includes a status indicating weather
+ * the FW was able to schedule the event or not. For fragment start/end
+ * notification the status is always success. There is no start/end fragment
+ * notification for monolithic events.
+ */
+enum {
+	TE_V1_NOTIF_NONE = 0,
+	TE_V1_NOTIF_HOST_EVENT_START = BIT(0),
+	TE_V1_NOTIF_HOST_EVENT_END = BIT(1),
+	TE_V1_NOTIF_INTERNAL_EVENT_START = BIT(2),
+	TE_V1_NOTIF_INTERNAL_EVENT_END = BIT(3),
+	TE_V1_NOTIF_HOST_FRAG_START = BIT(4),
+	TE_V1_NOTIF_HOST_FRAG_END = BIT(5),
+	TE_V1_NOTIF_INTERNAL_FRAG_START = BIT(6),
+	TE_V1_NOTIF_INTERNAL_FRAG_END = BIT(7),
+}; /* MAC_EVENT_ACTION_API_E_VER_2 */
+
+/* Time event - defines for command API */
+
+/*
+ * @TE_V2_FRAG_NONE: fragmentation of the time event is NOT allowed.
+ * @TE_V2_FRAG_SINGLE: fragmentation of the time event is allowed, but only
+ *  the first fragment is scheduled.
+ * @TE_V2_FRAG_DUAL: fragmentation of the time event is allowed, but only
+ *  the first 2 fragments are scheduled.
+ * @TE_V2_FRAG_ENDLESS: fragmentation of the time event is allowed, and any
+ *  number of fragments are valid.
+ *
+ * Other than the constant defined above, specifying a fragmentation value 'x'
+ * means that the event can be fragmented but only the first 'x' will be
+ * scheduled.
+ */
+enum {
+	TE_V2_FRAG_NONE = 0,
+	TE_V2_FRAG_SINGLE = 1,
+	TE_V2_FRAG_DUAL = 2,
+	TE_V2_FRAG_MAX = 0xfe,
+	TE_V2_FRAG_ENDLESS = 0xff
+};
+
+/* Repeat the time event endlessly (until removed) */
+#define TE_V2_REPEAT_ENDLESS	0xff
+/* If a Time Event has bounded repetitions, this is the maximal value */
+#define TE_V2_REPEAT_MAX	0xfe
+
+#define TE_V2_PLACEMENT_POS	12
+#define TE_V2_ABSENCE_POS	15
+
+/**
+ * enum iwl_time_event_policy - Time event policy values
+ * A notification (both event and fragment) includes a status indicating weather
+ * the FW was able to schedule the event or not. For fragment start/end
+ * notification the status is always success. There is no start/end fragment
+ * notification for monolithic events.
+ *
+ * @TE_V2_DEFAULT_POLICY: independent, social, present, unoticable
+ * @TE_V2_NOTIF_HOST_EVENT_START: request/receive notification on event start
+ * @TE_V2_NOTIF_HOST_EVENT_END:request/receive notification on event end
+ * @TE_V2_NOTIF_INTERNAL_EVENT_START: internal FW use
+ * @TE_V2_NOTIF_INTERNAL_EVENT_END: internal FW use.
+ * @TE_V2_NOTIF_HOST_FRAG_START: request/receive notification on frag start
+ * @TE_V2_NOTIF_HOST_FRAG_END:request/receive notification on frag end
+ * @TE_V2_NOTIF_INTERNAL_FRAG_START: internal FW use.
+ * @TE_V2_NOTIF_INTERNAL_FRAG_END: internal FW use.
+ * @T2_V2_START_IMMEDIATELY: start time event immediately
+ * @TE_V2_DEP_OTHER: depends on another time event
+ * @TE_V2_DEP_TSF: depends on a specific time
+ * @TE_V2_EVENT_SOCIOPATHIC: can't co-exist with other events of tha same MAC
+ * @TE_V2_ABSENCE: are we present or absent during the Time Event.
+ */
+enum iwl_time_event_policy {
+	TE_V2_DEFAULT_POLICY = 0x0,
+
+	/* notifications (event start/stop, fragment start/stop) */
+	TE_V2_NOTIF_HOST_EVENT_START = BIT(0),
+	TE_V2_NOTIF_HOST_EVENT_END = BIT(1),
+	TE_V2_NOTIF_INTERNAL_EVENT_START = BIT(2),
+	TE_V2_NOTIF_INTERNAL_EVENT_END = BIT(3),
+
+	TE_V2_NOTIF_HOST_FRAG_START = BIT(4),
+	TE_V2_NOTIF_HOST_FRAG_END = BIT(5),
+	TE_V2_NOTIF_INTERNAL_FRAG_START = BIT(6),
+	TE_V2_NOTIF_INTERNAL_FRAG_END = BIT(7),
+	T2_V2_START_IMMEDIATELY = BIT(11),
+
+	/* placement characteristics */
+	TE_V2_DEP_OTHER = BIT(TE_V2_PLACEMENT_POS),
+	TE_V2_DEP_TSF = BIT(TE_V2_PLACEMENT_POS + 1),
+	TE_V2_EVENT_SOCIOPATHIC = BIT(TE_V2_PLACEMENT_POS + 2),
+
+	/* are we present or absent during the Time Event. */
+	TE_V2_ABSENCE = BIT(TE_V2_ABSENCE_POS),
+};
+
+/**
+ * struct iwl_time_event_cmd - configuring Time Events
+ * with struct MAC_TIME_EVENT_DATA_API_S_VER_2 (see also
+ * with version 1. determined by IWL_UCODE_TLV_FLAGS)
+ * ( TIME_EVENT_CMD = 0x29 )
+ * @id_and_color: ID and color of the relevant MAC,
+ *	&enum iwl_ctxt_id_and_color
+ * @action: action to perform, one of &enum iwl_ctxt_action
+ * @id: this field has two meanings, depending on the action:
+ *	If the action is ADD, then it means the type of event to add.
+ *	For all other actions it is the unique event ID assigned when the
+ *	event was added by the FW.
+ * @apply_time: When to start the Time Event (in GP2)
+ * @max_delay: maximum delay to event's start (apply time), in TU
+ * @depends_on: the unique ID of the event we depend on (if any)
+ * @interval: interval between repetitions, in TU
+ * @duration: duration of event in TU
+ * @repeat: how many repetitions to do, can be TE_REPEAT_ENDLESS
+ * @max_frags: maximal number of fragments the Time Event can be divided to
+ * @policy: defines whether uCode shall notify the host or other uCode modules
+ *	on event and/or fragment start and/or end
+ *	using one of TE_INDEPENDENT, TE_DEP_OTHER, TE_DEP_TSF
+ *	TE_EVENT_SOCIOPATHIC
+ *	using TE_ABSENCE and using TE_NOTIF_*,
+ *	&enum iwl_time_event_policy
+ */
+struct iwl_time_event_cmd {
+	/* COMMON_INDEX_HDR_API_S_VER_1 */
+	__le32 id_and_color;
+	__le32 action;
+	__le32 id;
+	/* MAC_TIME_EVENT_DATA_API_S_VER_2 */
+	__le32 apply_time;
+	__le32 max_delay;
+	__le32 depends_on;
+	__le32 interval;
+	__le32 duration;
+	u8 repeat;
+	u8 max_frags;
+	__le16 policy;
+} __packed; /* MAC_TIME_EVENT_CMD_API_S_VER_2 */
+
+/**
+ * struct iwl_time_event_resp - response structure to iwl_time_event_cmd
+ * @status: bit 0 indicates success, all others specify errors
+ * @id: the Time Event type
+ * @unique_id: the unique ID assigned (in ADD) or given (others) to the TE
+ * @id_and_color: ID and color of the relevant MAC,
+ *	&enum iwl_ctxt_id_and_color
+ */
+struct iwl_time_event_resp {
+	__le32 status;
+	__le32 id;
+	__le32 unique_id;
+	__le32 id_and_color;
+} __packed; /* MAC_TIME_EVENT_RSP_API_S_VER_1 */
+
+/**
+ * struct iwl_time_event_notif - notifications of time event start/stop
+ * ( TIME_EVENT_NOTIFICATION = 0x2a )
+ * @timestamp: action timestamp in GP2
+ * @session_id: session's unique id
+ * @unique_id: unique id of the Time Event itself
+ * @id_and_color: ID and color of the relevant MAC
+ * @action: &enum iwl_time_event_policy
+ * @status: true if scheduled, false otherwise (not executed)
+ */
+struct iwl_time_event_notif {
+	__le32 timestamp;
+	__le32 session_id;
+	__le32 unique_id;
+	__le32 id_and_color;
+	__le32 action;
+	__le32 status;
+} __packed; /* MAC_TIME_EVENT_NTFY_API_S_VER_1 */
+
+/*
+ * Aux ROC command
+ *
+ * Command requests the firmware to create a time event for a certain duration
+ * and remain on the given channel. This is done by using the Aux framework in
+ * the FW.
+ * The command was first used for Hot Spot issues - but can be used regardless
+ * to Hot Spot.
+ *
+ * ( HOT_SPOT_CMD 0x53 )
+ *
+ * @id_and_color: ID and color of the MAC
+ * @action: action to perform, one of FW_CTXT_ACTION_*
+ * @event_unique_id: If the action FW_CTXT_ACTION_REMOVE then the
+ *	event_unique_id should be the id of the time event assigned by ucode.
+ *	Otherwise ignore the event_unique_id.
+ * @sta_id_and_color: station id and color, resumed during "Remain On Channel"
+ *	activity.
+ * @channel_info: channel info
+ * @node_addr: Our MAC Address
+ * @reserved: reserved for alignment
+ * @apply_time: GP2 value to start (should always be the current GP2 value)
+ * @apply_time_max_delay: Maximum apply time delay value in TU. Defines max
+ *	time by which start of the event is allowed to be postponed.
+ * @duration: event duration in TU To calculate event duration:
+ *	timeEventDuration = min(duration, remainingQuota)
+ */
+struct iwl_hs20_roc_req {
+	/* COMMON_INDEX_HDR_API_S_VER_1 hdr */
+	__le32 id_and_color;
+	__le32 action;
+	__le32 event_unique_id;
+	__le32 sta_id_and_color;
+	struct iwl_fw_channel_info channel_info;
+	u8 node_addr[ETH_ALEN];
+	__le16 reserved;
+	__le32 apply_time;
+	__le32 apply_time_max_delay;
+	__le32 duration;
+} __packed; /* HOT_SPOT_CMD_API_S_VER_1 */
+
+/*
+ * values for AUX ROC result values
+ */
+enum iwl_mvm_hot_spot {
+	HOT_SPOT_RSP_STATUS_OK,
+	HOT_SPOT_RSP_STATUS_TOO_MANY_EVENTS,
+	HOT_SPOT_MAX_NUM_OF_SESSIONS,
+};
+
+/*
+ * Aux ROC command response
+ *
+ * In response to iwl_hs20_roc_req the FW sends this command to notify the
+ * driver the uid of the timevent.
+ *
+ * ( HOT_SPOT_CMD 0x53 )
+ *
+ * @event_unique_id: Unique ID of time event assigned by ucode
+ * @status: Return status 0 is success, all the rest used for specific errors
+ */
+struct iwl_hs20_roc_res {
+	__le32 event_unique_id;
+	__le32 status;
+} __packed; /* HOT_SPOT_RSP_API_S_VER_1 */
+
+#endif /* __iwl_fw_api_time_event_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tof.h b/drivers/net/wireless/intel/iwlwifi/fw/api/tof.h
index 8658a98..7328a16 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tof.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/tof.h
@@ -16,11 +16,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
- * USA
- *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
@@ -60,8 +55,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *****************************************************************************/
-#ifndef __fw_api_tof_h__
-#define __fw_api_tof_h__
+#ifndef __iwl_fw_api_tof_h__
+#define __iwl_fw_api_tof_h__
 
 /* ToF sub-group command IDs */
 enum iwl_mvm_tof_sub_grp_ids {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h
index 97d7eed..14ad9fb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h
@@ -17,11 +17,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
- * USA
- *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
@@ -62,8 +57,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *****************************************************************************/
 
-#ifndef __fw_api_tx_h__
-#define __fw_api_tx_h__
+#ifndef __iwl_fw_api_tx_h__
+#define __iwl_fw_api_tx_h__
 
 /**
  * enum iwl_tx_flags - bitmasks for tx_flags in TX command
@@ -414,7 +409,8 @@ enum iwl_tx_status {
  * @AGG_TX_STATE_BT_PRIO:
  * @AGG_TX_STATE_FEW_BYTES:
  * @AGG_TX_STATE_ABORT:
- * @AGG_TX_STATE_LAST_SENT_TTL:
+ * @AGG_TX_STATE_TX_ON_AIR_DROP: TX_ON_AIR signal drop without underrun or
+ *	BT detection
  * @AGG_TX_STATE_LAST_SENT_TRY_CNT:
  * @AGG_TX_STATE_LAST_SENT_BT_KILL:
  * @AGG_TX_STATE_SCD_QUERY:
@@ -426,7 +422,7 @@ enum iwl_tx_status {
  *	occur if tx failed for this frame when it was a member of a previous
  *	aggregation block). If rate scaling is used, retry count indicates the
  *	rate table entry used for all frames in the new agg.
- *@ AGG_TX_STATE_SEQ_NUM_MSK: Command ID and sequence number of Tx command for
+ * @AGG_TX_STATE_SEQ_NUM_MSK: Command ID and sequence number of Tx command for
  *	this frame
  *
  * TODO: complete documentation
@@ -438,7 +434,7 @@ enum iwl_tx_agg_status {
 	AGG_TX_STATE_BT_PRIO = 0x002,
 	AGG_TX_STATE_FEW_BYTES = 0x004,
 	AGG_TX_STATE_ABORT = 0x008,
-	AGG_TX_STATE_LAST_SENT_TTL = 0x010,
+	AGG_TX_STATE_TX_ON_AIR_DROP = 0x010,
 	AGG_TX_STATE_LAST_SENT_TRY_CNT = 0x020,
 	AGG_TX_STATE_LAST_SENT_BT_KILL = 0x040,
 	AGG_TX_STATE_SCD_QUERY = 0x080,
@@ -450,10 +446,6 @@ enum iwl_tx_agg_status {
 	AGG_TX_STATE_TRY_CNT_MSK = 0xf << AGG_TX_STATE_TRY_CNT_POS,
 };
 
-#define AGG_TX_STATE_LAST_SENT_MSK  (AGG_TX_STATE_LAST_SENT_TTL| \
-				     AGG_TX_STATE_LAST_SENT_TRY_CNT| \
-				     AGG_TX_STATE_LAST_SENT_BT_KILL)
-
 /*
  * The mask below describes a status where we are absolutely sure that the MPDU
  * wasn't sent. For BA/Underrun we cannot be that sure. All we know that we've
@@ -771,7 +763,8 @@ struct iwl_mac_beacon_cmd_v6 {
 } __packed; /* BEACON_TEMPLATE_CMD_API_S_VER_6 */
 
 /**
- * struct iwl_mac_beacon_cmd_data - data of beacon template with offloaded CSA
+ * struct iwl_mac_beacon_cmd_v7 - beacon template command with offloaded CSA
+ * @tx: the tx commands associated with the beacon frame
  * @template_id: currently equal to the mac context id of the coresponding
  *  mac.
  * @tim_idx: the offset of the tim IE in the beacon
@@ -780,38 +773,47 @@ struct iwl_mac_beacon_cmd_v6 {
  * @csa_offset: offset to the CSA IE if present
  * @frame: the template of the beacon frame
  */
-struct iwl_mac_beacon_cmd_data {
+struct iwl_mac_beacon_cmd_v7 {
+	struct iwl_tx_cmd tx;
 	__le32 template_id;
 	__le32 tim_idx;
 	__le32 tim_size;
 	__le32 ecsa_offset;
 	__le32 csa_offset;
 	struct ieee80211_hdr frame[0];
-};
-
-/**
- * struct iwl_mac_beacon_cmd_v7 - beacon template command with offloaded CSA
- * @tx: the tx commands associated with the beacon frame
- * @data: see &iwl_mac_beacon_cmd_data
- */
-struct iwl_mac_beacon_cmd_v7 {
-	struct iwl_tx_cmd tx;
-	struct iwl_mac_beacon_cmd_data data;
 } __packed; /* BEACON_TEMPLATE_CMD_API_S_VER_7 */
 
+enum iwl_mac_beacon_flags {
+	IWL_MAC_BEACON_CCK	= BIT(8),
+	IWL_MAC_BEACON_ANT_A	= BIT(9),
+	IWL_MAC_BEACON_ANT_B	= BIT(10),
+	IWL_MAC_BEACON_ANT_C	= BIT(11),
+};
+
 /**
  * struct iwl_mac_beacon_cmd - beacon template command with offloaded CSA
- * @byte_cnt: byte count of the beacon frame
- * @flags: for future use
+ * @byte_cnt: byte count of the beacon frame.
+ * @flags: least significant byte for rate code. The most significant byte
+ *	is &enum iwl_mac_beacon_flags.
  * @reserved: reserved
- * @data: see &iwl_mac_beacon_cmd_data
+ * @template_id: currently equal to the mac context id of the coresponding mac.
+ * @tim_idx: the offset of the tim IE in the beacon
+ * @tim_size: the length of the tim IE
+ * @ecsa_offset: offset to the ECSA IE if present
+ * @csa_offset: offset to the CSA IE if present
+ * @frame: the template of the beacon frame
  */
 struct iwl_mac_beacon_cmd {
 	__le16 byte_cnt;
 	__le16 flags;
 	__le64 reserved;
-	struct iwl_mac_beacon_cmd_data data;
-} __packed; /* BEACON_TEMPLATE_CMD_API_S_VER_8 */
+	__le32 template_id;
+	__le32 tim_idx;
+	__le32 tim_size;
+	__le32 ecsa_offset;
+	__le32 csa_offset;
+	struct ieee80211_hdr frame[0];
+} __packed; /* BEACON_TEMPLATE_CMD_API_S_VER_9 */
 
 struct iwl_beacon_notif {
 	struct iwl_mvm_tx_resp beacon_notify_hdr;
@@ -914,4 +916,4 @@ struct iwl_scd_txq_cfg_rsp {
 	u8 scd_queue;
 } __packed; /* SCD_QUEUE_CFG_RSP_API_S_VER_1 */
 
-#endif /* __fw_api_tx_h__ */
+#endif /* __iwl_fw_api_tx_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h
new file mode 100644
index 0000000..87b4434
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/txq.h
@@ -0,0 +1,163 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+#ifndef __iwl_fw_api_txq_h__
+#define __iwl_fw_api_txq_h__
+
+/*
+ * DQA queue numbers
+ *
+ * @IWL_MVM_DQA_CMD_QUEUE: a queue reserved for sending HCMDs to the FW
+ * @IWL_MVM_DQA_AUX_QUEUE: a queue reserved for aux frames
+ * @IWL_MVM_DQA_P2P_DEVICE_QUEUE: a queue reserved for P2P device frames
+ * @IWL_MVM_DQA_GCAST_QUEUE: a queue reserved for P2P GO/SoftAP GCAST frames
+ * @IWL_MVM_DQA_BSS_CLIENT_QUEUE: a queue reserved for BSS activity, to ensure
+ *	that we are never left without the possibility to connect to an AP.
+ * @IWL_MVM_DQA_MIN_MGMT_QUEUE: first TXQ in pool for MGMT and non-QOS frames.
+ *	Each MGMT queue is mapped to a single STA
+ *	MGMT frames are frames that return true on ieee80211_is_mgmt()
+ * @IWL_MVM_DQA_MAX_MGMT_QUEUE: last TXQ in pool for MGMT frames
+ * @IWL_MVM_DQA_AP_PROBE_RESP_QUEUE: a queue reserved for P2P GO/SoftAP probe
+ *	responses
+ * @IWL_MVM_DQA_MIN_DATA_QUEUE: first TXQ in pool for DATA frames.
+ *	DATA frames are intended for !ieee80211_is_mgmt() frames, but if
+ *	the MGMT TXQ pool is exhausted, mgmt frames can be sent on DATA queues
+ *	as well
+ * @IWL_MVM_DQA_MAX_DATA_QUEUE: last TXQ in pool for DATA frames
+ */
+enum iwl_mvm_dqa_txq {
+	IWL_MVM_DQA_CMD_QUEUE = 0,
+	IWL_MVM_DQA_AUX_QUEUE = 1,
+	IWL_MVM_DQA_P2P_DEVICE_QUEUE = 2,
+	IWL_MVM_DQA_GCAST_QUEUE = 3,
+	IWL_MVM_DQA_BSS_CLIENT_QUEUE = 4,
+	IWL_MVM_DQA_MIN_MGMT_QUEUE = 5,
+	IWL_MVM_DQA_MAX_MGMT_QUEUE = 8,
+	IWL_MVM_DQA_AP_PROBE_RESP_QUEUE = 9,
+	IWL_MVM_DQA_MIN_DATA_QUEUE = 10,
+	IWL_MVM_DQA_MAX_DATA_QUEUE = 31,
+};
+
+enum iwl_mvm_tx_fifo {
+	IWL_MVM_TX_FIFO_BK = 0,
+	IWL_MVM_TX_FIFO_BE,
+	IWL_MVM_TX_FIFO_VI,
+	IWL_MVM_TX_FIFO_VO,
+	IWL_MVM_TX_FIFO_MCAST = 5,
+	IWL_MVM_TX_FIFO_CMD = 7,
+};
+
+enum iwl_gen2_tx_fifo {
+	IWL_GEN2_TX_FIFO_CMD = 0,
+	IWL_GEN2_EDCA_TX_FIFO_BK,
+	IWL_GEN2_EDCA_TX_FIFO_BE,
+	IWL_GEN2_EDCA_TX_FIFO_VI,
+	IWL_GEN2_EDCA_TX_FIFO_VO,
+	IWL_GEN2_TRIG_TX_FIFO_BK,
+	IWL_GEN2_TRIG_TX_FIFO_BE,
+	IWL_GEN2_TRIG_TX_FIFO_VI,
+	IWL_GEN2_TRIG_TX_FIFO_VO,
+};
+
+/**
+ * enum iwl_tx_queue_cfg_actions - TXQ config options
+ * @TX_QUEUE_CFG_ENABLE_QUEUE: enable a queue
+ * @TX_QUEUE_CFG_TFD_SHORT_FORMAT: use short TFD format
+ */
+enum iwl_tx_queue_cfg_actions {
+	TX_QUEUE_CFG_ENABLE_QUEUE		= BIT(0),
+	TX_QUEUE_CFG_TFD_SHORT_FORMAT		= BIT(1),
+};
+
+/**
+ * struct iwl_tx_queue_cfg_cmd - txq hw scheduler config command
+ * @sta_id: station id
+ * @tid: tid of the queue
+ * @flags: see &enum iwl_tx_queue_cfg_actions
+ * @cb_size: size of TFD cyclic buffer. Value is exponent - 3.
+ *	Minimum value 0 (8 TFDs), maximum value 5 (256 TFDs)
+ * @byte_cnt_addr: address of byte count table
+ * @tfdq_addr: address of TFD circular buffer
+ */
+struct iwl_tx_queue_cfg_cmd {
+	u8 sta_id;
+	u8 tid;
+	__le16 flags;
+	__le32 cb_size;
+	__le64 byte_cnt_addr;
+	__le64 tfdq_addr;
+} __packed; /* TX_QUEUE_CFG_CMD_API_S_VER_2 */
+
+/**
+ * struct iwl_tx_queue_cfg_rsp - response to txq hw scheduler config
+ * @queue_number: queue number assigned to this RA -TID
+ * @flags: set on failure
+ * @write_pointer: initial value for write pointer
+ * @reserved: reserved
+ */
+struct iwl_tx_queue_cfg_rsp {
+	__le16 queue_number;
+	__le16 flags;
+	__le16 write_pointer;
+	__le16 reserved;
+} __packed; /* TX_QUEUE_CFG_RSP_API_S_VER_2 */
+
+#endif /* __iwl_fw_api_txq_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/common_rx.c b/drivers/net/wireless/intel/iwlwifi/fw/common_rx.c
new file mode 100644
index 0000000..6f75985
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/common_rx.c
@@ -0,0 +1,88 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+#include "iwl-drv.h"
+#include "runtime.h"
+#include "fw/api/commands.h"
+#include "fw/api/alive.h"
+
+static void iwl_fwrt_fseq_ver_mismatch(struct iwl_fw_runtime *fwrt,
+				       struct iwl_rx_cmd_buffer *rxb)
+{
+	struct iwl_rx_packet *pkt = rxb_addr(rxb);
+	struct iwl_fseq_ver_mismatch_ntf *fseq = (void *)pkt->data;
+
+	IWL_ERR(fwrt, "FSEQ version mismatch (aux: %d, wifi: %d)\n",
+		__le32_to_cpu(fseq->aux_read_fseq_ver),
+		__le32_to_cpu(fseq->wifi_fseq_ver));
+}
+
+void iwl_fwrt_handle_notification(struct iwl_fw_runtime *fwrt,
+				  struct iwl_rx_cmd_buffer *rxb)
+{
+	struct iwl_rx_packet *pkt = rxb_addr(rxb);
+	u32 cmd = WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd);
+
+	switch (cmd) {
+	case WIDE_ID(SYSTEM_GROUP, FSEQ_VER_MISMATCH_NTF):
+		iwl_fwrt_fseq_ver_mismatch(fwrt, rxb);
+		break;
+	default:
+		break;
+	}
+}
+IWL_EXPORT_SYMBOL(iwl_fwrt_handle_notification);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 1602b36..6afc7a7 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -63,22 +63,37 @@
  *
  *****************************************************************************/
 #include <linux/devcoredump.h>
-
-#include "fw-dbg.h"
+#include "iwl-drv.h"
+#include "runtime.h"
+#include "dbg.h"
 #include "iwl-io.h"
-#include "mvm.h"
 #include "iwl-prph.h"
 #include "iwl-csr.h"
 
+/**
+ * struct iwl_fw_dump_ptrs - set of pointers needed for the fw-error-dump
+ *
+ * @fwrt_ptr: pointer to the buffer coming from fwrt
+ * @trans_ptr: pointer to struct %iwl_trans_dump_data which contains the
+ *	transport's data.
+ * @trans_len: length of the valid data in trans_ptr
+ * @fwrt_len: length of the valid data in fwrt_ptr
+ */
+struct iwl_fw_dump_ptrs {
+	struct iwl_trans_dump_data *trans_ptr;
+	void *fwrt_ptr;
+	u32 fwrt_len;
+};
+
 #define RADIO_REG_MAX_READ 0x2ad
-static void iwl_mvm_read_radio_reg(struct iwl_mvm *mvm,
-				   struct iwl_fw_error_dump_data **dump_data)
+static void iwl_read_radio_regs(struct iwl_fw_runtime *fwrt,
+				struct iwl_fw_error_dump_data **dump_data)
 {
 	u8 *pos = (void *)(*dump_data)->data;
 	unsigned long flags;
 	int i;
 
-	if (!iwl_trans_grab_nic_access(mvm->trans, &flags))
+	if (!iwl_trans_grab_nic_access(fwrt->trans, &flags))
 		return;
 
 	(*dump_data)->type = cpu_to_le32(IWL_FW_ERROR_DUMP_RADIO_REG);
@@ -88,20 +103,20 @@ static void iwl_mvm_read_radio_reg(struct iwl_mvm *mvm,
 		u32 rd_cmd = RADIO_RSP_RD_CMD;
 
 		rd_cmd |= i << RADIO_RSP_ADDR_POS;
-		iwl_write_prph_no_grab(mvm->trans, RSP_RADIO_CMD, rd_cmd);
-		*pos =  (u8)iwl_read_prph_no_grab(mvm->trans, RSP_RADIO_RDDAT);
+		iwl_write_prph_no_grab(fwrt->trans, RSP_RADIO_CMD, rd_cmd);
+		*pos = (u8)iwl_read_prph_no_grab(fwrt->trans, RSP_RADIO_RDDAT);
 
 		pos++;
 	}
 
 	*dump_data = iwl_fw_error_next_data(*dump_data);
 
-	iwl_trans_release_nic_access(mvm->trans, &flags);
+	iwl_trans_release_nic_access(fwrt->trans, &flags);
 }
 
-static void iwl_mvm_dump_rxf(struct iwl_mvm *mvm,
-			     struct iwl_fw_error_dump_data **dump_data,
-			     int size, u32 offset, int fifo_num)
+static void iwl_fwrt_dump_rxf(struct iwl_fw_runtime *fwrt,
+			      struct iwl_fw_error_dump_data **dump_data,
+			      int size, u32 offset, int fifo_num)
 {
 	struct iwl_fw_error_dump_fifo *fifo_hdr;
 	u32 *fifo_data;
@@ -122,41 +137,41 @@ static void iwl_mvm_dump_rxf(struct iwl_mvm *mvm,
 
 	fifo_hdr->fifo_num = cpu_to_le32(fifo_num);
 	fifo_hdr->available_bytes =
-		cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+		cpu_to_le32(iwl_trans_read_prph(fwrt->trans,
 						RXF_RD_D_SPACE + offset));
 	fifo_hdr->wr_ptr =
-		cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+		cpu_to_le32(iwl_trans_read_prph(fwrt->trans,
 						RXF_RD_WR_PTR + offset));
 	fifo_hdr->rd_ptr =
-		cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+		cpu_to_le32(iwl_trans_read_prph(fwrt->trans,
 						RXF_RD_RD_PTR + offset));
 	fifo_hdr->fence_ptr =
-		cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+		cpu_to_le32(iwl_trans_read_prph(fwrt->trans,
 						RXF_RD_FENCE_PTR + offset));
 	fifo_hdr->fence_mode =
-		cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+		cpu_to_le32(iwl_trans_read_prph(fwrt->trans,
 						RXF_SET_FENCE_MODE + offset));
 
 	/* Lock fence */
-	iwl_trans_write_prph(mvm->trans, RXF_SET_FENCE_MODE + offset, 0x1);
+	iwl_trans_write_prph(fwrt->trans, RXF_SET_FENCE_MODE + offset, 0x1);
 	/* Set fence pointer to the same place like WR pointer */
-	iwl_trans_write_prph(mvm->trans, RXF_LD_WR2FENCE + offset, 0x1);
+	iwl_trans_write_prph(fwrt->trans, RXF_LD_WR2FENCE + offset, 0x1);
 	/* Set fence offset */
-	iwl_trans_write_prph(mvm->trans,
+	iwl_trans_write_prph(fwrt->trans,
 			     RXF_LD_FENCE_OFFSET_ADDR + offset, 0x0);
 
 	/* Read FIFO */
 	fifo_len /= sizeof(u32); /* Size in DWORDS */
 	for (i = 0; i < fifo_len; i++)
-		fifo_data[i] = iwl_trans_read_prph(mvm->trans,
+		fifo_data[i] = iwl_trans_read_prph(fwrt->trans,
 						 RXF_FIFO_RD_FENCE_INC +
 						 offset);
 	*dump_data = iwl_fw_error_next_data(*dump_data);
 }
 
-static void iwl_mvm_dump_txf(struct iwl_mvm *mvm,
-			     struct iwl_fw_error_dump_data **dump_data,
-			     int size, u32 offset, int fifo_num)
+static void iwl_fwrt_dump_txf(struct iwl_fw_runtime *fwrt,
+			      struct iwl_fw_error_dump_data **dump_data,
+			      int size, u32 offset, int fifo_num)
 {
 	struct iwl_fw_error_dump_fifo *fifo_hdr;
 	u32 *fifo_data;
@@ -177,91 +192,91 @@ static void iwl_mvm_dump_txf(struct iwl_mvm *mvm,
 
 	fifo_hdr->fifo_num = cpu_to_le32(fifo_num);
 	fifo_hdr->available_bytes =
-		cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+		cpu_to_le32(iwl_trans_read_prph(fwrt->trans,
 						TXF_FIFO_ITEM_CNT + offset));
 	fifo_hdr->wr_ptr =
-		cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+		cpu_to_le32(iwl_trans_read_prph(fwrt->trans,
 						TXF_WR_PTR + offset));
 	fifo_hdr->rd_ptr =
-		cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+		cpu_to_le32(iwl_trans_read_prph(fwrt->trans,
 						TXF_RD_PTR + offset));
 	fifo_hdr->fence_ptr =
-		cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+		cpu_to_le32(iwl_trans_read_prph(fwrt->trans,
 						TXF_FENCE_PTR + offset));
 	fifo_hdr->fence_mode =
-		cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+		cpu_to_le32(iwl_trans_read_prph(fwrt->trans,
 						TXF_LOCK_FENCE + offset));
 
 	/* Set the TXF_READ_MODIFY_ADDR to TXF_WR_PTR */
-	iwl_trans_write_prph(mvm->trans, TXF_READ_MODIFY_ADDR + offset,
+	iwl_trans_write_prph(fwrt->trans, TXF_READ_MODIFY_ADDR + offset,
 			     TXF_WR_PTR + offset);
 
 	/* Dummy-read to advance the read pointer to the head */
-	iwl_trans_read_prph(mvm->trans, TXF_READ_MODIFY_DATA + offset);
+	iwl_trans_read_prph(fwrt->trans, TXF_READ_MODIFY_DATA + offset);
 
 	/* Read FIFO */
 	fifo_len /= sizeof(u32); /* Size in DWORDS */
 	for (i = 0; i < fifo_len; i++)
-		fifo_data[i] = iwl_trans_read_prph(mvm->trans,
+		fifo_data[i] = iwl_trans_read_prph(fwrt->trans,
 						  TXF_READ_MODIFY_DATA +
 						  offset);
 	*dump_data = iwl_fw_error_next_data(*dump_data);
 }
 
-static void iwl_mvm_dump_fifos(struct iwl_mvm *mvm,
-			       struct iwl_fw_error_dump_data **dump_data)
+static void iwl_fw_dump_fifos(struct iwl_fw_runtime *fwrt,
+			      struct iwl_fw_error_dump_data **dump_data)
 {
 	struct iwl_fw_error_dump_fifo *fifo_hdr;
-	struct iwl_mvm_shared_mem_cfg *cfg = &mvm->smem_cfg;
+	struct iwl_fwrt_shared_mem_cfg *cfg = &fwrt->smem_cfg;
 	u32 *fifo_data;
 	u32 fifo_len;
 	unsigned long flags;
 	int i, j;
 
-	if (!iwl_trans_grab_nic_access(mvm->trans, &flags))
+	if (!iwl_trans_grab_nic_access(fwrt->trans, &flags))
 		return;
 
 	/* Pull RXF1 */
-	iwl_mvm_dump_rxf(mvm, dump_data, cfg->lmac[0].rxfifo1_size, 0, 0);
+	iwl_fwrt_dump_rxf(fwrt, dump_data, cfg->lmac[0].rxfifo1_size, 0, 0);
 	/* Pull RXF2 */
-	iwl_mvm_dump_rxf(mvm, dump_data, cfg->rxfifo2_size,
-			 RXF_DIFF_FROM_PREV, 1);
+	iwl_fwrt_dump_rxf(fwrt, dump_data, cfg->rxfifo2_size,
+			  RXF_DIFF_FROM_PREV, 1);
 	/* Pull LMAC2 RXF1 */
-	if (mvm->smem_cfg.num_lmacs > 1)
-		iwl_mvm_dump_rxf(mvm, dump_data, cfg->lmac[1].rxfifo1_size,
-				 LMAC2_PRPH_OFFSET, 2);
+	if (fwrt->smem_cfg.num_lmacs > 1)
+		iwl_fwrt_dump_rxf(fwrt, dump_data, cfg->lmac[1].rxfifo1_size,
+				  LMAC2_PRPH_OFFSET, 2);
 
 	/* Pull TXF data from LMAC1 */
-	for (i = 0; i < mvm->smem_cfg.num_txfifo_entries; i++) {
+	for (i = 0; i < fwrt->smem_cfg.num_txfifo_entries; i++) {
 		/* Mark the number of TXF we're pulling now */
-		iwl_trans_write_prph(mvm->trans, TXF_LARC_NUM, i);
-		iwl_mvm_dump_txf(mvm, dump_data, cfg->lmac[0].txfifo_size[i],
-				 0, i);
+		iwl_trans_write_prph(fwrt->trans, TXF_LARC_NUM, i);
+		iwl_fwrt_dump_txf(fwrt, dump_data, cfg->lmac[0].txfifo_size[i],
+				  0, i);
 	}
 
 	/* Pull TXF data from LMAC2 */
-	if (mvm->smem_cfg.num_lmacs > 1) {
-		for (i = 0; i < mvm->smem_cfg.num_txfifo_entries; i++) {
+	if (fwrt->smem_cfg.num_lmacs > 1) {
+		for (i = 0; i < fwrt->smem_cfg.num_txfifo_entries; i++) {
 			/* Mark the number of TXF we're pulling now */
-			iwl_trans_write_prph(mvm->trans,
+			iwl_trans_write_prph(fwrt->trans,
 					     TXF_LARC_NUM + LMAC2_PRPH_OFFSET,
 					     i);
-			iwl_mvm_dump_txf(mvm, dump_data,
-					 cfg->lmac[1].txfifo_size[i],
-					 LMAC2_PRPH_OFFSET,
-					 i + cfg->num_txfifo_entries);
+			iwl_fwrt_dump_txf(fwrt, dump_data,
+					  cfg->lmac[1].txfifo_size[i],
+					  LMAC2_PRPH_OFFSET,
+					  i + cfg->num_txfifo_entries);
 		}
 	}
 
-	if (fw_has_capa(&mvm->fw->ucode_capa,
+	if (fw_has_capa(&fwrt->fw->ucode_capa,
 			IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG)) {
 		/* Pull UMAC internal TXF data from all TXFs */
 		for (i = 0;
-		     i < ARRAY_SIZE(mvm->smem_cfg.internal_txfifo_size);
+		     i < ARRAY_SIZE(fwrt->smem_cfg.internal_txfifo_size);
 		     i++) {
 			fifo_hdr = (void *)(*dump_data)->data;
 			fifo_data = (void *)fifo_hdr->data;
-			fifo_len = mvm->smem_cfg.internal_txfifo_size[i];
+			fifo_len = fwrt->smem_cfg.internal_txfifo_size[i];
 
 			/* No need to try to read the data if the length is 0 */
 			if (fifo_len == 0)
@@ -276,52 +291,45 @@ static void iwl_mvm_dump_fifos(struct iwl_mvm *mvm,
 			fifo_hdr->fifo_num = cpu_to_le32(i);
 
 			/* Mark the number of TXF we're pulling now */
-			iwl_trans_write_prph(mvm->trans, TXF_CPU2_NUM, i +
-				mvm->smem_cfg.num_txfifo_entries);
+			iwl_trans_write_prph(fwrt->trans, TXF_CPU2_NUM, i +
+				fwrt->smem_cfg.num_txfifo_entries);
 
 			fifo_hdr->available_bytes =
-				cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+				cpu_to_le32(iwl_trans_read_prph(fwrt->trans,
 								TXF_CPU2_FIFO_ITEM_CNT));
 			fifo_hdr->wr_ptr =
-				cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+				cpu_to_le32(iwl_trans_read_prph(fwrt->trans,
 								TXF_CPU2_WR_PTR));
 			fifo_hdr->rd_ptr =
-				cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+				cpu_to_le32(iwl_trans_read_prph(fwrt->trans,
 								TXF_CPU2_RD_PTR));
 			fifo_hdr->fence_ptr =
-				cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+				cpu_to_le32(iwl_trans_read_prph(fwrt->trans,
 								TXF_CPU2_FENCE_PTR));
 			fifo_hdr->fence_mode =
-				cpu_to_le32(iwl_trans_read_prph(mvm->trans,
+				cpu_to_le32(iwl_trans_read_prph(fwrt->trans,
 								TXF_CPU2_LOCK_FENCE));
 
 			/* Set TXF_CPU2_READ_MODIFY_ADDR to TXF_CPU2_WR_PTR */
-			iwl_trans_write_prph(mvm->trans,
+			iwl_trans_write_prph(fwrt->trans,
 					     TXF_CPU2_READ_MODIFY_ADDR,
 					     TXF_CPU2_WR_PTR);
 
 			/* Dummy-read to advance the read pointer to head */
-			iwl_trans_read_prph(mvm->trans,
+			iwl_trans_read_prph(fwrt->trans,
 					    TXF_CPU2_READ_MODIFY_DATA);
 
 			/* Read FIFO */
 			fifo_len /= sizeof(u32); /* Size in DWORDS */
 			for (j = 0; j < fifo_len; j++)
 				fifo_data[j] =
-					iwl_trans_read_prph(mvm->trans,
+					iwl_trans_read_prph(fwrt->trans,
 							    TXF_CPU2_READ_MODIFY_DATA);
 			*dump_data = iwl_fw_error_next_data(*dump_data);
 		}
 	}
 
-	iwl_trans_release_nic_access(mvm->trans, &flags);
-}
-
-void iwl_mvm_free_fw_dump_desc(struct iwl_mvm *mvm)
-{
-	if (mvm->fw_dump_desc != &iwl_mvm_dump_desc_assert)
-		kfree(mvm->fw_dump_desc);
-	mvm->fw_dump_desc = NULL;
+	iwl_trans_release_nic_access(fwrt->trans, &flags);
 }
 
 #define IWL8260_ICCM_OFFSET		0x44000 /* Only for B-step */
@@ -531,37 +539,34 @@ static struct scatterlist *alloc_sgtable(int size)
 	return table;
 }
 
-void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm)
+void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt)
 {
 	struct iwl_fw_error_dump_file *dump_file;
 	struct iwl_fw_error_dump_data *dump_data;
 	struct iwl_fw_error_dump_info *dump_info;
 	struct iwl_fw_error_dump_mem *dump_mem;
+	struct iwl_fw_error_dump_smem_cfg *dump_smem_cfg;
 	struct iwl_fw_error_dump_trigger_desc *dump_trig;
-	struct iwl_mvm_dump_ptrs *fw_error_dump;
+	struct iwl_fw_dump_ptrs *fw_error_dump;
 	struct scatterlist *sg_dump_data;
 	u32 sram_len, sram_ofs;
-	const struct iwl_fw_dbg_mem_seg_tlv *fw_dbg_mem = mvm->fw->dbg_mem_tlv;
+	const struct iwl_fw_dbg_mem_seg_tlv *fw_dbg_mem = fwrt->fw->dbg_mem_tlv;
+	struct iwl_fwrt_shared_mem_cfg *mem_cfg = &fwrt->smem_cfg;
 	u32 file_len, fifo_data_len = 0, prph_len = 0, radio_len = 0;
-	u32 smem_len = mvm->fw->n_dbg_mem_tlv ? 0 : mvm->cfg->smem_len;
-	u32 sram2_len = mvm->fw->n_dbg_mem_tlv ? 0 : mvm->cfg->dccm2_len;
+	u32 smem_len = fwrt->fw->n_dbg_mem_tlv ? 0 : fwrt->trans->cfg->smem_len;
+	u32 sram2_len = fwrt->fw->n_dbg_mem_tlv ?
+				0 : fwrt->trans->cfg->dccm2_len;
 	bool monitor_dump_only = false;
 	int i;
 
-	if (!IWL_MVM_COLLECT_FW_ERR_DUMP &&
-	    !mvm->trans->dbg_dest_tlv)
-		return;
-
-	lockdep_assert_held(&mvm->mutex);
-
 	/* there's no point in fw dump if the bus is dead */
-	if (test_bit(STATUS_TRANS_DEAD, &mvm->trans->status)) {
-		IWL_ERR(mvm, "Skip fw error dump since bus is dead\n");
+	if (test_bit(STATUS_TRANS_DEAD, &fwrt->trans->status)) {
+		IWL_ERR(fwrt, "Skip fw error dump since bus is dead\n");
 		goto out;
 	}
 
-	if (mvm->fw_dump_trig &&
-	    mvm->fw_dump_trig->mode & IWL_FW_DBG_TRIGGER_MONITOR_ONLY)
+	if (fwrt->dump.trig &&
+	    fwrt->dump.trig->mode & IWL_FW_DBG_TRIGGER_MONITOR_ONLY)
 		monitor_dump_only = true;
 
 	fw_error_dump = kzalloc(sizeof(*fw_error_dump), GFP_KERNEL);
@@ -569,21 +574,19 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm)
 		goto out;
 
 	/* SRAM - include stack CCM if driver knows the values for it */
-	if (!mvm->cfg->dccm_offset || !mvm->cfg->dccm_len) {
+	if (!fwrt->trans->cfg->dccm_offset || !fwrt->trans->cfg->dccm_len) {
 		const struct fw_img *img;
 
-		img = &mvm->fw->img[mvm->cur_ucode];
+		img = &fwrt->fw->img[fwrt->cur_fw_img];
 		sram_ofs = img->sec[IWL_UCODE_SECTION_DATA].offset;
 		sram_len = img->sec[IWL_UCODE_SECTION_DATA].len;
 	} else {
-		sram_ofs = mvm->cfg->dccm_offset;
-		sram_len = mvm->cfg->dccm_len;
+		sram_ofs = fwrt->trans->cfg->dccm_offset;
+		sram_len = fwrt->trans->cfg->dccm_len;
 	}
 
 	/* reading RXF/TXF sizes */
-	if (test_bit(STATUS_FW_ERROR, &mvm->trans->status)) {
-		struct iwl_mvm_shared_mem_cfg *mem_cfg = &mvm->smem_cfg;
-
+	if (test_bit(STATUS_FW_ERROR, &fwrt->trans->status)) {
 		fifo_data_len = 0;
 
 		/* Count RXF2 size */
@@ -621,7 +624,7 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm)
 			}
 		}
 
-		if (fw_has_capa(&mvm->fw->ucode_capa,
+		if (fw_has_capa(&fwrt->fw->ucode_capa,
 				IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG)) {
 			for (i = 0;
 			     i < ARRAY_SIZE(mem_cfg->internal_txfifo_size);
@@ -638,7 +641,7 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm)
 		}
 
 		/* Make room for PRPH registers */
-		if (!mvm->trans->cfg->gen2) {
+		if (!fwrt->trans->cfg->gen2) {
 			for (i = 0; i < ARRAY_SIZE(iwl_prph_dump_addr_comm);
 			     i++) {
 				/* The range includes both boundaries */
@@ -652,7 +655,8 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm)
 			}
 		}
 
-		if (!mvm->trans->cfg->gen2 && mvm->cfg->mq_rx_supported) {
+		if (!fwrt->trans->cfg->gen2 &&
+		    fwrt->trans->cfg->mq_rx_supported) {
 			for (i = 0; i <
 				ARRAY_SIZE(iwl_prph_dump_addr_9000); i++) {
 				/* The range includes both boundaries */
@@ -666,12 +670,13 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm)
 			}
 		}
 
-		if (mvm->cfg->device_family == IWL_DEVICE_FAMILY_7000)
+		if (fwrt->trans->cfg->device_family == IWL_DEVICE_FAMILY_7000)
 			radio_len = sizeof(*dump_data) + RADIO_REG_MAX_READ;
 	}
 
 	file_len = sizeof(*dump_file) +
-		   sizeof(*dump_data) * 2 +
+		   sizeof(*dump_data) * 3 +
+		   sizeof(*dump_smem_cfg) +
 		   fifo_data_len +
 		   prph_len +
 		   radio_len +
@@ -686,31 +691,31 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm)
 		file_len += sizeof(*dump_data) + sizeof(*dump_mem) + sram2_len;
 
 	/* Make room for MEM segments */
-	for (i = 0; i < mvm->fw->n_dbg_mem_tlv; i++) {
+	for (i = 0; i < fwrt->fw->n_dbg_mem_tlv; i++) {
 		file_len += sizeof(*dump_data) + sizeof(*dump_mem) +
 			    le32_to_cpu(fw_dbg_mem[i].len);
 	}
 
 	/* Make room for fw's virtual image pages, if it exists */
-	if (!mvm->trans->cfg->gen2 &&
-	    mvm->fw->img[mvm->cur_ucode].paging_mem_size &&
-	    mvm->fw_paging_db[0].fw_paging_block)
-		file_len += mvm->num_of_paging_blk *
+	if (!fwrt->trans->cfg->gen2 &&
+	    fwrt->fw->img[fwrt->cur_fw_img].paging_mem_size &&
+	    fwrt->fw_paging_db[0].fw_paging_block)
+		file_len += fwrt->num_of_paging_blk *
 			(sizeof(*dump_data) +
 			 sizeof(struct iwl_fw_error_dump_paging) +
 			 PAGING_BLOCK_SIZE);
 
 	/* If we only want a monitor dump, reset the file length */
 	if (monitor_dump_only) {
-		file_len = sizeof(*dump_file) + sizeof(*dump_data) +
-			   sizeof(*dump_info);
+		file_len = sizeof(*dump_file) + sizeof(*dump_data) * 2 +
+			   sizeof(*dump_info) + sizeof(*dump_smem_cfg);
 	}
 
-	if (mvm->fw_dump_desc)
+	if (fwrt->dump.desc)
 		file_len += sizeof(*dump_data) + sizeof(*dump_trig) +
-			    mvm->fw_dump_desc->len;
+			    fwrt->dump.desc->len;
 
-	if (!mvm->fw->n_dbg_mem_tlv)
+	if (!fwrt->fw->n_dbg_mem_tlv)
 		file_len += sram_len + sizeof(*dump_mem);
 
 	dump_file = vzalloc(file_len);
@@ -719,7 +724,7 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm)
 		goto out;
 	}
 
-	fw_error_dump->op_mode_ptr = dump_file;
+	fw_error_dump->fwrt_ptr = dump_file;
 
 	dump_file->barker = cpu_to_le32(IWL_FW_ERROR_DUMP_BARKER);
 	dump_data = (void *)dump_file->data;
@@ -728,32 +733,59 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm)
 	dump_data->len = cpu_to_le32(sizeof(*dump_info));
 	dump_info = (void *)dump_data->data;
 	dump_info->device_family =
-		mvm->cfg->device_family == IWL_DEVICE_FAMILY_7000 ?
+		fwrt->trans->cfg->device_family == IWL_DEVICE_FAMILY_7000 ?
 			cpu_to_le32(IWL_FW_ERROR_DUMP_FAMILY_7) :
 			cpu_to_le32(IWL_FW_ERROR_DUMP_FAMILY_8);
-	dump_info->hw_step = cpu_to_le32(CSR_HW_REV_STEP(mvm->trans->hw_rev));
-	memcpy(dump_info->fw_human_readable, mvm->fw->human_readable,
+	dump_info->hw_step = cpu_to_le32(CSR_HW_REV_STEP(fwrt->trans->hw_rev));
+	memcpy(dump_info->fw_human_readable, fwrt->fw->human_readable,
 	       sizeof(dump_info->fw_human_readable));
-	strncpy(dump_info->dev_human_readable, mvm->cfg->name,
+	strncpy(dump_info->dev_human_readable, fwrt->trans->cfg->name,
 		sizeof(dump_info->dev_human_readable));
-	strncpy(dump_info->bus_human_readable, mvm->dev->bus->name,
+	strncpy(dump_info->bus_human_readable, fwrt->dev->bus->name,
 		sizeof(dump_info->bus_human_readable));
 
 	dump_data = iwl_fw_error_next_data(dump_data);
+
+	/* Dump shared memory configuration */
+	dump_data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_MEM_CFG);
+	dump_data->len = cpu_to_le32(sizeof(*dump_smem_cfg));
+	dump_smem_cfg = (void *)dump_data->data;
+	dump_smem_cfg->num_lmacs = cpu_to_le32(mem_cfg->num_lmacs);
+	dump_smem_cfg->num_txfifo_entries =
+		cpu_to_le32(mem_cfg->num_txfifo_entries);
+	for (i = 0; i < MAX_NUM_LMAC; i++) {
+		int j;
+
+		for (j = 0; j < TX_FIFO_MAX_NUM; j++)
+			dump_smem_cfg->lmac[i].txfifo_size[j] =
+				cpu_to_le32(mem_cfg->lmac[i].txfifo_size[j]);
+		dump_smem_cfg->lmac[i].rxfifo1_size =
+			cpu_to_le32(mem_cfg->lmac[i].rxfifo1_size);
+	}
+	dump_smem_cfg->rxfifo2_size = cpu_to_le32(mem_cfg->rxfifo2_size);
+	dump_smem_cfg->internal_txfifo_addr =
+		cpu_to_le32(mem_cfg->internal_txfifo_addr);
+	for (i = 0; i < TX_FIFO_INTERNAL_MAX_NUM; i++) {
+		dump_smem_cfg->internal_txfifo_size[i] =
+			cpu_to_le32(mem_cfg->internal_txfifo_size[i]);
+	}
+
+	dump_data = iwl_fw_error_next_data(dump_data);
+
 	/* We only dump the FIFOs if the FW is in error state */
-	if (test_bit(STATUS_FW_ERROR, &mvm->trans->status)) {
-		iwl_mvm_dump_fifos(mvm, &dump_data);
+	if (test_bit(STATUS_FW_ERROR, &fwrt->trans->status)) {
+		iwl_fw_dump_fifos(fwrt, &dump_data);
 		if (radio_len)
-			iwl_mvm_read_radio_reg(mvm, &dump_data);
+			iwl_read_radio_regs(fwrt, &dump_data);
 	}
 
-	if (mvm->fw_dump_desc) {
+	if (fwrt->dump.desc) {
 		dump_data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_ERROR_INFO);
 		dump_data->len = cpu_to_le32(sizeof(*dump_trig) +
-					     mvm->fw_dump_desc->len);
+					     fwrt->dump.desc->len);
 		dump_trig = (void *)dump_data->data;
-		memcpy(dump_trig, &mvm->fw_dump_desc->trig_desc,
-		       sizeof(*dump_trig) + mvm->fw_dump_desc->len);
+		memcpy(dump_trig, &fwrt->dump.desc->trig_desc,
+		       sizeof(*dump_trig) + fwrt->dump.desc->len);
 
 		dump_data = iwl_fw_error_next_data(dump_data);
 	}
@@ -762,18 +794,18 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm)
 	if (monitor_dump_only)
 		goto dump_trans_data;
 
-	if (!mvm->fw->n_dbg_mem_tlv) {
+	if (!fwrt->fw->n_dbg_mem_tlv) {
 		dump_data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_MEM);
 		dump_data->len = cpu_to_le32(sram_len + sizeof(*dump_mem));
 		dump_mem = (void *)dump_data->data;
 		dump_mem->type = cpu_to_le32(IWL_FW_ERROR_DUMP_MEM_SRAM);
 		dump_mem->offset = cpu_to_le32(sram_ofs);
-		iwl_trans_read_mem_bytes(mvm->trans, sram_ofs, dump_mem->data,
+		iwl_trans_read_mem_bytes(fwrt->trans, sram_ofs, dump_mem->data,
 					 sram_len);
 		dump_data = iwl_fw_error_next_data(dump_data);
 	}
 
-	for (i = 0; i < mvm->fw->n_dbg_mem_tlv; i++) {
+	for (i = 0; i < fwrt->fw->n_dbg_mem_tlv; i++) {
 		u32 len = le32_to_cpu(fw_dbg_mem[i].len);
 		u32 ofs = le32_to_cpu(fw_dbg_mem[i].ofs);
 		bool success;
@@ -786,13 +818,13 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm)
 
 		switch (dump_mem->type & cpu_to_le32(FW_DBG_MEM_TYPE_MASK)) {
 		case cpu_to_le32(FW_DBG_MEM_TYPE_REGULAR):
-			iwl_trans_read_mem_bytes(mvm->trans, ofs,
+			iwl_trans_read_mem_bytes(fwrt->trans, ofs,
 						 dump_mem->data,
 						 len);
 			success = true;
 			break;
 		case cpu_to_le32(FW_DBG_MEM_TYPE_PRPH):
-			success = iwl_read_prph_block(mvm->trans, ofs, len,
+			success = iwl_read_prph_block(fwrt->trans, ofs, len,
 						      (void *)dump_mem->data);
 			break;
 		default:
@@ -813,8 +845,9 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm)
 		dump_data->len = cpu_to_le32(smem_len + sizeof(*dump_mem));
 		dump_mem = (void *)dump_data->data;
 		dump_mem->type = cpu_to_le32(IWL_FW_ERROR_DUMP_MEM_SMEM);
-		dump_mem->offset = cpu_to_le32(mvm->cfg->smem_offset);
-		iwl_trans_read_mem_bytes(mvm->trans, mvm->cfg->smem_offset,
+		dump_mem->offset = cpu_to_le32(fwrt->trans->cfg->smem_offset);
+		iwl_trans_read_mem_bytes(fwrt->trans,
+					 fwrt->trans->cfg->smem_offset,
 					 dump_mem->data, smem_len);
 		dump_data = iwl_fw_error_next_data(dump_data);
 	}
@@ -824,28 +857,29 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm)
 		dump_data->len = cpu_to_le32(sram2_len + sizeof(*dump_mem));
 		dump_mem = (void *)dump_data->data;
 		dump_mem->type = cpu_to_le32(IWL_FW_ERROR_DUMP_MEM_SRAM);
-		dump_mem->offset = cpu_to_le32(mvm->cfg->dccm2_offset);
-		iwl_trans_read_mem_bytes(mvm->trans, mvm->cfg->dccm2_offset,
+		dump_mem->offset = cpu_to_le32(fwrt->trans->cfg->dccm2_offset);
+		iwl_trans_read_mem_bytes(fwrt->trans,
+					 fwrt->trans->cfg->dccm2_offset,
 					 dump_mem->data, sram2_len);
 		dump_data = iwl_fw_error_next_data(dump_data);
 	}
 
 	/* Dump fw's virtual image */
-	if (!mvm->trans->cfg->gen2 &&
-	    mvm->fw->img[mvm->cur_ucode].paging_mem_size &&
-	    mvm->fw_paging_db[0].fw_paging_block) {
-		for (i = 1; i < mvm->num_of_paging_blk + 1; i++) {
+	if (!fwrt->trans->cfg->gen2 &&
+	    fwrt->fw->img[fwrt->cur_fw_img].paging_mem_size &&
+	    fwrt->fw_paging_db[0].fw_paging_block) {
+		for (i = 1; i < fwrt->num_of_paging_blk + 1; i++) {
 			struct iwl_fw_error_dump_paging *paging;
 			struct page *pages =
-				mvm->fw_paging_db[i].fw_paging_block;
-			dma_addr_t addr = mvm->fw_paging_db[i].fw_paging_phys;
+				fwrt->fw_paging_db[i].fw_paging_block;
+			dma_addr_t addr = fwrt->fw_paging_db[i].fw_paging_phys;
 
 			dump_data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_PAGING);
 			dump_data->len = cpu_to_le32(sizeof(*paging) +
 						     PAGING_BLOCK_SIZE);
 			paging = (void *)dump_data->data;
 			paging->index = cpu_to_le32(i);
-			dma_sync_single_for_cpu(mvm->trans->dev, addr,
+			dma_sync_single_for_cpu(fwrt->trans->dev, addr,
 						PAGING_BLOCK_SIZE,
 						DMA_BIDIRECTIONAL);
 			memcpy(paging->data, page_address(pages),
@@ -855,20 +889,20 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm)
 	}
 
 	if (prph_len) {
-		iwl_dump_prph(mvm->trans, &dump_data,
+		iwl_dump_prph(fwrt->trans, &dump_data,
 			      iwl_prph_dump_addr_comm,
 			      ARRAY_SIZE(iwl_prph_dump_addr_comm));
 
-		if (mvm->cfg->mq_rx_supported)
-			iwl_dump_prph(mvm->trans, &dump_data,
+		if (fwrt->trans->cfg->mq_rx_supported)
+			iwl_dump_prph(fwrt->trans, &dump_data,
 				      iwl_prph_dump_addr_9000,
 				      ARRAY_SIZE(iwl_prph_dump_addr_9000));
 	}
 
 dump_trans_data:
-	fw_error_dump->trans_ptr = iwl_trans_dump_data(mvm->trans,
-						       mvm->fw_dump_trig);
-	fw_error_dump->op_mode_len = file_len;
+	fw_error_dump->trans_ptr = iwl_trans_dump_data(fwrt->trans,
+						       fwrt->dump.trig);
+	fw_error_dump->fwrt_len = file_len;
 	if (fw_error_dump->trans_ptr)
 		file_len += fw_error_dump->trans_ptr->len;
 	dump_file->file_len = cpu_to_le32(file_len);
@@ -877,68 +911,72 @@ dump_trans_data:
 	if (sg_dump_data) {
 		sg_pcopy_from_buffer(sg_dump_data,
 				     sg_nents(sg_dump_data),
-				     fw_error_dump->op_mode_ptr,
-				     fw_error_dump->op_mode_len, 0);
+				     fw_error_dump->fwrt_ptr,
+				     fw_error_dump->fwrt_len, 0);
 		if (fw_error_dump->trans_ptr)
 			sg_pcopy_from_buffer(sg_dump_data,
 					     sg_nents(sg_dump_data),
 					     fw_error_dump->trans_ptr->data,
 					     fw_error_dump->trans_ptr->len,
-					     fw_error_dump->op_mode_len);
-		dev_coredumpsg(mvm->trans->dev, sg_dump_data, file_len,
+					     fw_error_dump->fwrt_len);
+		dev_coredumpsg(fwrt->trans->dev, sg_dump_data, file_len,
 			       GFP_KERNEL);
 	}
-	vfree(fw_error_dump->op_mode_ptr);
+	vfree(fw_error_dump->fwrt_ptr);
 	vfree(fw_error_dump->trans_ptr);
 	kfree(fw_error_dump);
 
 out:
-	iwl_mvm_free_fw_dump_desc(mvm);
-	mvm->fw_dump_trig = NULL;
-	clear_bit(IWL_MVM_STATUS_DUMPING_FW_LOG, &mvm->status);
+	iwl_fw_free_dump_desc(fwrt);
+	fwrt->dump.trig = NULL;
+	clear_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status);
 }
+IWL_EXPORT_SYMBOL(iwl_fw_error_dump);
 
-const struct iwl_mvm_dump_desc iwl_mvm_dump_desc_assert = {
+const struct iwl_fw_dump_desc iwl_dump_desc_assert = {
 	.trig_desc = {
 		.type = cpu_to_le32(FW_DBG_TRIGGER_FW_ASSERT),
 	},
 };
+IWL_EXPORT_SYMBOL(iwl_dump_desc_assert);
 
-int iwl_mvm_fw_dbg_collect_desc(struct iwl_mvm *mvm,
-				const struct iwl_mvm_dump_desc *desc,
-				const struct iwl_fw_dbg_trigger_tlv *trigger)
+int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt,
+			    const struct iwl_fw_dump_desc *desc,
+			    const struct iwl_fw_dbg_trigger_tlv *trigger)
 {
 	unsigned int delay = 0;
 
 	if (trigger)
 		delay = msecs_to_jiffies(le32_to_cpu(trigger->stop_delay));
 
-	if (WARN(mvm->trans->state == IWL_TRANS_NO_FW,
+	if (WARN(fwrt->trans->state == IWL_TRANS_NO_FW,
 		 "Can't collect dbg data when FW isn't alive\n"))
 		return -EIO;
 
-	if (test_and_set_bit(IWL_MVM_STATUS_DUMPING_FW_LOG, &mvm->status))
+	if (test_and_set_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status))
 		return -EBUSY;
 
-	if (WARN_ON(mvm->fw_dump_desc))
-		iwl_mvm_free_fw_dump_desc(mvm);
+	if (WARN_ON(fwrt->dump.desc))
+		iwl_fw_free_dump_desc(fwrt);
 
-	IWL_WARN(mvm, "Collecting data: trigger %d fired.\n",
+	IWL_WARN(fwrt, "Collecting data: trigger %d fired.\n",
 		 le32_to_cpu(desc->trig_desc.type));
 
-	mvm->fw_dump_desc = desc;
-	mvm->fw_dump_trig = trigger;
+	fwrt->dump.desc = desc;
+	fwrt->dump.trig = trigger;
 
-	schedule_delayed_work(&mvm->fw_dump_wk, delay);
+	schedule_delayed_work(&fwrt->dump.wk, delay);
 
 	return 0;
 }
+IWL_EXPORT_SYMBOL(iwl_fw_dbg_collect_desc);
 
-int iwl_mvm_fw_dbg_collect(struct iwl_mvm *mvm, enum iwl_fw_dbg_trigger trig,
-			   const char *str, size_t len,
-			   const struct iwl_fw_dbg_trigger_tlv *trigger)
+int iwl_fw_dbg_collect(struct iwl_fw_runtime *fwrt,
+		       enum iwl_fw_dbg_trigger trig,
+		       const char *str, size_t len,
+		       const struct iwl_fw_dbg_trigger_tlv *trigger)
 {
-	struct iwl_mvm_dump_desc *desc;
+	struct iwl_fw_dump_desc *desc;
 
 	desc = kzalloc(sizeof(*desc) + len, GFP_ATOMIC);
 	if (!desc)
@@ -948,12 +986,13 @@ int iwl_mvm_fw_dbg_collect(struct iwl_mvm *mvm, enum iwl_fw_dbg_trigger trig,
 	desc->trig_desc.type = cpu_to_le32(trig);
 	memcpy(desc->trig_desc.data, str, len);
 
-	return iwl_mvm_fw_dbg_collect_desc(mvm, desc, trigger);
+	return iwl_fw_dbg_collect_desc(fwrt, desc, trigger);
 }
+IWL_EXPORT_SYMBOL(iwl_fw_dbg_collect);
 
-int iwl_mvm_fw_dbg_collect_trig(struct iwl_mvm *mvm,
-				struct iwl_fw_dbg_trigger_tlv *trigger,
-				const char *fmt, ...)
+int iwl_fw_dbg_collect_trig(struct iwl_fw_runtime *fwrt,
+			    struct iwl_fw_dbg_trigger_tlv *trigger,
+			    const char *fmt, ...)
 {
 	u16 occurrences = le16_to_cpu(trigger->occurrences);
 	int ret, len = 0;
@@ -978,8 +1017,8 @@ int iwl_mvm_fw_dbg_collect_trig(struct iwl_mvm *mvm,
 		len = strlen(buf) + 1;
 	}
 
-	ret = iwl_mvm_fw_dbg_collect(mvm, le32_to_cpu(trigger->id), buf, len,
-				     trigger);
+	ret = iwl_fw_dbg_collect(fwrt, le32_to_cpu(trigger->id), buf, len,
+				 trigger);
 
 	if (ret)
 		return ret;
@@ -987,37 +1026,42 @@ int iwl_mvm_fw_dbg_collect_trig(struct iwl_mvm *mvm,
 	trigger->occurrences = cpu_to_le16(occurrences - 1);
 	return 0;
 }
+IWL_EXPORT_SYMBOL(iwl_fw_dbg_collect_trig);
 
-int iwl_mvm_start_fw_dbg_conf(struct iwl_mvm *mvm, u8 conf_id)
+int iwl_fw_start_dbg_conf(struct iwl_fw_runtime *fwrt, u8 conf_id)
 {
 	u8 *ptr;
 	int ret;
 	int i;
 
-	if (WARN_ONCE(conf_id >= ARRAY_SIZE(mvm->fw->dbg_conf_tlv),
+	if (WARN_ONCE(conf_id >= ARRAY_SIZE(fwrt->fw->dbg_conf_tlv),
 		      "Invalid configuration %d\n", conf_id))
 		return -EINVAL;
 
 	/* EARLY START - firmware's configuration is hard coded */
-	if ((!mvm->fw->dbg_conf_tlv[conf_id] ||
-	     !mvm->fw->dbg_conf_tlv[conf_id]->num_of_hcmds) &&
+	if ((!fwrt->fw->dbg_conf_tlv[conf_id] ||
+	     !fwrt->fw->dbg_conf_tlv[conf_id]->num_of_hcmds) &&
 	    conf_id == FW_DBG_START_FROM_ALIVE)
 		return 0;
 
-	if (!mvm->fw->dbg_conf_tlv[conf_id])
+	if (!fwrt->fw->dbg_conf_tlv[conf_id])
 		return -EINVAL;
 
-	if (mvm->fw_dbg_conf != FW_DBG_INVALID)
-		IWL_WARN(mvm, "FW already configured (%d) - re-configuring\n",
-			 mvm->fw_dbg_conf);
+	if (fwrt->dump.conf != FW_DBG_INVALID)
+		IWL_WARN(fwrt, "FW already configured (%d) - re-configuring\n",
+			 fwrt->dump.conf);
 
 	/* Send all HCMDs for configuring the FW debug */
-	ptr = (void *)&mvm->fw->dbg_conf_tlv[conf_id]->hcmd;
-	for (i = 0; i < mvm->fw->dbg_conf_tlv[conf_id]->num_of_hcmds; i++) {
+	ptr = (void *)&fwrt->fw->dbg_conf_tlv[conf_id]->hcmd;
+	for (i = 0; i < fwrt->fw->dbg_conf_tlv[conf_id]->num_of_hcmds; i++) {
 		struct iwl_fw_dbg_conf_hcmd *cmd = (void *)ptr;
+		struct iwl_host_cmd hcmd = {
+			.id = cmd->id,
+			.len = { le16_to_cpu(cmd->len), },
+			.data = { cmd->data, },
+		};
 
-		ret = iwl_mvm_send_cmd_pdu(mvm, cmd->id, 0,
-					   le16_to_cpu(cmd->len), cmd->data);
+		ret = iwl_trans_send_cmd(fwrt->trans, &hcmd);
 		if (ret)
 			return ret;
 
@@ -1025,7 +1069,59 @@ int iwl_mvm_start_fw_dbg_conf(struct iwl_mvm *mvm, u8 conf_id)
 		ptr += le16_to_cpu(cmd->len);
 	}
 
-	mvm->fw_dbg_conf = conf_id;
+	fwrt->dump.conf = conf_id;
 
 	return 0;
 }
+IWL_EXPORT_SYMBOL(iwl_fw_start_dbg_conf);
+
+void iwl_fw_error_dump_wk(struct work_struct *work)
+{
+	struct iwl_fw_runtime *fwrt =
+		container_of(work, struct iwl_fw_runtime, dump.wk.work);
+
+	if (fwrt->ops && fwrt->ops->dump_start &&
+	    fwrt->ops->dump_start(fwrt->ops_ctx))
+		return;
+
+	if (fwrt->trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) {
+		/* stop recording */
+		iwl_set_bits_prph(fwrt->trans, MON_BUFF_SAMPLE_CTL, 0x100);
+
+		iwl_fw_error_dump(fwrt);
+
+		/* start recording again if the firmware is not crashed */
+		if (!test_bit(STATUS_FW_ERROR, &fwrt->trans->status) &&
+		    fwrt->fw->dbg_dest_tlv) {
+			iwl_clear_bits_prph(fwrt->trans,
+					    MON_BUFF_SAMPLE_CTL, 0x100);
+			iwl_clear_bits_prph(fwrt->trans,
+					    MON_BUFF_SAMPLE_CTL, 0x1);
+			iwl_set_bits_prph(fwrt->trans,
+					  MON_BUFF_SAMPLE_CTL, 0x1);
+		}
+	} else {
+		u32 in_sample = iwl_read_prph(fwrt->trans, DBGC_IN_SAMPLE);
+		u32 out_ctrl = iwl_read_prph(fwrt->trans, DBGC_OUT_CTRL);
+
+		/* stop recording */
+		iwl_write_prph(fwrt->trans, DBGC_IN_SAMPLE, 0);
+		udelay(100);
+		iwl_write_prph(fwrt->trans, DBGC_OUT_CTRL, 0);
+		/* wait before we collect the data till the DBGC stop */
+		udelay(500);
+
+		iwl_fw_error_dump(fwrt);
+
+		/* start recording again if the firmware is not crashed */
+		if (!test_bit(STATUS_FW_ERROR, &fwrt->trans->status) &&
+		    fwrt->fw->dbg_dest_tlv) {
+			iwl_write_prph(fwrt->trans, DBGC_IN_SAMPLE, in_sample);
+			iwl_write_prph(fwrt->trans, DBGC_OUT_CTRL, out_ctrl);
+		}
+	}
+
+	if (fwrt->ops && fwrt->ops->dump_end)
+		fwrt->ops->dump_end(fwrt->ops_ctx);
+}
+
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
index 4a5287a..0f810ea 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
@@ -7,7 +7,7 @@
  *
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
- * Copyright(c) 2015        Intel Deutschland GmbH
+ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -32,7 +32,7 @@
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
- * Copyright(c) 2015        Intel Deutschland GmbH
+ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -63,24 +63,46 @@
  *
  *****************************************************************************/
 
-#ifndef __mvm_fw_dbg_h__
-#define __mvm_fw_dbg_h__
-#include "fw/file.h"
-#include "fw/error-dump.h"
-#include "mvm.h"
-
-void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm);
-void iwl_mvm_free_fw_dump_desc(struct iwl_mvm *mvm);
-int iwl_mvm_fw_dbg_collect_desc(struct iwl_mvm *mvm,
-				const struct iwl_mvm_dump_desc *desc,
-				const struct iwl_fw_dbg_trigger_tlv *trigger);
-int iwl_mvm_fw_dbg_collect(struct iwl_mvm *mvm, enum iwl_fw_dbg_trigger trig,
-			   const char *str, size_t len,
-			   const struct iwl_fw_dbg_trigger_tlv *trigger);
-int iwl_mvm_fw_dbg_collect_trig(struct iwl_mvm *mvm,
-				struct iwl_fw_dbg_trigger_tlv *trigger,
-				const char *fmt, ...) __printf(3, 4);
-int iwl_mvm_start_fw_dbg_conf(struct iwl_mvm *mvm, u8 id);
+#ifndef __iwl_fw_dbg_h__
+#define __iwl_fw_dbg_h__
+#include <linux/workqueue.h>
+#include <net/cfg80211.h>
+#include "runtime.h"
+#include "file.h"
+#include "error-dump.h"
+
+/**
+ * struct iwl_fw_dump_desc - describes the dump
+ * @len: length of trig_desc->data
+ * @trig_desc: the description of the dump
+ */
+struct iwl_fw_dump_desc {
+	size_t len;
+	/* must be last */
+	struct iwl_fw_error_dump_trigger_desc trig_desc;
+};
+
+extern const struct iwl_fw_dump_desc iwl_dump_desc_assert;
+
+static inline void iwl_fw_free_dump_desc(struct iwl_fw_runtime *fwrt)
+{
+	if (fwrt->dump.desc != &iwl_dump_desc_assert)
+		kfree(fwrt->dump.desc);
+	fwrt->dump.desc = NULL;
+}
+
+void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt);
+int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt,
+			    const struct iwl_fw_dump_desc *desc,
+			    const struct iwl_fw_dbg_trigger_tlv *trigger);
+int iwl_fw_dbg_collect(struct iwl_fw_runtime *fwrt,
+		       enum iwl_fw_dbg_trigger trig,
+		       const char *str, size_t len,
+		       const struct iwl_fw_dbg_trigger_tlv *trigger);
+int iwl_fw_dbg_collect_trig(struct iwl_fw_runtime *fwrt,
+			    struct iwl_fw_dbg_trigger_tlv *trigger,
+			    const char *fmt, ...) __printf(3, 4);
+int iwl_fw_start_dbg_conf(struct iwl_fw_runtime *fwrt, u8 id);
 
 #define iwl_fw_dbg_trigger_enabled(fw, id) ({			\
 	void *__dbg_trigger = (fw)->dbg_trigger_tlv[(id)];	\
@@ -101,25 +123,25 @@ _iwl_fw_dbg_get_trigger(const struct iwl_fw *fw, enum iwl_fw_dbg_trigger id)
 
 static inline bool
 iwl_fw_dbg_trigger_vif_match(struct iwl_fw_dbg_trigger_tlv *trig,
-			     struct ieee80211_vif *vif)
+			     struct wireless_dev *wdev)
 {
 	u32 trig_vif = le32_to_cpu(trig->vif_type);
 
 	return trig_vif == IWL_FW_DBG_CONF_VIF_ANY ||
-	       ieee80211_vif_type_p2p(vif) == trig_vif;
+	       wdev->iftype == trig_vif;
 }
 
 static inline bool
-iwl_fw_dbg_trigger_stop_conf_match(struct iwl_mvm *mvm,
+iwl_fw_dbg_trigger_stop_conf_match(struct iwl_fw_runtime *fwrt,
 				   struct iwl_fw_dbg_trigger_tlv *trig)
 {
 	return ((trig->mode & IWL_FW_DBG_TRIGGER_STOP) &&
-		(mvm->fw_dbg_conf == FW_DBG_INVALID ||
-		(BIT(mvm->fw_dbg_conf) & le32_to_cpu(trig->stop_conf_ids))));
+		(fwrt->dump.conf == FW_DBG_INVALID ||
+		(BIT(fwrt->dump.conf) & le32_to_cpu(trig->stop_conf_ids))));
 }
 
 static inline bool
-iwl_fw_dbg_no_trig_window(struct iwl_mvm *mvm,
+iwl_fw_dbg_no_trig_window(struct iwl_fw_runtime *fwrt,
 			  struct iwl_fw_dbg_trigger_tlv *trig)
 {
 	unsigned long wind_jiff =
@@ -127,49 +149,66 @@ iwl_fw_dbg_no_trig_window(struct iwl_mvm *mvm,
 	u32 id = le32_to_cpu(trig->id);
 
 	/* If this is the first event checked, jump to update start ts */
-	if (mvm->fw_dbg_non_collect_ts_start[id] &&
-	    (time_after(mvm->fw_dbg_non_collect_ts_start[id] + wind_jiff,
+	if (fwrt->dump.non_collect_ts_start[id] &&
+	    (time_after(fwrt->dump.non_collect_ts_start[id] + wind_jiff,
 			jiffies)))
 		return true;
 
-	mvm->fw_dbg_non_collect_ts_start[id] = jiffies;
+	fwrt->dump.non_collect_ts_start[id] = jiffies;
 	return false;
 }
 
 static inline bool
-iwl_fw_dbg_trigger_check_stop(struct iwl_mvm *mvm,
-			      struct ieee80211_vif *vif,
+iwl_fw_dbg_trigger_check_stop(struct iwl_fw_runtime *fwrt,
+			      struct wireless_dev *wdev,
 			      struct iwl_fw_dbg_trigger_tlv *trig)
 {
-	if (vif && !iwl_fw_dbg_trigger_vif_match(trig, vif))
+	if (wdev && !iwl_fw_dbg_trigger_vif_match(trig, wdev))
 		return false;
 
-	if (iwl_fw_dbg_no_trig_window(mvm, trig)) {
-		IWL_WARN(mvm, "Trigger %d occurred while no-collect window.\n",
+	if (iwl_fw_dbg_no_trig_window(fwrt, trig)) {
+		IWL_WARN(fwrt, "Trigger %d occurred while no-collect window.\n",
 			 trig->id);
 		return false;
 	}
 
-	return iwl_fw_dbg_trigger_stop_conf_match(mvm, trig);
+	return iwl_fw_dbg_trigger_stop_conf_match(fwrt, trig);
 }
 
 static inline void
-_iwl_fw_dbg_trigger_simple_stop(struct iwl_mvm *mvm,
-				struct ieee80211_vif *vif,
+_iwl_fw_dbg_trigger_simple_stop(struct iwl_fw_runtime *fwrt,
+				struct wireless_dev *wdev,
 				struct iwl_fw_dbg_trigger_tlv *trigger)
 {
 	if (!trigger)
 		return;
 
-	if (!iwl_fw_dbg_trigger_check_stop(mvm, vif, trigger))
+	if (!iwl_fw_dbg_trigger_check_stop(fwrt, wdev, trigger))
 		return;
 
-	iwl_mvm_fw_dbg_collect_trig(mvm, trigger, NULL);
+	iwl_fw_dbg_collect_trig(fwrt, trigger, NULL);
 }
 
-#define iwl_fw_dbg_trigger_simple_stop(mvm, vif, trig)	\
-	_iwl_fw_dbg_trigger_simple_stop((mvm), (vif),	\
-					iwl_fw_dbg_get_trigger((mvm)->fw,\
+#define iwl_fw_dbg_trigger_simple_stop(fwrt, wdev, trig)	\
+	_iwl_fw_dbg_trigger_simple_stop((fwrt), (wdev),		\
+					iwl_fw_dbg_get_trigger((fwrt)->fw,\
 							       (trig)))
 
-#endif  /* __mvm_fw_dbg_h__ */
+static inline void iwl_fw_dump_conf_clear(struct iwl_fw_runtime *fwrt)
+{
+	fwrt->dump.conf = FW_DBG_INVALID;
+}
+
+void iwl_fw_error_dump_wk(struct work_struct *work);
+
+static inline void iwl_fw_flush_dump(struct iwl_fw_runtime *fwrt)
+{
+	flush_delayed_work(&fwrt->dump.wk);
+}
+
+static inline void iwl_fw_cancel_dump(struct iwl_fw_runtime *fwrt)
+{
+	cancel_delayed_work_sync(&fwrt->dump.wk);
+}
+
+#endif  /* __iwl_fw_dbg_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
index cfebde6..ed7beca 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
@@ -7,6 +7,7 @@
  *
  * Copyright(c) 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2014 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -33,6 +34,7 @@
  *
  * Copyright(c) 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2014 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -92,6 +94,9 @@
  * @IWL_FW_ERROR_DUMP_EXTERNAL: used only by external code utilities, and
  *	for that reason is not in use in any other place in the Linux Wi-Fi
  *	stack.
+ * @IWL_FW_ERROR_DUMP_MEM_CFG: the addresses and sizes of fifos in the smem,
+ *	which we get from the fw after ALIVE. The content is structured as
+ *	&struct iwl_fw_error_dump_smem_cfg.
  */
 enum iwl_fw_error_dump_type {
 	/* 0 is deprecated */
@@ -110,6 +115,7 @@ enum iwl_fw_error_dump_type {
 	IWL_FW_ERROR_DUMP_RADIO_REG = 13,
 	IWL_FW_ERROR_DUMP_INTERNAL_TXF = 14,
 	IWL_FW_ERROR_DUMP_EXTERNAL = 15, /* Do not move */
+	IWL_FW_ERROR_DUMP_MEM_CFG = 16,
 
 	IWL_FW_ERROR_DUMP_MAX,
 };
@@ -208,6 +214,30 @@ struct iwl_fw_error_dump_fw_mon {
 	u8 data[];
 } __packed;
 
+#define MAX_NUM_LMAC 2
+#define TX_FIFO_INTERNAL_MAX_NUM	6
+#define TX_FIFO_MAX_NUM			15
+/**
+ * struct iwl_fw_error_dump_smem_cfg - Dump SMEM configuration
+ *	This must follow &struct iwl_fwrt_shared_mem_cfg.
+ * @num_lmacs: number of lmacs
+ * @num_txfifo_entries: number of tx fifos
+ * @lmac: sizes of lmacs txfifos and rxfifo1
+ * @rxfifo2_size: size of rxfifo2
+ * @internal_txfifo_addr: address of internal tx fifo
+ * @internal_txfifo_size: size of internal tx fifo
+ */
+struct iwl_fw_error_dump_smem_cfg {
+	__le32 num_lmacs;
+	__le32 num_txfifo_entries;
+	struct {
+		__le32 txfifo_size[TX_FIFO_MAX_NUM];
+		__le32 rxfifo1_size;
+	} lmac[MAX_NUM_LMAC];
+	__le32 rxfifo2_size;
+	__le32 internal_txfifo_addr;
+	__le32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM];
+} __packed;
 /**
  * struct iwl_fw_error_dump_prph - periphery registers data
  * @prph_start: address of the first register in this chunk
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index c73a643..887f6d8 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -246,6 +246,8 @@ typedef unsigned int __bitwise iwl_ucode_tlv_api_t;
  * @IWL_UCODE_TLV_API_STA_TYPE: This ucode supports station type assignement.
  * @IWL_UCODE_TLV_API_NAN2_VER2: This ucode supports NAN API version 2
  * @IWL_UCODE_TLV_API_NEW_RX_STATS: should new RX STATISTICS API be used
+ * @IWL_UCODE_TLV_API_ATS_COEX_EXTERNAL: the coex notification is enlared to
+ *	include information about ACL time sharing.
  *
  * @NUM_IWL_UCODE_TLV_API: number of bits used
  */
@@ -260,7 +262,9 @@ enum iwl_ucode_tlv_api {
 	IWL_UCODE_TLV_API_STA_TYPE		= (__force iwl_ucode_tlv_api_t)30,
 	IWL_UCODE_TLV_API_NAN2_VER2		= (__force iwl_ucode_tlv_api_t)31,
 	/* API Set 1 */
+	IWL_UCODE_TLV_API_NEW_BEACON_TEMPLATE	= (__force iwl_ucode_tlv_api_t)34,
 	IWL_UCODE_TLV_API_NEW_RX_STATS		= (__force iwl_ucode_tlv_api_t)35,
+	IWL_UCODE_TLV_API_COEX_ATS_EXTERNAL	= (__force iwl_ucode_tlv_api_t)37,
 
 	NUM_IWL_UCODE_TLV_API
 #ifdef __CHECKER__
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/init.c b/drivers/net/wireless/intel/iwlwifi/fw/init.c
new file mode 100644
index 0000000..bfe5316
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/init.c
@@ -0,0 +1,75 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+#include "iwl-drv.h"
+#include "runtime.h"
+#include "dbg.h"
+
+void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans,
+			 const struct iwl_fw *fw,
+			 const struct iwl_fw_runtime_ops *ops, void *ops_ctx)
+{
+	memset(fwrt, 0, sizeof(*fwrt));
+	fwrt->trans = trans;
+	fwrt->fw = fw;
+	fwrt->dev = trans->dev;
+	fwrt->dump.conf = FW_DBG_INVALID;
+	fwrt->ops = ops;
+	fwrt->ops_ctx = ops_ctx;
+	INIT_DELAYED_WORK(&fwrt->dump.wk, iwl_fw_error_dump_wk);
+}
+IWL_EXPORT_SYMBOL(iwl_fw_runtime_init);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/notif-wait.c b/drivers/net/wireless/intel/iwlwifi/fw/notif-wait.c
index 29bb92e..1096c94 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/notif-wait.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/notif-wait.c
@@ -6,7 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2015 Intel Deutschland GmbH
+ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -32,6 +32,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -161,6 +162,15 @@ iwl_init_notification_wait(struct iwl_notif_wait_data *notif_wait,
 }
 IWL_EXPORT_SYMBOL(iwl_init_notification_wait);
 
+void iwl_remove_notification(struct iwl_notif_wait_data *notif_wait,
+			     struct iwl_notification_wait *wait_entry)
+{
+	spin_lock_bh(&notif_wait->notif_wait_lock);
+	list_del(&wait_entry->list);
+	spin_unlock_bh(&notif_wait->notif_wait_lock);
+}
+IWL_EXPORT_SYMBOL(iwl_remove_notification);
+
 int iwl_wait_notification(struct iwl_notif_wait_data *notif_wait,
 			  struct iwl_notification_wait *wait_entry,
 			  unsigned long timeout)
@@ -171,9 +181,7 @@ int iwl_wait_notification(struct iwl_notif_wait_data *notif_wait,
 				 wait_entry->triggered || wait_entry->aborted,
 				 timeout);
 
-	spin_lock_bh(&notif_wait->notif_wait_lock);
-	list_del(&wait_entry->list);
-	spin_unlock_bh(&notif_wait->notif_wait_lock);
+	iwl_remove_notification(notif_wait, wait_entry);
 
 	if (wait_entry->aborted)
 		return -EIO;
@@ -184,12 +192,3 @@ int iwl_wait_notification(struct iwl_notif_wait_data *notif_wait,
 	return 0;
 }
 IWL_EXPORT_SYMBOL(iwl_wait_notification);
-
-void iwl_remove_notification(struct iwl_notif_wait_data *notif_wait,
-			     struct iwl_notification_wait *wait_entry)
-{
-	spin_lock_bh(&notif_wait->notif_wait_lock);
-	list_del(&wait_entry->list);
-	spin_unlock_bh(&notif_wait->notif_wait_lock);
-}
-IWL_EXPORT_SYMBOL(iwl_remove_notification);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/nvm.c b/drivers/net/wireless/intel/iwlwifi/fw/nvm.c
new file mode 100644
index 0000000..bd2e1fb
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/nvm.c
@@ -0,0 +1,162 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
+ * USA
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+#include "iwl-drv.h"
+#include "runtime.h"
+#include "fw/api/nvm-reg.h"
+#include "fw/api/commands.h"
+#include "iwl-nvm-parse.h"
+
+struct iwl_nvm_data *iwl_fw_get_nvm(struct iwl_fw_runtime *fwrt)
+{
+	struct iwl_nvm_get_info cmd = {};
+	struct iwl_nvm_get_info_rsp *rsp;
+	struct iwl_trans *trans = fwrt->trans;
+	struct iwl_nvm_data *nvm;
+	struct iwl_host_cmd hcmd = {
+		.flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL,
+		.data = { &cmd, },
+		.len = { sizeof(cmd) },
+		.id = WIDE_ID(REGULATORY_AND_NVM_GROUP, NVM_GET_INFO)
+	};
+	int  ret;
+	bool lar_fw_supported = !iwlwifi_mod_params.lar_disable &&
+				fw_has_capa(&fwrt->fw->ucode_capa,
+					    IWL_UCODE_TLV_CAPA_LAR_SUPPORT);
+
+	ret = iwl_trans_send_cmd(trans, &hcmd);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (WARN(iwl_rx_packet_payload_len(hcmd.resp_pkt) != sizeof(*rsp),
+		 "Invalid payload len in NVM response from FW %d",
+		 iwl_rx_packet_payload_len(hcmd.resp_pkt))) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	rsp = (void *)hcmd.resp_pkt->data;
+	if (le32_to_cpu(rsp->general.flags) & NVM_GENERAL_FLAGS_EMPTY_OTP)
+		IWL_INFO(fwrt, "OTP is empty\n");
+
+	nvm = kzalloc(sizeof(*nvm) +
+		      sizeof(struct ieee80211_channel) * IWL_NUM_CHANNELS,
+		      GFP_KERNEL);
+	if (!nvm) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	iwl_set_hw_address_from_csr(trans, nvm);
+	/* TODO: if platform NVM has MAC address - override it here */
+
+	if (!is_valid_ether_addr(nvm->hw_addr)) {
+		IWL_ERR(fwrt, "no valid mac address was found\n");
+		ret = -EINVAL;
+		goto err_free;
+	}
+
+	IWL_INFO(trans, "base HW address: %pM\n", nvm->hw_addr);
+
+	/* Initialize general data */
+	nvm->nvm_version = le16_to_cpu(rsp->general.nvm_version);
+
+	/* Initialize MAC sku data */
+	nvm->sku_cap_11ac_enable =
+		le32_to_cpu(rsp->mac_sku.enable_11ac);
+	nvm->sku_cap_11n_enable =
+		le32_to_cpu(rsp->mac_sku.enable_11n);
+	nvm->sku_cap_band_24GHz_enable =
+		le32_to_cpu(rsp->mac_sku.enable_24g);
+	nvm->sku_cap_band_52GHz_enable =
+		le32_to_cpu(rsp->mac_sku.enable_5g);
+	nvm->sku_cap_mimo_disabled =
+		le32_to_cpu(rsp->mac_sku.mimo_disable);
+
+	/* Initialize PHY sku data */
+	nvm->valid_tx_ant = (u8)le32_to_cpu(rsp->phy_sku.tx_chains);
+	nvm->valid_rx_ant = (u8)le32_to_cpu(rsp->phy_sku.rx_chains);
+
+	/* Initialize regulatory data */
+	nvm->lar_enabled =
+		le32_to_cpu(rsp->regulatory.lar_enabled) && lar_fw_supported;
+
+	iwl_init_sbands(trans->dev, trans->cfg, nvm,
+			rsp->regulatory.channel_profile,
+			nvm->valid_tx_ant & fwrt->fw->valid_tx_ant,
+			nvm->valid_rx_ant & fwrt->fw->valid_rx_ant,
+			nvm->lar_enabled, false);
+
+	iwl_free_resp(&hcmd);
+	return nvm;
+
+err_free:
+	kfree(nvm);
+out:
+	iwl_free_resp(&hcmd);
+	return ERR_PTR(ret);
+}
+IWL_EXPORT_SYMBOL(iwl_fw_get_nvm);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/paging.c b/drivers/net/wireless/intel/iwlwifi/fw/paging.c
new file mode 100644
index 0000000..1610722
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/paging.c
@@ -0,0 +1,414 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+#include "iwl-drv.h"
+#include "runtime.h"
+#include "fw/api/commands.h"
+
+void iwl_free_fw_paging(struct iwl_fw_runtime *fwrt)
+{
+	int i;
+
+	if (!fwrt->fw_paging_db[0].fw_paging_block)
+		return;
+
+	for (i = 0; i < NUM_OF_FW_PAGING_BLOCKS; i++) {
+		struct iwl_fw_paging *paging = &fwrt->fw_paging_db[i];
+
+		if (!paging->fw_paging_block) {
+			IWL_DEBUG_FW(fwrt,
+				     "Paging: block %d already freed, continue to next page\n",
+				     i);
+
+			continue;
+		}
+		dma_unmap_page(fwrt->trans->dev, paging->fw_paging_phys,
+			       paging->fw_paging_size, DMA_BIDIRECTIONAL);
+
+		__free_pages(paging->fw_paging_block,
+			     get_order(paging->fw_paging_size));
+		paging->fw_paging_block = NULL;
+	}
+	kfree(fwrt->trans->paging_download_buf);
+	fwrt->trans->paging_download_buf = NULL;
+	fwrt->trans->paging_db = NULL;
+
+	memset(fwrt->fw_paging_db, 0, sizeof(fwrt->fw_paging_db));
+}
+IWL_EXPORT_SYMBOL(iwl_free_fw_paging);
+
+static int iwl_alloc_fw_paging_mem(struct iwl_fw_runtime *fwrt,
+				   const struct fw_img *image)
+{
+	struct page *block;
+	dma_addr_t phys = 0;
+	int blk_idx, order, num_of_pages, size, dma_enabled;
+
+	if (fwrt->fw_paging_db[0].fw_paging_block)
+		return 0;
+
+	dma_enabled = is_device_dma_capable(fwrt->trans->dev);
+
+	/* ensure BLOCK_2_EXP_SIZE is power of 2 of PAGING_BLOCK_SIZE */
+	BUILD_BUG_ON(BIT(BLOCK_2_EXP_SIZE) != PAGING_BLOCK_SIZE);
+
+	num_of_pages = image->paging_mem_size / FW_PAGING_SIZE;
+	fwrt->num_of_paging_blk =
+		DIV_ROUND_UP(num_of_pages, NUM_OF_PAGE_PER_GROUP);
+	fwrt->num_of_pages_in_last_blk =
+		num_of_pages -
+		NUM_OF_PAGE_PER_GROUP * (fwrt->num_of_paging_blk - 1);
+
+	IWL_DEBUG_FW(fwrt,
+		     "Paging: allocating mem for %d paging blocks, each block holds 8 pages, last block holds %d pages\n",
+		     fwrt->num_of_paging_blk,
+		     fwrt->num_of_pages_in_last_blk);
+
+	/*
+	 * Allocate CSS and paging blocks in dram.
+	 */
+	for (blk_idx = 0; blk_idx < fwrt->num_of_paging_blk + 1; blk_idx++) {
+		/* For CSS allocate 4KB, for others PAGING_BLOCK_SIZE (32K) */
+		size = blk_idx ? PAGING_BLOCK_SIZE : FW_PAGING_SIZE;
+		order = get_order(size);
+		block = alloc_pages(GFP_KERNEL, order);
+		if (!block) {
+			/* free all the previous pages since we failed */
+			iwl_free_fw_paging(fwrt);
+			return -ENOMEM;
+		}
+
+		fwrt->fw_paging_db[blk_idx].fw_paging_block = block;
+		fwrt->fw_paging_db[blk_idx].fw_paging_size = size;
+
+		if (dma_enabled) {
+			phys = dma_map_page(fwrt->trans->dev, block, 0,
+					    PAGE_SIZE << order,
+					    DMA_BIDIRECTIONAL);
+			if (dma_mapping_error(fwrt->trans->dev, phys)) {
+				/*
+				 * free the previous pages and the current one
+				 * since we failed to map_page.
+				 */
+				iwl_free_fw_paging(fwrt);
+				return -ENOMEM;
+			}
+			fwrt->fw_paging_db[blk_idx].fw_paging_phys = phys;
+		} else {
+			fwrt->fw_paging_db[blk_idx].fw_paging_phys =
+				PAGING_ADDR_SIG |
+				blk_idx << BLOCK_2_EXP_SIZE;
+		}
+
+		if (!blk_idx)
+			IWL_DEBUG_FW(fwrt,
+				     "Paging: allocated 4K(CSS) bytes (order %d) for firmware paging.\n",
+				     order);
+		else
+			IWL_DEBUG_FW(fwrt,
+				     "Paging: allocated 32K bytes (order %d) for firmware paging.\n",
+				     order);
+	}
+
+	return 0;
+}
+
+static int iwl_fill_paging_mem(struct iwl_fw_runtime *fwrt,
+			       const struct fw_img *image)
+{
+	int sec_idx, idx;
+	u32 offset = 0;
+
+	/*
+	 * find where is the paging image start point:
+	 * if CPU2 exist and it's in paging format, then the image looks like:
+	 * CPU1 sections (2 or more)
+	 * CPU1_CPU2_SEPARATOR_SECTION delimiter - separate between CPU1 to CPU2
+	 * CPU2 sections (not paged)
+	 * PAGING_SEPARATOR_SECTION delimiter - separate between CPU2
+	 * non paged to CPU2 paging sec
+	 * CPU2 paging CSS
+	 * CPU2 paging image (including instruction and data)
+	 */
+	for (sec_idx = 0; sec_idx < image->num_sec; sec_idx++) {
+		if (image->sec[sec_idx].offset == PAGING_SEPARATOR_SECTION) {
+			sec_idx++;
+			break;
+		}
+	}
+
+	/*
+	 * If paging is enabled there should be at least 2 more sections left
+	 * (one for CSS and one for Paging data)
+	 */
+	if (sec_idx >= image->num_sec - 1) {
+		IWL_ERR(fwrt, "Paging: Missing CSS and/or paging sections\n");
+		iwl_free_fw_paging(fwrt);
+		return -EINVAL;
+	}
+
+	/* copy the CSS block to the dram */
+	IWL_DEBUG_FW(fwrt, "Paging: load paging CSS to FW, sec = %d\n",
+		     sec_idx);
+
+	memcpy(page_address(fwrt->fw_paging_db[0].fw_paging_block),
+	       image->sec[sec_idx].data,
+	       fwrt->fw_paging_db[0].fw_paging_size);
+	dma_sync_single_for_device(fwrt->trans->dev,
+				   fwrt->fw_paging_db[0].fw_paging_phys,
+				   fwrt->fw_paging_db[0].fw_paging_size,
+				   DMA_BIDIRECTIONAL);
+
+	IWL_DEBUG_FW(fwrt,
+		     "Paging: copied %d CSS bytes to first block\n",
+		     fwrt->fw_paging_db[0].fw_paging_size);
+
+	sec_idx++;
+
+	/*
+	 * copy the paging blocks to the dram
+	 * loop index start from 1 since that CSS block already copied to dram
+	 * and CSS index is 0.
+	 * loop stop at num_of_paging_blk since that last block is not full.
+	 */
+	for (idx = 1; idx < fwrt->num_of_paging_blk; idx++) {
+		struct iwl_fw_paging *block = &fwrt->fw_paging_db[idx];
+
+		memcpy(page_address(block->fw_paging_block),
+		       image->sec[sec_idx].data + offset,
+		       block->fw_paging_size);
+		dma_sync_single_for_device(fwrt->trans->dev,
+					   block->fw_paging_phys,
+					   block->fw_paging_size,
+					   DMA_BIDIRECTIONAL);
+
+		IWL_DEBUG_FW(fwrt,
+			     "Paging: copied %d paging bytes to block %d\n",
+			     fwrt->fw_paging_db[idx].fw_paging_size,
+			     idx);
+
+		offset += fwrt->fw_paging_db[idx].fw_paging_size;
+	}
+
+	/* copy the last paging block */
+	if (fwrt->num_of_pages_in_last_blk > 0) {
+		struct iwl_fw_paging *block = &fwrt->fw_paging_db[idx];
+
+		memcpy(page_address(block->fw_paging_block),
+		       image->sec[sec_idx].data + offset,
+		       FW_PAGING_SIZE * fwrt->num_of_pages_in_last_blk);
+		dma_sync_single_for_device(fwrt->trans->dev,
+					   block->fw_paging_phys,
+					   block->fw_paging_size,
+					   DMA_BIDIRECTIONAL);
+
+		IWL_DEBUG_FW(fwrt,
+			     "Paging: copied %d pages in the last block %d\n",
+			     fwrt->num_of_pages_in_last_blk, idx);
+	}
+
+	return 0;
+}
+
+static int iwl_save_fw_paging(struct iwl_fw_runtime *fwrt,
+			      const struct fw_img *fw)
+{
+	int ret;
+
+	ret = iwl_alloc_fw_paging_mem(fwrt, fw);
+	if (ret)
+		return ret;
+
+	return iwl_fill_paging_mem(fwrt, fw);
+}
+
+/* send paging cmd to FW in case CPU2 has paging image */
+static int iwl_send_paging_cmd(struct iwl_fw_runtime *fwrt,
+			       const struct fw_img *fw)
+{
+	struct iwl_fw_paging_cmd paging_cmd = {
+		.flags = cpu_to_le32(PAGING_CMD_IS_SECURED |
+				     PAGING_CMD_IS_ENABLED |
+				     (fwrt->num_of_pages_in_last_blk <<
+				      PAGING_CMD_NUM_OF_PAGES_IN_LAST_GRP_POS)),
+		.block_size = cpu_to_le32(BLOCK_2_EXP_SIZE),
+		.block_num = cpu_to_le32(fwrt->num_of_paging_blk),
+	};
+	struct iwl_host_cmd hcmd = {
+		.id = iwl_cmd_id(FW_PAGING_BLOCK_CMD, IWL_ALWAYS_LONG_GROUP, 0),
+		.len = { sizeof(paging_cmd), },
+		.data = { &paging_cmd, },
+	};
+	int blk_idx;
+
+	/* loop for for all paging blocks + CSS block */
+	for (blk_idx = 0; blk_idx < fwrt->num_of_paging_blk + 1; blk_idx++) {
+		dma_addr_t addr = fwrt->fw_paging_db[blk_idx].fw_paging_phys;
+		__le32 phy_addr;
+
+		addr = addr >> PAGE_2_EXP_SIZE;
+		phy_addr = cpu_to_le32(addr);
+		paging_cmd.device_phy_addr[blk_idx] = phy_addr;
+	}
+
+	return iwl_trans_send_cmd(fwrt->trans, &hcmd);
+}
+
+/*
+ * Send paging item cmd to FW in case CPU2 has paging image
+ */
+static int iwl_trans_get_paging_item(struct iwl_fw_runtime *fwrt)
+{
+	int ret;
+	struct iwl_fw_get_item_cmd fw_get_item_cmd = {
+		.item_id = cpu_to_le32(IWL_FW_ITEM_ID_PAGING),
+	};
+	struct iwl_fw_get_item_resp *item_resp;
+	struct iwl_host_cmd cmd = {
+		.id = iwl_cmd_id(FW_GET_ITEM_CMD, IWL_ALWAYS_LONG_GROUP, 0),
+		.flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL,
+		.data = { &fw_get_item_cmd, },
+		.len = { sizeof(fw_get_item_cmd), },
+	};
+
+	ret = iwl_trans_send_cmd(fwrt->trans, &cmd);
+	if (ret) {
+		IWL_ERR(fwrt,
+			"Paging: Failed to send FW_GET_ITEM_CMD cmd (err = %d)\n",
+			ret);
+		return ret;
+	}
+
+	item_resp = (void *)((struct iwl_rx_packet *)cmd.resp_pkt)->data;
+	if (item_resp->item_id != cpu_to_le32(IWL_FW_ITEM_ID_PAGING)) {
+		IWL_ERR(fwrt,
+			"Paging: got wrong item in FW_GET_ITEM_CMD resp (item_id = %u)\n",
+			le32_to_cpu(item_resp->item_id));
+		ret = -EIO;
+		goto exit;
+	}
+
+	/* Add an extra page for headers */
+	fwrt->trans->paging_download_buf = kzalloc(PAGING_BLOCK_SIZE +
+						  FW_PAGING_SIZE,
+						  GFP_KERNEL);
+	if (!fwrt->trans->paging_download_buf) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+	fwrt->trans->paging_req_addr = le32_to_cpu(item_resp->item_val);
+	fwrt->trans->paging_db = fwrt->fw_paging_db;
+	IWL_DEBUG_FW(fwrt,
+		     "Paging: got paging request address (paging_req_addr 0x%08x)\n",
+		     fwrt->trans->paging_req_addr);
+
+exit:
+	iwl_free_resp(&cmd);
+
+	return ret;
+}
+
+int iwl_init_paging(struct iwl_fw_runtime *fwrt, enum iwl_ucode_type type)
+{
+	const struct fw_img *fw = &fwrt->fw->img[type];
+	int ret;
+
+	if (fwrt->trans->cfg->gen2)
+		return 0;
+
+	/*
+	 * Configure and operate fw paging mechanism.
+	 * The driver configures the paging flow only once.
+	 * The CPU2 paging image is included in the IWL_UCODE_INIT image.
+	 */
+	if (!fw->paging_mem_size)
+		return 0;
+
+	/*
+	 * When dma is not enabled, the driver needs to copy / write
+	 * the downloaded / uploaded page to / from the smem.
+	 * This gets the location of the place were the pages are
+	 * stored.
+	 */
+	if (!is_device_dma_capable(fwrt->trans->dev)) {
+		ret = iwl_trans_get_paging_item(fwrt);
+		if (ret) {
+			IWL_ERR(fwrt, "failed to get FW paging item\n");
+			return ret;
+		}
+	}
+
+	ret = iwl_save_fw_paging(fwrt, fw);
+	if (ret) {
+		IWL_ERR(fwrt, "failed to save the FW paging image\n");
+		return ret;
+	}
+
+	ret = iwl_send_paging_cmd(fwrt, fw);
+	if (ret) {
+		IWL_ERR(fwrt, "failed to send the paging cmd\n");
+		iwl_free_fw_paging(fwrt);
+		return ret;
+	}
+
+	return 0;
+}
+IWL_EXPORT_SYMBOL(iwl_init_paging);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
new file mode 100644
index 0000000..50cfb6d
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
@@ -0,0 +1,158 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+#ifndef __iwl_fw_runtime_h__
+#define __iwl_fw_runtime_h__
+
+#include "iwl-config.h"
+#include "iwl-trans.h"
+#include "img.h"
+#include "fw/api/debug.h"
+#include "fw/api/paging.h"
+#include "iwl-eeprom-parse.h"
+
+struct iwl_fw_runtime_ops {
+	int (*dump_start)(void *ctx);
+	void (*dump_end)(void *ctx);
+};
+
+#define MAX_NUM_LMAC 2
+struct iwl_fwrt_shared_mem_cfg {
+	int num_lmacs;
+	int num_txfifo_entries;
+	struct {
+		u32 txfifo_size[TX_FIFO_MAX_NUM];
+		u32 rxfifo1_size;
+	} lmac[MAX_NUM_LMAC];
+	u32 rxfifo2_size;
+	u32 internal_txfifo_addr;
+	u32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM];
+};
+
+enum iwl_fw_runtime_status {
+	IWL_FWRT_STATUS_DUMPING = 0,
+};
+
+/**
+ * struct iwl_fw_runtime - runtime data for firmware
+ * @fw: firmware image
+ * @cfg: NIC configuration
+ * @dev: device pointer
+ * @ops: user ops
+ * @ops_ctx: user ops context
+ * @status: status flags
+ * @fw_paging_db: paging database
+ * @num_of_paging_blk: number of paging blocks
+ * @num_of_pages_in_last_blk: number of pages in the last block
+ * @smem_cfg: saved firmware SMEM configuration
+ * @cur_fw_img: current firmware image, must be maintained by
+ *	the driver by calling &iwl_fw_set_current_image()
+ * @dump: debug dump data
+ */
+struct iwl_fw_runtime {
+	struct iwl_trans *trans;
+	const struct iwl_fw *fw;
+	struct device *dev;
+
+	const struct iwl_fw_runtime_ops *ops;
+	void *ops_ctx;
+
+	unsigned long status;
+
+	/* Paging */
+	struct iwl_fw_paging fw_paging_db[NUM_OF_FW_PAGING_BLOCKS];
+	u16 num_of_paging_blk;
+	u16 num_of_pages_in_last_blk;
+
+	enum iwl_ucode_type cur_fw_img;
+
+	/* memory configuration */
+	struct iwl_fwrt_shared_mem_cfg smem_cfg;
+
+	/* debug */
+	struct {
+		const struct iwl_fw_dump_desc *desc;
+		const struct iwl_fw_dbg_trigger_tlv *trig;
+		struct delayed_work wk;
+
+		u8 conf;
+
+		/* ts of the beginning of a non-collect fw dbg data period */
+		unsigned long non_collect_ts_start[FW_DBG_TRIGGER_MAX - 1];
+	} dump;
+};
+
+void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans,
+			 const struct iwl_fw *fw,
+			 const struct iwl_fw_runtime_ops *ops, void *ops_ctx);
+
+static inline void iwl_fw_set_current_image(struct iwl_fw_runtime *fwrt,
+					    enum iwl_ucode_type cur_fw_img)
+{
+	fwrt->cur_fw_img = cur_fw_img;
+}
+
+int iwl_init_paging(struct iwl_fw_runtime *fwrt, enum iwl_ucode_type type);
+void iwl_free_fw_paging(struct iwl_fw_runtime *fwrt);
+
+void iwl_get_shared_mem_conf(struct iwl_fw_runtime *fwrt);
+
+void iwl_fwrt_handle_notification(struct iwl_fw_runtime *fwrt,
+				  struct iwl_rx_cmd_buffer *rxb);
+struct iwl_nvm_data *iwl_fw_get_nvm(struct iwl_fw_runtime *fwrt);
+
+#endif /* __iwl_fw_runtime_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/smem.c b/drivers/net/wireless/intel/iwlwifi/fw/smem.c
new file mode 100644
index 0000000..7667573
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/smem.c
@@ -0,0 +1,155 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+#include "iwl-drv.h"
+#include "runtime.h"
+#include "fw/api/commands.h"
+
+static void iwl_parse_shared_mem_a000(struct iwl_fw_runtime *fwrt,
+				      struct iwl_rx_packet *pkt)
+{
+	struct iwl_shared_mem_cfg *mem_cfg = (void *)pkt->data;
+	int i, lmac;
+	int lmac_num = le32_to_cpu(mem_cfg->lmac_num);
+
+	if (WARN_ON(lmac_num > ARRAY_SIZE(mem_cfg->lmac_smem)))
+		return;
+
+	fwrt->smem_cfg.num_lmacs = lmac_num;
+	fwrt->smem_cfg.num_txfifo_entries =
+		ARRAY_SIZE(mem_cfg->lmac_smem[0].txfifo_size);
+	fwrt->smem_cfg.rxfifo2_size = le32_to_cpu(mem_cfg->rxfifo2_size);
+
+	for (lmac = 0; lmac < lmac_num; lmac++) {
+		struct iwl_shared_mem_lmac_cfg *lmac_cfg =
+			&mem_cfg->lmac_smem[lmac];
+
+		for (i = 0; i < ARRAY_SIZE(lmac_cfg->txfifo_size); i++)
+			fwrt->smem_cfg.lmac[lmac].txfifo_size[i] =
+				le32_to_cpu(lmac_cfg->txfifo_size[i]);
+		fwrt->smem_cfg.lmac[lmac].rxfifo1_size =
+			le32_to_cpu(lmac_cfg->rxfifo1_size);
+	}
+}
+
+static void iwl_parse_shared_mem(struct iwl_fw_runtime *fwrt,
+				 struct iwl_rx_packet *pkt)
+{
+	struct iwl_shared_mem_cfg_v2 *mem_cfg = (void *)pkt->data;
+	int i;
+
+	fwrt->smem_cfg.num_lmacs = 1;
+
+	fwrt->smem_cfg.num_txfifo_entries = ARRAY_SIZE(mem_cfg->txfifo_size);
+	for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++)
+		fwrt->smem_cfg.lmac[0].txfifo_size[i] =
+			le32_to_cpu(mem_cfg->txfifo_size[i]);
+
+	fwrt->smem_cfg.lmac[0].rxfifo1_size =
+		le32_to_cpu(mem_cfg->rxfifo_size[0]);
+	fwrt->smem_cfg.rxfifo2_size = le32_to_cpu(mem_cfg->rxfifo_size[1]);
+
+	/* new API has more data, from rxfifo_addr field and on */
+	if (fw_has_capa(&fwrt->fw->ucode_capa,
+			IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG)) {
+		BUILD_BUG_ON(sizeof(fwrt->smem_cfg.internal_txfifo_size) !=
+			     sizeof(mem_cfg->internal_txfifo_size));
+
+		fwrt->smem_cfg.internal_txfifo_addr =
+			le32_to_cpu(mem_cfg->internal_txfifo_addr);
+
+		for (i = 0;
+		     i < ARRAY_SIZE(fwrt->smem_cfg.internal_txfifo_size);
+		     i++)
+			fwrt->smem_cfg.internal_txfifo_size[i] =
+				le32_to_cpu(mem_cfg->internal_txfifo_size[i]);
+	}
+}
+
+void iwl_get_shared_mem_conf(struct iwl_fw_runtime *fwrt)
+{
+	struct iwl_host_cmd cmd = {
+		.flags = CMD_WANT_SKB,
+		.data = { NULL, },
+		.len = { 0, },
+	};
+	struct iwl_rx_packet *pkt;
+
+	if (fw_has_capa(&fwrt->fw->ucode_capa,
+			IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG))
+		cmd.id = iwl_cmd_id(SHARED_MEM_CFG_CMD, SYSTEM_GROUP, 0);
+	else
+		cmd.id = SHARED_MEM_CFG;
+
+	if (WARN_ON(iwl_trans_send_cmd(fwrt->trans, &cmd)))
+		return;
+
+	pkt = cmd.resp_pkt;
+	if (fwrt->trans->cfg->device_family == IWL_DEVICE_FAMILY_A000)
+		iwl_parse_shared_mem_a000(fwrt, pkt);
+	else
+		iwl_parse_shared_mem(fwrt, pkt);
+
+	IWL_DEBUG_INFO(fwrt, "SHARED MEM CFG: got memory offsets/sizes\n");
+
+	iwl_free_resp(&cmd);
+}
+IWL_EXPORT_SYMBOL(iwl_get_shared_mem_conf);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index d19c748..3e057b5 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -463,6 +463,9 @@ extern const struct iwl_cfg iwla000_2ac_cfg_hr;
 extern const struct iwl_cfg iwla000_2ac_cfg_hr_cdb;
 extern const struct iwl_cfg iwla000_2ac_cfg_jf;
 extern const struct iwl_cfg iwla000_2ax_cfg_hr;
+extern const struct iwl_cfg iwla000_2ax_cfg_qnj_hr_f0;
+extern const struct iwl_cfg iwla000_2ax_cfg_qnj_jf_b0;
+extern const struct iwl_cfg iwla000_2ax_cfg_qnj_hr_a0;
 #endif /* CONFIG_IWLMVM */
 
 #endif /* __IWL_CONFIG_H__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
index c6c1876..b03e0f9 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
@@ -169,7 +169,7 @@
 
 /*
  * CSR Hardware Revision Workaround Register.  Indicates hardware rev;
- * "step" determines CCK backoff for txpower calculation.  Used for 4965 only.
+ * "step" determines CCK backoff for txpower calculation.
  * See also CSR_HW_REV register.
  * Bit fields:
  *  3-2:  0 = A, 1 = B, 2 = C, 3 = D step
@@ -354,11 +354,16 @@ enum {
 #define CSR_HW_REV_TYPE_135		(0x0000120)
 #define CSR_HW_REV_TYPE_7265D		(0x0000210)
 #define CSR_HW_REV_TYPE_NONE		(0x00001F0)
+#define CSR_HW_REV_TYPE_QNJ		(0x0000360)
+#define CSR_HW_REV_TYPE_HR_CDB		(0x0000340)
 
 /* RF_ID value */
-#define CSR_HW_RF_ID_TYPE_JF		(0x00105000)
+#define CSR_HW_RF_ID_TYPE_JF		(0x00105100)
 #define CSR_HW_RF_ID_TYPE_HR		(0x0010A000)
-#define CSR_HW_RF_ID_TYPE_HRCDB		(0x00109000)
+#define CSR_HW_RF_ID_TYPE_HRCDB		(0x00109F00)
+
+/* HW_RF CHIP ID  */
+#define CSR_HW_RF_ID_TYPE_CHIP_ID(_val) (((_val) >> 12) & 0xFFF)
 
 /* EEPROM REG */
 #define CSR_EEPROM_REG_READ_VALID_MSK	(0x00000001)
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index 4e0f86f..99676d6 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -479,8 +479,8 @@ static int iwl_set_default_calib(struct iwl_drv *drv, const u8 *data)
 	return 0;
 }
 
-static int iwl_set_ucode_api_flags(struct iwl_drv *drv, const u8 *data,
-				   struct iwl_ucode_capabilities *capa)
+static void iwl_set_ucode_api_flags(struct iwl_drv *drv, const u8 *data,
+				    struct iwl_ucode_capabilities *capa)
 {
 	const struct iwl_ucode_api *ucode_api = (void *)data;
 	u32 api_index = le32_to_cpu(ucode_api->api_index);
@@ -488,23 +488,20 @@ static int iwl_set_ucode_api_flags(struct iwl_drv *drv, const u8 *data,
 	int i;
 
 	if (api_index >= DIV_ROUND_UP(NUM_IWL_UCODE_TLV_API, 32)) {
-		IWL_ERR(drv,
-			"api flags index %d larger than supported by driver\n",
-			api_index);
-		/* don't return an error so we can load FW that has more bits */
-		return 0;
+		IWL_WARN(drv,
+			 "api flags index %d larger than supported by driver\n",
+			 api_index);
+		return;
 	}
 
 	for (i = 0; i < 32; i++) {
 		if (api_flags & BIT(i))
 			__set_bit(i + 32 * api_index, capa->_api);
 	}
-
-	return 0;
 }
 
-static int iwl_set_ucode_capabilities(struct iwl_drv *drv, const u8 *data,
-				      struct iwl_ucode_capabilities *capa)
+static void iwl_set_ucode_capabilities(struct iwl_drv *drv, const u8 *data,
+				       struct iwl_ucode_capabilities *capa)
 {
 	const struct iwl_ucode_capa *ucode_capa = (void *)data;
 	u32 api_index = le32_to_cpu(ucode_capa->api_index);
@@ -512,19 +509,16 @@ static int iwl_set_ucode_capabilities(struct iwl_drv *drv, const u8 *data,
 	int i;
 
 	if (api_index >= DIV_ROUND_UP(NUM_IWL_UCODE_TLV_CAPA, 32)) {
-		IWL_ERR(drv,
-			"capa flags index %d larger than supported by driver\n",
-			api_index);
-		/* don't return an error so we can load FW that has more bits */
-		return 0;
+		IWL_WARN(drv,
+			 "capa flags index %d larger than supported by driver\n",
+			 api_index);
+		return;
 	}
 
 	for (i = 0; i < 32; i++) {
 		if (api_flags & BIT(i))
 			__set_bit(i + 32 * api_index, capa->_capa);
 	}
-
-	return 0;
 }
 
 static int iwl_parse_v1_v2_firmware(struct iwl_drv *drv,
@@ -766,14 +760,12 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv,
 		case IWL_UCODE_TLV_API_CHANGES_SET:
 			if (tlv_len != sizeof(struct iwl_ucode_api))
 				goto invalid_tlv_len;
-			if (iwl_set_ucode_api_flags(drv, tlv_data, capa))
-				goto tlv_error;
+			iwl_set_ucode_api_flags(drv, tlv_data, capa);
 			break;
 		case IWL_UCODE_TLV_ENABLED_CAPABILITIES:
 			if (tlv_len != sizeof(struct iwl_ucode_capa))
 				goto invalid_tlv_len;
-			if (iwl_set_ucode_capabilities(drv, tlv_data, capa))
-				goto tlv_error;
+			iwl_set_ucode_capabilities(drv, tlv_data, capa);
 			break;
 		case IWL_UCODE_TLV_INIT_EVTLOG_PTR:
 			if (tlv_len != sizeof(u32))
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.c b/drivers/net/wireless/intel/iwlwifi/iwl-io.c
index c527b8c..efb1998 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-io.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.c
@@ -241,20 +241,12 @@ IWL_EXPORT_SYMBOL(iwl_clear_bits_prph);
 
 void iwl_force_nmi(struct iwl_trans *trans)
 {
-	if (trans->cfg->device_family < IWL_DEVICE_FAMILY_8000) {
+	if (trans->cfg->device_family < IWL_DEVICE_FAMILY_9000)
 		iwl_write_prph(trans, DEVICE_SET_NMI_REG,
 			       DEVICE_SET_NMI_VAL_DRV);
-		iwl_write_prph(trans, DEVICE_SET_NMI_REG,
-			       DEVICE_SET_NMI_VAL_HW);
-	} else if (trans->cfg->device_family == IWL_DEVICE_FAMILY_A000) {
+	else
 		iwl_write_prph(trans, UREG_NIC_SET_NMI_DRIVER,
-			       DEVICE_SET_NMI_8000_VAL);
-	} else {
-		iwl_write_prph(trans, DEVICE_SET_NMI_8000_REG,
-			       DEVICE_SET_NMI_8000_VAL);
-		iwl_write_prph(trans, DEVICE_SET_NMI_REG,
-			       DEVICE_SET_NMI_VAL_DRV);
-	}
+			       UREG_NIC_SET_NMI_DRIVER_NMI_FROM_DRIVER_MSK);
 }
 IWL_EXPORT_SYMBOL(iwl_force_nmi);
 
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index 3ee6767..3014bee 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -79,6 +79,7 @@
 /* NVM offsets (in words) definitions */
 enum wkp_nvm_offsets {
 	/* NVM HW-Section offset (in words) definitions */
+	SUBSYSTEM_ID = 0x0A,
 	HW_ADDR = 0x15,
 
 	/* NVM SW-Section offset (in words) definitions */
@@ -183,22 +184,26 @@ static struct ieee80211_rate iwl_cfg80211_rates[] = {
  * @NVM_CHANNEL_INDOOR_ONLY: only indoor use is allowed
  * @NVM_CHANNEL_GO_CONCURRENT: GO operation is allowed when connected to BSS
  *	on same channel on 2.4 or same UNII band on 5.2
- * @NVM_CHANNEL_WIDE: 20 MHz channel okay (?)
- * @NVM_CHANNEL_40MHZ: 40 MHz channel okay (?)
- * @NVM_CHANNEL_80MHZ: 80 MHz channel okay (?)
- * @NVM_CHANNEL_160MHZ: 160 MHz channel okay (?)
+ * @NVM_CHANNEL_UNIFORM: uniform spreading required
+ * @NVM_CHANNEL_20MHZ: 20 MHz channel okay
+ * @NVM_CHANNEL_40MHZ: 40 MHz channel okay
+ * @NVM_CHANNEL_80MHZ: 80 MHz channel okay
+ * @NVM_CHANNEL_160MHZ: 160 MHz channel okay
+ * @NVM_CHANNEL_DC_HIGH: DC HIGH required/allowed (?)
  */
 enum iwl_nvm_channel_flags {
-	NVM_CHANNEL_VALID = BIT(0),
-	NVM_CHANNEL_IBSS = BIT(1),
-	NVM_CHANNEL_ACTIVE = BIT(3),
-	NVM_CHANNEL_RADAR = BIT(4),
-	NVM_CHANNEL_INDOOR_ONLY = BIT(5),
-	NVM_CHANNEL_GO_CONCURRENT = BIT(6),
-	NVM_CHANNEL_WIDE = BIT(8),
-	NVM_CHANNEL_40MHZ = BIT(9),
-	NVM_CHANNEL_80MHZ = BIT(10),
-	NVM_CHANNEL_160MHZ = BIT(11),
+	NVM_CHANNEL_VALID		= BIT(0),
+	NVM_CHANNEL_IBSS		= BIT(1),
+	NVM_CHANNEL_ACTIVE		= BIT(3),
+	NVM_CHANNEL_RADAR		= BIT(4),
+	NVM_CHANNEL_INDOOR_ONLY		= BIT(5),
+	NVM_CHANNEL_GO_CONCURRENT	= BIT(6),
+	NVM_CHANNEL_UNIFORM		= BIT(7),
+	NVM_CHANNEL_20MHZ		= BIT(8),
+	NVM_CHANNEL_40MHZ		= BIT(9),
+	NVM_CHANNEL_80MHZ		= BIT(10),
+	NVM_CHANNEL_160MHZ		= BIT(11),
+	NVM_CHANNEL_DC_HIGH		= BIT(12),
 };
 
 #define CHECK_AND_PRINT_I(x)	\
@@ -254,13 +259,12 @@ static u32 iwl_get_channel_flags(u8 ch_num, int ch_idx, bool is_5ghz,
 static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg,
 				struct iwl_nvm_data *data,
 				const __le16 * const nvm_ch_flags,
-				bool lar_supported)
+				bool lar_supported, bool no_wide_in_5ghz)
 {
 	int ch_idx;
 	int n_channels = 0;
 	struct ieee80211_channel *channel;
 	u16 ch_flags;
-	bool is_5ghz;
 	int num_of_ch, num_2ghz_channels;
 	const u8 *nvm_chan;
 
@@ -275,12 +279,20 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg,
 	}
 
 	for (ch_idx = 0; ch_idx < num_of_ch; ch_idx++) {
+		bool is_5ghz = (ch_idx >= num_2ghz_channels);
+
 		ch_flags = __le16_to_cpup(nvm_ch_flags + ch_idx);
 
-		if (ch_idx >= num_2ghz_channels &&
-		    !data->sku_cap_band_52GHz_enable)
+		if (is_5ghz && !data->sku_cap_band_52GHz_enable)
 			continue;
 
+		/* workaround to disable wide channels in 5GHz */
+		if (no_wide_in_5ghz && is_5ghz) {
+			ch_flags &= ~(NVM_CHANNEL_40MHZ |
+				     NVM_CHANNEL_80MHZ |
+				     NVM_CHANNEL_160MHZ);
+		}
+
 		if (ch_flags & NVM_CHANNEL_160MHZ)
 			data->vht160_supported = true;
 
@@ -303,8 +315,8 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg,
 		n_channels++;
 
 		channel->hw_value = nvm_chan[ch_idx];
-		channel->band = (ch_idx < num_2ghz_channels) ?
-				NL80211_BAND_2GHZ : NL80211_BAND_5GHZ;
+		channel->band = is_5ghz ?
+				NL80211_BAND_5GHZ : NL80211_BAND_2GHZ;
 		channel->center_freq =
 			ieee80211_channel_to_frequency(
 				channel->hw_value, channel->band);
@@ -316,7 +328,6 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg,
 		 * is not used in mvm, and is used for backwards compatibility
 		 */
 		channel->max_power = IWL_DEFAULT_MAX_TX_POWER;
-		is_5ghz = channel->band == NL80211_BAND_5GHZ;
 
 		/* don't put limitations in case we're using LAR */
 		if (!lar_supported)
@@ -327,7 +338,7 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg,
 			channel->flags = 0;
 
 		IWL_DEBUG_EEPROM(dev,
-				 "Ch. %d [%sGHz] flags 0x%x %s%s%s%s%s%s%s%s%s%s(%ddBm): Ad-Hoc %ssupported\n",
+				 "Ch. %d [%sGHz] flags 0x%x %s%s%s%s%s%s%s%s%s%s%s%s(%ddBm): Ad-Hoc %ssupported\n",
 				 channel->hw_value,
 				 is_5ghz ? "5.2" : "2.4",
 				 ch_flags,
@@ -337,10 +348,12 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg,
 				 CHECK_AND_PRINT_I(RADAR),
 				 CHECK_AND_PRINT_I(INDOOR_ONLY),
 				 CHECK_AND_PRINT_I(GO_CONCURRENT),
-				 CHECK_AND_PRINT_I(WIDE),
+				 CHECK_AND_PRINT_I(UNIFORM),
+				 CHECK_AND_PRINT_I(20MHZ),
 				 CHECK_AND_PRINT_I(40MHZ),
 				 CHECK_AND_PRINT_I(80MHZ),
 				 CHECK_AND_PRINT_I(160MHZ),
+				 CHECK_AND_PRINT_I(DC_HIGH),
 				 channel->max_power,
 				 ((ch_flags & NVM_CHANNEL_IBSS) &&
 				  !(ch_flags & NVM_CHANNEL_RADAR))
@@ -432,14 +445,15 @@ static void iwl_init_vht_hw_capab(const struct iwl_cfg *cfg,
 
 void iwl_init_sbands(struct device *dev, const struct iwl_cfg *cfg,
 		     struct iwl_nvm_data *data, const __le16 *nvm_ch_flags,
-		     u8 tx_chains, u8 rx_chains, bool lar_supported)
+		     u8 tx_chains, u8 rx_chains, bool lar_supported,
+		     bool no_wide_in_5ghz)
 {
 	int n_channels;
 	int n_used = 0;
 	struct ieee80211_supported_band *sband;
 
 	n_channels = iwl_init_channel_map(dev, cfg, data, nvm_ch_flags,
-					  lar_supported);
+					  lar_supported, no_wide_in_5ghz);
 	sband = &data->bands[NL80211_BAND_2GHZ];
 	sband->band = NL80211_BAND_2GHZ;
 	sband->bitrates = &iwl_cfg80211_rates[RATES_24_OFFS];
@@ -568,7 +582,7 @@ static void iwl_set_hw_address_family_8000(struct iwl_trans *trans,
 					   const struct iwl_cfg *cfg,
 					   struct iwl_nvm_data *data,
 					   const __le16 *mac_override,
-					   const __le16 *nvm_hw)
+					   const __be16 *nvm_hw)
 {
 	const u8 *hw_addr;
 
@@ -615,7 +629,7 @@ static void iwl_set_hw_address_family_8000(struct iwl_trans *trans,
 
 static int iwl_set_hw_address(struct iwl_trans *trans,
 			      const struct iwl_cfg *cfg,
-			      struct iwl_nvm_data *data, const __le16 *nvm_hw,
+			      struct iwl_nvm_data *data, const __be16 *nvm_hw,
 			      const __le16 *mac_override)
 {
 	if (cfg->mac_addr_from_csr) {
@@ -645,9 +659,41 @@ static int iwl_set_hw_address(struct iwl_trans *trans,
 	return 0;
 }
 
+static bool
+iwl_nvm_no_wide_in_5ghz(struct device *dev, const struct iwl_cfg *cfg,
+			const __be16 *nvm_hw)
+{
+	/*
+	 * Workaround a bug in Indonesia SKUs where the regulatory in
+	 * some 7000-family OTPs erroneously allow wide channels in
+	 * 5GHz.  To check for Indonesia, we take the SKU value from
+	 * bits 1-4 in the subsystem ID and check if it is either 5 or
+	 * 9.  In those cases, we need to force-disable wide channels
+	 * in 5GHz otherwise the FW will throw a sysassert when we try
+	 * to use them.
+	 */
+	if (cfg->device_family == IWL_DEVICE_FAMILY_7000) {
+		/*
+		 * Unlike the other sections in the NVM, the hw
+		 * section uses big-endian.
+		 */
+		u16 subsystem_id = be16_to_cpup(nvm_hw + SUBSYSTEM_ID);
+		u8 sku = (subsystem_id & 0x1e) >> 1;
+
+		if (sku == 5 || sku == 9) {
+			IWL_DEBUG_EEPROM(dev,
+					 "disabling wide channels in 5GHz (0x%0x %d)\n",
+					 subsystem_id, sku);
+			return true;
+		}
+	}
+
+	return false;
+}
+
 struct iwl_nvm_data *
 iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg,
-		   const __le16 *nvm_hw, const __le16 *nvm_sw,
+		   const __be16 *nvm_hw, const __le16 *nvm_sw,
 		   const __le16 *nvm_calib, const __le16 *regulatory,
 		   const __le16 *mac_override, const __le16 *phy_sku,
 		   u8 tx_chains, u8 rx_chains, bool lar_fw_supported)
@@ -655,6 +701,7 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 	struct device *dev = trans->dev;
 	struct iwl_nvm_data *data;
 	bool lar_enabled;
+	bool no_wide_in_5ghz = iwl_nvm_no_wide_in_5ghz(dev, cfg, nvm_hw);
 	u32 sku, radio_cfg;
 	u16 lar_config;
 	const __le16 *ch_section;
@@ -725,7 +772,7 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 	}
 
 	iwl_init_sbands(dev, cfg, data, ch_section, tx_chains, rx_chains,
-			lar_fw_supported && lar_enabled);
+			lar_fw_supported && lar_enabled, no_wide_in_5ghz);
 	data->calib_version = 255;
 
 	return data;
@@ -868,19 +915,27 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
 		prev_reg_rule_flags = reg_rule_flags;
 
 		IWL_DEBUG_DEV(dev, IWL_DL_LAR,
-			      "Ch. %d [%sGHz] %s%s%s%s%s%s%s%s%s(0x%02x) reg_flags 0x%x: %s\n",
+			      "Ch. %d [%sGHz] %s%s%s%s%s%s%s%s%s%s%s%s(0x%02x)\n",
 			      center_freq,
 			      band == NL80211_BAND_5GHZ ? "5.2" : "2.4",
 			      CHECK_AND_PRINT_I(VALID),
+			      CHECK_AND_PRINT_I(IBSS),
 			      CHECK_AND_PRINT_I(ACTIVE),
 			      CHECK_AND_PRINT_I(RADAR),
-			      CHECK_AND_PRINT_I(WIDE),
+			      CHECK_AND_PRINT_I(INDOOR_ONLY),
+			      CHECK_AND_PRINT_I(GO_CONCURRENT),
+			      CHECK_AND_PRINT_I(UNIFORM),
+			      CHECK_AND_PRINT_I(20MHZ),
 			      CHECK_AND_PRINT_I(40MHZ),
 			      CHECK_AND_PRINT_I(80MHZ),
 			      CHECK_AND_PRINT_I(160MHZ),
-			      CHECK_AND_PRINT_I(INDOOR_ONLY),
-			      CHECK_AND_PRINT_I(GO_CONCURRENT),
-			      ch_flags, reg_rule_flags,
+			      CHECK_AND_PRINT_I(DC_HIGH),
+			      ch_flags);
+		IWL_DEBUG_DEV(dev, IWL_DL_LAR,
+			      "Ch. %d [%sGHz] reg_flags 0x%x: %s\n",
+			      center_freq,
+			      band == NL80211_BAND_5GHZ ? "5.2" : "2.4",
+			      reg_rule_flags,
 			      ((ch_flags & NVM_CHANNEL_ACTIVE) &&
 			       !(ch_flags & NVM_CHANNEL_RADAR))
 					 ? "Ad-Hoc" : "");
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
index 3fd6506..2d1a24d 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
@@ -77,7 +77,7 @@
  */
 struct iwl_nvm_data *
 iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg,
-		   const __le16 *nvm_hw, const __le16 *nvm_sw,
+		   const __be16 *nvm_hw, const __le16 *nvm_sw,
 		   const __le16 *nvm_calib, const __le16 *regulatory,
 		   const __le16 *mac_override, const __le16 *phy_sku,
 		   u8 tx_chains, u8 rx_chains, bool lar_fw_supported);
@@ -93,7 +93,8 @@ void iwl_set_hw_address_from_csr(struct iwl_trans *trans,
  */
 void iwl_init_sbands(struct device *dev, const struct iwl_cfg *cfg,
 		     struct iwl_nvm_data *data, const __le16 *nvm_ch_flags,
-		     u8 tx_chains, u8 rx_chains, bool lar_supported);
+		     u8 tx_chains, u8 rx_chains, bool lar_supported,
+		     bool no_wide_in_5ghz);
 
 /**
  * iwl_parse_mcc_info - parse MCC (mobile country code) info coming from FW
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
index 6772c59..421a869 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
@@ -109,13 +109,12 @@
 /* Device system time */
 #define DEVICE_SYSTEM_TIME_REG 0xA0206C
 
-/* Device NMI register */
+/* Device NMI register and value for 8000 family and lower hw's */
 #define DEVICE_SET_NMI_REG 0x00a01c30
-#define DEVICE_SET_NMI_VAL_HW BIT(0)
 #define DEVICE_SET_NMI_VAL_DRV BIT(7)
-#define DEVICE_SET_NMI_8000_REG 0x00a01c24
-#define DEVICE_SET_NMI_8000_VAL 0x1000000
+/* Device NMI register and value for 9000 family and above hw's */
 #define UREG_NIC_SET_NMI_DRIVER 0x00a05c10
+#define UREG_NIC_SET_NMI_DRIVER_NMI_FROM_DRIVER_MSK 0xff000000
 
 /* Shared registers (0x0..0x3ff, via target indirect or periphery */
 #define SHR_BASE	0x00a10000
@@ -404,6 +403,12 @@ enum aux_misc_master1_en {
 #define SB_CPU_2_STATUS			0xA01E34
 #define UMAG_SB_CPU_1_STATUS		0xA038C0
 #define UMAG_SB_CPU_2_STATUS		0xA038C4
+#define UMAG_GEN_HW_STATUS		0xA038C8
+
+/* For UMAG_GEN_HW_STATUS reg check */
+enum {
+	UMAG_GEN_HW_IS_FPGA = BIT(1),
+};
 
 /* FW chicken bits */
 #define LMPM_CHICK			0xA01FF8
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index eb6842a..e90abbf 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -76,7 +76,8 @@
 #include "iwl-config.h"
 #include "fw/img.h"
 #include "iwl-op-mode.h"
-#include "fw/api.h"
+#include "fw/api/cmdhdr.h"
+#include "fw/api/txq.h"
 
 /**
  * DOC: Transport layer - what is it ?
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile
index 83ac807..00e6737 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile
@@ -6,7 +6,7 @@ iwlmvm-y += power.o coex.o
 iwlmvm-y += tt.o offloading.o tdls.o
 iwlmvm-$(CONFIG_IWLWIFI_DEBUGFS) += debugfs.o debugfs-vif.o
 iwlmvm-$(CONFIG_IWLWIFI_LEDS) += led.o
-iwlmvm-y += tof.o fw-dbg.o
+iwlmvm-y += tof.o
 iwlmvm-$(CONFIG_PM) += d3.o
 
 ccflags-y += -I$(src)/../
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/coex.c b/drivers/net/wireless/intel/iwlwifi/mvm/coex.c
index 34dd5c4..79c80f1 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/coex.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/coex.c
@@ -7,6 +7,7 @@
  *
  * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2017        Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -33,6 +34,7 @@
  *
  * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2017        Intel Deutschland GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -67,7 +69,7 @@
 #include <linux/etherdevice.h>
 #include <net/mac80211.h>
 
-#include "fw-api-coex.h"
+#include "fw/api/coex.h"
 #include "iwl-modparams.h"
 #include "mvm.h"
 #include "iwl-debug.h"
@@ -148,215 +150,6 @@ static const __le64 iwl_ci_mask[][3] = {
 	},
 };
 
-struct corunning_block_luts {
-	u8 range;
-	__le32 lut20[BT_COEX_CORUN_LUT_SIZE];
-};
-
-/*
- * Ranges for the antenna coupling calibration / co-running block LUT:
- *		LUT0: [ 0, 12[
- *		LUT1: [12, 20[
- *		LUT2: [20, 21[
- *		LUT3: [21, 23[
- *		LUT4: [23, 27[
- *		LUT5: [27, 30[
- *		LUT6: [30, 32[
- *		LUT7: [32, 33[
- *		LUT8: [33, - [
- */
-static const struct corunning_block_luts antenna_coupling_ranges[] = {
-	{
-		.range = 0,
-		.lut20 = {
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-		},
-	},
-	{
-		.range = 12,
-		.lut20 = {
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-		},
-	},
-	{
-		.range = 20,
-		.lut20 = {
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-		},
-	},
-	{
-		.range = 21,
-		.lut20 = {
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-		},
-	},
-	{
-		.range = 23,
-		.lut20 = {
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-		},
-	},
-	{
-		.range = 27,
-		.lut20 = {
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-		},
-	},
-	{
-		.range = 30,
-		.lut20 = {
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-		},
-	},
-	{
-		.range = 32,
-		.lut20 = {
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-		},
-	},
-	{
-		.range = 33,
-		.lut20 = {
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-			cpu_to_le32(0x00000000),  cpu_to_le32(0x00000000),
-		},
-	},
-};
-
 static enum iwl_bt_coex_lut_type
 iwl_get_coex_type(struct iwl_mvm *mvm, const struct ieee80211_vif *vif)
 {
@@ -437,9 +230,6 @@ int iwl_mvm_send_bt_init_conf(struct iwl_mvm *mvm)
 		bt_cmd.enabled_modules |=
 			cpu_to_le32(BT_COEX_SYNC2SCO_ENABLED);
 
-	if (iwl_mvm_bt_is_plcr_supported(mvm))
-		bt_cmd.enabled_modules |= cpu_to_le32(BT_COEX_CORUN_ENABLED);
-
 	if (iwl_mvm_is_mplut_supported(mvm))
 		bt_cmd.enabled_modules |= cpu_to_le32(BT_COEX_MPLUT_ENABLED);
 
@@ -560,8 +350,7 @@ static void iwl_mvm_bt_notif_iterator(void *_data, u8 *mac,
 		smps_mode = IEEE80211_SMPS_AUTOMATIC;
 
 	if (mvmvif->phy_ctxt &&
-	    IWL_COEX_IS_RRC_ON(mvm->last_bt_notif.ttc_rrc_status,
-			       mvmvif->phy_ctxt->id))
+	    (mvm->last_bt_notif.rrc_status & BIT(mvmvif->phy_ctxt->id)))
 		smps_mode = IEEE80211_SMPS_AUTOMATIC;
 
 	IWL_DEBUG_COEX(data->mvm,
@@ -725,17 +514,36 @@ void iwl_mvm_rx_bt_coex_notif(struct iwl_mvm *mvm,
 	struct iwl_rx_packet *pkt = rxb_addr(rxb);
 	struct iwl_bt_coex_profile_notif *notif = (void *)pkt->data;
 
+	if (!iwl_mvm_has_new_ats_coex_api(mvm)) {
+		struct iwl_bt_coex_profile_notif_v4 *v4 = (void *)pkt->data;
+
+		mvm->last_bt_notif.mbox_msg[0] = v4->mbox_msg[0];
+		mvm->last_bt_notif.mbox_msg[1] = v4->mbox_msg[1];
+		mvm->last_bt_notif.mbox_msg[2] = v4->mbox_msg[2];
+		mvm->last_bt_notif.mbox_msg[3] = v4->mbox_msg[3];
+		mvm->last_bt_notif.msg_idx = v4->msg_idx;
+		mvm->last_bt_notif.bt_ci_compliance = v4->bt_ci_compliance;
+		mvm->last_bt_notif.primary_ch_lut = v4->primary_ch_lut;
+		mvm->last_bt_notif.secondary_ch_lut = v4->secondary_ch_lut;
+		mvm->last_bt_notif.bt_activity_grading =
+			v4->bt_activity_grading;
+		mvm->last_bt_notif.ttc_status = v4->ttc_status;
+		mvm->last_bt_notif.rrc_status = v4->rrc_status;
+	} else {
+		/* save this notification for future use: rssi fluctuations */
+		memcpy(&mvm->last_bt_notif, notif, sizeof(mvm->last_bt_notif));
+	}
+
 	IWL_DEBUG_COEX(mvm, "BT Coex Notification received\n");
-	IWL_DEBUG_COEX(mvm, "\tBT ci compliance %d\n", notif->bt_ci_compliance);
+	IWL_DEBUG_COEX(mvm, "\tBT ci compliance %d\n",
+		       mvm->last_bt_notif.bt_ci_compliance);
 	IWL_DEBUG_COEX(mvm, "\tBT primary_ch_lut %d\n",
-		       le32_to_cpu(notif->primary_ch_lut));
+		       le32_to_cpu(mvm->last_bt_notif.primary_ch_lut));
 	IWL_DEBUG_COEX(mvm, "\tBT secondary_ch_lut %d\n",
-		       le32_to_cpu(notif->secondary_ch_lut));
+		       le32_to_cpu(mvm->last_bt_notif.secondary_ch_lut));
 	IWL_DEBUG_COEX(mvm, "\tBT activity grading %d\n",
-		       le32_to_cpu(notif->bt_activity_grading));
+		       le32_to_cpu(mvm->last_bt_notif.bt_activity_grading));
 
-	/* remember this notification for future use: rssi fluctuations */
-	memcpy(&mvm->last_bt_notif, notif, sizeof(mvm->last_bt_notif));
 
 	iwl_mvm_bt_coex_notif_handle(mvm);
 }
@@ -792,7 +600,7 @@ u16 iwl_mvm_coex_agg_time_limit(struct iwl_mvm *mvm,
 	struct iwl_mvm_phy_ctxt *phy_ctxt = mvmvif->phy_ctxt;
 	enum iwl_bt_coex_lut_type lut_type;
 
-	if (IWL_COEX_IS_TTC_ON(mvm->last_bt_notif.ttc_rrc_status, phy_ctxt->id))
+	if (mvm->last_bt_notif.ttc_status & BIT(phy_ctxt->id))
 		return LINK_QUAL_AGG_TIME_LIMIT_DEF;
 
 	if (le32_to_cpu(mvm->last_bt_notif.bt_activity_grading) <
@@ -816,7 +624,7 @@ bool iwl_mvm_bt_coex_is_mimo_allowed(struct iwl_mvm *mvm,
 	struct iwl_mvm_phy_ctxt *phy_ctxt = mvmvif->phy_ctxt;
 	enum iwl_bt_coex_lut_type lut_type;
 
-	if (IWL_COEX_IS_TTC_ON(mvm->last_bt_notif.ttc_rrc_status, phy_ctxt->id))
+	if (mvm->last_bt_notif.ttc_status & BIT(phy_ctxt->id))
 		return true;
 
 	if (le32_to_cpu(mvm->last_bt_notif.bt_activity_grading) <
@@ -909,59 +717,3 @@ void iwl_mvm_bt_coex_vif_change(struct iwl_mvm *mvm)
 {
 	iwl_mvm_bt_coex_notif_handle(mvm);
 }
-
-void iwl_mvm_rx_ant_coupling_notif(struct iwl_mvm *mvm,
-				   struct iwl_rx_cmd_buffer *rxb)
-{
-	struct iwl_rx_packet *pkt = rxb_addr(rxb);
-	struct iwl_mvm_antenna_coupling_notif *notif = (void *)pkt->data;
-	u32 ant_isolation = le32_to_cpu(notif->isolation);
-	struct iwl_bt_coex_corun_lut_update_cmd cmd = {};
-	u8 __maybe_unused lower_bound, upper_bound;
-	u8 lut;
-
-	if (!iwl_mvm_bt_is_plcr_supported(mvm))
-		return;
-
-	lockdep_assert_held(&mvm->mutex);
-
-	/* Ignore updates if we are in force mode */
-	if (unlikely(mvm->bt_force_ant_mode != BT_FORCE_ANT_DIS))
-		return;
-
-	if (ant_isolation ==  mvm->last_ant_isol)
-		return;
-
-	for (lut = 0; lut < ARRAY_SIZE(antenna_coupling_ranges) - 1; lut++)
-		if (ant_isolation < antenna_coupling_ranges[lut + 1].range)
-			break;
-
-	lower_bound = antenna_coupling_ranges[lut].range;
-
-	if (lut < ARRAY_SIZE(antenna_coupling_ranges) - 1)
-		upper_bound = antenna_coupling_ranges[lut + 1].range;
-	else
-		upper_bound = antenna_coupling_ranges[lut].range;
-
-	IWL_DEBUG_COEX(mvm, "Antenna isolation=%d in range [%d,%d[, lut=%d\n",
-		       ant_isolation, lower_bound, upper_bound, lut);
-
-	mvm->last_ant_isol = ant_isolation;
-
-	if (mvm->last_corun_lut == lut)
-		return;
-
-	mvm->last_corun_lut = lut;
-
-	/* For the moment, use the same LUT for 20GHz and 40GHz */
-	memcpy(&cmd.corun_lut20, antenna_coupling_ranges[lut].lut20,
-	       sizeof(cmd.corun_lut20));
-
-	memcpy(&cmd.corun_lut40, antenna_coupling_ranges[lut].lut20,
-	       sizeof(cmd.corun_lut40));
-
-	if (iwl_mvm_send_cmd_pdu(mvm, BT_COEX_UPDATE_CORUN_LUT, 0,
-				 sizeof(cmd), &cmd))
-		IWL_ERR(mvm,
-			"failed to send BT_COEX_UPDATE_CORUN_LUT command\n");
-}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
index 6fda862..976640f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
@@ -95,7 +95,6 @@
 #define IWL_MVM_BT_COEX_EN_RED_TXP_THRESH	62
 #define IWL_MVM_BT_COEX_DIS_RED_TXP_THRESH	65
 #define IWL_MVM_BT_COEX_SYNC2SCO		1
-#define IWL_MVM_BT_COEX_CORUNNING		0
 #define IWL_MVM_BT_COEX_MPLUT			1
 #define IWL_MVM_BT_COEX_RRC			1
 #define IWL_MVM_BT_COEX_TTC			1
@@ -111,7 +110,6 @@
 #define IWL_MVM_SW_TX_CSUM_OFFLOAD		0
 #define IWL_MVM_HW_CSUM_DISABLE			0
 #define IWL_MVM_PARSE_NVM			0
-#define IWL_MVM_COLLECT_FW_ERR_DUMP		1
 #define IWL_MVM_RS_NUM_TRY_BEFORE_ANT_TOGGLE    1
 #define IWL_MVM_RS_HT_VHT_RETRIES_PER_RATE      2
 #define IWL_MVM_RS_HT_VHT_RETRIES_PER_RATE_TW   1
@@ -138,8 +136,10 @@
 #define IWL_MVM_RS_SR_NO_DECREASE		85	/* percent */
 #define IWL_MVM_RS_AGG_TIME_LIMIT	        4000    /* 4 msecs. valid 100-8000 */
 #define IWL_MVM_RS_AGG_DISABLE_START	        3
+#define IWL_MVM_RS_AGG_START_THRESHOLD	        10	/* num frames per second */
 #define IWL_MVM_RS_TPC_SR_FORCE_INCREASE	75	/* percent */
 #define IWL_MVM_RS_TPC_SR_NO_INCREASE		85	/* percent */
 #define IWL_MVM_RS_TPC_TX_POWER_STEP		3
+#define IWL_MVM_ENABLE_EBS			1
 
 #endif /* __MVM_CONSTANTS_H */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
index a7ac281..71a01df 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
@@ -65,7 +65,7 @@
  *
  *****************************************************************************/
 #include "mvm.h"
-#include "fw-api-tof.h"
+#include "fw/api/tof.h"
 #include "debugfs.h"
 
 static void iwl_dbgfs_update_pm(struct iwl_mvm *mvm,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index c1c9c48..e97904c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -69,7 +69,6 @@
 #include <linux/netdevice.h>
 
 #include "mvm.h"
-#include "fw-dbg.h"
 #include "sta.h"
 #include "iwl-io.h"
 #include "debugfs.h"
@@ -83,8 +82,11 @@ static ssize_t iwl_dbgfs_ctdp_budget_read(struct file *file,
 	char buf[16];
 	int pos, budget;
 
+	if (!iwl_mvm_is_ctdp_supported(mvm))
+		return -EOPNOTSUPP;
+
 	if (!iwl_mvm_firmware_running(mvm) ||
-	    mvm->cur_ucode != IWL_UCODE_REGULAR)
+	    mvm->fwrt.cur_fw_img != IWL_UCODE_REGULAR)
 		return -EIO;
 
 	mutex_lock(&mvm->mutex);
@@ -104,8 +106,11 @@ static ssize_t iwl_dbgfs_stop_ctdp_write(struct iwl_mvm *mvm, char *buf,
 {
 	int ret;
 
+	if (!iwl_mvm_is_ctdp_supported(mvm))
+		return -EOPNOTSUPP;
+
 	if (!iwl_mvm_firmware_running(mvm) ||
-	    mvm->cur_ucode != IWL_UCODE_REGULAR)
+	    mvm->fwrt.cur_fw_img != IWL_UCODE_REGULAR)
 		return -EIO;
 
 	mutex_lock(&mvm->mutex);
@@ -115,6 +120,18 @@ static ssize_t iwl_dbgfs_stop_ctdp_write(struct iwl_mvm *mvm, char *buf,
 	return ret ?: count;
 }
 
+static ssize_t iwl_dbgfs_force_ctkill_write(struct iwl_mvm *mvm, char *buf,
+					    size_t count, loff_t *ppos)
+{
+	if (!iwl_mvm_firmware_running(mvm) ||
+	    mvm->fwrt.cur_fw_img != IWL_UCODE_REGULAR)
+		return -EIO;
+
+	iwl_mvm_enter_ctkill(mvm);
+
+	return count;
+}
+
 static ssize_t iwl_dbgfs_tx_flush_write(struct iwl_mvm *mvm, char *buf,
 					size_t count, loff_t *ppos)
 {
@@ -122,7 +139,7 @@ static ssize_t iwl_dbgfs_tx_flush_write(struct iwl_mvm *mvm, char *buf,
 	u32 flush_arg;
 
 	if (!iwl_mvm_firmware_running(mvm) ||
-	    mvm->cur_ucode != IWL_UCODE_REGULAR)
+	    mvm->fwrt.cur_fw_img != IWL_UCODE_REGULAR)
 		return -EIO;
 
 	if (kstrtou32(buf, 0, &flush_arg))
@@ -155,7 +172,7 @@ static ssize_t iwl_dbgfs_sta_drain_write(struct iwl_mvm *mvm, char *buf,
 	int sta_id, drain, ret;
 
 	if (!iwl_mvm_firmware_running(mvm) ||
-	    mvm->cur_ucode != IWL_UCODE_REGULAR)
+	    mvm->fwrt.cur_fw_img != IWL_UCODE_REGULAR)
 		return -EIO;
 
 	if (sscanf(buf, "%d %d", &sta_id, &drain) != 2)
@@ -192,7 +209,7 @@ static ssize_t iwl_dbgfs_sram_read(struct file *file, char __user *user_buf,
 		return -EINVAL;
 
 	/* default is to dump the entire data segment */
-	img = &mvm->fw->img[mvm->cur_ucode];
+	img = &mvm->fw->img[mvm->fwrt.cur_fw_img];
 	ofs = img->sec[IWL_UCODE_SECTION_DATA].offset;
 	len = img->sec[IWL_UCODE_SECTION_DATA].len;
 
@@ -224,7 +241,7 @@ static ssize_t iwl_dbgfs_sram_write(struct iwl_mvm *mvm, char *buf,
 	if (!iwl_mvm_firmware_running(mvm))
 		return -EINVAL;
 
-	img = &mvm->fw->img[mvm->cur_ucode];
+	img = &mvm->fw->img[mvm->fwrt.cur_fw_img];
 	img_offset = img->sec[IWL_UCODE_SECTION_DATA].offset;
 	img_len = img->sec[IWL_UCODE_SECTION_DATA].len;
 
@@ -452,20 +469,9 @@ static ssize_t iwl_dbgfs_disable_power_off_write(struct iwl_mvm *mvm, char *buf,
 	return ret ?: count;
 }
 
-#define BT_MBOX_MSG(_notif, _num, _field)				     \
-	((le32_to_cpu((_notif)->mbox_msg[(_num)]) & BT_MBOX##_num##_##_field)\
-	>> BT_MBOX##_num##_##_field##_POS)
-
-
-#define BT_MBOX_PRINT(_num, _field, _end)				    \
-			pos += scnprintf(buf + pos, bufsz - pos,	    \
-					 "\t%s: %d%s",			    \
-					 #_field,			    \
-					 BT_MBOX_MSG(notif, _num, _field),  \
-					 true ? "\n" : ", ");
-
 static
-int iwl_mvm_coex_dump_mbox(struct iwl_bt_coex_profile_notif *notif, char *buf,
+int iwl_mvm_coex_dump_mbox(struct iwl_mvm *mvm,
+			   struct iwl_bt_coex_profile_notif *notif, char *buf,
 			   int pos, int bufsz)
 {
 	pos += scnprintf(buf+pos, bufsz-pos, "MBOX dw0:\n");
@@ -509,6 +515,7 @@ int iwl_mvm_coex_dump_mbox(struct iwl_bt_coex_profile_notif *notif, char *buf,
 	BT_MBOX_PRINT(3, SCO_STATE, false);
 	BT_MBOX_PRINT(3, SNIFF_STATE, false);
 	BT_MBOX_PRINT(3, A2DP_STATE, false);
+	BT_MBOX_PRINT(3, A2DP_SRC, false);
 	BT_MBOX_PRINT(3, ACL_STATE, false);
 	BT_MBOX_PRINT(3, MSTR_STATE, false);
 	BT_MBOX_PRINT(3, OBX_STATE, false);
@@ -518,7 +525,12 @@ int iwl_mvm_coex_dump_mbox(struct iwl_bt_coex_profile_notif *notif, char *buf,
 	BT_MBOX_PRINT(3, INBAND_P, false);
 	BT_MBOX_PRINT(3, MSG_TYPE_2, false);
 	BT_MBOX_PRINT(3, SSN_2, false);
-	BT_MBOX_PRINT(3, UPDATE_REQUEST, true);
+	BT_MBOX_PRINT(3, UPDATE_REQUEST, !iwl_mvm_has_new_ats_coex_api(mvm));
+
+	if (iwl_mvm_has_new_ats_coex_api(mvm)) {
+		BT_MBOX_PRINT(4, ATS_BT_INTERVAL, false);
+		BT_MBOX_PRINT(4, ATS_BT_ACTIVE_MAX_TH, true);
+	}
 
 	return pos;
 }
@@ -537,7 +549,7 @@ static ssize_t iwl_dbgfs_bt_notif_read(struct file *file, char __user *user_buf,
 
 	mutex_lock(&mvm->mutex);
 
-	pos += iwl_mvm_coex_dump_mbox(notif, buf, pos, bufsz);
+	pos += iwl_mvm_coex_dump_mbox(mvm, notif, buf, pos, bufsz);
 
 	pos += scnprintf(buf + pos, bufsz - pos, "bt_ci_compliance = %d\n",
 			 notif->bt_ci_compliance);
@@ -548,20 +560,15 @@ static ssize_t iwl_dbgfs_bt_notif_read(struct file *file, char __user *user_buf,
 	pos += scnprintf(buf + pos,
 			 bufsz - pos, "bt_activity_grading = %d\n",
 			 le32_to_cpu(notif->bt_activity_grading));
-	pos += scnprintf(buf + pos, bufsz - pos,
-			 "antenna isolation = %d CORUN LUT index = %d\n",
-			 mvm->last_ant_isol, mvm->last_corun_lut);
 	pos += scnprintf(buf + pos, bufsz - pos, "bt_rrc = %d\n",
-			 (notif->ttc_rrc_status >> 4) & 0xF);
+			 notif->rrc_status & 0xF);
 	pos += scnprintf(buf + pos, bufsz - pos, "bt_ttc = %d\n",
-			 notif->ttc_rrc_status & 0xF);
+			 notif->ttc_status & 0xF);
 
 	pos += scnprintf(buf + pos, bufsz - pos, "sync_sco = %d\n",
 			 IWL_MVM_BT_COEX_SYNC2SCO);
 	pos += scnprintf(buf + pos, bufsz - pos, "mplut = %d\n",
 			 IWL_MVM_BT_COEX_MPLUT);
-	pos += scnprintf(buf + pos, bufsz - pos, "corunning = %d\n",
-			 IWL_MVM_BT_COEX_CORUNNING);
 
 	mutex_unlock(&mvm->mutex);
 
@@ -1123,7 +1130,7 @@ static ssize_t iwl_dbgfs_fw_dbg_conf_read(struct file *file,
 	int pos = 0;
 
 	mutex_lock(&mvm->mutex);
-	conf = mvm->fw_dbg_conf;
+	conf = mvm->fwrt.dump.conf;
 	mutex_unlock(&mvm->mutex);
 
 	pos += scnprintf(buf + pos, bufsz - pos, "%d\n", conf);
@@ -1190,7 +1197,7 @@ static ssize_t iwl_dbgfs_fw_dbg_conf_write(struct iwl_mvm *mvm,
 		return -EINVAL;
 
 	mutex_lock(&mvm->mutex);
-	ret = iwl_mvm_start_fw_dbg_conf(mvm, conf_id);
+	ret = iwl_fw_start_dbg_conf(&mvm->fwrt, conf_id);
 	mutex_unlock(&mvm->mutex);
 
 	return ret ?: count;
@@ -1211,8 +1218,8 @@ static ssize_t iwl_dbgfs_fw_dbg_collect_write(struct iwl_mvm *mvm,
 	if (count == 0)
 		return 0;
 
-	iwl_mvm_fw_dbg_collect(mvm, FW_DBG_TRIGGER_USER, buf,
-			       (count - 1), NULL);
+	iwl_fw_dbg_collect(&mvm->fwrt, FW_DBG_TRIGGER_USER, buf,
+			   (count - 1), NULL);
 
 	iwl_mvm_unref(mvm, IWL_MVM_REF_PRPH_WRITE);
 
@@ -1642,6 +1649,7 @@ MVM_DEBUGFS_READ_WRITE_FILE_OPS(prph_reg, 64);
 /* Device wide debugfs entries */
 MVM_DEBUGFS_READ_FILE_OPS(ctdp_budget);
 MVM_DEBUGFS_WRITE_FILE_OPS(stop_ctdp, 8);
+MVM_DEBUGFS_WRITE_FILE_OPS(force_ctkill, 8);
 MVM_DEBUGFS_WRITE_FILE_OPS(tx_flush, 16);
 MVM_DEBUGFS_WRITE_FILE_OPS(sta_drain, 8);
 MVM_DEBUGFS_WRITE_FILE_OPS(send_echo_cmd, 8);
@@ -1829,6 +1837,7 @@ int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir)
 	MVM_DEBUGFS_ADD_FILE(nic_temp, dbgfs_dir, S_IRUSR);
 	MVM_DEBUGFS_ADD_FILE(ctdp_budget, dbgfs_dir, S_IRUSR);
 	MVM_DEBUGFS_ADD_FILE(stop_ctdp, dbgfs_dir, S_IWUSR);
+	MVM_DEBUGFS_ADD_FILE(force_ctkill, dbgfs_dir, S_IWUSR);
 	MVM_DEBUGFS_ADD_FILE(stations, dbgfs_dir, S_IRUSR);
 	MVM_DEBUGFS_ADD_FILE(bt_notif, dbgfs_dir, S_IRUSR);
 	MVM_DEBUGFS_ADD_FILE(bt_cmd, dbgfs_dir, S_IRUSR);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h
index aad265d..e8e74dd 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h
@@ -68,2823 +68,33 @@
 #ifndef __fw_api_h__
 #define __fw_api_h__
 
-#include "fw-api-rs.h"
-#include "fw-api-rx.h"
-#include "fw-api-tx.h"
-#include "fw-api-sta.h"
-#include "fw-api-mac.h"
-#include "fw-api-power.h"
-#include "fw-api-d3.h"
-#include "fw-api-coex.h"
-#include "fw-api-scan.h"
-#include "fw-api-stats.h"
-#include "fw-api-tof.h"
-
-/* Tx queue numbers for non-DQA mode */
-enum {
-	IWL_MVM_OFFCHANNEL_QUEUE = 8,
-	IWL_MVM_CMD_QUEUE = 9,
-};
-
-/*
- * DQA queue numbers
- *
- * @IWL_MVM_DQA_CMD_QUEUE: a queue reserved for sending HCMDs to the FW
- * @IWL_MVM_DQA_AUX_QUEUE: a queue reserved for aux frames
- * @IWL_MVM_DQA_P2P_DEVICE_QUEUE: a queue reserved for P2P device frames
- * @IWL_MVM_DQA_GCAST_QUEUE: a queue reserved for P2P GO/SoftAP GCAST frames
- * @IWL_MVM_DQA_BSS_CLIENT_QUEUE: a queue reserved for BSS activity, to ensure
- *	that we are never left without the possibility to connect to an AP.
- * @IWL_MVM_DQA_MIN_MGMT_QUEUE: first TXQ in pool for MGMT and non-QOS frames.
- *	Each MGMT queue is mapped to a single STA
- *	MGMT frames are frames that return true on ieee80211_is_mgmt()
- * @IWL_MVM_DQA_MAX_MGMT_QUEUE: last TXQ in pool for MGMT frames
- * @IWL_MVM_DQA_AP_PROBE_RESP_QUEUE: a queue reserved for P2P GO/SoftAP probe
- *	responses
- * @IWL_MVM_DQA_MIN_DATA_QUEUE: first TXQ in pool for DATA frames.
- *	DATA frames are intended for !ieee80211_is_mgmt() frames, but if
- *	the MGMT TXQ pool is exhausted, mgmt frames can be sent on DATA queues
- *	as well
- * @IWL_MVM_DQA_MAX_DATA_QUEUE: last TXQ in pool for DATA frames
- */
-enum iwl_mvm_dqa_txq {
-	IWL_MVM_DQA_CMD_QUEUE = 0,
-	IWL_MVM_DQA_AUX_QUEUE = 1,
-	IWL_MVM_DQA_P2P_DEVICE_QUEUE = 2,
-	IWL_MVM_DQA_GCAST_QUEUE = 3,
-	IWL_MVM_DQA_BSS_CLIENT_QUEUE = 4,
-	IWL_MVM_DQA_MIN_MGMT_QUEUE = 5,
-	IWL_MVM_DQA_MAX_MGMT_QUEUE = 8,
-	IWL_MVM_DQA_AP_PROBE_RESP_QUEUE = 9,
-	IWL_MVM_DQA_MIN_DATA_QUEUE = 10,
-	IWL_MVM_DQA_MAX_DATA_QUEUE = 31,
-};
-
-enum iwl_mvm_tx_fifo {
-	IWL_MVM_TX_FIFO_BK = 0,
-	IWL_MVM_TX_FIFO_BE,
-	IWL_MVM_TX_FIFO_VI,
-	IWL_MVM_TX_FIFO_VO,
-	IWL_MVM_TX_FIFO_MCAST = 5,
-	IWL_MVM_TX_FIFO_CMD = 7,
-};
-
-
-/**
- * enum iwl_legacy_cmds - legacy group command IDs
- */
-enum iwl_legacy_cmds {
-	/**
-	 * @MVM_ALIVE:
-	 * Alive data from the firmware, as described in
-	 * &struct mvm_alive_resp_v3 or &struct mvm_alive_resp.
-	 */
-	MVM_ALIVE = 0x1,
-
-	/**
-	 * @REPLY_ERROR: Cause an error in the firmware, for testing purposes.
-	 */
-	REPLY_ERROR = 0x2,
-
-	/**
-	 * @ECHO_CMD: Send data to the device to have it returned immediately.
-	 */
-	ECHO_CMD = 0x3,
-
-	/**
-	 * @INIT_COMPLETE_NOTIF: Notification that initialization is complete.
-	 */
-	INIT_COMPLETE_NOTIF = 0x4,
-
-	/**
-	 * @PHY_CONTEXT_CMD:
-	 * Add/modify/remove a PHY context, using &struct iwl_phy_context_cmd.
-	 */
-	PHY_CONTEXT_CMD = 0x8,
-
-	/**
-	 * @DBG_CFG: Debug configuration command.
-	 */
-	DBG_CFG = 0x9,
-
-	/**
-	 * @ANTENNA_COUPLING_NOTIFICATION:
-	 * Antenna coupling data, &struct iwl_mvm_antenna_coupling_notif
-	 */
-	ANTENNA_COUPLING_NOTIFICATION = 0xa,
-
-	/**
-	 * @SCAN_ITERATION_COMPLETE_UMAC:
-	 * Firmware indicates a scan iteration completed, using
-	 * &struct iwl_umac_scan_iter_complete_notif.
-	 */
-	SCAN_ITERATION_COMPLETE_UMAC = 0xb5,
-
-	/**
-	 * @SCAN_CFG_CMD:
-	 * uses &struct iwl_scan_config_v1 or &struct iwl_scan_config
-	 */
-	SCAN_CFG_CMD = 0xc,
-
-	/**
-	 * @SCAN_REQ_UMAC: uses &struct iwl_scan_req_umac
-	 */
-	SCAN_REQ_UMAC = 0xd,
-
-	/**
-	 * @SCAN_ABORT_UMAC: uses &struct iwl_umac_scan_abort
-	 */
-	SCAN_ABORT_UMAC = 0xe,
-
-	/**
-	 * @SCAN_COMPLETE_UMAC: uses &struct iwl_umac_scan_complete
-	 */
-	SCAN_COMPLETE_UMAC = 0xf,
-
-	/**
-	 * @BA_WINDOW_STATUS_NOTIFICATION_ID:
-	 * uses &struct iwl_ba_window_status_notif
-	 */
-	BA_WINDOW_STATUS_NOTIFICATION_ID = 0x13,
-
-	/**
-	 * @ADD_STA_KEY:
-	 * &struct iwl_mvm_add_sta_key_cmd_v1 or
-	 * &struct iwl_mvm_add_sta_key_cmd.
-	 */
-	ADD_STA_KEY = 0x17,
-
-	/**
-	 * @ADD_STA:
-	 * &struct iwl_mvm_add_sta_cmd or &struct iwl_mvm_add_sta_cmd_v7.
-	 */
-	ADD_STA = 0x18,
-
-	/**
-	 * @REMOVE_STA: &struct iwl_mvm_rm_sta_cmd
-	 */
-	REMOVE_STA = 0x19,
-
-	/**
-	 * @FW_GET_ITEM_CMD: uses &struct iwl_fw_get_item_cmd
-	 */
-	FW_GET_ITEM_CMD = 0x1a,
-
-	/**
-	 * @TX_CMD: uses &struct iwl_tx_cmd or &struct iwl_tx_cmd_gen2,
-	 *	response in &struct iwl_mvm_tx_resp or
-	 *	&struct iwl_mvm_tx_resp_v3
-	 */
-	TX_CMD = 0x1c,
-
-	/**
-	 * @TXPATH_FLUSH: &struct iwl_tx_path_flush_cmd
-	 */
-	TXPATH_FLUSH = 0x1e,
-
-	/**
-	 * @MGMT_MCAST_KEY:
-	 * &struct iwl_mvm_mgmt_mcast_key_cmd or
-	 * &struct iwl_mvm_mgmt_mcast_key_cmd_v1
-	 */
-	MGMT_MCAST_KEY = 0x1f,
-
-	/* scheduler config */
-	/**
-	 * @SCD_QUEUE_CFG: &struct iwl_scd_txq_cfg_cmd for older hardware,
-	 *	&struct iwl_tx_queue_cfg_cmd with &struct iwl_tx_queue_cfg_rsp
-	 *	for newer (A000) hardware.
-	 */
-	SCD_QUEUE_CFG = 0x1d,
-
-	/**
-	 * @WEP_KEY: uses &struct iwl_mvm_wep_key_cmd
-	 */
-	WEP_KEY = 0x20,
-
-	/**
-	 * @SHARED_MEM_CFG:
-	 * retrieve shared memory configuration - response in
-	 * &struct iwl_shared_mem_cfg
-	 */
-	SHARED_MEM_CFG = 0x25,
-
-	/**
-	 * @TDLS_CHANNEL_SWITCH_CMD: uses &struct iwl_tdls_channel_switch_cmd
-	 */
-	TDLS_CHANNEL_SWITCH_CMD = 0x27,
-
-	/**
-	 * @TDLS_CHANNEL_SWITCH_NOTIFICATION:
-	 * uses &struct iwl_tdls_channel_switch_notif
-	 */
-	TDLS_CHANNEL_SWITCH_NOTIFICATION = 0xaa,
-
-	/**
-	 * @TDLS_CONFIG_CMD:
-	 * &struct iwl_tdls_config_cmd, response in &struct iwl_tdls_config_res
-	 */
-	TDLS_CONFIG_CMD = 0xa7,
-
-	/**
-	 * @MAC_CONTEXT_CMD: &struct iwl_mac_ctx_cmd
-	 */
-	MAC_CONTEXT_CMD = 0x28,
-
-	/**
-	 * @TIME_EVENT_CMD:
-	 * &struct iwl_time_event_cmd, response in &struct iwl_time_event_resp
-	 */
-	TIME_EVENT_CMD = 0x29, /* both CMD and response */
-
-	/**
-	 * @TIME_EVENT_NOTIFICATION: &struct iwl_time_event_notif
-	 */
-	TIME_EVENT_NOTIFICATION = 0x2a,
-
-	/**
-	 * @BINDING_CONTEXT_CMD:
-	 * &struct iwl_binding_cmd or &struct iwl_binding_cmd_v1
-	 */
-	BINDING_CONTEXT_CMD = 0x2b,
-
-	/**
-	 * @TIME_QUOTA_CMD: &struct iwl_time_quota_cmd
-	 */
-	TIME_QUOTA_CMD = 0x2c,
-
-	/**
-	 * @NON_QOS_TX_COUNTER_CMD:
-	 * command is &struct iwl_nonqos_seq_query_cmd
-	 */
-	NON_QOS_TX_COUNTER_CMD = 0x2d,
-
-	/**
-	 * @LQ_CMD: using &struct iwl_lq_cmd
-	 */
-	LQ_CMD = 0x4e,
-
-	/**
-	 * @FW_PAGING_BLOCK_CMD:
-	 * &struct iwl_fw_paging_cmd
-	 */
-	FW_PAGING_BLOCK_CMD = 0x4f,
-
-	/**
-	 * @SCAN_OFFLOAD_REQUEST_CMD: uses &struct iwl_scan_req_lmac
-	 */
-	SCAN_OFFLOAD_REQUEST_CMD = 0x51,
-
-	/**
-	 * @SCAN_OFFLOAD_ABORT_CMD: abort the scan - no further contents
-	 */
-	SCAN_OFFLOAD_ABORT_CMD = 0x52,
-
-	/**
-	 * @HOT_SPOT_CMD: uses &struct iwl_hs20_roc_req
-	 */
-	HOT_SPOT_CMD = 0x53,
-
-	/**
-	 * @SCAN_OFFLOAD_COMPLETE:
-	 * notification, &struct iwl_periodic_scan_complete
-	 */
-	SCAN_OFFLOAD_COMPLETE = 0x6D,
-
-	/**
-	 * @SCAN_OFFLOAD_UPDATE_PROFILES_CMD:
-	 * update scan offload (scheduled scan) profiles/blacklist/etc.
-	 */
-	SCAN_OFFLOAD_UPDATE_PROFILES_CMD = 0x6E,
-
-	/**
-	 * @MATCH_FOUND_NOTIFICATION: scan match found
-	 */
-	MATCH_FOUND_NOTIFICATION = 0xd9,
-
-	/**
-	 * @SCAN_ITERATION_COMPLETE:
-	 * uses &struct iwl_lmac_scan_complete_notif
-	 */
-	SCAN_ITERATION_COMPLETE = 0xe7,
-
-	/* Phy */
-	/**
-	 * @PHY_CONFIGURATION_CMD: &struct iwl_phy_cfg_cmd
-	 */
-	PHY_CONFIGURATION_CMD = 0x6a,
-
-	/**
-	 * @CALIB_RES_NOTIF_PHY_DB: &struct iwl_calib_res_notif_phy_db
-	 */
-	CALIB_RES_NOTIF_PHY_DB = 0x6b,
-
-	/**
-	 * @PHY_DB_CMD: &struct iwl_phy_db_cmd
-	 */
-	PHY_DB_CMD = 0x6c,
-
-	/**
-	 * @TOF_CMD: &struct iwl_tof_config_cmd
-	 */
-	TOF_CMD = 0x10,
-
-	/**
-	 * @TOF_NOTIFICATION: &struct iwl_tof_gen_resp_cmd
-	 */
-	TOF_NOTIFICATION = 0x11,
-
-	/**
-	 * @POWER_TABLE_CMD: &struct iwl_device_power_cmd
-	 */
-	POWER_TABLE_CMD = 0x77,
-
-	/**
-	 * @PSM_UAPSD_AP_MISBEHAVING_NOTIFICATION:
-	 * &struct iwl_uapsd_misbehaving_ap_notif
-	 */
-	PSM_UAPSD_AP_MISBEHAVING_NOTIFICATION = 0x78,
-
-	/**
-	 * @LTR_CONFIG: &struct iwl_ltr_config_cmd
-	 */
-	LTR_CONFIG = 0xee,
-
-	/**
-	 * @REPLY_THERMAL_MNG_BACKOFF:
-	 * Thermal throttling command
-	 */
-	REPLY_THERMAL_MNG_BACKOFF = 0x7e,
-
-	/**
-	 * @DC2DC_CONFIG_CMD:
-	 * Set/Get DC2DC frequency tune
-	 * Command is &struct iwl_dc2dc_config_cmd,
-	 * response is &struct iwl_dc2dc_config_resp
-	 */
-	DC2DC_CONFIG_CMD = 0x83,
-
-	/**
-	 * @NVM_ACCESS_CMD: using &struct iwl_nvm_access_cmd
-	 */
-	NVM_ACCESS_CMD = 0x88,
-
-	/**
-	 * @BEACON_NOTIFICATION: &struct iwl_extended_beacon_notif
-	 */
-	BEACON_NOTIFICATION = 0x90,
-
-	/**
-	 * @BEACON_TEMPLATE_CMD:
-	 *	Uses one of &struct iwl_mac_beacon_cmd_v6,
-	 *	&struct iwl_mac_beacon_cmd_v7 or &struct iwl_mac_beacon_cmd
-	 *	depending on the device version.
-	 */
-	BEACON_TEMPLATE_CMD = 0x91,
-	/**
-	 * @TX_ANT_CONFIGURATION_CMD: &struct iwl_tx_ant_cfg_cmd
-	 */
-	TX_ANT_CONFIGURATION_CMD = 0x98,
-
-	/**
-	 * @STATISTICS_CMD:
-	 * one of &struct iwl_statistics_cmd,
-	 * &struct iwl_notif_statistics_v11,
-	 * &struct iwl_notif_statistics_v10,
-	 * &struct iwl_notif_statistics_cdb
-	 */
-	STATISTICS_CMD = 0x9c,
-
-	/**
-	 * @STATISTICS_NOTIFICATION:
-	 * one of &struct iwl_notif_statistics_v10,
-	 * &struct iwl_notif_statistics_v11,
-	 * &struct iwl_notif_statistics_cdb
-	 */
-	STATISTICS_NOTIFICATION = 0x9d,
-
-	/**
-	 * @EOSP_NOTIFICATION:
-	 * Notify that a service period ended,
-	 * &struct iwl_mvm_eosp_notification
-	 */
-	EOSP_NOTIFICATION = 0x9e,
-
-	/**
-	 * @REDUCE_TX_POWER_CMD:
-	 * &struct iwl_dev_tx_power_cmd_v3 or &struct iwl_dev_tx_power_cmd
-	 */
-	REDUCE_TX_POWER_CMD = 0x9f,
-
-	/**
-	 * @CARD_STATE_NOTIFICATION:
-	 * Card state (RF/CT kill) notification,
-	 * uses &struct iwl_card_state_notif
-	 */
-	CARD_STATE_NOTIFICATION = 0xa1,
-
-	/**
-	 * @MISSED_BEACONS_NOTIFICATION: &struct iwl_missed_beacons_notif
-	 */
-	MISSED_BEACONS_NOTIFICATION = 0xa2,
-
-	/**
-	 * @MAC_PM_POWER_TABLE: using &struct iwl_mac_power_cmd
-	 */
-	MAC_PM_POWER_TABLE = 0xa9,
-
-	/**
-	 * @MFUART_LOAD_NOTIFICATION: &struct iwl_mfuart_load_notif
-	 */
-	MFUART_LOAD_NOTIFICATION = 0xb1,
-
-	/**
-	 * @RSS_CONFIG_CMD: &struct iwl_rss_config_cmd
-	 */
-	RSS_CONFIG_CMD = 0xb3,
-
-	/**
-	 * @REPLY_RX_PHY_CMD: &struct iwl_rx_phy_info
-	 */
-	REPLY_RX_PHY_CMD = 0xc0,
-
-	/**
-	 * @REPLY_RX_MPDU_CMD:
-	 * &struct iwl_rx_mpdu_res_start or &struct iwl_rx_mpdu_desc
-	 */
-	REPLY_RX_MPDU_CMD = 0xc1,
-
-	/**
-	 * @FRAME_RELEASE:
-	 * Frame release (reorder helper) notification, uses
-	 * &struct iwl_frame_release
-	 */
-	FRAME_RELEASE = 0xc3,
-
-	/**
-	 * @BA_NOTIF:
-	 * BlockAck notification, uses &struct iwl_mvm_compressed_ba_notif
-	 * or &struct iwl_mvm_ba_notif depending on the HW
-	 */
-	BA_NOTIF = 0xc5,
-
-	/* Location Aware Regulatory */
-	/**
-	 * @MCC_UPDATE_CMD: using &struct iwl_mcc_update_cmd
-	 */
-	MCC_UPDATE_CMD = 0xc8,
-
-	/**
-	 * @MCC_CHUB_UPDATE_CMD: using &struct iwl_mcc_chub_notif
-	 */
-	MCC_CHUB_UPDATE_CMD = 0xc9,
-
-	/**
-	 * @MARKER_CMD: trace marker command, uses &struct iwl_mvm_marker
-	 */
-	MARKER_CMD = 0xcb,
-
-	/**
-	 * @BT_PROFILE_NOTIFICATION: &struct iwl_bt_coex_profile_notif
-	 */
-	BT_PROFILE_NOTIFICATION = 0xce,
-
-	/**
-	 * @BT_CONFIG: &struct iwl_bt_coex_cmd
-	 */
-	BT_CONFIG = 0x9b,
-
-	/**
-	 * @BT_COEX_UPDATE_CORUN_LUT:
-	 * &struct iwl_bt_coex_corun_lut_update_cmd
-	 */
-	BT_COEX_UPDATE_CORUN_LUT = 0x5b,
-
-	/**
-	 * @BT_COEX_UPDATE_REDUCED_TXP:
-	 * &struct iwl_bt_coex_reduced_txp_update_cmd
-	 */
-	BT_COEX_UPDATE_REDUCED_TXP = 0x5c,
-
-	/**
-	 * @BT_COEX_CI: &struct iwl_bt_coex_ci_cmd
-	 */
-	BT_COEX_CI = 0x5d,
-
-	/**
-	 * @REPLY_SF_CFG_CMD: &struct iwl_sf_cfg_cmd
-	 */
-	REPLY_SF_CFG_CMD = 0xd1,
-	/**
-	 * @REPLY_BEACON_FILTERING_CMD: &struct iwl_beacon_filter_cmd
-	 */
-	REPLY_BEACON_FILTERING_CMD = 0xd2,
-
-	/**
-	 * @DTS_MEASUREMENT_NOTIFICATION:
-	 * &struct iwl_dts_measurement_notif_v1 or
-	 * &struct iwl_dts_measurement_notif_v2
-	 */
-	DTS_MEASUREMENT_NOTIFICATION = 0xdd,
-
-	/**
-	 * @LDBG_CONFIG_CMD: configure continuous trace recording
-	 */
-	LDBG_CONFIG_CMD = 0xf6,
-
-	/**
-	 * @DEBUG_LOG_MSG: Debugging log data from firmware
-	 */
-	DEBUG_LOG_MSG = 0xf7,
-
-	/**
-	 * @BCAST_FILTER_CMD: &struct iwl_bcast_filter_cmd
-	 */
-	BCAST_FILTER_CMD = 0xcf,
-
-	/**
-	 * @MCAST_FILTER_CMD: &struct iwl_mcast_filter_cmd
-	 */
-	MCAST_FILTER_CMD = 0xd0,
-
-	/**
-	 * @D3_CONFIG_CMD: &struct iwl_d3_manager_config
-	 */
-	D3_CONFIG_CMD = 0xd3,
-
-	/**
-	 * @PROT_OFFLOAD_CONFIG_CMD: Depending on firmware, uses one of
-	 * &struct iwl_proto_offload_cmd_v1, &struct iwl_proto_offload_cmd_v2,
-	 * &struct iwl_proto_offload_cmd_v3_small,
-	 * &struct iwl_proto_offload_cmd_v3_large
-	 */
-	PROT_OFFLOAD_CONFIG_CMD = 0xd4,
-
-	/**
-	 * @OFFLOADS_QUERY_CMD:
-	 * No data in command, response in &struct iwl_wowlan_status
-	 */
-	OFFLOADS_QUERY_CMD = 0xd5,
-
-	/**
-	 * @REMOTE_WAKE_CONFIG_CMD: &struct iwl_wowlan_remote_wake_config
-	 */
-	REMOTE_WAKE_CONFIG_CMD = 0xd6,
-
-	/**
-	 * @D0I3_END_CMD: End D0i3/D3 state, no command data
-	 */
-	D0I3_END_CMD = 0xed,
-
-	/**
-	 * @WOWLAN_PATTERNS: &struct iwl_wowlan_patterns_cmd
-	 */
-	WOWLAN_PATTERNS = 0xe0,
-
-	/**
-	 * @WOWLAN_CONFIGURATION: &struct iwl_wowlan_config_cmd
-	 */
-	WOWLAN_CONFIGURATION = 0xe1,
-
-	/**
-	 * @WOWLAN_TSC_RSC_PARAM: &struct iwl_wowlan_rsc_tsc_params_cmd
-	 */
-	WOWLAN_TSC_RSC_PARAM = 0xe2,
-
-	/**
-	 * @WOWLAN_TKIP_PARAM: &struct iwl_wowlan_tkip_params_cmd
-	 */
-	WOWLAN_TKIP_PARAM = 0xe3,
-
-	/**
-	 * @WOWLAN_KEK_KCK_MATERIAL: &struct iwl_wowlan_kek_kck_material_cmd
-	 */
-	WOWLAN_KEK_KCK_MATERIAL = 0xe4,
-
-	/**
-	 * @WOWLAN_GET_STATUSES: response in &struct iwl_wowlan_status
-	 */
-	WOWLAN_GET_STATUSES = 0xe5,
-
-	/**
-	 * @SCAN_OFFLOAD_PROFILES_QUERY_CMD:
-	 * No command data, response is &struct iwl_scan_offload_profiles_query
-	 */
-	SCAN_OFFLOAD_PROFILES_QUERY_CMD = 0x56,
-};
-
-/* Please keep this enum *SORTED* by hex value.
- * Needed for binary search, otherwise a warning will be triggered.
- */
-enum iwl_mac_conf_subcmd_ids {
-	LINK_QUALITY_MEASUREMENT_CMD = 0x1,
-	LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF = 0xFE,
-	CHANNEL_SWITCH_NOA_NOTIF = 0xFF,
-};
-
-/**
- * enum iwl_phy_ops_subcmd_ids - PHY group commands
- */
-enum iwl_phy_ops_subcmd_ids {
-	/**
-	 * @CMD_DTS_MEASUREMENT_TRIGGER_WIDE:
-	 * Uses either &struct iwl_dts_measurement_cmd or
-	 * &struct iwl_ext_dts_measurement_cmd
-	 */
-	CMD_DTS_MEASUREMENT_TRIGGER_WIDE = 0x0,
-
-	/**
-	 * @CTDP_CONFIG_CMD: &struct iwl_mvm_ctdp_cmd
-	 */
-	CTDP_CONFIG_CMD = 0x03,
-
-	/**
-	 * @TEMP_REPORTING_THRESHOLDS_CMD: &struct temp_report_ths_cmd
-	 */
-	TEMP_REPORTING_THRESHOLDS_CMD = 0x04,
-
-	/**
-	 * @GEO_TX_POWER_LIMIT: &struct iwl_geo_tx_power_profiles_cmd
-	 */
-	GEO_TX_POWER_LIMIT = 0x05,
-
-	/**
-	 * @CT_KILL_NOTIFICATION: &struct ct_kill_notif
-	 */
-	CT_KILL_NOTIFICATION = 0xFE,
-
-	/**
-	 * @DTS_MEASUREMENT_NOTIF_WIDE:
-	 * &struct iwl_dts_measurement_notif_v1 or
-	 * &struct iwl_dts_measurement_notif_v2
-	 */
-	DTS_MEASUREMENT_NOTIF_WIDE = 0xFF,
-};
-
-/**
- * enum iwl_system_subcmd_ids - system group command IDs
- */
-enum iwl_system_subcmd_ids {
-	/**
-	 * @SHARED_MEM_CFG_CMD:
-	 * response in &struct iwl_shared_mem_cfg or
-	 * &struct iwl_shared_mem_cfg_v2
-	 */
-	SHARED_MEM_CFG_CMD = 0x0,
-
-	/**
-	 * @INIT_EXTENDED_CFG_CMD: &struct iwl_init_extended_cfg_cmd
-	 */
-	INIT_EXTENDED_CFG_CMD = 0x03,
-};
-
-/**
- * enum iwl_data_path_subcmd_ids - data path group commands
- */
-enum iwl_data_path_subcmd_ids {
-	/**
-	 * @DQA_ENABLE_CMD: &struct iwl_dqa_enable_cmd
-	 */
-	DQA_ENABLE_CMD = 0x0,
-
-	/**
-	 * @UPDATE_MU_GROUPS_CMD: &struct iwl_mu_group_mgmt_cmd
-	 */
-	UPDATE_MU_GROUPS_CMD = 0x1,
-
-	/**
-	 * @TRIGGER_RX_QUEUES_NOTIF_CMD: &struct iwl_rxq_sync_cmd
-	 */
-	TRIGGER_RX_QUEUES_NOTIF_CMD = 0x2,
-
-	/**
-	 * @STA_PM_NOTIF: &struct iwl_mvm_pm_state_notification
-	 */
-	STA_PM_NOTIF = 0xFD,
-
-	/**
-	 * @MU_GROUP_MGMT_NOTIF: &struct iwl_mu_group_mgmt_notif
-	 */
-	MU_GROUP_MGMT_NOTIF = 0xFE,
-
-	/**
-	 * @RX_QUEUES_NOTIFICATION: &struct iwl_rxq_sync_notification
-	 */
-	RX_QUEUES_NOTIFICATION = 0xFF,
-};
-
-/**
- * enum iwl_prot_offload_subcmd_ids - protocol offload commands
- */
-enum iwl_prot_offload_subcmd_ids {
-	/**
-	 * @STORED_BEACON_NTF: &struct iwl_stored_beacon_notif
-	 */
-	STORED_BEACON_NTF = 0xFF,
-};
-
-/**
- * enum iwl_regulatory_and_nvm_subcmd_ids - regulatory/NVM commands
- */
-enum iwl_regulatory_and_nvm_subcmd_ids {
-	/**
-	 * @NVM_ACCESS_COMPLETE: &struct iwl_nvm_access_complete_cmd
-	 */
-	NVM_ACCESS_COMPLETE = 0x0,
-
-	/**
-	 * @NVM_GET_INFO:
-	 * Command is &struct iwl_nvm_get_info,
-	 * response is &struct iwl_nvm_get_info_rsp
-	 */
-	NVM_GET_INFO = 0x2,
-};
-
-/**
- * enum iwl_debug_cmds - debug commands
- */
-enum iwl_debug_cmds {
-	/**
-	 * @LMAC_RD_WR:
-	 * LMAC memory read/write, using &struct iwl_dbg_mem_access_cmd and
-	 * &struct iwl_dbg_mem_access_rsp
-	 */
-	LMAC_RD_WR = 0x0,
-	/**
-	 * @UMAC_RD_WR:
-	 * UMAC memory read/write, using &struct iwl_dbg_mem_access_cmd and
-	 * &struct iwl_dbg_mem_access_rsp
-	 */
-	UMAC_RD_WR = 0x1,
-	/**
-	 * @MFU_ASSERT_DUMP_NTF:
-	 * &struct iwl_mfu_assert_dump_notif
-	 */
-	MFU_ASSERT_DUMP_NTF = 0xFE,
-};
-
-/**
- * enum iwl_mvm_command_groups - command groups for the firmware
- * @LEGACY_GROUP: legacy group, uses command IDs from &enum iwl_legacy_cmds
- * @LONG_GROUP: legacy group with long header, also uses command IDs
- *	from &enum iwl_legacy_cmds
- * @SYSTEM_GROUP: system group, uses command IDs from
- *	&enum iwl_system_subcmd_ids
- * @MAC_CONF_GROUP: MAC configuration group, uses command IDs from
- *	&enum iwl_mac_conf_subcmd_ids
- * @PHY_OPS_GROUP: PHY operations group, uses command IDs from
- *	&enum iwl_phy_ops_subcmd_ids
- * @DATA_PATH_GROUP: data path group, uses command IDs from
- *	&enum iwl_data_path_subcmd_ids
- * @NAN_GROUP: NAN group, uses command IDs from &enum iwl_nan_subcmd_ids
- * @TOF_GROUP: TOF group, uses command IDs from &enum iwl_tof_subcmd_ids
- * @PROT_OFFLOAD_GROUP: protocol offload group, uses command IDs from
- *	&enum iwl_prot_offload_subcmd_ids
- * @REGULATORY_AND_NVM_GROUP: regulatory/NVM group, uses command IDs from
- *	&enum iwl_regulatory_and_nvm_subcmd_ids
- * @DEBUG_GROUP: Debug group, uses command IDs from &enum iwl_debug_cmds
- */
-enum iwl_mvm_command_groups {
-	LEGACY_GROUP = 0x0,
-	LONG_GROUP = 0x1,
-	SYSTEM_GROUP = 0x2,
-	MAC_CONF_GROUP = 0x3,
-	PHY_OPS_GROUP = 0x4,
-	DATA_PATH_GROUP = 0x5,
-	PROT_OFFLOAD_GROUP = 0xb,
-	REGULATORY_AND_NVM_GROUP = 0xc,
-	DEBUG_GROUP = 0xf,
-};
-
-/**
- * struct iwl_cmd_response - generic response struct for most commands
- * @status: status of the command asked, changes for each one
- */
-struct iwl_cmd_response {
-	__le32 status;
-};
-
-/*
- * struct iwl_dqa_enable_cmd
- * @cmd_queue: the TXQ number of the command queue
- */
-struct iwl_dqa_enable_cmd {
-	__le32 cmd_queue;
-} __packed; /* DQA_CONTROL_CMD_API_S_VER_1 */
-
-/*
- * struct iwl_tx_ant_cfg_cmd
- * @valid: valid antenna configuration
- */
-struct iwl_tx_ant_cfg_cmd {
-	__le32 valid;
-} __packed;
-
-/**
- * struct iwl_calib_ctrl - Calibration control struct.
- * Sent as part of the phy configuration command.
- * @flow_trigger: bitmap for which calibrations to perform according to
- *		flow triggers, using &enum iwl_calib_cfg
- * @event_trigger: bitmap for which calibrations to perform according to
- *		event triggers, using &enum iwl_calib_cfg
- */
-struct iwl_calib_ctrl {
-	__le32 flow_trigger;
-	__le32 event_trigger;
-} __packed;
-
-/* This enum defines the bitmap of various calibrations to enable in both
- * init ucode and runtime ucode through CALIBRATION_CFG_CMD.
- */
-enum iwl_calib_cfg {
-	IWL_CALIB_CFG_XTAL_IDX			= BIT(0),
-	IWL_CALIB_CFG_TEMPERATURE_IDX		= BIT(1),
-	IWL_CALIB_CFG_VOLTAGE_READ_IDX		= BIT(2),
-	IWL_CALIB_CFG_PAPD_IDX			= BIT(3),
-	IWL_CALIB_CFG_TX_PWR_IDX		= BIT(4),
-	IWL_CALIB_CFG_DC_IDX			= BIT(5),
-	IWL_CALIB_CFG_BB_FILTER_IDX		= BIT(6),
-	IWL_CALIB_CFG_LO_LEAKAGE_IDX		= BIT(7),
-	IWL_CALIB_CFG_TX_IQ_IDX			= BIT(8),
-	IWL_CALIB_CFG_TX_IQ_SKEW_IDX		= BIT(9),
-	IWL_CALIB_CFG_RX_IQ_IDX			= BIT(10),
-	IWL_CALIB_CFG_RX_IQ_SKEW_IDX		= BIT(11),
-	IWL_CALIB_CFG_SENSITIVITY_IDX		= BIT(12),
-	IWL_CALIB_CFG_CHAIN_NOISE_IDX		= BIT(13),
-	IWL_CALIB_CFG_DISCONNECTED_ANT_IDX	= BIT(14),
-	IWL_CALIB_CFG_ANT_COUPLING_IDX		= BIT(15),
-	IWL_CALIB_CFG_DAC_IDX			= BIT(16),
-	IWL_CALIB_CFG_ABS_IDX			= BIT(17),
-	IWL_CALIB_CFG_AGC_IDX			= BIT(18),
-};
-
-/**
- * struct iwl_phy_cfg_cmd - Phy configuration command
- * @phy_cfg: PHY configuration value, uses &enum iwl_fw_phy_cfg
- * @calib_control: calibration control data
- */
-struct iwl_phy_cfg_cmd {
-	__le32	phy_cfg;
-	struct iwl_calib_ctrl calib_control;
-} __packed;
-
-#define PHY_CFG_RADIO_TYPE	(BIT(0) | BIT(1))
-#define PHY_CFG_RADIO_STEP	(BIT(2) | BIT(3))
-#define PHY_CFG_RADIO_DASH	(BIT(4) | BIT(5))
-#define PHY_CFG_PRODUCT_NUMBER	(BIT(6) | BIT(7))
-#define PHY_CFG_TX_CHAIN_A	BIT(8)
-#define PHY_CFG_TX_CHAIN_B	BIT(9)
-#define PHY_CFG_TX_CHAIN_C	BIT(10)
-#define PHY_CFG_RX_CHAIN_A	BIT(12)
-#define PHY_CFG_RX_CHAIN_B	BIT(13)
-#define PHY_CFG_RX_CHAIN_C	BIT(14)
-
-
-/**
- * enum iwl_nvm_access_op - NVM access opcode
- * @IWL_NVM_READ: read NVM
- * @IWL_NVM_WRITE: write NVM
- */
-enum iwl_nvm_access_op {
-	IWL_NVM_READ	= 0,
-	IWL_NVM_WRITE	= 1,
-};
-
-/**
- * enum iwl_nvm_access_target - target of the NVM_ACCESS_CMD
- * @NVM_ACCESS_TARGET_CACHE: access the cache
- * @NVM_ACCESS_TARGET_OTP: access the OTP
- * @NVM_ACCESS_TARGET_EEPROM: access the EEPROM
- */
-enum iwl_nvm_access_target {
-	NVM_ACCESS_TARGET_CACHE = 0,
-	NVM_ACCESS_TARGET_OTP = 1,
-	NVM_ACCESS_TARGET_EEPROM = 2,
-};
-
-/**
- * enum iwl_nvm_section_type - section types for NVM_ACCESS_CMD
- * @NVM_SECTION_TYPE_SW: software section
- * @NVM_SECTION_TYPE_REGULATORY: regulatory section
- * @NVM_SECTION_TYPE_CALIBRATION: calibration section
- * @NVM_SECTION_TYPE_PRODUCTION: production section
- * @NVM_SECTION_TYPE_MAC_OVERRIDE: MAC override section
- * @NVM_SECTION_TYPE_PHY_SKU: PHY SKU section
- * @NVM_MAX_NUM_SECTIONS: number of sections
- */
-enum iwl_nvm_section_type {
-	NVM_SECTION_TYPE_SW = 1,
-	NVM_SECTION_TYPE_REGULATORY = 3,
-	NVM_SECTION_TYPE_CALIBRATION = 4,
-	NVM_SECTION_TYPE_PRODUCTION = 5,
-	NVM_SECTION_TYPE_MAC_OVERRIDE = 11,
-	NVM_SECTION_TYPE_PHY_SKU = 12,
-	NVM_MAX_NUM_SECTIONS = 13,
-};
-
-/**
- * struct iwl_nvm_access_cmd - Request the device to send an NVM section
- * @op_code: &enum iwl_nvm_access_op
- * @target: &enum iwl_nvm_access_target
- * @type: &enum iwl_nvm_section_type
- * @offset: offset in bytes into the section
- * @length: in bytes, to read/write
- * @data: if write operation, the data to write. On read its empty
- */
-struct iwl_nvm_access_cmd {
-	u8 op_code;
-	u8 target;
-	__le16 type;
-	__le16 offset;
-	__le16 length;
-	u8 data[];
-} __packed; /* NVM_ACCESS_CMD_API_S_VER_2 */
-
-#define NUM_OF_FW_PAGING_BLOCKS	33 /* 32 for data and 1 block for CSS */
-
-/**
- * struct iwl_fw_paging_cmd - paging layout
- *
- * (FW_PAGING_BLOCK_CMD = 0x4f)
- *
- * Send to FW the paging layout in the driver.
- *
- * @flags: various flags for the command
- * @block_size: the block size in powers of 2
- * @block_num: number of blocks specified in the command.
- * @device_phy_addr: virtual addresses from device side
- */
-struct iwl_fw_paging_cmd {
-	__le32 flags;
-	__le32 block_size;
-	__le32 block_num;
-	__le32 device_phy_addr[NUM_OF_FW_PAGING_BLOCKS];
-} __packed; /* FW_PAGING_BLOCK_CMD_API_S_VER_1 */
-
-/*
- * Fw items ID's
- *
- * @IWL_FW_ITEM_ID_PAGING: Address of the pages that the FW will upload
- *	download
- */
-enum iwl_fw_item_id {
-	IWL_FW_ITEM_ID_PAGING = 3,
-};
-
-/*
- * struct iwl_fw_get_item_cmd - get an item from the fw
- */
-struct iwl_fw_get_item_cmd {
-	__le32 item_id;
-} __packed; /* FW_GET_ITEM_CMD_API_S_VER_1 */
-
-#define CONT_REC_COMMAND_SIZE	80
-#define ENABLE_CONT_RECORDING	0x15
-#define DISABLE_CONT_RECORDING	0x16
-
-/*
- * struct iwl_continuous_record_mode - recording mode
- */
-struct iwl_continuous_record_mode {
-	__le16 enable_recording;
-} __packed;
-
-/*
- * struct iwl_continuous_record_cmd - enable/disable continuous recording
- */
-struct iwl_continuous_record_cmd {
-	struct iwl_continuous_record_mode record_mode;
-	u8 pad[CONT_REC_COMMAND_SIZE -
-		sizeof(struct iwl_continuous_record_mode)];
-} __packed;
-
-struct iwl_fw_get_item_resp {
-	__le32 item_id;
-	__le32 item_byte_cnt;
-	__le32 item_val;
-} __packed; /* FW_GET_ITEM_RSP_S_VER_1 */
-
-/**
- * struct iwl_nvm_access_resp_ver2 - response to NVM_ACCESS_CMD
- * @offset: offset in bytes into the section
- * @length: in bytes, either how much was written or read
- * @type: NVM_SECTION_TYPE_*
- * @status: 0 for success, fail otherwise
- * @data: if read operation, the data returned. Empty on write.
- */
-struct iwl_nvm_access_resp {
-	__le16 offset;
-	__le16 length;
-	__le16 type;
-	__le16 status;
-	u8 data[];
-} __packed; /* NVM_ACCESS_CMD_RESP_API_S_VER_2 */
-
-/* MVM_ALIVE 0x1 */
-
-/* alive response is_valid values */
-#define ALIVE_RESP_UCODE_OK	BIT(0)
-#define ALIVE_RESP_RFKILL	BIT(1)
-
-/* alive response ver_type values */
-enum {
-	FW_TYPE_HW = 0,
-	FW_TYPE_PROT = 1,
-	FW_TYPE_AP = 2,
-	FW_TYPE_WOWLAN = 3,
-	FW_TYPE_TIMING = 4,
-	FW_TYPE_WIPAN = 5
-};
-
-/* alive response ver_subtype values */
-enum {
-	FW_SUBTYPE_FULL_FEATURE = 0,
-	FW_SUBTYPE_BOOTSRAP = 1, /* Not valid */
-	FW_SUBTYPE_REDUCED = 2,
-	FW_SUBTYPE_ALIVE_ONLY = 3,
-	FW_SUBTYPE_WOWLAN = 4,
-	FW_SUBTYPE_AP_SUBTYPE = 5,
-	FW_SUBTYPE_WIPAN = 6,
-	FW_SUBTYPE_INITIALIZE = 9
-};
-
-#define IWL_ALIVE_STATUS_ERR 0xDEAD
-#define IWL_ALIVE_STATUS_OK 0xCAFE
-
-#define IWL_ALIVE_FLG_RFKILL	BIT(0)
-
-struct iwl_lmac_alive {
-	__le32 ucode_minor;
-	__le32 ucode_major;
-	u8 ver_subtype;
-	u8 ver_type;
-	u8 mac;
-	u8 opt;
-	__le32 timestamp;
-	__le32 error_event_table_ptr;	/* SRAM address for error log */
-	__le32 log_event_table_ptr;	/* SRAM address for LMAC event log */
-	__le32 cpu_register_ptr;
-	__le32 dbgm_config_ptr;
-	__le32 alive_counter_ptr;
-	__le32 scd_base_ptr;		/* SRAM address for SCD */
-	__le32 st_fwrd_addr;		/* pointer to Store and forward */
-	__le32 st_fwrd_size;
-} __packed; /* UCODE_ALIVE_NTFY_API_S_VER_3 */
-
-struct iwl_umac_alive {
-	__le32 umac_minor;		/* UMAC version: minor */
-	__le32 umac_major;		/* UMAC version: major */
-	__le32 error_info_addr;		/* SRAM address for UMAC error log */
-	__le32 dbg_print_buff_addr;
-} __packed; /* UMAC_ALIVE_DATA_API_S_VER_2 */
-
-struct mvm_alive_resp_v3 {
-	__le16 status;
-	__le16 flags;
-	struct iwl_lmac_alive lmac_data;
-	struct iwl_umac_alive umac_data;
-} __packed; /* ALIVE_RES_API_S_VER_3 */
-
-struct mvm_alive_resp {
-	__le16 status;
-	__le16 flags;
-	struct iwl_lmac_alive lmac_data[2];
-	struct iwl_umac_alive umac_data;
-} __packed; /* ALIVE_RES_API_S_VER_4 */
-
-/* Error response/notification */
-enum {
-	FW_ERR_UNKNOWN_CMD = 0x0,
-	FW_ERR_INVALID_CMD_PARAM = 0x1,
-	FW_ERR_SERVICE = 0x2,
-	FW_ERR_ARC_MEMORY = 0x3,
-	FW_ERR_ARC_CODE = 0x4,
-	FW_ERR_WATCH_DOG = 0x5,
-	FW_ERR_WEP_GRP_KEY_INDX = 0x10,
-	FW_ERR_WEP_KEY_SIZE = 0x11,
-	FW_ERR_OBSOLETE_FUNC = 0x12,
-	FW_ERR_UNEXPECTED = 0xFE,
-	FW_ERR_FATAL = 0xFF
-};
-
-/**
- * struct iwl_error_resp - FW error indication
- * ( REPLY_ERROR = 0x2 )
- * @error_type: one of FW_ERR_*
- * @cmd_id: the command ID for which the error occured
- * @reserved1: reserved
- * @bad_cmd_seq_num: sequence number of the erroneous command
- * @error_service: which service created the error, applicable only if
- *	error_type = 2, otherwise 0
- * @timestamp: TSF in usecs.
- */
-struct iwl_error_resp {
-	__le32 error_type;
-	u8 cmd_id;
-	u8 reserved1;
-	__le16 bad_cmd_seq_num;
-	__le32 error_service;
-	__le64 timestamp;
-} __packed;
-
-
-/* Common PHY, MAC and Bindings definitions */
-#define MAX_MACS_IN_BINDING	(3)
-#define MAX_BINDINGS		(4)
-
-/**
- * enum iwl_mvm_id_and_color - ID and color fields in context dword
- * @FW_CTXT_ID_POS: position of the ID
- * @FW_CTXT_ID_MSK: mask of the ID
- * @FW_CTXT_COLOR_POS: position of the color
- * @FW_CTXT_COLOR_MSK: mask of the color
- * @FW_CTXT_INVALID: value used to indicate unused/invalid
- */
-enum iwl_mvm_id_and_color {
-	FW_CTXT_ID_POS		= 0,
-	FW_CTXT_ID_MSK		= 0xff << FW_CTXT_ID_POS,
-	FW_CTXT_COLOR_POS	= 8,
-	FW_CTXT_COLOR_MSK	= 0xff << FW_CTXT_COLOR_POS,
-	FW_CTXT_INVALID		= 0xffffffff,
-};
-
-#define FW_CMD_ID_AND_COLOR(_id, _color) ((_id << FW_CTXT_ID_POS) |\
-					  (_color << FW_CTXT_COLOR_POS))
-
-/* Possible actions on PHYs, MACs and Bindings */
-enum iwl_phy_ctxt_action {
-	FW_CTXT_ACTION_STUB = 0,
-	FW_CTXT_ACTION_ADD,
-	FW_CTXT_ACTION_MODIFY,
-	FW_CTXT_ACTION_REMOVE,
-	FW_CTXT_ACTION_NUM
-}; /* COMMON_CONTEXT_ACTION_API_E_VER_1 */
-
-/* Time Events */
-
-/* Time Event types, according to MAC type */
-enum iwl_time_event_type {
-	/* BSS Station Events */
-	TE_BSS_STA_AGGRESSIVE_ASSOC,
-	TE_BSS_STA_ASSOC,
-	TE_BSS_EAP_DHCP_PROT,
-	TE_BSS_QUIET_PERIOD,
-
-	/* P2P Device Events */
-	TE_P2P_DEVICE_DISCOVERABLE,
-	TE_P2P_DEVICE_LISTEN,
-	TE_P2P_DEVICE_ACTION_SCAN,
-	TE_P2P_DEVICE_FULL_SCAN,
-
-	/* P2P Client Events */
-	TE_P2P_CLIENT_AGGRESSIVE_ASSOC,
-	TE_P2P_CLIENT_ASSOC,
-	TE_P2P_CLIENT_QUIET_PERIOD,
-
-	/* P2P GO Events */
-	TE_P2P_GO_ASSOC_PROT,
-	TE_P2P_GO_REPETITIVET_NOA,
-	TE_P2P_GO_CT_WINDOW,
-
-	/* WiDi Sync Events */
-	TE_WIDI_TX_SYNC,
-
-	/* Channel Switch NoA */
-	TE_CHANNEL_SWITCH_PERIOD,
-
-	TE_MAX
-}; /* MAC_EVENT_TYPE_API_E_VER_1 */
-
-
-
-/* Time event - defines for command API v1 */
-
-/*
- * @TE_V1_FRAG_NONE: fragmentation of the time event is NOT allowed.
- * @TE_V1_FRAG_SINGLE: fragmentation of the time event is allowed, but only
- *	the first fragment is scheduled.
- * @TE_V1_FRAG_DUAL: fragmentation of the time event is allowed, but only
- *	the first 2 fragments are scheduled.
- * @TE_V1_FRAG_ENDLESS: fragmentation of the time event is allowed, and any
- *	number of fragments are valid.
- *
- * Other than the constant defined above, specifying a fragmentation value 'x'
- * means that the event can be fragmented but only the first 'x' will be
- * scheduled.
- */
-enum {
-	TE_V1_FRAG_NONE = 0,
-	TE_V1_FRAG_SINGLE = 1,
-	TE_V1_FRAG_DUAL = 2,
-	TE_V1_FRAG_ENDLESS = 0xffffffff
-};
-
-/* If a Time Event can be fragmented, this is the max number of fragments */
-#define TE_V1_FRAG_MAX_MSK	0x0fffffff
-/* Repeat the time event endlessly (until removed) */
-#define TE_V1_REPEAT_ENDLESS	0xffffffff
-/* If a Time Event has bounded repetitions, this is the maximal value */
-#define TE_V1_REPEAT_MAX_MSK_V1	0x0fffffff
-
-/* Time Event dependencies: none, on another TE, or in a specific time */
-enum {
-	TE_V1_INDEPENDENT		= 0,
-	TE_V1_DEP_OTHER			= BIT(0),
-	TE_V1_DEP_TSF			= BIT(1),
-	TE_V1_EVENT_SOCIOPATHIC		= BIT(2),
-}; /* MAC_EVENT_DEPENDENCY_POLICY_API_E_VER_2 */
-
-/*
- * @TE_V1_NOTIF_NONE: no notifications
- * @TE_V1_NOTIF_HOST_EVENT_START: request/receive notification on event start
- * @TE_V1_NOTIF_HOST_EVENT_END:request/receive notification on event end
- * @TE_V1_NOTIF_INTERNAL_EVENT_START: internal FW use
- * @TE_V1_NOTIF_INTERNAL_EVENT_END: internal FW use.
- * @TE_V1_NOTIF_HOST_FRAG_START: request/receive notification on frag start
- * @TE_V1_NOTIF_HOST_FRAG_END:request/receive notification on frag end
- * @TE_V1_NOTIF_INTERNAL_FRAG_START: internal FW use.
- * @TE_V1_NOTIF_INTERNAL_FRAG_END: internal FW use.
- *
- * Supported Time event notifications configuration.
- * A notification (both event and fragment) includes a status indicating weather
- * the FW was able to schedule the event or not. For fragment start/end
- * notification the status is always success. There is no start/end fragment
- * notification for monolithic events.
- */
-enum {
-	TE_V1_NOTIF_NONE = 0,
-	TE_V1_NOTIF_HOST_EVENT_START = BIT(0),
-	TE_V1_NOTIF_HOST_EVENT_END = BIT(1),
-	TE_V1_NOTIF_INTERNAL_EVENT_START = BIT(2),
-	TE_V1_NOTIF_INTERNAL_EVENT_END = BIT(3),
-	TE_V1_NOTIF_HOST_FRAG_START = BIT(4),
-	TE_V1_NOTIF_HOST_FRAG_END = BIT(5),
-	TE_V1_NOTIF_INTERNAL_FRAG_START = BIT(6),
-	TE_V1_NOTIF_INTERNAL_FRAG_END = BIT(7),
-}; /* MAC_EVENT_ACTION_API_E_VER_2 */
-
-/* Time event - defines for command API */
-
-/*
- * @TE_V2_FRAG_NONE: fragmentation of the time event is NOT allowed.
- * @TE_V2_FRAG_SINGLE: fragmentation of the time event is allowed, but only
- *  the first fragment is scheduled.
- * @TE_V2_FRAG_DUAL: fragmentation of the time event is allowed, but only
- *  the first 2 fragments are scheduled.
- * @TE_V2_FRAG_ENDLESS: fragmentation of the time event is allowed, and any
- *  number of fragments are valid.
- *
- * Other than the constant defined above, specifying a fragmentation value 'x'
- * means that the event can be fragmented but only the first 'x' will be
- * scheduled.
- */
-enum {
-	TE_V2_FRAG_NONE = 0,
-	TE_V2_FRAG_SINGLE = 1,
-	TE_V2_FRAG_DUAL = 2,
-	TE_V2_FRAG_MAX = 0xfe,
-	TE_V2_FRAG_ENDLESS = 0xff
-};
-
-/* Repeat the time event endlessly (until removed) */
-#define TE_V2_REPEAT_ENDLESS	0xff
-/* If a Time Event has bounded repetitions, this is the maximal value */
-#define TE_V2_REPEAT_MAX	0xfe
-
-#define TE_V2_PLACEMENT_POS	12
-#define TE_V2_ABSENCE_POS	15
-
-/**
- * enum iwl_time_event_policy - Time event policy values
- * A notification (both event and fragment) includes a status indicating weather
- * the FW was able to schedule the event or not. For fragment start/end
- * notification the status is always success. There is no start/end fragment
- * notification for monolithic events.
- *
- * @TE_V2_DEFAULT_POLICY: independent, social, present, unoticable
- * @TE_V2_NOTIF_HOST_EVENT_START: request/receive notification on event start
- * @TE_V2_NOTIF_HOST_EVENT_END:request/receive notification on event end
- * @TE_V2_NOTIF_INTERNAL_EVENT_START: internal FW use
- * @TE_V2_NOTIF_INTERNAL_EVENT_END: internal FW use.
- * @TE_V2_NOTIF_HOST_FRAG_START: request/receive notification on frag start
- * @TE_V2_NOTIF_HOST_FRAG_END:request/receive notification on frag end
- * @TE_V2_NOTIF_INTERNAL_FRAG_START: internal FW use.
- * @TE_V2_NOTIF_INTERNAL_FRAG_END: internal FW use.
- * @T2_V2_START_IMMEDIATELY: start time event immediately
- * @TE_V2_DEP_OTHER: depends on another time event
- * @TE_V2_DEP_TSF: depends on a specific time
- * @TE_V2_EVENT_SOCIOPATHIC: can't co-exist with other events of tha same MAC
- * @TE_V2_ABSENCE: are we present or absent during the Time Event.
- */
-enum iwl_time_event_policy {
-	TE_V2_DEFAULT_POLICY = 0x0,
-
-	/* notifications (event start/stop, fragment start/stop) */
-	TE_V2_NOTIF_HOST_EVENT_START = BIT(0),
-	TE_V2_NOTIF_HOST_EVENT_END = BIT(1),
-	TE_V2_NOTIF_INTERNAL_EVENT_START = BIT(2),
-	TE_V2_NOTIF_INTERNAL_EVENT_END = BIT(3),
-
-	TE_V2_NOTIF_HOST_FRAG_START = BIT(4),
-	TE_V2_NOTIF_HOST_FRAG_END = BIT(5),
-	TE_V2_NOTIF_INTERNAL_FRAG_START = BIT(6),
-	TE_V2_NOTIF_INTERNAL_FRAG_END = BIT(7),
-	T2_V2_START_IMMEDIATELY = BIT(11),
-
-	/* placement characteristics */
-	TE_V2_DEP_OTHER = BIT(TE_V2_PLACEMENT_POS),
-	TE_V2_DEP_TSF = BIT(TE_V2_PLACEMENT_POS + 1),
-	TE_V2_EVENT_SOCIOPATHIC = BIT(TE_V2_PLACEMENT_POS + 2),
-
-	/* are we present or absent during the Time Event. */
-	TE_V2_ABSENCE = BIT(TE_V2_ABSENCE_POS),
-};
-
-/**
- * struct iwl_time_event_cmd - configuring Time Events
- * with struct MAC_TIME_EVENT_DATA_API_S_VER_2 (see also
- * with version 1. determined by IWL_UCODE_TLV_FLAGS)
- * ( TIME_EVENT_CMD = 0x29 )
- * @id_and_color: ID and color of the relevant MAC,
- *	&enum iwl_mvm_id_and_color
- * @action: action to perform, one of &enum iwl_phy_ctxt_action
- * @id: this field has two meanings, depending on the action:
- *	If the action is ADD, then it means the type of event to add.
- *	For all other actions it is the unique event ID assigned when the
- *	event was added by the FW.
- * @apply_time: When to start the Time Event (in GP2)
- * @max_delay: maximum delay to event's start (apply time), in TU
- * @depends_on: the unique ID of the event we depend on (if any)
- * @interval: interval between repetitions, in TU
- * @duration: duration of event in TU
- * @repeat: how many repetitions to do, can be TE_REPEAT_ENDLESS
- * @max_frags: maximal number of fragments the Time Event can be divided to
- * @policy: defines whether uCode shall notify the host or other uCode modules
- *	on event and/or fragment start and/or end
- *	using one of TE_INDEPENDENT, TE_DEP_OTHER, TE_DEP_TSF
- *	TE_EVENT_SOCIOPATHIC
- *	using TE_ABSENCE and using TE_NOTIF_*,
- *	&enum iwl_time_event_policy
- */
-struct iwl_time_event_cmd {
-	/* COMMON_INDEX_HDR_API_S_VER_1 */
-	__le32 id_and_color;
-	__le32 action;
-	__le32 id;
-	/* MAC_TIME_EVENT_DATA_API_S_VER_2 */
-	__le32 apply_time;
-	__le32 max_delay;
-	__le32 depends_on;
-	__le32 interval;
-	__le32 duration;
-	u8 repeat;
-	u8 max_frags;
-	__le16 policy;
-} __packed; /* MAC_TIME_EVENT_CMD_API_S_VER_2 */
-
-/**
- * struct iwl_time_event_resp - response structure to iwl_time_event_cmd
- * @status: bit 0 indicates success, all others specify errors
- * @id: the Time Event type
- * @unique_id: the unique ID assigned (in ADD) or given (others) to the TE
- * @id_and_color: ID and color of the relevant MAC,
- *	&enum iwl_mvm_id_and_color
- */
-struct iwl_time_event_resp {
-	__le32 status;
-	__le32 id;
-	__le32 unique_id;
-	__le32 id_and_color;
-} __packed; /* MAC_TIME_EVENT_RSP_API_S_VER_1 */
-
-/**
- * struct iwl_time_event_notif - notifications of time event start/stop
- * ( TIME_EVENT_NOTIFICATION = 0x2a )
- * @timestamp: action timestamp in GP2
- * @session_id: session's unique id
- * @unique_id: unique id of the Time Event itself
- * @id_and_color: ID and color of the relevant MAC
- * @action: &enum iwl_time_event_policy
- * @status: true if scheduled, false otherwise (not executed)
- */
-struct iwl_time_event_notif {
-	__le32 timestamp;
-	__le32 session_id;
-	__le32 unique_id;
-	__le32 id_and_color;
-	__le32 action;
-	__le32 status;
-} __packed; /* MAC_TIME_EVENT_NTFY_API_S_VER_1 */
-
-
-/* Bindings and Time Quota */
-
-/**
- * struct iwl_binding_cmd_v1 - configuring bindings
- * ( BINDING_CONTEXT_CMD = 0x2b )
- * @id_and_color: ID and color of the relevant Binding,
- *	&enum iwl_mvm_id_and_color
- * @action: action to perform, one of FW_CTXT_ACTION_*
- * @macs: array of MAC id and colors which belong to the binding,
- *	&enum iwl_mvm_id_and_color
- * @phy: PHY id and color which belongs to the binding,
- *	&enum iwl_mvm_id_and_color
- */
-struct iwl_binding_cmd_v1 {
-	/* COMMON_INDEX_HDR_API_S_VER_1 */
-	__le32 id_and_color;
-	__le32 action;
-	/* BINDING_DATA_API_S_VER_1 */
-	__le32 macs[MAX_MACS_IN_BINDING];
-	__le32 phy;
-} __packed; /* BINDING_CMD_API_S_VER_1 */
-
-/**
- * struct iwl_binding_cmd - configuring bindings
- * ( BINDING_CONTEXT_CMD = 0x2b )
- * @id_and_color: ID and color of the relevant Binding,
- *	&enum iwl_mvm_id_and_color
- * @action: action to perform, one of FW_CTXT_ACTION_*
- * @macs: array of MAC id and colors which belong to the binding
- *	&enum iwl_mvm_id_and_color
- * @phy: PHY id and color which belongs to the binding
- *	&enum iwl_mvm_id_and_color
- * @lmac_id: the lmac id the binding belongs to
- */
-struct iwl_binding_cmd {
-	/* COMMON_INDEX_HDR_API_S_VER_1 */
-	__le32 id_and_color;
-	__le32 action;
-	/* BINDING_DATA_API_S_VER_1 */
-	__le32 macs[MAX_MACS_IN_BINDING];
-	__le32 phy;
-	__le32 lmac_id;
-} __packed; /* BINDING_CMD_API_S_VER_2 */
-
-#define IWL_BINDING_CMD_SIZE_V1	sizeof(struct iwl_binding_cmd_v1)
-#define IWL_LMAC_24G_INDEX		0
-#define IWL_LMAC_5G_INDEX		1
-
-/* The maximal number of fragments in the FW's schedule session */
-#define IWL_MVM_MAX_QUOTA 128
-
-/**
- * struct iwl_time_quota_data - configuration of time quota per binding
- * @id_and_color: ID and color of the relevant Binding,
- *	&enum iwl_mvm_id_and_color
- * @quota: absolute time quota in TU. The scheduler will try to divide the
- *	remainig quota (after Time Events) according to this quota.
- * @max_duration: max uninterrupted context duration in TU
- */
-struct iwl_time_quota_data {
-	__le32 id_and_color;
-	__le32 quota;
-	__le32 max_duration;
-} __packed; /* TIME_QUOTA_DATA_API_S_VER_1 */
-
-/**
- * struct iwl_time_quota_cmd - configuration of time quota between bindings
- * ( TIME_QUOTA_CMD = 0x2c )
- * @quotas: allocations per binding
- * Note: on non-CDB the fourth one is the auxilary mac and is
- *	essentially zero.
- *	On CDB the fourth one is a regular binding.
- */
-struct iwl_time_quota_cmd {
-	struct iwl_time_quota_data quotas[MAX_BINDINGS];
-} __packed; /* TIME_QUOTA_ALLOCATION_CMD_API_S_VER_1 */
-
-
-/* PHY context */
-
-/* Supported bands */
-#define PHY_BAND_5  (0)
-#define PHY_BAND_24 (1)
-
-/* Supported channel width, vary if there is VHT support */
-#define PHY_VHT_CHANNEL_MODE20	(0x0)
-#define PHY_VHT_CHANNEL_MODE40	(0x1)
-#define PHY_VHT_CHANNEL_MODE80	(0x2)
-#define PHY_VHT_CHANNEL_MODE160	(0x3)
-
-/*
- * Control channel position:
- * For legacy set bit means upper channel, otherwise lower.
- * For VHT - bit-2 marks if the control is lower/upper relative to center-freq
- *   bits-1:0 mark the distance from the center freq. for 20Mhz, offset is 0.
- *                                   center_freq
- *                                        |
- * 40Mhz                          |_______|_______|
- * 80Mhz                  |_______|_______|_______|_______|
- * 160Mhz |_______|_______|_______|_______|_______|_______|_______|_______|
- * code      011     010     001     000  |  100     101     110    111
- */
-#define PHY_VHT_CTRL_POS_1_BELOW  (0x0)
-#define PHY_VHT_CTRL_POS_2_BELOW  (0x1)
-#define PHY_VHT_CTRL_POS_3_BELOW  (0x2)
-#define PHY_VHT_CTRL_POS_4_BELOW  (0x3)
-#define PHY_VHT_CTRL_POS_1_ABOVE  (0x4)
-#define PHY_VHT_CTRL_POS_2_ABOVE  (0x5)
-#define PHY_VHT_CTRL_POS_3_ABOVE  (0x6)
-#define PHY_VHT_CTRL_POS_4_ABOVE  (0x7)
-
-/*
- * @band: PHY_BAND_*
- * @channel: channel number
- * @width: PHY_[VHT|LEGACY]_CHANNEL_*
- * @ctrl channel: PHY_[VHT|LEGACY]_CTRL_*
- */
-struct iwl_fw_channel_info {
-	u8 band;
-	u8 channel;
-	u8 width;
-	u8 ctrl_pos;
-} __packed;
-
-#define PHY_RX_CHAIN_DRIVER_FORCE_POS	(0)
-#define PHY_RX_CHAIN_DRIVER_FORCE_MSK \
-	(0x1 << PHY_RX_CHAIN_DRIVER_FORCE_POS)
-#define PHY_RX_CHAIN_VALID_POS		(1)
-#define PHY_RX_CHAIN_VALID_MSK \
-	(0x7 << PHY_RX_CHAIN_VALID_POS)
-#define PHY_RX_CHAIN_FORCE_SEL_POS	(4)
-#define PHY_RX_CHAIN_FORCE_SEL_MSK \
-	(0x7 << PHY_RX_CHAIN_FORCE_SEL_POS)
-#define PHY_RX_CHAIN_FORCE_MIMO_SEL_POS	(7)
-#define PHY_RX_CHAIN_FORCE_MIMO_SEL_MSK \
-	(0x7 << PHY_RX_CHAIN_FORCE_MIMO_SEL_POS)
-#define PHY_RX_CHAIN_CNT_POS		(10)
-#define PHY_RX_CHAIN_CNT_MSK \
-	(0x3 << PHY_RX_CHAIN_CNT_POS)
-#define PHY_RX_CHAIN_MIMO_CNT_POS	(12)
-#define PHY_RX_CHAIN_MIMO_CNT_MSK \
-	(0x3 << PHY_RX_CHAIN_MIMO_CNT_POS)
-#define PHY_RX_CHAIN_MIMO_FORCE_POS	(14)
-#define PHY_RX_CHAIN_MIMO_FORCE_MSK \
-	(0x1 << PHY_RX_CHAIN_MIMO_FORCE_POS)
-
-/* TODO: fix the value, make it depend on firmware at runtime? */
-#define NUM_PHY_CTX	3
-
-/* TODO: complete missing documentation */
-/**
- * struct iwl_phy_context_cmd - config of the PHY context
- * ( PHY_CONTEXT_CMD = 0x8 )
- * @id_and_color: ID and color of the relevant Binding
- * @action: action to perform, one of FW_CTXT_ACTION_*
- * @apply_time: 0 means immediate apply and context switch.
- *	other value means apply new params after X usecs
- * @tx_param_color: ???
- * @ci: channel info
- * @txchain_info: ???
- * @rxchain_info: ???
- * @acquisition_data: ???
- * @dsp_cfg_flags: set to 0
- */
-struct iwl_phy_context_cmd {
-	/* COMMON_INDEX_HDR_API_S_VER_1 */
-	__le32 id_and_color;
-	__le32 action;
-	/* PHY_CONTEXT_DATA_API_S_VER_1 */
-	__le32 apply_time;
-	__le32 tx_param_color;
-	struct iwl_fw_channel_info ci;
-	__le32 txchain_info;
-	__le32 rxchain_info;
-	__le32 acquisition_data;
-	__le32 dsp_cfg_flags;
-} __packed; /* PHY_CONTEXT_CMD_API_VER_1 */
-
-/*
- * Aux ROC command
- *
- * Command requests the firmware to create a time event for a certain duration
- * and remain on the given channel. This is done by using the Aux framework in
- * the FW.
- * The command was first used for Hot Spot issues - but can be used regardless
- * to Hot Spot.
- *
- * ( HOT_SPOT_CMD 0x53 )
- *
- * @id_and_color: ID and color of the MAC
- * @action: action to perform, one of FW_CTXT_ACTION_*
- * @event_unique_id: If the action FW_CTXT_ACTION_REMOVE then the
- *	event_unique_id should be the id of the time event assigned by ucode.
- *	Otherwise ignore the event_unique_id.
- * @sta_id_and_color: station id and color, resumed during "Remain On Channel"
- *	activity.
- * @channel_info: channel info
- * @node_addr: Our MAC Address
- * @reserved: reserved for alignment
- * @apply_time: GP2 value to start (should always be the current GP2 value)
- * @apply_time_max_delay: Maximum apply time delay value in TU. Defines max
- *	time by which start of the event is allowed to be postponed.
- * @duration: event duration in TU To calculate event duration:
- *	timeEventDuration = min(duration, remainingQuota)
- */
-struct iwl_hs20_roc_req {
-	/* COMMON_INDEX_HDR_API_S_VER_1 hdr */
-	__le32 id_and_color;
-	__le32 action;
-	__le32 event_unique_id;
-	__le32 sta_id_and_color;
-	struct iwl_fw_channel_info channel_info;
-	u8 node_addr[ETH_ALEN];
-	__le16 reserved;
-	__le32 apply_time;
-	__le32 apply_time_max_delay;
-	__le32 duration;
-} __packed; /* HOT_SPOT_CMD_API_S_VER_1 */
-
-/*
- * values for AUX ROC result values
- */
-enum iwl_mvm_hot_spot {
-	HOT_SPOT_RSP_STATUS_OK,
-	HOT_SPOT_RSP_STATUS_TOO_MANY_EVENTS,
-	HOT_SPOT_MAX_NUM_OF_SESSIONS,
-};
-
-/*
- * Aux ROC command response
- *
- * In response to iwl_hs20_roc_req the FW sends this command to notify the
- * driver the uid of the timevent.
- *
- * ( HOT_SPOT_CMD 0x53 )
- *
- * @event_unique_id: Unique ID of time event assigned by ucode
- * @status: Return status 0 is success, all the rest used for specific errors
- */
-struct iwl_hs20_roc_res {
-	__le32 event_unique_id;
-	__le32 status;
-} __packed; /* HOT_SPOT_RSP_API_S_VER_1 */
-
-/**
- * struct iwl_radio_version_notif - information on the radio version
- * ( RADIO_VERSION_NOTIFICATION = 0x68 )
- * @radio_flavor: radio flavor
- * @radio_step: radio version step
- * @radio_dash: radio version dash
- */
-struct iwl_radio_version_notif {
-	__le32 radio_flavor;
-	__le32 radio_step;
-	__le32 radio_dash;
-} __packed; /* RADIO_VERSION_NOTOFICATION_S_VER_1 */
-
-enum iwl_card_state_flags {
-	CARD_ENABLED		= 0x00,
-	HW_CARD_DISABLED	= 0x01,
-	SW_CARD_DISABLED	= 0x02,
-	CT_KILL_CARD_DISABLED	= 0x04,
-	HALT_CARD_DISABLED	= 0x08,
-	CARD_DISABLED_MSK	= 0x0f,
-	CARD_IS_RX_ON		= 0x10,
-};
-
-/**
- * struct iwl_radio_version_notif - information on the radio version
- * ( CARD_STATE_NOTIFICATION = 0xa1 )
- * @flags: %iwl_card_state_flags
- */
-struct iwl_card_state_notif {
-	__le32 flags;
-} __packed; /* CARD_STATE_NTFY_API_S_VER_1 */
-
-/**
- * struct iwl_missed_beacons_notif - information on missed beacons
- * ( MISSED_BEACONS_NOTIFICATION = 0xa2 )
- * @mac_id: interface ID
- * @consec_missed_beacons_since_last_rx: number of consecutive missed
- *	beacons since last RX.
- * @consec_missed_beacons: number of consecutive missed beacons
- * @num_expected_beacons: number of expected beacons
- * @num_recvd_beacons: number of received beacons
- */
-struct iwl_missed_beacons_notif {
-	__le32 mac_id;
-	__le32 consec_missed_beacons_since_last_rx;
-	__le32 consec_missed_beacons;
-	__le32 num_expected_beacons;
-	__le32 num_recvd_beacons;
-} __packed; /* MISSED_BEACON_NTFY_API_S_VER_3 */
-
-/**
- * struct iwl_mfuart_load_notif - mfuart image version & status
- * ( MFUART_LOAD_NOTIFICATION = 0xb1 )
- * @installed_ver: installed image version
- * @external_ver: external image version
- * @status: MFUART loading status
- * @duration: MFUART loading time
- * @image_size: MFUART image size in bytes
-*/
-struct iwl_mfuart_load_notif {
-	__le32 installed_ver;
-	__le32 external_ver;
-	__le32 status;
-	__le32 duration;
-	/* image size valid only in v2 of the command */
-	__le32 image_size;
-} __packed; /*MFU_LOADER_NTFY_API_S_VER_2*/
-
-/**
- * struct iwl_mfu_assert_dump_notif - mfuart dump logs
- * ( MFU_ASSERT_DUMP_NTF = 0xfe )
- * @assert_id: mfuart assert id that cause the notif
- * @curr_reset_num: number of asserts since uptime
- * @index_num: current chunk id
- * @parts_num: total number of chunks
- * @data_size: number of data bytes sent
- * @data: data buffer
- */
-struct iwl_mfu_assert_dump_notif {
-	__le32   assert_id;
-	__le32   curr_reset_num;
-	__le16   index_num;
-	__le16   parts_num;
-	__le32   data_size;
-	__le32   data[0];
-} __packed; /*MFU_DUMP_ASSERT_API_S_VER_1*/
-
-#define MAX_PORT_ID_NUM	2
-#define MAX_MCAST_FILTERING_ADDRESSES 256
-
-/**
- * struct iwl_mcast_filter_cmd - configure multicast filter.
- * @filter_own: Set 1 to filter out multicast packets sent by station itself
- * @port_id:	Multicast MAC addresses array specifier. This is a strange way
- *		to identify network interface adopted in host-device IF.
- *		It is used by FW as index in array of addresses. This array has
- *		MAX_PORT_ID_NUM members.
- * @count:	Number of MAC addresses in the array
- * @pass_all:	Set 1 to pass all multicast packets.
- * @bssid:	current association BSSID.
- * @reserved:	reserved
- * @addr_list:	Place holder for array of MAC addresses.
- *		IMPORTANT: add padding if necessary to ensure DWORD alignment.
- */
-struct iwl_mcast_filter_cmd {
-	u8 filter_own;
-	u8 port_id;
-	u8 count;
-	u8 pass_all;
-	u8 bssid[6];
-	u8 reserved[2];
-	u8 addr_list[0];
-} __packed; /* MCAST_FILTERING_CMD_API_S_VER_1 */
-
-#define MAX_BCAST_FILTERS 8
-#define MAX_BCAST_FILTER_ATTRS 2
-
-/**
- * enum iwl_mvm_bcast_filter_attr_offset - written by fw for each Rx packet
- * @BCAST_FILTER_OFFSET_PAYLOAD_START: offset is from payload start.
- * @BCAST_FILTER_OFFSET_IP_END: offset is from ip header end (i.e.
- *	start of ip payload).
- */
-enum iwl_mvm_bcast_filter_attr_offset {
-	BCAST_FILTER_OFFSET_PAYLOAD_START = 0,
-	BCAST_FILTER_OFFSET_IP_END = 1,
-};
-
-/**
- * struct iwl_fw_bcast_filter_attr - broadcast filter attribute
- * @offset_type:	&enum iwl_mvm_bcast_filter_attr_offset.
- * @offset:	starting offset of this pattern.
- * @reserved1:	reserved
- * @val:	value to match - big endian (MSB is the first
- *		byte to match from offset pos).
- * @mask:	mask to match (big endian).
- */
-struct iwl_fw_bcast_filter_attr {
-	u8 offset_type;
-	u8 offset;
-	__le16 reserved1;
-	__be32 val;
-	__be32 mask;
-} __packed; /* BCAST_FILTER_ATT_S_VER_1 */
-
-/**
- * enum iwl_mvm_bcast_filter_frame_type - filter frame type
- * @BCAST_FILTER_FRAME_TYPE_ALL: consider all frames.
- * @BCAST_FILTER_FRAME_TYPE_IPV4: consider only ipv4 frames
- */
-enum iwl_mvm_bcast_filter_frame_type {
-	BCAST_FILTER_FRAME_TYPE_ALL = 0,
-	BCAST_FILTER_FRAME_TYPE_IPV4 = 1,
-};
-
-/**
- * struct iwl_fw_bcast_filter - broadcast filter
- * @discard: discard frame (1) or let it pass (0).
- * @frame_type: &enum iwl_mvm_bcast_filter_frame_type.
- * @reserved1: reserved
- * @num_attrs: number of valid attributes in this filter.
- * @attrs: attributes of this filter. a filter is considered matched
- *	only when all its attributes are matched (i.e. AND relationship)
- */
-struct iwl_fw_bcast_filter {
-	u8 discard;
-	u8 frame_type;
-	u8 num_attrs;
-	u8 reserved1;
-	struct iwl_fw_bcast_filter_attr attrs[MAX_BCAST_FILTER_ATTRS];
-} __packed; /* BCAST_FILTER_S_VER_1 */
-
-#define BA_WINDOW_STREAMS_MAX		16
-#define BA_WINDOW_STATUS_TID_MSK	0x000F
-#define BA_WINDOW_STATUS_STA_ID_POS	4
-#define BA_WINDOW_STATUS_STA_ID_MSK	0x01F0
-#define BA_WINDOW_STATUS_VALID_MSK	BIT(9)
-
-/**
- * struct iwl_ba_window_status_notif - reordering window's status notification
- * @bitmap: bitmap of received frames [start_seq_num + 0]..[start_seq_num + 63]
- * @ra_tid: bit 3:0 - TID, bit 8:4 - STA_ID, bit 9 - valid
- * @start_seq_num: the start sequence number of the bitmap
- * @mpdu_rx_count: the number of received MPDUs since entering D0i3
- */
-struct iwl_ba_window_status_notif {
-	__le64 bitmap[BA_WINDOW_STREAMS_MAX];
-	__le16 ra_tid[BA_WINDOW_STREAMS_MAX];
-	__le32 start_seq_num[BA_WINDOW_STREAMS_MAX];
-	__le16 mpdu_rx_count[BA_WINDOW_STREAMS_MAX];
-} __packed; /* BA_WINDOW_STATUS_NTFY_API_S_VER_1 */
-
-/**
- * struct iwl_fw_bcast_mac - per-mac broadcast filtering configuration.
- * @default_discard: default action for this mac (discard (1) / pass (0)).
- * @reserved1: reserved
- * @attached_filters: bitmap of relevant filters for this mac.
- */
-struct iwl_fw_bcast_mac {
-	u8 default_discard;
-	u8 reserved1;
-	__le16 attached_filters;
-} __packed; /* BCAST_MAC_CONTEXT_S_VER_1 */
-
-/**
- * struct iwl_bcast_filter_cmd - broadcast filtering configuration
- * @disable: enable (0) / disable (1)
- * @max_bcast_filters: max number of filters (MAX_BCAST_FILTERS)
- * @max_macs: max number of macs (NUM_MAC_INDEX_DRIVER)
- * @reserved1: reserved
- * @filters: broadcast filters
- * @macs: broadcast filtering configuration per-mac
- */
-struct iwl_bcast_filter_cmd {
-	u8 disable;
-	u8 max_bcast_filters;
-	u8 max_macs;
-	u8 reserved1;
-	struct iwl_fw_bcast_filter filters[MAX_BCAST_FILTERS];
-	struct iwl_fw_bcast_mac macs[NUM_MAC_INDEX_DRIVER];
-} __packed; /* BCAST_FILTERING_HCMD_API_S_VER_1 */
-
-/*
- * enum iwl_mvm_marker_id - maker ids
- *
- * The ids for different type of markers to insert into the usniffer logs
- */
-enum iwl_mvm_marker_id {
-	MARKER_ID_TX_FRAME_LATENCY = 1,
-}; /* MARKER_ID_API_E_VER_1 */
-
-/**
- * struct iwl_mvm_marker - mark info into the usniffer logs
- *
- * (MARKER_CMD = 0xcb)
- *
- * Mark the UTC time stamp into the usniffer logs together with additional
- * metadata, so the usniffer output can be parsed.
- * In the command response the ucode will return the GP2 time.
- *
- * @dw_len: The amount of dwords following this byte including this byte.
- * @marker_id: A unique marker id (iwl_mvm_marker_id).
- * @reserved: reserved.
- * @timestamp: in milliseconds since 1970-01-01 00:00:00 UTC
- * @metadata: additional meta data that will be written to the unsiffer log
- */
-struct iwl_mvm_marker {
-	u8 dw_len;
-	u8 marker_id;
-	__le16 reserved;
-	__le64 timestamp;
-	__le32 metadata[0];
-} __packed; /* MARKER_API_S_VER_1 */
-
-/*
- * enum iwl_dc2dc_config_id - flag ids
- *
- * Ids of dc2dc configuration flags
- */
-enum iwl_dc2dc_config_id {
-	DCDC_LOW_POWER_MODE_MSK_SET  = 0x1, /* not used */
-	DCDC_FREQ_TUNE_SET = 0x2,
-}; /* MARKER_ID_API_E_VER_1 */
-
-/**
- * struct iwl_dc2dc_config_cmd - configure dc2dc values
- *
- * (DC2DC_CONFIG_CMD = 0x83)
- *
- * Set/Get & configure dc2dc values.
- * The command always returns the current dc2dc values.
- *
- * @flags: set/get dc2dc
- * @enable_low_power_mode: not used.
- * @dc2dc_freq_tune0: frequency divider - digital domain
- * @dc2dc_freq_tune1: frequency divider - analog domain
- */
-struct iwl_dc2dc_config_cmd {
-	__le32 flags;
-	__le32 enable_low_power_mode; /* not used */
-	__le32 dc2dc_freq_tune0;
-	__le32 dc2dc_freq_tune1;
-} __packed; /* DC2DC_CONFIG_CMD_API_S_VER_1 */
-
-/**
- * struct iwl_dc2dc_config_resp - response for iwl_dc2dc_config_cmd
- *
- * Current dc2dc values returned by the FW.
- *
- * @dc2dc_freq_tune0: frequency divider - digital domain
- * @dc2dc_freq_tune1: frequency divider - analog domain
- */
-struct iwl_dc2dc_config_resp {
-	__le32 dc2dc_freq_tune0;
-	__le32 dc2dc_freq_tune1;
-} __packed; /* DC2DC_CONFIG_RESP_API_S_VER_1 */
-
-/***********************************
- * Smart Fifo API
- ***********************************/
-/* Smart Fifo state */
-enum iwl_sf_state {
-	SF_LONG_DELAY_ON = 0, /* should never be called by driver */
-	SF_FULL_ON,
-	SF_UNINIT,
-	SF_INIT_OFF,
-	SF_HW_NUM_STATES
-};
-
-/* Smart Fifo possible scenario */
-enum iwl_sf_scenario {
-	SF_SCENARIO_SINGLE_UNICAST,
-	SF_SCENARIO_AGG_UNICAST,
-	SF_SCENARIO_MULTICAST,
-	SF_SCENARIO_BA_RESP,
-	SF_SCENARIO_TX_RESP,
-	SF_NUM_SCENARIO
-};
-
-#define SF_TRANSIENT_STATES_NUMBER 2	/* SF_LONG_DELAY_ON and SF_FULL_ON */
-#define SF_NUM_TIMEOUT_TYPES 2		/* Aging timer and Idle timer */
-
-/* smart FIFO default values */
-#define SF_W_MARK_SISO 6144
-#define SF_W_MARK_MIMO2 8192
-#define SF_W_MARK_MIMO3 6144
-#define SF_W_MARK_LEGACY 4096
-#define SF_W_MARK_SCAN 4096
-
-/* SF Scenarios timers for default configuration (aligned to 32 uSec) */
-#define SF_SINGLE_UNICAST_IDLE_TIMER_DEF 160	/* 150 uSec  */
-#define SF_SINGLE_UNICAST_AGING_TIMER_DEF 400	/* 0.4 mSec */
-#define SF_AGG_UNICAST_IDLE_TIMER_DEF 160		/* 150 uSec */
-#define SF_AGG_UNICAST_AGING_TIMER_DEF 400		/* 0.4 mSec */
-#define SF_MCAST_IDLE_TIMER_DEF 160		/* 150 mSec */
-#define SF_MCAST_AGING_TIMER_DEF 400		/* 0.4 mSec */
-#define SF_BA_IDLE_TIMER_DEF 160			/* 150 uSec */
-#define SF_BA_AGING_TIMER_DEF 400			/* 0.4 mSec */
-#define SF_TX_RE_IDLE_TIMER_DEF 160			/* 150 uSec */
-#define SF_TX_RE_AGING_TIMER_DEF 400		/* 0.4 mSec */
-
-/* SF Scenarios timers for BSS MAC configuration (aligned to 32 uSec) */
-#define SF_SINGLE_UNICAST_IDLE_TIMER 320	/* 300 uSec  */
-#define SF_SINGLE_UNICAST_AGING_TIMER 2016	/* 2 mSec */
-#define SF_AGG_UNICAST_IDLE_TIMER 320		/* 300 uSec */
-#define SF_AGG_UNICAST_AGING_TIMER 2016		/* 2 mSec */
-#define SF_MCAST_IDLE_TIMER 2016		/* 2 mSec */
-#define SF_MCAST_AGING_TIMER 10016		/* 10 mSec */
-#define SF_BA_IDLE_TIMER 320			/* 300 uSec */
-#define SF_BA_AGING_TIMER 2016			/* 2 mSec */
-#define SF_TX_RE_IDLE_TIMER 320			/* 300 uSec */
-#define SF_TX_RE_AGING_TIMER 2016		/* 2 mSec */
-
-#define SF_LONG_DELAY_AGING_TIMER 1000000	/* 1 Sec */
-
-#define SF_CFG_DUMMY_NOTIF_OFF	BIT(16)
-
-/**
- * struct iwl_sf_cfg_cmd - Smart Fifo configuration command.
- * @state: smart fifo state, types listed in &enum iwl_sf_state.
- * @watermark: Minimum allowed availabe free space in RXF for transient state.
- * @long_delay_timeouts: aging and idle timer values for each scenario
- * in long delay state.
- * @full_on_timeouts: timer values for each scenario in full on state.
- */
-struct iwl_sf_cfg_cmd {
-	__le32 state;
-	__le32 watermark[SF_TRANSIENT_STATES_NUMBER];
-	__le32 long_delay_timeouts[SF_NUM_SCENARIO][SF_NUM_TIMEOUT_TYPES];
-	__le32 full_on_timeouts[SF_NUM_SCENARIO][SF_NUM_TIMEOUT_TYPES];
-} __packed; /* SF_CFG_API_S_VER_2 */
-
-/***********************************
- * Location Aware Regulatory (LAR) API - MCC updates
- ***********************************/
-
-/**
- * struct iwl_mcc_update_cmd_v1 - Request the device to update geographic
- * regulatory profile according to the given MCC (Mobile Country Code).
- * The MCC is two letter-code, ascii upper case[A-Z] or '00' for world domain.
- * 'ZZ' MCC will be used to switch to NVM default profile; in this case, the
- * MCC in the cmd response will be the relevant MCC in the NVM.
- * @mcc: given mobile country code
- * @source_id: the source from where we got the MCC, see iwl_mcc_source
- * @reserved: reserved for alignment
- */
-struct iwl_mcc_update_cmd_v1 {
-	__le16 mcc;
-	u8 source_id;
-	u8 reserved;
-} __packed; /* LAR_UPDATE_MCC_CMD_API_S_VER_1 */
-
-/**
- * struct iwl_mcc_update_cmd - Request the device to update geographic
- * regulatory profile according to the given MCC (Mobile Country Code).
- * The MCC is two letter-code, ascii upper case[A-Z] or '00' for world domain.
- * 'ZZ' MCC will be used to switch to NVM default profile; in this case, the
- * MCC in the cmd response will be the relevant MCC in the NVM.
- * @mcc: given mobile country code
- * @source_id: the source from where we got the MCC, see iwl_mcc_source
- * @reserved: reserved for alignment
- * @key: integrity key for MCC API OEM testing
- * @reserved2: reserved
- */
-struct iwl_mcc_update_cmd {
-	__le16 mcc;
-	u8 source_id;
-	u8 reserved;
-	__le32 key;
-	u8 reserved2[20];
-} __packed; /* LAR_UPDATE_MCC_CMD_API_S_VER_2 */
-
-/**
- * struct iwl_mcc_update_resp_v1  - response to MCC_UPDATE_CMD.
- * Contains the new channel control profile map, if changed, and the new MCC
- * (mobile country code).
- * The new MCC may be different than what was requested in MCC_UPDATE_CMD.
- * @status: see &enum iwl_mcc_update_status
- * @mcc: the new applied MCC
- * @cap: capabilities for all channels which matches the MCC
- * @source_id: the MCC source, see iwl_mcc_source
- * @n_channels: number of channels in @channels_data (may be 14, 39, 50 or 51
- *		channels, depending on platform)
- * @channels: channel control data map, DWORD for each channel. Only the first
- *	16bits are used.
- */
-struct iwl_mcc_update_resp_v1  {
-	__le32 status;
-	__le16 mcc;
-	u8 cap;
-	u8 source_id;
-	__le32 n_channels;
-	__le32 channels[0];
-} __packed; /* LAR_UPDATE_MCC_CMD_RESP_S_VER_1 */
-
-/**
- * struct iwl_mcc_update_resp - response to MCC_UPDATE_CMD.
- * Contains the new channel control profile map, if changed, and the new MCC
- * (mobile country code).
- * The new MCC may be different than what was requested in MCC_UPDATE_CMD.
- * @status: see &enum iwl_mcc_update_status
- * @mcc: the new applied MCC
- * @cap: capabilities for all channels which matches the MCC
- * @source_id: the MCC source, see iwl_mcc_source
- * @time: time elapsed from the MCC test start (in 30 seconds TU)
- * @reserved: reserved.
- * @n_channels: number of channels in @channels_data (may be 14, 39, 50 or 51
- *		channels, depending on platform)
- * @channels: channel control data map, DWORD for each channel. Only the first
- *	16bits are used.
- */
-struct iwl_mcc_update_resp {
-	__le32 status;
-	__le16 mcc;
-	u8 cap;
-	u8 source_id;
-	__le16 time;
-	__le16 reserved;
-	__le32 n_channels;
-	__le32 channels[0];
-} __packed; /* LAR_UPDATE_MCC_CMD_RESP_S_VER_2 */
-
-/**
- * struct iwl_mcc_chub_notif - chub notifies of mcc change
- * (MCC_CHUB_UPDATE_CMD = 0xc9)
- * The Chub (Communication Hub, CommsHUB) is a HW component that connects to
- * the cellular and connectivity cores that gets updates of the mcc, and
- * notifies the ucode directly of any mcc change.
- * The ucode requests the driver to request the device to update geographic
- * regulatory  profile according to the given MCC (Mobile Country Code).
- * The MCC is two letter-code, ascii upper case[A-Z] or '00' for world domain.
- * 'ZZ' MCC will be used to switch to NVM default profile; in this case, the
- * MCC in the cmd response will be the relevant MCC in the NVM.
- * @mcc: given mobile country code
- * @source_id: identity of the change originator, see iwl_mcc_source
- * @reserved1: reserved for alignment
- */
-struct iwl_mcc_chub_notif {
-	__le16 mcc;
-	u8 source_id;
-	u8 reserved1;
-} __packed; /* LAR_MCC_NOTIFY_S */
-
-enum iwl_mcc_update_status {
-	MCC_RESP_NEW_CHAN_PROFILE,
-	MCC_RESP_SAME_CHAN_PROFILE,
-	MCC_RESP_INVALID,
-	MCC_RESP_NVM_DISABLED,
-	MCC_RESP_ILLEGAL,
-	MCC_RESP_LOW_PRIORITY,
-	MCC_RESP_TEST_MODE_ACTIVE,
-	MCC_RESP_TEST_MODE_NOT_ACTIVE,
-	MCC_RESP_TEST_MODE_DENIAL_OF_SERVICE,
-};
-
-enum iwl_mcc_source {
-	MCC_SOURCE_OLD_FW = 0,
-	MCC_SOURCE_ME = 1,
-	MCC_SOURCE_BIOS = 2,
-	MCC_SOURCE_3G_LTE_HOST = 3,
-	MCC_SOURCE_3G_LTE_DEVICE = 4,
-	MCC_SOURCE_WIFI = 5,
-	MCC_SOURCE_RESERVED = 6,
-	MCC_SOURCE_DEFAULT = 7,
-	MCC_SOURCE_UNINITIALIZED = 8,
-	MCC_SOURCE_MCC_API = 9,
-	MCC_SOURCE_GET_CURRENT = 0x10,
-	MCC_SOURCE_GETTING_MCC_TEST_MODE = 0x11,
-};
-
-/* DTS measurements */
-
-enum iwl_dts_measurement_flags {
-	DTS_TRIGGER_CMD_FLAGS_TEMP	= BIT(0),
-	DTS_TRIGGER_CMD_FLAGS_VOLT	= BIT(1),
-};
-
-/**
- * struct iwl_dts_measurement_cmd - request DTS temp and/or voltage measurements
- *
- * @flags: indicates which measurements we want as specified in
- *	&enum iwl_dts_measurement_flags
- */
-struct iwl_dts_measurement_cmd {
-	__le32 flags;
-} __packed; /* TEMPERATURE_MEASUREMENT_TRIGGER_CMD_S */
-
-/**
-* enum iwl_dts_control_measurement_mode - DTS measurement type
-* @DTS_AUTOMATIC: Automatic mode (full SW control). Provide temperature read
-*                 back (latest value. Not waiting for new value). Use automatic
-*                 SW DTS configuration.
-* @DTS_REQUEST_READ: Request DTS read. Configure DTS with manual settings,
-*                    trigger DTS reading and provide read back temperature read
-*                    when available.
-* @DTS_OVER_WRITE: over-write the DTS temperatures in the SW until next read
-* @DTS_DIRECT_WITHOUT_MEASURE: DTS returns its latest temperature result,
-*                              without measurement trigger.
-*/
-enum iwl_dts_control_measurement_mode {
-	DTS_AUTOMATIC			= 0,
-	DTS_REQUEST_READ		= 1,
-	DTS_OVER_WRITE			= 2,
-	DTS_DIRECT_WITHOUT_MEASURE	= 3,
-};
-
-/**
-* enum iwl_dts_used - DTS to use or used for measurement in the DTS request
-* @DTS_USE_TOP: Top
-* @DTS_USE_CHAIN_A: chain A
-* @DTS_USE_CHAIN_B: chain B
-* @DTS_USE_CHAIN_C: chain C
-* @XTAL_TEMPERATURE: read temperature from xtal
-*/
-enum iwl_dts_used {
-	DTS_USE_TOP		= 0,
-	DTS_USE_CHAIN_A		= 1,
-	DTS_USE_CHAIN_B		= 2,
-	DTS_USE_CHAIN_C		= 3,
-	XTAL_TEMPERATURE	= 4,
-};
-
-/**
-* enum iwl_dts_bit_mode - bit-mode to use in DTS request read mode
-* @DTS_BIT6_MODE: bit 6 mode
-* @DTS_BIT8_MODE: bit 8 mode
-*/
-enum iwl_dts_bit_mode {
-	DTS_BIT6_MODE	= 0,
-	DTS_BIT8_MODE	= 1,
-};
-
-/**
- * struct iwl_ext_dts_measurement_cmd - request extended DTS temp measurements
- * @control_mode: see &enum iwl_dts_control_measurement_mode
- * @temperature: used when over write DTS mode is selected
- * @sensor: set temperature sensor to use. See &enum iwl_dts_used
- * @avg_factor: average factor to DTS in request DTS read mode
- * @bit_mode: value defines the DTS bit mode to use. See &enum iwl_dts_bit_mode
- * @step_duration: step duration for the DTS
- */
-struct iwl_ext_dts_measurement_cmd {
-	__le32 control_mode;
-	__le32 temperature;
-	__le32 sensor;
-	__le32 avg_factor;
-	__le32 bit_mode;
-	__le32 step_duration;
-} __packed; /* XVT_FW_DTS_CONTROL_MEASUREMENT_REQUEST_API_S */
-
-/**
- * struct iwl_dts_measurement_notif_v1 - measurements notification
- *
- * @temp: the measured temperature
- * @voltage: the measured voltage
- */
-struct iwl_dts_measurement_notif_v1 {
-	__le32 temp;
-	__le32 voltage;
-} __packed; /* TEMPERATURE_MEASUREMENT_TRIGGER_NTFY_S_VER_1*/
-
-/**
- * struct iwl_dts_measurement_notif_v2 - measurements notification
- *
- * @temp: the measured temperature
- * @voltage: the measured voltage
- * @threshold_idx: the trip index that was crossed
- */
-struct iwl_dts_measurement_notif_v2 {
-	__le32 temp;
-	__le32 voltage;
-	__le32 threshold_idx;
-} __packed; /* TEMPERATURE_MEASUREMENT_TRIGGER_NTFY_S_VER_2 */
-
-/**
- * struct ct_kill_notif - CT-kill entry notification
- *
- * @temperature: the current temperature in celsius
- * @reserved: reserved
- */
-struct ct_kill_notif {
-	__le16 temperature;
-	__le16 reserved;
-} __packed; /* GRP_PHY_CT_KILL_NTF */
-
-/**
-* enum ctdp_cmd_operation - CTDP command operations
-* @CTDP_CMD_OPERATION_START: update the current budget
-* @CTDP_CMD_OPERATION_STOP: stop ctdp
-* @CTDP_CMD_OPERATION_REPORT: get the average budget
-*/
-enum iwl_mvm_ctdp_cmd_operation {
-	CTDP_CMD_OPERATION_START	= 0x1,
-	CTDP_CMD_OPERATION_STOP		= 0x2,
-	CTDP_CMD_OPERATION_REPORT	= 0x4,
-};/* CTDP_CMD_OPERATION_TYPE_E */
-
-/**
- * struct iwl_mvm_ctdp_cmd - track and manage the FW power consumption budget
- *
- * @operation: see &enum iwl_mvm_ctdp_cmd_operation
- * @budget: the budget in milliwatt
- * @window_size: defined in API but not used
- */
-struct iwl_mvm_ctdp_cmd {
-	__le32 operation;
-	__le32 budget;
-	__le32 window_size;
-} __packed;
-
-#define IWL_MAX_DTS_TRIPS	8
-
-/**
- * struct temp_report_ths_cmd - set temperature thresholds
- *
- * @num_temps: number of temperature thresholds passed
- * @thresholds: array with the thresholds to be configured
- */
-struct temp_report_ths_cmd {
-	__le32 num_temps;
-	__le16 thresholds[IWL_MAX_DTS_TRIPS];
-} __packed; /* GRP_PHY_TEMP_REPORTING_THRESHOLDS_CMD */
-
-/***********************************
- * TDLS API
- ***********************************/
-
-/* Type of TDLS request */
-enum iwl_tdls_channel_switch_type {
-	TDLS_SEND_CHAN_SW_REQ = 0,
-	TDLS_SEND_CHAN_SW_RESP_AND_MOVE_CH,
-	TDLS_MOVE_CH,
-}; /* TDLS_STA_CHANNEL_SWITCH_CMD_TYPE_API_E_VER_1 */
-
-/**
- * struct iwl_tdls_channel_switch_timing - Switch timing in TDLS channel-switch
- * @frame_timestamp: GP2 timestamp of channel-switch request/response packet
- *	received from peer
- * @max_offchan_duration: What amount of microseconds out of a DTIM is given
- *	to the TDLS off-channel communication. For instance if the DTIM is
- *	200TU and the TDLS peer is to be given 25% of the time, the value
- *	given will be 50TU, or 50 * 1024 if translated into microseconds.
- * @switch_time: switch time the peer sent in its channel switch timing IE
- * @switch_timeout: switch timeout the peer sent in its channel switch timing IE
- */
-struct iwl_tdls_channel_switch_timing {
-	__le32 frame_timestamp; /* GP2 time of peer packet Rx */
-	__le32 max_offchan_duration; /* given in micro-seconds */
-	__le32 switch_time; /* given in micro-seconds */
-	__le32 switch_timeout; /* given in micro-seconds */
-} __packed; /* TDLS_STA_CHANNEL_SWITCH_TIMING_DATA_API_S_VER_1 */
-
-#define IWL_TDLS_CH_SW_FRAME_MAX_SIZE 200
-
-/**
- * struct iwl_tdls_channel_switch_frame - TDLS channel switch frame template
- *
- * A template representing a TDLS channel-switch request or response frame
- *
- * @switch_time_offset: offset to the channel switch timing IE in the template
- * @tx_cmd: Tx parameters for the frame
- * @data: frame data
- */
-struct iwl_tdls_channel_switch_frame {
-	__le32 switch_time_offset;
-	struct iwl_tx_cmd tx_cmd;
-	u8 data[IWL_TDLS_CH_SW_FRAME_MAX_SIZE];
-} __packed; /* TDLS_STA_CHANNEL_SWITCH_FRAME_API_S_VER_1 */
-
-/**
- * struct iwl_tdls_channel_switch_cmd - TDLS channel switch command
- *
- * The command is sent to initiate a channel switch and also in response to
- * incoming TDLS channel-switch request/response packets from remote peers.
- *
- * @switch_type: see &enum iwl_tdls_channel_switch_type
- * @peer_sta_id: station id of TDLS peer
- * @ci: channel we switch to
- * @timing: timing related data for command
- * @frame: channel-switch request/response template, depending to switch_type
- */
-struct iwl_tdls_channel_switch_cmd {
-	u8 switch_type;
-	__le32 peer_sta_id;
-	struct iwl_fw_channel_info ci;
-	struct iwl_tdls_channel_switch_timing timing;
-	struct iwl_tdls_channel_switch_frame frame;
-} __packed; /* TDLS_STA_CHANNEL_SWITCH_CMD_API_S_VER_1 */
-
-/**
- * struct iwl_tdls_channel_switch_notif - TDLS channel switch start notification
- *
- * @status: non-zero on success
- * @offchannel_duration: duration given in microseconds
- * @sta_id: peer currently performing the channel-switch with
- */
-struct iwl_tdls_channel_switch_notif {
-	__le32 status;
-	__le32 offchannel_duration;
-	__le32 sta_id;
-} __packed; /* TDLS_STA_CHANNEL_SWITCH_NTFY_API_S_VER_1 */
-
-/**
- * struct iwl_tdls_sta_info - TDLS station info
- *
- * @sta_id: station id of the TDLS peer
- * @tx_to_peer_tid: TID reserved vs. the peer for FW based Tx
- * @tx_to_peer_ssn: initial SSN the FW should use for Tx on its TID vs the peer
- * @is_initiator: 1 if the peer is the TDLS link initiator, 0 otherwise
- */
-struct iwl_tdls_sta_info {
-	u8 sta_id;
-	u8 tx_to_peer_tid;
-	__le16 tx_to_peer_ssn;
-	__le32 is_initiator;
-} __packed; /* TDLS_STA_INFO_VER_1 */
-
-/**
- * struct iwl_tdls_config_cmd - TDLS basic config command
- *
- * @id_and_color: MAC id and color being configured
- * @tdls_peer_count: amount of currently connected TDLS peers
- * @tx_to_ap_tid: TID reverved vs. the AP for FW based Tx
- * @tx_to_ap_ssn: initial SSN the FW should use for Tx on its TID vs. the AP
- * @sta_info: per-station info. Only the first tdls_peer_count entries are set
- * @pti_req_data_offset: offset of network-level data for the PTI template
- * @pti_req_tx_cmd: Tx parameters for PTI request template
- * @pti_req_template: PTI request template data
- */
-struct iwl_tdls_config_cmd {
-	__le32 id_and_color; /* mac id and color */
-	u8 tdls_peer_count;
-	u8 tx_to_ap_tid;
-	__le16 tx_to_ap_ssn;
-	struct iwl_tdls_sta_info sta_info[IWL_MVM_TDLS_STA_COUNT];
-
-	__le32 pti_req_data_offset;
-	struct iwl_tx_cmd pti_req_tx_cmd;
-	u8 pti_req_template[0];
-} __packed; /* TDLS_CONFIG_CMD_API_S_VER_1 */
-
-/**
- * struct iwl_tdls_config_sta_info_res - TDLS per-station config information
- *
- * @sta_id: station id of the TDLS peer
- * @tx_to_peer_last_seq: last sequence number used by FW during FW-based Tx to
- *	the peer
- */
-struct iwl_tdls_config_sta_info_res {
-	__le16 sta_id;
-	__le16 tx_to_peer_last_seq;
-} __packed; /* TDLS_STA_INFO_RSP_VER_1 */
-
-/**
- * struct iwl_tdls_config_res - TDLS config information from FW
- *
- * @tx_to_ap_last_seq: last sequence number used by FW during FW-based Tx to AP
- * @sta_info: per-station TDLS config information
- */
-struct iwl_tdls_config_res {
-	__le32 tx_to_ap_last_seq;
-	struct iwl_tdls_config_sta_info_res sta_info[IWL_MVM_TDLS_STA_COUNT];
-} __packed; /* TDLS_CONFIG_RSP_API_S_VER_1 */
-
-#define TX_FIFO_MAX_NUM_9000		8
-#define TX_FIFO_MAX_NUM			15
-#define RX_FIFO_MAX_NUM			2
-#define TX_FIFO_INTERNAL_MAX_NUM	6
-
-/**
- * struct iwl_shared_mem_cfg_v2 - Shared memory configuration information
- *
- * @shared_mem_addr: shared memory addr (pre 8000 HW set to 0x0 as MARBH is not
- *	accessible)
- * @shared_mem_size: shared memory size
- * @sample_buff_addr: internal sample (mon/adc) buff addr (pre 8000 HW set to
- *	0x0 as accessible only via DBGM RDAT)
- * @sample_buff_size: internal sample buff size
- * @txfifo_addr: start addr of TXF0 (excluding the context table 0.5KB), (pre
- *	8000 HW set to 0x0 as not accessible)
- * @txfifo_size: size of TXF0 ... TXF7
- * @rxfifo_size: RXF1, RXF2 sizes. If there is no RXF2, it'll have a value of 0
- * @page_buff_addr: used by UMAC and performance debug (page miss analysis),
- *	when paging is not supported this should be 0
- * @page_buff_size: size of %page_buff_addr
- * @rxfifo_addr: Start address of rxFifo
- * @internal_txfifo_addr: start address of internalFifo
- * @internal_txfifo_size: internal fifos' size
- *
- * NOTE: on firmware that don't have IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG
- *	 set, the last 3 members don't exist.
- */
-struct iwl_shared_mem_cfg_v2 {
-	__le32 shared_mem_addr;
-	__le32 shared_mem_size;
-	__le32 sample_buff_addr;
-	__le32 sample_buff_size;
-	__le32 txfifo_addr;
-	__le32 txfifo_size[TX_FIFO_MAX_NUM_9000];
-	__le32 rxfifo_size[RX_FIFO_MAX_NUM];
-	__le32 page_buff_addr;
-	__le32 page_buff_size;
-	__le32 rxfifo_addr;
-	__le32 internal_txfifo_addr;
-	__le32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM];
-} __packed; /* SHARED_MEM_ALLOC_API_S_VER_2 */
-
-/**
- * struct iwl_shared_mem_lmac_cfg - LMAC shared memory configuration
- *
- * @txfifo_addr: start addr of TXF0 (excluding the context table 0.5KB)
- * @txfifo_size: size of TX FIFOs
- * @rxfifo1_addr: RXF1 addr
- * @rxfifo1_size: RXF1 size
- */
-struct iwl_shared_mem_lmac_cfg {
-	__le32 txfifo_addr;
-	__le32 txfifo_size[TX_FIFO_MAX_NUM];
-	__le32 rxfifo1_addr;
-	__le32 rxfifo1_size;
-
-} __packed; /* SHARED_MEM_ALLOC_LMAC_API_S_VER_1 */
-
-/**
- * struct iwl_shared_mem_cfg - Shared memory configuration information
- *
- * @shared_mem_addr: shared memory address
- * @shared_mem_size: shared memory size
- * @sample_buff_addr: internal sample (mon/adc) buff addr
- * @sample_buff_size: internal sample buff size
- * @rxfifo2_addr: start addr of RXF2
- * @rxfifo2_size: size of RXF2
- * @page_buff_addr: used by UMAC and performance debug (page miss analysis),
- *	when paging is not supported this should be 0
- * @page_buff_size: size of %page_buff_addr
- * @lmac_num: number of LMACs (1 or 2)
- * @lmac_smem: per - LMAC smem data
- */
-struct iwl_shared_mem_cfg {
-	__le32 shared_mem_addr;
-	__le32 shared_mem_size;
-	__le32 sample_buff_addr;
-	__le32 sample_buff_size;
-	__le32 rxfifo2_addr;
-	__le32 rxfifo2_size;
-	__le32 page_buff_addr;
-	__le32 page_buff_size;
-	__le32 lmac_num;
-	struct iwl_shared_mem_lmac_cfg lmac_smem[2];
-} __packed; /* SHARED_MEM_ALLOC_API_S_VER_3 */
-
-/**
- * struct iwl_mu_group_mgmt_cmd - VHT MU-MIMO group configuration
- *
- * @reserved: reserved
- * @membership_status: a bitmap of MU groups
- * @user_position:the position of station in a group. If the station is in the
- *	group then bits (group * 2) is the position -1
- */
-struct iwl_mu_group_mgmt_cmd {
-	__le32 reserved;
-	__le32 membership_status[2];
-	__le32 user_position[4];
-} __packed; /* MU_GROUP_ID_MNG_TABLE_API_S_VER_1 */
-
-/**
- * struct iwl_mu_group_mgmt_notif - VHT MU-MIMO group id notification
- *
- * @membership_status: a bitmap of MU groups
- * @user_position: the position of station in a group. If the station is in the
- *	group then bits (group * 2) is the position -1
- */
-struct iwl_mu_group_mgmt_notif {
-	__le32 membership_status[2];
-	__le32 user_position[4];
-} __packed; /* MU_GROUP_MNG_NTFY_API_S_VER_1 */
-
-#define MAX_STORED_BEACON_SIZE 600
-
-/**
- * struct iwl_stored_beacon_notif - Stored beacon notification
- *
- * @system_time: system time on air rise
- * @tsf: TSF on air rise
- * @beacon_timestamp: beacon on air rise
- * @band: band, matches &RX_RES_PHY_FLAGS_BAND_24 definition
- * @channel: channel this beacon was received on
- * @rates: rate in ucode internal format
- * @byte_count: frame's byte count
- * @data: beacon data, length in @byte_count
- */
-struct iwl_stored_beacon_notif {
-	__le32 system_time;
-	__le64 tsf;
-	__le32 beacon_timestamp;
-	__le16 band;
-	__le16 channel;
-	__le32 rates;
-	__le32 byte_count;
-	u8 data[MAX_STORED_BEACON_SIZE];
-} __packed; /* WOWLAN_STROED_BEACON_INFO_S_VER_2 */
-
-#define LQM_NUMBER_OF_STATIONS_IN_REPORT 16
-
-enum iwl_lqm_cmd_operatrions {
-	LQM_CMD_OPERATION_START_MEASUREMENT = 0x01,
-	LQM_CMD_OPERATION_STOP_MEASUREMENT = 0x02,
-};
-
-enum iwl_lqm_status {
-	LQM_STATUS_SUCCESS = 0,
-	LQM_STATUS_TIMEOUT = 1,
-	LQM_STATUS_ABORT = 2,
-};
-
-/**
- * struct iwl_link_qual_msrmnt_cmd - Link Quality Measurement command
- * @cmd_operation: command operation to be performed (start or stop)
- *	as defined above.
- * @mac_id: MAC ID the measurement applies to.
- * @measurement_time: time of the total measurement to be performed, in uSec.
- * @timeout: maximum time allowed until a response is sent, in uSec.
- */
-struct iwl_link_qual_msrmnt_cmd {
-	__le32 cmd_operation;
-	__le32 mac_id;
-	__le32 measurement_time;
-	__le32 timeout;
-} __packed /* LQM_CMD_API_S_VER_1 */;
-
-/**
- * struct iwl_link_qual_msrmnt_notif - Link Quality Measurement notification
- *
- * @frequent_stations_air_time: an array containing the total air time
- *	(in uSec) used by the most frequently transmitting stations.
- * @number_of_stations: the number of uniqe stations included in the array
- *	(a number between 0 to 16)
- * @total_air_time_other_stations: the total air time (uSec) used by all the
- *	stations which are not included in the above report.
- * @time_in_measurement_window: the total time in uSec in which a measurement
- *	took place.
- * @tx_frame_dropped: the number of TX frames dropped due to retry limit during
- *	measurement
- * @mac_id: MAC ID the measurement applies to.
- * @status: return status. may be one of the LQM_STATUS_* defined above.
- * @reserved: reserved.
- */
-struct iwl_link_qual_msrmnt_notif {
-	__le32 frequent_stations_air_time[LQM_NUMBER_OF_STATIONS_IN_REPORT];
-	__le32 number_of_stations;
-	__le32 total_air_time_other_stations;
-	__le32 time_in_measurement_window;
-	__le32 tx_frame_dropped;
-	__le32 mac_id;
-	__le32 status;
-	u8 reserved[12];
-} __packed; /* LQM_MEASUREMENT_COMPLETE_NTF_API_S_VER1 */
-
-/**
- * struct iwl_channel_switch_noa_notif - Channel switch NOA notification
- *
- * @id_and_color: ID and color of the MAC
- */
-struct iwl_channel_switch_noa_notif {
-	__le32 id_and_color;
-} __packed; /* CHANNEL_SWITCH_START_NTFY_API_S_VER_1 */
-
-/* Operation types for the debug mem access */
-enum {
-	DEBUG_MEM_OP_READ = 0,
-	DEBUG_MEM_OP_WRITE = 1,
-	DEBUG_MEM_OP_WRITE_BYTES = 2,
-};
-
-#define DEBUG_MEM_MAX_SIZE_DWORDS 32
-
-/**
- * struct iwl_dbg_mem_access_cmd - Request the device to read/write memory
- * @op: DEBUG_MEM_OP_*
- * @addr: address to read/write from/to
- * @len: in dwords, to read/write
- * @data: for write opeations, contains the source buffer
- */
-struct iwl_dbg_mem_access_cmd {
-	__le32 op;
-	__le32 addr;
-	__le32 len;
-	__le32 data[];
-} __packed; /* DEBUG_(U|L)MAC_RD_WR_CMD_API_S_VER_1 */
-
-/* Status responses for the debug mem access */
-enum {
-	DEBUG_MEM_STATUS_SUCCESS = 0x0,
-	DEBUG_MEM_STATUS_FAILED = 0x1,
-	DEBUG_MEM_STATUS_LOCKED = 0x2,
-	DEBUG_MEM_STATUS_HIDDEN = 0x3,
-	DEBUG_MEM_STATUS_LENGTH = 0x4,
-};
-
-/**
- * struct iwl_dbg_mem_access_rsp - Response to debug mem commands
- * @status: DEBUG_MEM_STATUS_*
- * @len: read dwords (0 for write operations)
- * @data: contains the read DWs
- */
-struct iwl_dbg_mem_access_rsp {
-	__le32 status;
-	__le32 len;
-	__le32 data[];
-} __packed; /* DEBUG_(U|L)MAC_RD_WR_RSP_API_S_VER_1 */
-
-/**
- * struct iwl_nvm_access_complete_cmd - NVM_ACCESS commands are completed
- * @reserved: reserved
- */
-struct iwl_nvm_access_complete_cmd {
-	__le32 reserved;
-} __packed; /* NVM_ACCESS_COMPLETE_CMD_API_S_VER_1 */
-
-/**
- * enum iwl_extended_cfg_flag - commands driver may send before
- *	finishing init flow
- * @IWL_INIT_DEBUG_CFG: driver is going to send debug config command
- * @IWL_INIT_NVM: driver is going to send NVM_ACCESS commands
- * @IWL_INIT_PHY: driver is going to send PHY_DB commands
- */
-enum iwl_extended_cfg_flags {
-	IWL_INIT_DEBUG_CFG,
-	IWL_INIT_NVM,
-	IWL_INIT_PHY,
-};
-
-/**
- * struct iwl_extended_cfg_cmd - mark what commands ucode should wait for
- * before finishing init flows
- * @init_flags: values from iwl_extended_cfg_flags
- */
-struct iwl_init_extended_cfg_cmd {
-	__le32 init_flags;
-} __packed; /* INIT_EXTENDED_CFG_CMD_API_S_VER_1 */
-
-/*
- * struct iwl_nvm_get_info - request to get NVM data
- */
-struct iwl_nvm_get_info {
-	__le32 reserved;
-} __packed; /* GRP_REGULATORY_NVM_GET_INFO_CMD_S_VER_1 */
-
-/**
- * struct iwl_nvm_get_info_general - general NVM data
- * @flags: 1 - empty, 0 - valid
- * @nvm_version: nvm version
- * @board_type: board type
- * @reserved: reserved
- */
-struct iwl_nvm_get_info_general {
-	__le32 flags;
-	__le16 nvm_version;
-	u8 board_type;
-	u8 reserved;
-} __packed; /* GRP_REGULATORY_NVM_GET_INFO_GENERAL_S_VER_1 */
-
-/**
- * struct iwl_nvm_get_info_sku - mac information
- * @enable_24g: band 2.4G enabled
- * @enable_5g: band 5G enabled
- * @enable_11n: 11n enabled
- * @enable_11ac: 11ac enabled
- * @mimo_disable: MIMO enabled
- * @ext_crypto: Extended crypto enabled
- */
-struct iwl_nvm_get_info_sku {
-	__le32 enable_24g;
-	__le32 enable_5g;
-	__le32 enable_11n;
-	__le32 enable_11ac;
-	__le32 mimo_disable;
-	__le32 ext_crypto;
-} __packed; /* GRP_REGULATORY_NVM_GET_INFO_MAC_SKU_SECTION_S_VER_1 */
-
-/**
- * struct iwl_nvm_get_info_phy - phy information
- * @tx_chains: BIT 0 chain A, BIT 1 chain B
- * @rx_chains: BIT 0 chain A, BIT 1 chain B
- */
-struct iwl_nvm_get_info_phy {
-	__le32 tx_chains;
-	__le32 rx_chains;
-} __packed; /* GRP_REGULATORY_NVM_GET_INFO_PHY_SKU_SECTION_S_VER_1 */
-
-#define IWL_NUM_CHANNELS (51)
-
-/**
- * struct iwl_nvm_get_info_regulatory - regulatory information
- * @lar_enabled: is LAR enabled
- * @channel_profile: regulatory data of this channel
- * @reserved: reserved
- */
-struct iwl_nvm_get_info_regulatory {
-	__le32 lar_enabled;
-	__le16 channel_profile[IWL_NUM_CHANNELS];
-	__le16 reserved;
-} __packed; /* GRP_REGULATORY_NVM_GET_INFO_REGULATORY_S_VER_1 */
-
-/**
- * struct iwl_nvm_get_info_rsp - response to get NVM data
- * @general: general NVM data
- * @mac_sku: data relating to MAC sku
- * @phy_sku: data relating to PHY sku
- * @regulatory: regulatory data
- */
-struct iwl_nvm_get_info_rsp {
-	struct iwl_nvm_get_info_general general;
-	struct iwl_nvm_get_info_sku mac_sku;
-	struct iwl_nvm_get_info_phy phy_sku;
-	struct iwl_nvm_get_info_regulatory regulatory;
-} __packed; /* GRP_REGULATORY_NVM_GET_INFO_CMD_RSP_S_VER_1 */
-
-/**
- * struct iwl_mvm_antenna_coupling_notif - antenna coupling notification
- * @isolation: antenna isolation value
- */
-struct iwl_mvm_antenna_coupling_notif {
-	__le32 isolation;
-} __packed;
+#include "fw/api/tdls.h"
+#include "fw/api/mac-cfg.h"
+#include "fw/api/offload.h"
+#include "fw/api/context.h"
+#include "fw/api/time-event.h"
+#include "fw/api/datapath.h"
+#include "fw/api/phy.h"
+#include "fw/api/config.h"
+#include "fw/api/alive.h"
+#include "fw/api/binding.h"
+#include "fw/api/cmdhdr.h"
+#include "fw/api/coex.h"
+#include "fw/api/commands.h"
+#include "fw/api/d3.h"
+#include "fw/api/filter.h"
+#include "fw/api/led.h"
+#include "fw/api/mac.h"
+#include "fw/api/nvm-reg.h"
+#include "fw/api/phy-ctxt.h"
+#include "fw/api/power.h"
+#include "fw/api/rs.h"
+#include "fw/api/rx.h"
+#include "fw/api/scan.h"
+#include "fw/api/sf.h"
+#include "fw/api/sta.h"
+#include "fw/api/stats.h"
+#include "fw/api/tof.h"
+#include "fw/api/tx.h"
 
 #endif /* __fw_api_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 82863e9..8348549 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -78,7 +78,7 @@
 #include "iwl-eeprom-parse.h"
 
 #include "mvm.h"
-#include "fw-dbg.h"
+#include "fw/dbg.h"
 #include "iwl-phy-db.h"
 
 #define MVM_UCODE_ALIVE_TIMEOUT	HZ
@@ -144,134 +144,6 @@ static int iwl_mvm_send_dqa_cmd(struct iwl_mvm *mvm)
 	return ret;
 }
 
-void iwl_free_fw_paging(struct iwl_mvm *mvm)
-{
-	int i;
-
-	if (!mvm->fw_paging_db[0].fw_paging_block)
-		return;
-
-	for (i = 0; i < NUM_OF_FW_PAGING_BLOCKS; i++) {
-		struct iwl_fw_paging *paging = &mvm->fw_paging_db[i];
-
-		if (!paging->fw_paging_block) {
-			IWL_DEBUG_FW(mvm,
-				     "Paging: block %d already freed, continue to next page\n",
-				     i);
-
-			continue;
-		}
-		dma_unmap_page(mvm->trans->dev, paging->fw_paging_phys,
-			       paging->fw_paging_size, DMA_BIDIRECTIONAL);
-
-		__free_pages(paging->fw_paging_block,
-			     get_order(paging->fw_paging_size));
-		paging->fw_paging_block = NULL;
-	}
-	kfree(mvm->trans->paging_download_buf);
-	mvm->trans->paging_download_buf = NULL;
-	mvm->trans->paging_db = NULL;
-
-	memset(mvm->fw_paging_db, 0, sizeof(mvm->fw_paging_db));
-}
-
-static int iwl_fill_paging_mem(struct iwl_mvm *mvm, const struct fw_img *image)
-{
-	int sec_idx, idx;
-	u32 offset = 0;
-
-	/*
-	 * find where is the paging image start point:
-	 * if CPU2 exist and it's in paging format, then the image looks like:
-	 * CPU1 sections (2 or more)
-	 * CPU1_CPU2_SEPARATOR_SECTION delimiter - separate between CPU1 to CPU2
-	 * CPU2 sections (not paged)
-	 * PAGING_SEPARATOR_SECTION delimiter - separate between CPU2
-	 * non paged to CPU2 paging sec
-	 * CPU2 paging CSS
-	 * CPU2 paging image (including instruction and data)
-	 */
-	for (sec_idx = 0; sec_idx < image->num_sec; sec_idx++) {
-		if (image->sec[sec_idx].offset == PAGING_SEPARATOR_SECTION) {
-			sec_idx++;
-			break;
-		}
-	}
-
-	/*
-	 * If paging is enabled there should be at least 2 more sections left
-	 * (one for CSS and one for Paging data)
-	 */
-	if (sec_idx >= image->num_sec - 1) {
-		IWL_ERR(mvm, "Paging: Missing CSS and/or paging sections\n");
-		iwl_free_fw_paging(mvm);
-		return -EINVAL;
-	}
-
-	/* copy the CSS block to the dram */
-	IWL_DEBUG_FW(mvm, "Paging: load paging CSS to FW, sec = %d\n",
-		     sec_idx);
-
-	memcpy(page_address(mvm->fw_paging_db[0].fw_paging_block),
-	       image->sec[sec_idx].data,
-	       mvm->fw_paging_db[0].fw_paging_size);
-	dma_sync_single_for_device(mvm->trans->dev,
-				   mvm->fw_paging_db[0].fw_paging_phys,
-				   mvm->fw_paging_db[0].fw_paging_size,
-				   DMA_BIDIRECTIONAL);
-
-	IWL_DEBUG_FW(mvm,
-		     "Paging: copied %d CSS bytes to first block\n",
-		     mvm->fw_paging_db[0].fw_paging_size);
-
-	sec_idx++;
-
-	/*
-	 * copy the paging blocks to the dram
-	 * loop index start from 1 since that CSS block already copied to dram
-	 * and CSS index is 0.
-	 * loop stop at num_of_paging_blk since that last block is not full.
-	 */
-	for (idx = 1; idx < mvm->num_of_paging_blk; idx++) {
-		struct iwl_fw_paging *block = &mvm->fw_paging_db[idx];
-
-		memcpy(page_address(block->fw_paging_block),
-		       image->sec[sec_idx].data + offset,
-		       block->fw_paging_size);
-		dma_sync_single_for_device(mvm->trans->dev,
-					   block->fw_paging_phys,
-					   block->fw_paging_size,
-					   DMA_BIDIRECTIONAL);
-
-
-		IWL_DEBUG_FW(mvm,
-			     "Paging: copied %d paging bytes to block %d\n",
-			     mvm->fw_paging_db[idx].fw_paging_size,
-			     idx);
-
-		offset += mvm->fw_paging_db[idx].fw_paging_size;
-	}
-
-	/* copy the last paging block */
-	if (mvm->num_of_pages_in_last_blk > 0) {
-		struct iwl_fw_paging *block = &mvm->fw_paging_db[idx];
-
-		memcpy(page_address(block->fw_paging_block),
-		       image->sec[sec_idx].data + offset,
-		       FW_PAGING_SIZE * mvm->num_of_pages_in_last_blk);
-		dma_sync_single_for_device(mvm->trans->dev,
-					   block->fw_paging_phys,
-					   block->fw_paging_size,
-					   DMA_BIDIRECTIONAL);
-
-		IWL_DEBUG_FW(mvm,
-			     "Paging: copied %d pages in the last block %d\n",
-			     mvm->num_of_pages_in_last_blk, idx);
-	}
-
-	return 0;
-}
-
 void iwl_mvm_mfu_assert_dump_notif(struct iwl_mvm *mvm,
 				   struct iwl_rx_cmd_buffer *rxb)
 {
@@ -293,178 +165,6 @@ void iwl_mvm_mfu_assert_dump_notif(struct iwl_mvm *mvm,
 			       le32_to_cpu(dump_data[i]));
 }
 
-static int iwl_alloc_fw_paging_mem(struct iwl_mvm *mvm,
-				   const struct fw_img *image)
-{
-	struct page *block;
-	dma_addr_t phys = 0;
-	int blk_idx, order, num_of_pages, size, dma_enabled;
-
-	if (mvm->fw_paging_db[0].fw_paging_block)
-		return 0;
-
-	dma_enabled = is_device_dma_capable(mvm->trans->dev);
-
-	/* ensure BLOCK_2_EXP_SIZE is power of 2 of PAGING_BLOCK_SIZE */
-	BUILD_BUG_ON(BIT(BLOCK_2_EXP_SIZE) != PAGING_BLOCK_SIZE);
-
-	num_of_pages = image->paging_mem_size / FW_PAGING_SIZE;
-	mvm->num_of_paging_blk =
-		DIV_ROUND_UP(num_of_pages, NUM_OF_PAGE_PER_GROUP);
-	mvm->num_of_pages_in_last_blk =
-		num_of_pages -
-		NUM_OF_PAGE_PER_GROUP * (mvm->num_of_paging_blk - 1);
-
-	IWL_DEBUG_FW(mvm,
-		     "Paging: allocating mem for %d paging blocks, each block holds 8 pages, last block holds %d pages\n",
-		     mvm->num_of_paging_blk,
-		     mvm->num_of_pages_in_last_blk);
-
-	/*
-	 * Allocate CSS and paging blocks in dram.
-	 */
-	for (blk_idx = 0; blk_idx < mvm->num_of_paging_blk + 1; blk_idx++) {
-		/* For CSS allocate 4KB, for others PAGING_BLOCK_SIZE (32K) */
-		size = blk_idx ? PAGING_BLOCK_SIZE : FW_PAGING_SIZE;
-		order = get_order(size);
-		block = alloc_pages(GFP_KERNEL, order);
-		if (!block) {
-			/* free all the previous pages since we failed */
-			iwl_free_fw_paging(mvm);
-			return -ENOMEM;
-		}
-
-		mvm->fw_paging_db[blk_idx].fw_paging_block = block;
-		mvm->fw_paging_db[blk_idx].fw_paging_size = size;
-
-		if (dma_enabled) {
-			phys = dma_map_page(mvm->trans->dev, block, 0,
-					    PAGE_SIZE << order,
-					    DMA_BIDIRECTIONAL);
-			if (dma_mapping_error(mvm->trans->dev, phys)) {
-				/*
-				 * free the previous pages and the current one
-				 * since we failed to map_page.
-				 */
-				iwl_free_fw_paging(mvm);
-				return -ENOMEM;
-			}
-			mvm->fw_paging_db[blk_idx].fw_paging_phys = phys;
-		} else {
-			mvm->fw_paging_db[blk_idx].fw_paging_phys =
-				PAGING_ADDR_SIG |
-				blk_idx << BLOCK_2_EXP_SIZE;
-		}
-
-		if (!blk_idx)
-			IWL_DEBUG_FW(mvm,
-				     "Paging: allocated 4K(CSS) bytes (order %d) for firmware paging.\n",
-				     order);
-		else
-			IWL_DEBUG_FW(mvm,
-				     "Paging: allocated 32K bytes (order %d) for firmware paging.\n",
-				     order);
-	}
-
-	return 0;
-}
-
-static int iwl_save_fw_paging(struct iwl_mvm *mvm,
-			      const struct fw_img *fw)
-{
-	int ret;
-
-	ret = iwl_alloc_fw_paging_mem(mvm, fw);
-	if (ret)
-		return ret;
-
-	return iwl_fill_paging_mem(mvm, fw);
-}
-
-/* send paging cmd to FW in case CPU2 has paging image */
-static int iwl_send_paging_cmd(struct iwl_mvm *mvm, const struct fw_img *fw)
-{
-	struct iwl_fw_paging_cmd paging_cmd = {
-		.flags = cpu_to_le32(PAGING_CMD_IS_SECURED |
-				     PAGING_CMD_IS_ENABLED |
-				     (mvm->num_of_pages_in_last_blk <<
-				      PAGING_CMD_NUM_OF_PAGES_IN_LAST_GRP_POS)),
-		.block_size = cpu_to_le32(BLOCK_2_EXP_SIZE),
-		.block_num = cpu_to_le32(mvm->num_of_paging_blk),
-	};
-	int blk_idx;
-
-	/* loop for for all paging blocks + CSS block */
-	for (blk_idx = 0; blk_idx < mvm->num_of_paging_blk + 1; blk_idx++) {
-		dma_addr_t addr = mvm->fw_paging_db[blk_idx].fw_paging_phys;
-		__le32 phy_addr;
-
-		addr = addr >> PAGE_2_EXP_SIZE;
-		phy_addr = cpu_to_le32(addr);
-		paging_cmd.device_phy_addr[blk_idx] = phy_addr;
-	}
-
-	return iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(FW_PAGING_BLOCK_CMD,
-						    IWL_ALWAYS_LONG_GROUP, 0),
-				    0, sizeof(paging_cmd), &paging_cmd);
-}
-
-/*
- * Send paging item cmd to FW in case CPU2 has paging image
- */
-static int iwl_trans_get_paging_item(struct iwl_mvm *mvm)
-{
-	int ret;
-	struct iwl_fw_get_item_cmd fw_get_item_cmd = {
-		.item_id = cpu_to_le32(IWL_FW_ITEM_ID_PAGING),
-	};
-
-	struct iwl_fw_get_item_resp *item_resp;
-	struct iwl_host_cmd cmd = {
-		.id = iwl_cmd_id(FW_GET_ITEM_CMD, IWL_ALWAYS_LONG_GROUP, 0),
-		.flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL,
-		.data = { &fw_get_item_cmd, },
-	};
-
-	cmd.len[0] = sizeof(struct iwl_fw_get_item_cmd);
-
-	ret = iwl_mvm_send_cmd(mvm, &cmd);
-	if (ret) {
-		IWL_ERR(mvm,
-			"Paging: Failed to send FW_GET_ITEM_CMD cmd (err = %d)\n",
-			ret);
-		return ret;
-	}
-
-	item_resp = (void *)((struct iwl_rx_packet *)cmd.resp_pkt)->data;
-	if (item_resp->item_id != cpu_to_le32(IWL_FW_ITEM_ID_PAGING)) {
-		IWL_ERR(mvm,
-			"Paging: got wrong item in FW_GET_ITEM_CMD resp (item_id = %u)\n",
-			le32_to_cpu(item_resp->item_id));
-		ret = -EIO;
-		goto exit;
-	}
-
-	/* Add an extra page for headers */
-	mvm->trans->paging_download_buf = kzalloc(PAGING_BLOCK_SIZE +
-						  FW_PAGING_SIZE,
-						  GFP_KERNEL);
-	if (!mvm->trans->paging_download_buf) {
-		ret = -ENOMEM;
-		goto exit;
-	}
-	mvm->trans->paging_req_addr = le32_to_cpu(item_resp->item_val);
-	mvm->trans->paging_db = mvm->fw_paging_db;
-	IWL_DEBUG_FW(mvm,
-		     "Paging: got paging request address (paging_req_addr 0x%08x)\n",
-		     mvm->trans->paging_req_addr);
-
-exit:
-	iwl_free_resp(&cmd);
-
-	return ret;
-}
-
 static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait,
 			 struct iwl_rx_packet *pkt, void *data)
 {
@@ -544,48 +244,6 @@ static bool iwl_wait_phy_db_entry(struct iwl_notif_wait_data *notif_wait,
 	return false;
 }
 
-static int iwl_mvm_init_paging(struct iwl_mvm *mvm)
-{
-	const struct fw_img *fw = &mvm->fw->img[mvm->cur_ucode];
-	int ret;
-
-	/*
-	 * Configure and operate fw paging mechanism.
-	 * The driver configures the paging flow only once.
-	 * The CPU2 paging image is included in the IWL_UCODE_INIT image.
-	 */
-	if (!fw->paging_mem_size)
-		return 0;
-
-	/*
-	 * When dma is not enabled, the driver needs to copy / write
-	 * the downloaded / uploaded page to / from the smem.
-	 * This gets the location of the place were the pages are
-	 * stored.
-	 */
-	if (!is_device_dma_capable(mvm->trans->dev)) {
-		ret = iwl_trans_get_paging_item(mvm);
-		if (ret) {
-			IWL_ERR(mvm, "failed to get FW paging item\n");
-			return ret;
-		}
-	}
-
-	ret = iwl_save_fw_paging(mvm, fw);
-	if (ret) {
-		IWL_ERR(mvm, "failed to save the FW paging image\n");
-		return ret;
-	}
-
-	ret = iwl_send_paging_cmd(mvm, fw);
-	if (ret) {
-		IWL_ERR(mvm, "failed to send the paging cmd\n");
-		iwl_free_fw_paging(mvm);
-		return ret;
-	}
-
-	return 0;
-}
 static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm,
 					 enum iwl_ucode_type ucode_type)
 {
@@ -593,7 +251,7 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm,
 	struct iwl_mvm_alive_data alive_data;
 	const struct fw_img *fw;
 	int ret, i;
-	enum iwl_ucode_type old_type = mvm->cur_ucode;
+	enum iwl_ucode_type old_type = mvm->fwrt.cur_fw_img;
 	static const u16 alive_cmd[] = { MVM_ALIVE };
 	struct iwl_sf_region st_fwrd_space;
 
@@ -606,7 +264,7 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm,
 		fw = iwl_get_ucode_image(mvm->fw, ucode_type);
 	if (WARN_ON(!fw))
 		return -EINVAL;
-	mvm->cur_ucode = ucode_type;
+	iwl_fw_set_current_image(&mvm->fwrt, ucode_type);
 	clear_bit(IWL_MVM_STATUS_FIRMWARE_RUNNING, &mvm->status);
 
 	iwl_init_notification_wait(&mvm->notif_wait, &alive_wait,
@@ -615,7 +273,7 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm,
 
 	ret = iwl_trans_start_fw(mvm->trans, fw, ucode_type == IWL_UCODE_INIT);
 	if (ret) {
-		mvm->cur_ucode = old_type;
+		iwl_fw_set_current_image(&mvm->fwrt, old_type);
 		iwl_remove_notification(&mvm->notif_wait, &alive_wait);
 		return ret;
 	}
@@ -639,13 +297,13 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm,
 				"SecBoot CPU1 Status: 0x%x, CPU2 Status: 0x%x\n",
 				iwl_read_prph(trans, SB_CPU_1_STATUS),
 				iwl_read_prph(trans, SB_CPU_2_STATUS));
-		mvm->cur_ucode = old_type;
+		iwl_fw_set_current_image(&mvm->fwrt, old_type);
 		return ret;
 	}
 
 	if (!alive_data.valid) {
 		IWL_ERR(mvm, "Loaded ucode is not valid!\n");
-		mvm->cur_ucode = old_type;
+		iwl_fw_set_current_image(&mvm->fwrt, old_type);
 		return -EIO;
 	}
 
@@ -673,10 +331,7 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm,
 	 */
 
 	memset(&mvm->queue_info, 0, sizeof(mvm->queue_info));
-	if (iwl_mvm_is_dqa_supported(mvm))
-		mvm->queue_info[IWL_MVM_DQA_CMD_QUEUE].hw_queue_refcount = 1;
-	else
-		mvm->queue_info[IWL_MVM_CMD_QUEUE].hw_queue_refcount = 1;
+	mvm->queue_info[IWL_MVM_DQA_CMD_QUEUE].hw_queue_refcount = 1;
 
 	for (i = 0; i < IEEE80211_MAX_QUEUES; i++)
 		atomic_set(&mvm->mac80211_queue_stop_count[i], 0);
@@ -733,7 +388,7 @@ static int iwl_run_unified_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm)
 	}
 
 	if (IWL_MVM_PARSE_NVM && read_nvm) {
-		ret = iwl_nvm_init(mvm, true);
+		ret = iwl_nvm_init(mvm);
 		if (ret) {
 			IWL_ERR(mvm, "Failed to read NVM: %d\n", ret);
 			goto error;
@@ -757,8 +412,10 @@ static int iwl_run_unified_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm)
 
 	/* Read the NVM only at driver load time, no need to do this twice */
 	if (!IWL_MVM_PARSE_NVM && read_nvm) {
-		ret = iwl_mvm_nvm_get_from_fw(mvm);
-		if (ret) {
+		mvm->nvm_data = iwl_fw_get_nvm(&mvm->fwrt);
+		if (IS_ERR(mvm->nvm_data)) {
+			ret = PTR_ERR(mvm->nvm_data);
+			mvm->nvm_data = NULL;
 			IWL_ERR(mvm, "Failed to read NVM: %d\n", ret);
 			return ret;
 		}
@@ -774,7 +431,7 @@ error:
 static int iwl_send_phy_cfg_cmd(struct iwl_mvm *mvm)
 {
 	struct iwl_phy_cfg_cmd phy_cfg_cmd;
-	enum iwl_ucode_type ucode_type = mvm->cur_ucode;
+	enum iwl_ucode_type ucode_type = mvm->fwrt.cur_fw_img;
 
 	/* Set parameters */
 	phy_cfg_cmd.phy_cfg = cpu_to_le32(iwl_mvm_get_phy_config(mvm));
@@ -799,7 +456,7 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm)
 	};
 	int ret;
 
-	if (iwl_mvm_has_new_tx_api(mvm))
+	if (iwl_mvm_has_unified_ucode(mvm))
 		return iwl_run_unified_mvm_ucode(mvm, true);
 
 	lockdep_assert_held(&mvm->mutex);
@@ -818,22 +475,21 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm)
 	ret = iwl_mvm_load_ucode_wait_alive(mvm, IWL_UCODE_INIT);
 	if (ret) {
 		IWL_ERR(mvm, "Failed to start INIT ucode: %d\n", ret);
-		goto error;
+		goto remove_notif;
 	}
 
 	if (mvm->cfg->device_family < IWL_DEVICE_FAMILY_8000) {
 		ret = iwl_mvm_send_bt_init_conf(mvm);
 		if (ret)
-			goto error;
+			goto remove_notif;
 	}
 
 	/* Read the NVM only at driver load time, no need to do this twice */
 	if (read_nvm) {
-		/* Read nvm */
-		ret = iwl_nvm_init(mvm, true);
+		ret = iwl_nvm_init(mvm);
 		if (ret) {
 			IWL_ERR(mvm, "Failed to read NVM: %d\n", ret);
-			goto error;
+			goto remove_notif;
 		}
 	}
 
@@ -841,8 +497,7 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm)
 	if (mvm->nvm_file_name)
 		iwl_mvm_load_nvm_to_nic(mvm);
 
-	ret = iwl_nvm_check_version(mvm->nvm_data, mvm->trans);
-	WARN_ON(ret);
+	WARN_ON(iwl_nvm_check_version(mvm->nvm_data, mvm->trans));
 
 	/*
 	 * abort after reading the nvm in case RF Kill is on, we will complete
@@ -851,9 +506,7 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm)
 	if (iwl_mvm_is_radio_hw_killed(mvm)) {
 		IWL_DEBUG_RF_KILL(mvm,
 				  "jump over all phy activities due to RF kill\n");
-		iwl_remove_notification(&mvm->notif_wait, &calib_wait);
-		ret = 1;
-		goto out;
+		goto remove_notif;
 	}
 
 	mvm->calibrating = true;
@@ -861,17 +514,13 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm)
 	/* Send TX valid antennas before triggering calibrations */
 	ret = iwl_send_tx_ant_cfg(mvm, iwl_mvm_get_valid_tx_ant(mvm));
 	if (ret)
-		goto error;
+		goto remove_notif;
 
-	/*
-	 * Send phy configurations command to init uCode
-	 * to start the 16.0 uCode init image internal calibrations.
-	 */
 	ret = iwl_send_phy_cfg_cmd(mvm);
 	if (ret) {
 		IWL_ERR(mvm, "Failed to run INIT calibrations: %d\n",
 			ret);
-		goto error;
+		goto remove_notif;
 	}
 
 	/*
@@ -879,15 +528,21 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm)
 	 * just wait for the calibration complete notification.
 	 */
 	ret = iwl_wait_notification(&mvm->notif_wait, &calib_wait,
-			MVM_UCODE_CALIB_TIMEOUT);
+				    MVM_UCODE_CALIB_TIMEOUT);
+	if (!ret)
+		goto out;
 
-	if (ret && iwl_mvm_is_radio_hw_killed(mvm)) {
+	if (iwl_mvm_is_radio_hw_killed(mvm)) {
 		IWL_DEBUG_RF_KILL(mvm, "RFKILL while calibrating.\n");
-		ret = 1;
+		ret = 0;
+	} else {
+		IWL_ERR(mvm, "Failed to run INIT calibrations: %d\n",
+			ret);
 	}
+
 	goto out;
 
-error:
+remove_notif:
 	iwl_remove_notification(&mvm->notif_wait, &calib_wait);
 out:
 	mvm->calibrating = false;
@@ -910,95 +565,6 @@ out:
 	return ret;
 }
 
-static void iwl_mvm_parse_shared_mem_a000(struct iwl_mvm *mvm,
-					  struct iwl_rx_packet *pkt)
-{
-	struct iwl_shared_mem_cfg *mem_cfg = (void *)pkt->data;
-	int i, lmac;
-	int lmac_num = le32_to_cpu(mem_cfg->lmac_num);
-
-	if (WARN_ON(lmac_num > ARRAY_SIZE(mem_cfg->lmac_smem)))
-		return;
-
-	mvm->smem_cfg.num_lmacs = lmac_num;
-	mvm->smem_cfg.num_txfifo_entries =
-		ARRAY_SIZE(mem_cfg->lmac_smem[0].txfifo_size);
-	mvm->smem_cfg.rxfifo2_size = le32_to_cpu(mem_cfg->rxfifo2_size);
-
-	for (lmac = 0; lmac < lmac_num; lmac++) {
-		struct iwl_shared_mem_lmac_cfg *lmac_cfg =
-			&mem_cfg->lmac_smem[lmac];
-
-		for (i = 0; i < ARRAY_SIZE(lmac_cfg->txfifo_size); i++)
-			mvm->smem_cfg.lmac[lmac].txfifo_size[i] =
-				le32_to_cpu(lmac_cfg->txfifo_size[i]);
-		mvm->smem_cfg.lmac[lmac].rxfifo1_size =
-			le32_to_cpu(lmac_cfg->rxfifo1_size);
-	}
-}
-
-static void iwl_mvm_parse_shared_mem(struct iwl_mvm *mvm,
-				     struct iwl_rx_packet *pkt)
-{
-	struct iwl_shared_mem_cfg_v2 *mem_cfg = (void *)pkt->data;
-	int i;
-
-	mvm->smem_cfg.num_lmacs = 1;
-
-	mvm->smem_cfg.num_txfifo_entries = ARRAY_SIZE(mem_cfg->txfifo_size);
-	for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++)
-		mvm->smem_cfg.lmac[0].txfifo_size[i] =
-			le32_to_cpu(mem_cfg->txfifo_size[i]);
-
-	mvm->smem_cfg.lmac[0].rxfifo1_size =
-		le32_to_cpu(mem_cfg->rxfifo_size[0]);
-	mvm->smem_cfg.rxfifo2_size = le32_to_cpu(mem_cfg->rxfifo_size[1]);
-
-	/* new API has more data, from rxfifo_addr field and on */
-	if (fw_has_capa(&mvm->fw->ucode_capa,
-			IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG)) {
-		BUILD_BUG_ON(sizeof(mvm->smem_cfg.internal_txfifo_size) !=
-			     sizeof(mem_cfg->internal_txfifo_size));
-
-		for (i = 0;
-		     i < ARRAY_SIZE(mvm->smem_cfg.internal_txfifo_size);
-		     i++)
-			mvm->smem_cfg.internal_txfifo_size[i] =
-				le32_to_cpu(mem_cfg->internal_txfifo_size[i]);
-	}
-}
-
-static void iwl_mvm_get_shared_mem_conf(struct iwl_mvm *mvm)
-{
-	struct iwl_host_cmd cmd = {
-		.flags = CMD_WANT_SKB,
-		.data = { NULL, },
-		.len = { 0, },
-	};
-	struct iwl_rx_packet *pkt;
-
-	lockdep_assert_held(&mvm->mutex);
-
-	if (fw_has_capa(&mvm->fw->ucode_capa,
-			IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG))
-		cmd.id = iwl_cmd_id(SHARED_MEM_CFG_CMD, SYSTEM_GROUP, 0);
-	else
-		cmd.id = SHARED_MEM_CFG;
-
-	if (WARN_ON(iwl_mvm_send_cmd(mvm, &cmd)))
-		return;
-
-	pkt = cmd.resp_pkt;
-	if (iwl_mvm_has_new_tx_api(mvm))
-		iwl_mvm_parse_shared_mem_a000(mvm, pkt);
-	else
-		iwl_mvm_parse_shared_mem(mvm, pkt);
-
-	IWL_DEBUG_INFO(mvm, "SHARED MEM CFG: got memory offsets/sizes\n");
-
-	iwl_free_resp(&cmd);
-}
-
 static int iwl_mvm_config_ltr(struct iwl_mvm *mvm)
 {
 	struct iwl_ltr_config_cmd cmd = {
@@ -1048,8 +614,8 @@ static union acpi_object *iwl_mvm_sar_find_wifi_pkg(struct iwl_mvm *mvm,
 						    union acpi_object *data,
 						    int data_size)
 {
+	union acpi_object *wifi_pkg = NULL;
 	int i;
-	union acpi_object *wifi_pkg;
 
 	/*
 	 * We need at least two packages, one for the revision and one
@@ -1431,6 +997,17 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm)
 {
 	return 0;
 }
+
+int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a,
+			       int prof_b)
+{
+	return -ENOENT;
+}
+
+int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm)
+{
+	return -ENOENT;
+}
 #endif /* CONFIG_ACPI */
 
 static int iwl_mvm_sar_init(struct iwl_mvm *mvm)
@@ -1467,7 +1044,7 @@ static int iwl_mvm_load_rt_fw(struct iwl_mvm *mvm)
 {
 	int ret;
 
-	if (iwl_mvm_has_new_tx_api(mvm))
+	if (iwl_mvm_has_unified_ucode(mvm))
 		return iwl_run_unified_mvm_ucode(mvm, false);
 
 	ret = iwl_run_init_mvm_ucode(mvm, false);
@@ -1477,9 +1054,6 @@ static int iwl_mvm_load_rt_fw(struct iwl_mvm *mvm)
 
 	if (ret) {
 		IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", ret);
-		/* this can't happen */
-		if (WARN_ON(ret > 0))
-			ret = -ERFKILL;
 		return ret;
 	}
 
@@ -1497,7 +1071,7 @@ static int iwl_mvm_load_rt_fw(struct iwl_mvm *mvm)
 	if (ret)
 		return ret;
 
-	return iwl_mvm_init_paging(mvm);
+	return iwl_init_paging(&mvm->fwrt, mvm->fwrt.cur_fw_img);
 }
 
 int iwl_mvm_up(struct iwl_mvm *mvm)
@@ -1518,24 +1092,24 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
 		goto error;
 	}
 
-	iwl_mvm_get_shared_mem_conf(mvm);
+	iwl_get_shared_mem_conf(&mvm->fwrt);
 
 	ret = iwl_mvm_sf_update(mvm, NULL, false);
 	if (ret)
 		IWL_ERR(mvm, "Failed to initialize Smart Fifo\n");
 
-	mvm->fw_dbg_conf = FW_DBG_INVALID;
+	mvm->fwrt.dump.conf = FW_DBG_INVALID;
 	/* if we have a destination, assume EARLY START */
 	if (mvm->fw->dbg_dest_tlv)
-		mvm->fw_dbg_conf = FW_DBG_START_FROM_ALIVE;
-	iwl_mvm_start_fw_dbg_conf(mvm, FW_DBG_START_FROM_ALIVE);
+		mvm->fwrt.dump.conf = FW_DBG_START_FROM_ALIVE;
+	iwl_fw_start_dbg_conf(&mvm->fwrt, FW_DBG_START_FROM_ALIVE);
 
 	ret = iwl_send_tx_ant_cfg(mvm, iwl_mvm_get_valid_tx_ant(mvm));
 	if (ret)
 		goto error;
 
-	/* Send phy db control command and then phy db calibration*/
-	if (!iwl_mvm_has_new_tx_api(mvm)) {
+	if (!iwl_mvm_has_unified_ucode(mvm)) {
+		/* Send phy db control command and then phy db calibration */
 		ret = iwl_send_phy_db_data(mvm->phy_db);
 		if (ret)
 			goto error;
@@ -1551,7 +1125,8 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
 
 	/* Init RSS configuration */
 	/* TODO - remove a000 disablement when we have RXQ config API */
-	if (iwl_mvm_has_new_rx_api(mvm) && !iwl_mvm_has_new_tx_api(mvm)) {
+	if (iwl_mvm_has_new_rx_api(mvm) &&
+	    mvm->trans->cfg->device_family != IWL_DEVICE_FAMILY_A000) {
 		ret = iwl_send_rss_cfg_cmd(mvm);
 		if (ret) {
 			IWL_ERR(mvm, "Failed to configure RSS queues: %d\n",
@@ -1569,14 +1144,9 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
 	/* reset quota debouncing buffer - 0xff will yield invalid data */
 	memset(&mvm->last_quota_cmd, 0xff, sizeof(mvm->last_quota_cmd));
 
-	/* Enable DQA-mode if required */
-	if (iwl_mvm_is_dqa_supported(mvm)) {
-		ret = iwl_mvm_send_dqa_cmd(mvm);
-		if (ret)
-			goto error;
-	} else {
-		IWL_DEBUG_FW(mvm, "Working in non-DQA mode\n");
-	}
+	ret = iwl_mvm_send_dqa_cmd(mvm);
+	if (ret)
+		goto error;
 
 	/* Add auxiliary station for scanning */
 	ret = iwl_mvm_add_aux_sta(mvm);
@@ -1611,7 +1181,12 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
 	}
 
 	/* TODO: read the budget from BIOS / Platform NVM */
-	if (iwl_mvm_is_ctdp_supported(mvm) && mvm->cooling_dev.cur_state > 0) {
+
+	/*
+	 * In case there is no budget from BIOS / Platform NVM the default
+	 * budget should be 2000mW (cooling state 0).
+	 */
+	if (iwl_mvm_is_ctdp_supported(mvm)) {
 		ret = iwl_mvm_ctdp_command(mvm, CTDP_CMD_OPERATION_START,
 					   mvm->cooling_dev.cur_state);
 		if (ret)
@@ -1657,6 +1232,8 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
 	if (ret)
 		goto error;
 
+	iwl_mvm_leds_sync(mvm);
+
 	IWL_DEBUG_INFO(mvm, "RT uCode started.\n");
 	return 0;
  error:
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/led.c b/drivers/net/wireless/intel/iwlwifi/mvm/led.c
index 3cac427..005e2e7 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/led.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/led.c
@@ -6,6 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2017        Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,6 +32,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2017        Intel Deutschland GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -66,26 +68,45 @@
 #include "iwl-csr.h"
 #include "mvm.h"
 
-/* Set led register on */
-static void iwl_mvm_led_enable(struct iwl_mvm *mvm)
+static void iwl_mvm_send_led_fw_cmd(struct iwl_mvm *mvm, bool on)
 {
-	iwl_write32(mvm->trans, CSR_LED_REG, CSR_LED_REG_TURN_ON);
+	struct iwl_led_cmd led_cmd = {
+		.status = cpu_to_le32(on),
+	};
+	struct iwl_host_cmd cmd = {
+		.id = WIDE_ID(LONG_GROUP, LEDS_CMD),
+		.len = { sizeof(led_cmd), },
+		.data = { &led_cmd, },
+		.flags = CMD_ASYNC,
+	};
+	int err;
+
+	if (!iwl_mvm_firmware_running(mvm))
+		return;
+
+	err = iwl_mvm_send_cmd(mvm, &cmd);
+
+	if (err)
+		IWL_WARN(mvm, "LED command failed: %d\n", err);
 }
 
-/* Set led register off */
-static void iwl_mvm_led_disable(struct iwl_mvm *mvm)
+static void iwl_mvm_led_set(struct iwl_mvm *mvm, bool on)
 {
-	iwl_write32(mvm->trans, CSR_LED_REG, CSR_LED_REG_TURN_OFF);
+	if (mvm->cfg->device_family >= IWL_DEVICE_FAMILY_8000) {
+		iwl_mvm_send_led_fw_cmd(mvm, on);
+		return;
+	}
+
+	iwl_write32(mvm->trans, CSR_LED_REG,
+		    on ? CSR_LED_REG_TURN_ON : CSR_LED_REG_TURN_OFF);
 }
 
 static void iwl_led_brightness_set(struct led_classdev *led_cdev,
 				   enum led_brightness brightness)
 {
 	struct iwl_mvm *mvm = container_of(led_cdev, struct iwl_mvm, led);
-	if (brightness > 0)
-		iwl_mvm_led_enable(mvm);
-	else
-		iwl_mvm_led_disable(mvm);
+
+	iwl_mvm_led_set(mvm, brightness > 0);
 }
 
 int iwl_mvm_leds_init(struct iwl_mvm *mvm)
@@ -127,10 +148,24 @@ int iwl_mvm_leds_init(struct iwl_mvm *mvm)
 	return 0;
 }
 
+void iwl_mvm_leds_sync(struct iwl_mvm *mvm)
+{
+	if (!(mvm->init_status & IWL_MVM_INIT_STATUS_LEDS_INIT_COMPLETE))
+		return;
+
+	/*
+	 * if we control through the register, we're doing it
+	 * even when the firmware isn't up, so no need to sync
+	 */
+	if (mvm->cfg->device_family < IWL_DEVICE_FAMILY_8000)
+		return;
+
+	iwl_mvm_led_set(mvm, mvm->led.brightness > 0);
+}
+
 void iwl_mvm_leds_exit(struct iwl_mvm *mvm)
 {
-	if (iwlwifi_mod_params.led_mode == IWL_LED_DISABLE ||
-	    !(mvm->init_status & IWL_MVM_INIT_STATUS_LEDS_INIT_COMPLETE))
+	if (!(mvm->init_status & IWL_MVM_INIT_STATUS_LEDS_INIT_COMPLETE))
 		return;
 
 	led_classdev_unregister(&mvm->led);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index dc631b2..a2bf530 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -7,7 +7,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
- * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
+ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -34,7 +34,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
- * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
+ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -72,7 +72,6 @@
 #include "fw-api.h"
 #include "mvm.h"
 #include "time-event.h"
-#include "fw-dbg.h"
 
 const u8 iwl_mvm_ac_to_tx_fifo[] = {
 	IWL_MVM_TX_FIFO_VO,
@@ -81,6 +80,13 @@ const u8 iwl_mvm_ac_to_tx_fifo[] = {
 	IWL_MVM_TX_FIFO_BK,
 };
 
+const u8 iwl_mvm_ac_to_gen2_tx_fifo[] = {
+	IWL_GEN2_EDCA_TX_FIFO_VO,
+	IWL_GEN2_EDCA_TX_FIFO_VI,
+	IWL_GEN2_EDCA_TX_FIFO_BE,
+	IWL_GEN2_EDCA_TX_FIFO_BK,
+};
+
 struct iwl_mvm_mac_iface_iterator_data {
 	struct iwl_mvm *mvm;
 	struct ieee80211_vif *vif;
@@ -235,32 +241,17 @@ static void iwl_mvm_iface_hw_queues_iter(void *_data, u8 *mac,
 	data->used_hw_queues |= iwl_mvm_mac_get_queues_mask(vif);
 }
 
-static void iwl_mvm_mac_sta_hw_queues_iter(void *_data,
-					   struct ieee80211_sta *sta)
-{
-	struct iwl_mvm_hw_queues_iface_iterator_data *data = _data;
-	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
-
-	/* Mark the queues used by the sta */
-	data->used_hw_queues |= mvmsta->tfd_queue_msk;
-}
-
 unsigned long iwl_mvm_get_used_hw_queues(struct iwl_mvm *mvm,
 					 struct ieee80211_vif *exclude_vif)
 {
-	u8 sta_id;
 	struct iwl_mvm_hw_queues_iface_iterator_data data = {
 		.exclude_vif = exclude_vif,
 		.used_hw_queues =
 			BIT(IWL_MVM_OFFCHANNEL_QUEUE) |
-			BIT(mvm->aux_queue),
+			BIT(mvm->aux_queue) |
+			BIT(IWL_MVM_DQA_GCAST_QUEUE),
 	};
 
-	if (iwl_mvm_is_dqa_supported(mvm))
-		data.used_hw_queues |= BIT(IWL_MVM_DQA_GCAST_QUEUE);
-	else
-		data.used_hw_queues |= BIT(IWL_MVM_CMD_QUEUE);
-
 	lockdep_assert_held(&mvm->mutex);
 
 	/* mark all VIF used hw queues */
@@ -268,26 +259,6 @@ unsigned long iwl_mvm_get_used_hw_queues(struct iwl_mvm *mvm,
 		mvm->hw, IEEE80211_IFACE_ITER_RESUME_ALL,
 		iwl_mvm_iface_hw_queues_iter, &data);
 
-	/*
-	 * for DQA, the hw_queue in mac80211 is never really used for
-	 * real traffic (only the few queue IDs covered above), so
-	 * we can reuse the real HW queue IDs the stations use
-	 */
-	if (iwl_mvm_is_dqa_supported(mvm))
-		return data.used_hw_queues;
-
-	/* don't assign the same hw queues as TDLS stations */
-	ieee80211_iterate_stations_atomic(mvm->hw,
-					  iwl_mvm_mac_sta_hw_queues_iter,
-					  &data);
-
-	/*
-	 * Some TDLS stations may be removed but are in the process of being
-	 * drained. Don't touch their queues.
-	 */
-	for_each_set_bit(sta_id, mvm->sta_drained, IWL_MVM_STATION_COUNT)
-		data.used_hw_queues |= mvm->tfd_drained[sta_id];
-
 	return data.used_hw_queues;
 }
 
@@ -338,8 +309,7 @@ void iwl_mvm_mac_ctxt_recalc_tsf_id(struct iwl_mvm *mvm,
 						NUM_TSF_IDS);
 }
 
-static int iwl_mvm_mac_ctxt_allocate_resources(struct iwl_mvm *mvm,
-					       struct ieee80211_vif *vif)
+int iwl_mvm_mac_ctxt_init(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	struct iwl_mvm_mac_iface_iterator_data data = {
@@ -355,6 +325,8 @@ static int iwl_mvm_mac_ctxt_allocate_resources(struct iwl_mvm *mvm,
 	int ret, i, queue_limit;
 	unsigned long used_hw_queues;
 
+	lockdep_assert_held(&mvm->mutex);
+
 	/*
 	 * Allocate a MAC ID and a TSF for this MAC, along with the queues
 	 * and other resources.
@@ -438,19 +410,14 @@ static int iwl_mvm_mac_ctxt_allocate_resources(struct iwl_mvm *mvm,
 		return 0;
 	}
 
-	if (iwl_mvm_is_dqa_supported(mvm)) {
-		/*
-		 * queues in mac80211 almost entirely independent of
-		 * the ones here - no real limit
-		 */
-		queue_limit = IEEE80211_MAX_QUEUES;
-		BUILD_BUG_ON(IEEE80211_MAX_QUEUES >
-			     BITS_PER_BYTE *
-			     sizeof(mvm->hw_queue_to_mac80211[0]));
-	} else {
-		/* need to not use too many in this case */
-		queue_limit = mvm->first_agg_queue;
-	}
+	/*
+	 * queues in mac80211 almost entirely independent of
+	 * the ones here - no real limit
+	 */
+	queue_limit = IEEE80211_MAX_QUEUES;
+	BUILD_BUG_ON(IEEE80211_MAX_QUEUES >
+		     BITS_PER_BYTE *
+		     sizeof(mvm->hw_queue_to_mac80211[0]));
 
 	/*
 	 * Find available queues, and allocate them to the ACs. When in
@@ -472,27 +439,12 @@ static int iwl_mvm_mac_ctxt_allocate_resources(struct iwl_mvm *mvm,
 
 	/* Allocate the CAB queue for softAP and GO interfaces */
 	if (vif->type == NL80211_IFTYPE_AP) {
-		u8 queue;
-
-		if (!iwl_mvm_is_dqa_supported(mvm)) {
-			queue = find_first_zero_bit(&used_hw_queues,
-						    mvm->first_agg_queue);
-
-			if (queue >= mvm->first_agg_queue) {
-				IWL_ERR(mvm, "Failed to allocate cab queue\n");
-				ret = -EIO;
-				goto exit_fail;
-			}
-		} else {
-			queue = IWL_MVM_DQA_GCAST_QUEUE;
-		}
-
 		/*
 		 * For TVQM this will be overwritten later with the FW assigned
 		 * queue value (when queue is enabled).
 		 */
-		mvmvif->cab_queue = queue;
-		vif->cab_queue = queue;
+		mvmvif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE;
+		vif->cab_queue = IWL_MVM_DQA_GCAST_QUEUE;
 	} else {
 		vif->cab_queue = IEEE80211_INVAL_HW_QUEUE;
 	}
@@ -513,78 +465,6 @@ exit_fail:
 	return ret;
 }
 
-int iwl_mvm_mac_ctxt_init(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
-{
-	unsigned int wdg_timeout =
-		iwl_mvm_get_wd_timeout(mvm, vif, false, false);
-	u32 ac;
-	int ret;
-
-	lockdep_assert_held(&mvm->mutex);
-
-	ret = iwl_mvm_mac_ctxt_allocate_resources(mvm, vif);
-	if (ret)
-		return ret;
-
-	/* If DQA is supported - queues will be enabled when needed */
-	if (iwl_mvm_is_dqa_supported(mvm))
-		return 0;
-
-	switch (vif->type) {
-	case NL80211_IFTYPE_P2P_DEVICE:
-		iwl_mvm_enable_ac_txq(mvm, IWL_MVM_OFFCHANNEL_QUEUE,
-				      IWL_MVM_OFFCHANNEL_QUEUE,
-				      IWL_MVM_TX_FIFO_VO, 0, wdg_timeout);
-		break;
-	case NL80211_IFTYPE_AP:
-		iwl_mvm_enable_ac_txq(mvm, vif->cab_queue, vif->cab_queue,
-				      IWL_MVM_TX_FIFO_MCAST, 0, wdg_timeout);
-		/* fall through */
-	default:
-		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
-			iwl_mvm_enable_ac_txq(mvm, vif->hw_queue[ac],
-					      vif->hw_queue[ac],
-					      iwl_mvm_ac_to_tx_fifo[ac], 0,
-					      wdg_timeout);
-		break;
-	}
-
-	return 0;
-}
-
-void iwl_mvm_mac_ctxt_release(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
-{
-	int ac;
-
-	lockdep_assert_held(&mvm->mutex);
-
-	/*
-	 * If DQA is supported - queues were already disabled, since in
-	 * DQA-mode the queues are a property of the STA and not of the
-	 * vif, and at this point the STA was already deleted
-	 */
-	if (iwl_mvm_is_dqa_supported(mvm))
-		return;
-
-	switch (vif->type) {
-	case NL80211_IFTYPE_P2P_DEVICE:
-		iwl_mvm_disable_txq(mvm, IWL_MVM_OFFCHANNEL_QUEUE,
-				    IWL_MVM_OFFCHANNEL_QUEUE,
-				    IWL_MAX_TID_COUNT, 0);
-
-		break;
-	case NL80211_IFTYPE_AP:
-		iwl_mvm_disable_txq(mvm, vif->cab_queue, vif->cab_queue,
-				    IWL_MAX_TID_COUNT, 0);
-		/* fall through */
-	default:
-		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
-			iwl_mvm_disable_txq(mvm, vif->hw_queue[ac],
-					    vif->hw_queue[ac],
-					    IWL_MAX_TID_COUNT, 0);
-	}
-}
-
 static void iwl_mvm_ack_rates(struct iwl_mvm *mvm,
 			      struct ieee80211_vif *vif,
 			      enum nl80211_band band,
@@ -775,7 +655,7 @@ static void iwl_mvm_mac_ctxt_cmd_common(struct iwl_mvm *mvm,
 	cmd->filter_flags = cpu_to_le32(MAC_FILTER_ACCEPT_GRP);
 
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
-		u8 txf = iwl_mvm_ac_to_tx_fifo[i];
+		u8 txf = iwl_mvm_mac_ac_to_tx_fifo(mvm, i);
 
 		cmd->ac[txf].cw_min =
 			cpu_to_le16(mvmvif->queue_params[i].cw_min);
@@ -908,18 +788,12 @@ static int iwl_mvm_mac_ctxt_cmd_listener(struct iwl_mvm *mvm,
 {
 	struct iwl_mac_ctx_cmd cmd = {};
 	u32 tfd_queue_msk = 0;
-	int ret, i;
+	int ret;
 
 	WARN_ON(vif->type != NL80211_IFTYPE_MONITOR);
 
 	iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, NULL, action);
 
-	if (!iwl_mvm_is_dqa_supported(mvm)) {
-		for (i = 0; i < IEEE80211_NUM_ACS; i++)
-			if (vif->hw_queue[i] != IEEE80211_INVAL_HW_QUEUE)
-				tfd_queue_msk |= BIT(vif->hw_queue[i]);
-	}
-
 	cmd.filter_flags = cpu_to_le32(MAC_FILTER_IN_PROMISC |
 				       MAC_FILTER_IN_CONTROL_AND_MGMT |
 				       MAC_FILTER_IN_BEACON |
@@ -1049,83 +923,40 @@ static u32 iwl_mvm_find_ie_offset(u8 *beacon, u8 eid, u32 frame_size)
 	return ie - beacon;
 }
 
-static int iwl_mvm_mac_ctxt_send_beacon(struct iwl_mvm *mvm,
-					struct ieee80211_vif *vif,
-					struct sk_buff *beacon)
+static u8 iwl_mvm_mac_ctxt_get_lowest_rate(struct ieee80211_tx_info *info,
+					   struct ieee80211_vif *vif)
 {
-	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
-	struct iwl_host_cmd cmd = {
-		.id = BEACON_TEMPLATE_CMD,
-		.flags = CMD_ASYNC,
-	};
-	union {
-		struct iwl_mac_beacon_cmd_v6 beacon_cmd_v6;
-		struct iwl_mac_beacon_cmd_v7 beacon_cmd;
-	} u = {};
-	struct iwl_mac_beacon_cmd beacon_cmd = {};
-	struct ieee80211_tx_info *info;
-	u32 beacon_skb_len;
-	u32 rate, tx_flags;
+	u8 rate;
 
-	if (WARN_ON(!beacon))
-		return -EINVAL;
+	if (info->band == NL80211_BAND_5GHZ || vif->p2p)
+		rate = IWL_FIRST_OFDM_RATE;
+	else
+		rate = IWL_FIRST_CCK_RATE;
 
-	beacon_skb_len = beacon->len;
-
-	if (fw_has_capa(&mvm->fw->ucode_capa,
-			IWL_UCODE_TLV_CAPA_CSA_AND_TBTT_OFFLOAD)) {
-		u32 csa_offset, ecsa_offset;
-
-		csa_offset = iwl_mvm_find_ie_offset(beacon->data,
-						    WLAN_EID_CHANNEL_SWITCH,
-						    beacon_skb_len);
-		ecsa_offset =
-			iwl_mvm_find_ie_offset(beacon->data,
-					       WLAN_EID_EXT_CHANSWITCH_ANN,
-					       beacon_skb_len);
-
-		if (iwl_mvm_has_new_tx_api(mvm)) {
-			beacon_cmd.data.template_id =
-				cpu_to_le32((u32)mvmvif->id);
-			beacon_cmd.data.ecsa_offset = cpu_to_le32(ecsa_offset);
-			beacon_cmd.data.csa_offset = cpu_to_le32(csa_offset);
-			beacon_cmd.byte_cnt = cpu_to_le16((u16)beacon_skb_len);
-			if (vif->type == NL80211_IFTYPE_AP)
-				iwl_mvm_mac_ctxt_set_tim(mvm,
-							 &beacon_cmd.data.tim_idx,
-							 &beacon_cmd.data.tim_size,
-							 beacon->data,
-							 beacon_skb_len);
-			cmd.len[0] = sizeof(beacon_cmd);
-			cmd.data[0] = &beacon_cmd;
-			goto send;
+	return rate;
+}
 
-		} else {
-			u.beacon_cmd.data.ecsa_offset =
-				cpu_to_le32(ecsa_offset);
-			u.beacon_cmd.data.csa_offset = cpu_to_le32(csa_offset);
-			cmd.len[0] = sizeof(u.beacon_cmd);
-			cmd.data[0] = &u;
-		}
-	} else {
-		cmd.len[0] = sizeof(u.beacon_cmd_v6);
-		cmd.data[0] = &u;
-	}
+static void iwl_mvm_mac_ctxt_set_tx(struct iwl_mvm *mvm,
+				    struct ieee80211_vif *vif,
+				    struct sk_buff *beacon,
+				    struct iwl_tx_cmd *tx)
+{
+	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+	struct ieee80211_tx_info *info;
+	u8 rate;
+	u32 tx_flags;
 
-	/* TODO: for now the beacon template id is set to be the mac context id.
-	 * Might be better to handle it as another resource ... */
-	u.beacon_cmd_v6.template_id = cpu_to_le32((u32)mvmvif->id);
 	info = IEEE80211_SKB_CB(beacon);
 
 	/* Set up TX command fields */
-	u.beacon_cmd_v6.tx.len = cpu_to_le16((u16)beacon_skb_len);
-	u.beacon_cmd_v6.tx.sta_id = mvmvif->bcast_sta.sta_id;
-	u.beacon_cmd_v6.tx.life_time = cpu_to_le32(TX_CMD_LIFE_TIME_INFINITE);
+	tx->len = cpu_to_le16((u16)beacon->len);
+	tx->sta_id = mvmvif->bcast_sta.sta_id;
+	tx->life_time = cpu_to_le32(TX_CMD_LIFE_TIME_INFINITE);
 	tx_flags = TX_CMD_FLG_SEQ_CTL | TX_CMD_FLG_TSF;
 	tx_flags |=
 		iwl_mvm_bt_coex_tx_prio(mvm, (void *)beacon->data, info, 0) <<
 						TX_CMD_FLG_BT_PRIO_POS;
-	u.beacon_cmd_v6.tx.tx_flags = cpu_to_le32(tx_flags);
+	tx->tx_flags = cpu_to_le32(tx_flags);
 
 	if (!fw_has_capa(&mvm->fw->ucode_capa,
 			 IWL_UCODE_TLV_CAPA_BEACON_ANT_SELECTION)) {
@@ -1134,37 +965,141 @@ static int iwl_mvm_mac_ctxt_send_beacon(struct iwl_mvm *mvm,
 					     mvm->mgmt_last_antenna_idx);
 	}
 
-	u.beacon_cmd_v6.tx.rate_n_flags =
+	tx->rate_n_flags =
 		cpu_to_le32(BIT(mvm->mgmt_last_antenna_idx) <<
 			    RATE_MCS_ANT_POS);
 
-	if (info->band == NL80211_BAND_5GHZ || vif->p2p) {
-		rate = IWL_FIRST_OFDM_RATE;
-	} else {
-		rate = IWL_FIRST_CCK_RATE;
-		u.beacon_cmd_v6.tx.rate_n_flags |=
-					cpu_to_le32(RATE_MCS_CCK_MSK);
-	}
-	u.beacon_cmd_v6.tx.rate_n_flags |=
-		cpu_to_le32(iwl_mvm_mac80211_idx_to_hwrate(rate));
+	rate = iwl_mvm_mac_ctxt_get_lowest_rate(info, vif);
 
-	/* Set up TX beacon command fields */
-	if (vif->type == NL80211_IFTYPE_AP)
-		iwl_mvm_mac_ctxt_set_tim(mvm, &u.beacon_cmd_v6.tim_idx,
-					 &u.beacon_cmd_v6.tim_size,
-					 beacon->data,
-					 beacon_skb_len);
+	tx->rate_n_flags |= cpu_to_le32(iwl_mvm_mac80211_idx_to_hwrate(rate));
+	if (rate == IWL_FIRST_CCK_RATE)
+		tx->rate_n_flags |= cpu_to_le32(RATE_MCS_CCK_MSK);
+
+}
 
-send:
-	/* Submit command */
+static int iwl_mvm_mac_ctxt_send_beacon_cmd(struct iwl_mvm *mvm,
+					    struct sk_buff *beacon,
+					    void *data, int len)
+{
+	struct iwl_host_cmd cmd = {
+		.id = BEACON_TEMPLATE_CMD,
+		.flags = CMD_ASYNC,
+	};
+
+	cmd.len[0] = len;
+	cmd.data[0] = data;
 	cmd.dataflags[0] = 0;
-	cmd.len[1] = beacon_skb_len;
+	cmd.len[1] = beacon->len;
 	cmd.data[1] = beacon->data;
 	cmd.dataflags[1] = IWL_HCMD_DFL_DUP;
 
 	return iwl_mvm_send_cmd(mvm, &cmd);
 }
 
+static int iwl_mvm_mac_ctxt_send_beacon_v6(struct iwl_mvm *mvm,
+					   struct ieee80211_vif *vif,
+					   struct sk_buff *beacon)
+{
+	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+	struct iwl_mac_beacon_cmd_v6 beacon_cmd = {};
+
+	iwl_mvm_mac_ctxt_set_tx(mvm, vif, beacon, &beacon_cmd.tx);
+
+	beacon_cmd.template_id = cpu_to_le32((u32)mvmvif->id);
+
+	if (vif->type == NL80211_IFTYPE_AP)
+		iwl_mvm_mac_ctxt_set_tim(mvm, &beacon_cmd.tim_idx,
+					 &beacon_cmd.tim_size,
+					 beacon->data, beacon->len);
+
+	return iwl_mvm_mac_ctxt_send_beacon_cmd(mvm, beacon, &beacon_cmd,
+						sizeof(beacon_cmd));
+}
+
+static int iwl_mvm_mac_ctxt_send_beacon_v7(struct iwl_mvm *mvm,
+					   struct ieee80211_vif *vif,
+					   struct sk_buff *beacon)
+{
+	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+	struct iwl_mac_beacon_cmd_v7 beacon_cmd = {};
+
+	iwl_mvm_mac_ctxt_set_tx(mvm, vif, beacon, &beacon_cmd.tx);
+
+	beacon_cmd.template_id = cpu_to_le32((u32)mvmvif->id);
+
+	if (vif->type == NL80211_IFTYPE_AP)
+		iwl_mvm_mac_ctxt_set_tim(mvm, &beacon_cmd.tim_idx,
+					 &beacon_cmd.tim_size,
+					 beacon->data, beacon->len);
+
+	beacon_cmd.csa_offset =
+		cpu_to_le32(iwl_mvm_find_ie_offset(beacon->data,
+						   WLAN_EID_CHANNEL_SWITCH,
+						   beacon->len));
+	beacon_cmd.ecsa_offset =
+		cpu_to_le32(iwl_mvm_find_ie_offset(beacon->data,
+						   WLAN_EID_EXT_CHANSWITCH_ANN,
+						   beacon->len));
+
+	return iwl_mvm_mac_ctxt_send_beacon_cmd(mvm, beacon, &beacon_cmd,
+						sizeof(beacon_cmd));
+}
+
+static int iwl_mvm_mac_ctxt_send_beacon_v9(struct iwl_mvm *mvm,
+					   struct ieee80211_vif *vif,
+					   struct sk_buff *beacon)
+{
+	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(beacon);
+	struct iwl_mac_beacon_cmd beacon_cmd = {};
+	u8 rate = iwl_mvm_mac_ctxt_get_lowest_rate(info, vif);
+	u16 flags;
+
+	flags = iwl_mvm_mac80211_idx_to_hwrate(rate);
+
+	if (rate == IWL_FIRST_CCK_RATE)
+		flags |= IWL_MAC_BEACON_CCK;
+
+	beacon_cmd.flags = cpu_to_le16(flags);
+	beacon_cmd.byte_cnt = cpu_to_le16((u16)beacon->len);
+	beacon_cmd.template_id = cpu_to_le32((u32)mvmvif->id);
+
+	if (vif->type == NL80211_IFTYPE_AP)
+		iwl_mvm_mac_ctxt_set_tim(mvm, &beacon_cmd.tim_idx,
+					 &beacon_cmd.tim_size,
+					 beacon->data, beacon->len);
+
+	beacon_cmd.csa_offset =
+		cpu_to_le32(iwl_mvm_find_ie_offset(beacon->data,
+						   WLAN_EID_CHANNEL_SWITCH,
+						   beacon->len));
+	beacon_cmd.ecsa_offset =
+		cpu_to_le32(iwl_mvm_find_ie_offset(beacon->data,
+						   WLAN_EID_EXT_CHANSWITCH_ANN,
+						   beacon->len));
+
+	return iwl_mvm_mac_ctxt_send_beacon_cmd(mvm, beacon, &beacon_cmd,
+						sizeof(beacon_cmd));
+}
+
+static int iwl_mvm_mac_ctxt_send_beacon(struct iwl_mvm *mvm,
+					struct ieee80211_vif *vif,
+					struct sk_buff *beacon)
+{
+	if (WARN_ON(!beacon))
+		return -EINVAL;
+
+	if (!fw_has_capa(&mvm->fw->ucode_capa,
+			 IWL_UCODE_TLV_CAPA_CSA_AND_TBTT_OFFLOAD))
+		return iwl_mvm_mac_ctxt_send_beacon_v6(mvm, vif, beacon);
+
+	if (fw_has_api(&mvm->fw->ucode_capa,
+		       IWL_UCODE_TLV_API_NEW_BEACON_TEMPLATE))
+		return iwl_mvm_mac_ctxt_send_beacon_v9(mvm, vif, beacon);
+
+	return iwl_mvm_mac_ctxt_send_beacon_v7(mvm, vif, beacon);
+}
+
 /* The beacon template for the AP/GO/IBSS has changed and needs update */
 int iwl_mvm_mac_ctxt_beacon_changed(struct iwl_mvm *mvm,
 				    struct ieee80211_vif *vif)
@@ -1559,12 +1494,14 @@ static void iwl_mvm_beacon_loss_iterator(void *_data, u8 *mac,
 
 	/* TODO: implement start trigger */
 
-	if (!iwl_fw_dbg_trigger_check_stop(mvm, vif, trigger))
+	if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt,
+					   ieee80211_vif_to_wdev(vif),
+					   trigger))
 		return;
 
 	if (rx_missed_bcon_since_rx >= stop_trig_missed_bcon_since_rx ||
 	    rx_missed_bcon >= stop_trig_missed_bcon)
-		iwl_mvm_fw_dbg_collect_trig(mvm, trigger, NULL);
+		iwl_fw_dbg_collect_trig(&mvm->fwrt, trigger, NULL);
 }
 
 void iwl_mvm_rx_missed_beacons_notif(struct iwl_mvm *mvm,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index ce901be..15f2d82 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -87,7 +87,6 @@
 #include "fw/error-dump.h"
 #include "iwl-prph.h"
 #include "iwl-nvm-parse.h"
-#include "fw-dbg.h"
 
 static const struct ieee80211_iface_limit iwl_mvm_limits[] = {
 	{
@@ -446,8 +445,18 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
 	ieee80211_hw_set(hw, NEEDS_UNIQUE_STA_ADDR);
 	if (iwl_mvm_has_new_rx_api(mvm))
 		ieee80211_hw_set(hw, SUPPORTS_REORDERING_BUFFER);
-	if (fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_STA_PM_NOTIF))
+
+	if (fw_has_capa(&mvm->fw->ucode_capa,
+			IWL_UCODE_TLV_CAPA_STA_PM_NOTIF)) {
 		ieee80211_hw_set(hw, AP_LINK_PS);
+	} else if (WARN_ON(iwl_mvm_has_new_tx_api(mvm))) {
+		/*
+		 * we absolutely need this for the new TX API since that comes
+		 * with many more queues than the current code can deal with
+		 * for station powersave
+		 */
+		return -EINVAL;
+	}
 
 	if (mvm->trans->num_rx_queues > 1)
 		ieee80211_hw_set(hw, USES_RSS);
@@ -455,10 +464,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
 	if (mvm->trans->max_skb_frags)
 		hw->netdev_features = NETIF_F_HIGHDMA | NETIF_F_SG;
 
-	if (!iwl_mvm_is_dqa_supported(mvm))
-		hw->queues = mvm->first_agg_queue;
-	else
-		hw->queues = IEEE80211_MAX_QUEUES;
+	hw->queues = IEEE80211_MAX_QUEUES;
 	hw->offchannel_tx_hw_queue = IWL_MVM_OFFCHANNEL_QUEUE;
 	hw->radiotap_mcs_details |= IEEE80211_RADIOTAP_MCS_HAVE_FEC |
 				    IEEE80211_RADIOTAP_MCS_HAVE_STBC;
@@ -799,17 +805,16 @@ static void iwl_mvm_mac_tx(struct ieee80211_hw *hw,
 		goto drop;
 	}
 
-	if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE &&
+	if (info->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE &&
 	    !test_bit(IWL_MVM_STATUS_ROC_RUNNING, &mvm->status) &&
 	    !test_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status))
 		goto drop;
 
-	/* treat non-bufferable MMPDUs as broadcast if sta is sleeping */
-	if (unlikely(info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER &&
-		     ieee80211_is_mgmt(hdr->frame_control) &&
-		     !ieee80211_is_deauth(hdr->frame_control) &&
-		     !ieee80211_is_disassoc(hdr->frame_control) &&
-		     !ieee80211_is_action(hdr->frame_control)))
+	/* treat non-bufferable MMPDUs on AP interfaces as broadcast */
+	if ((info->control.vif->type == NL80211_IFTYPE_AP ||
+	     info->control.vif->type == NL80211_IFTYPE_ADHOC) &&
+	    ieee80211_is_mgmt(hdr->frame_control) &&
+	    !ieee80211_is_bufferable_mmpdu(hdr->frame_control))
 		sta = NULL;
 
 	if (sta) {
@@ -845,11 +850,11 @@ static inline bool iwl_enable_tx_ampdu(const struct iwl_cfg *cfg)
 	return true;
 }
 
-#define CHECK_BA_TRIGGER(_mvm, _trig, _tid_bm, _tid, _fmt...)	\
-	do {							\
-		if (!(le16_to_cpu(_tid_bm) & BIT(_tid)))	\
-			break;					\
-		iwl_mvm_fw_dbg_collect_trig(_mvm, _trig, _fmt);	\
+#define CHECK_BA_TRIGGER(_mvm, _trig, _tid_bm, _tid, _fmt...)		\
+	do {								\
+		if (!(le16_to_cpu(_tid_bm) & BIT(_tid)))		\
+			break;						\
+		iwl_fw_dbg_collect_trig(&(_mvm)->fwrt, _trig, _fmt);	\
 	} while (0)
 
 static void
@@ -866,7 +871,8 @@ iwl_mvm_ampdu_check_trigger(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_BA);
 	ba_trig = (void *)trig->data;
 
-	if (!iwl_fw_dbg_trigger_check_stop(mvm, vif, trig))
+	if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt,
+					   ieee80211_vif_to_wdev(vif), trig))
 		return;
 
 	switch (action) {
@@ -1029,8 +1035,8 @@ static void iwl_mvm_restart_cleanup(struct iwl_mvm *mvm)
 	 * on D3->D0 transition
 	 */
 	if (!test_and_clear_bit(IWL_MVM_STATUS_D3_RECONFIG, &mvm->status)) {
-		mvm->fw_dump_desc = &iwl_mvm_dump_desc_assert;
-		iwl_mvm_fw_error_dump(mvm);
+		mvm->fwrt.dump.desc = &iwl_dump_desc_assert;
+		iwl_fw_error_dump(&mvm->fwrt);
 	}
 
 	/* cleanup all stale references (scan, roc), but keep the
@@ -1059,9 +1065,7 @@ static void iwl_mvm_restart_cleanup(struct iwl_mvm *mvm)
 
 	iwl_mvm_reset_phy_ctxts(mvm);
 	memset(mvm->fw_key_table, 0, sizeof(mvm->fw_key_table));
-	memset(mvm->sta_drained, 0, sizeof(mvm->sta_drained));
 	memset(mvm->sta_deferred_frames, 0, sizeof(mvm->sta_deferred_frames));
-	memset(mvm->tfd_drained, 0, sizeof(mvm->tfd_drained));
 	memset(&mvm->last_bt_notif, 0, sizeof(mvm->last_bt_notif));
 	memset(&mvm->last_bt_ci_cmd, 0, sizeof(mvm->last_bt_ci_cmd));
 
@@ -1072,7 +1076,7 @@ static void iwl_mvm_restart_cleanup(struct iwl_mvm *mvm)
 
 	mvm->vif_count = 0;
 	mvm->rx_ba_sessions = 0;
-	mvm->fw_dbg_conf = FW_DBG_INVALID;
+	mvm->fwrt.dump.conf = FW_DBG_INVALID;
 
 	/* keep statistics ticking */
 	iwl_mvm_accu_radio_stats(mvm);
@@ -1255,16 +1259,16 @@ static void iwl_mvm_mac_stop(struct ieee80211_hw *hw)
 	 * Lock and clear the firmware running bit here already, so that
 	 * new commands coming in elsewhere, e.g. from debugfs, will not
 	 * be able to proceed. This is important here because one of those
-	 * debugfs files causes the fw_dump_wk to be triggered, and if we
+	 * debugfs files causes the firmware dump to be triggered, and if we
 	 * don't stop debugfs accesses before canceling that it could be
 	 * retriggered after we flush it but before we've cleared the bit.
 	 */
 	clear_bit(IWL_MVM_STATUS_FIRMWARE_RUNNING, &mvm->status);
 
-	cancel_delayed_work_sync(&mvm->fw_dump_wk);
+	iwl_fw_cancel_dump(&mvm->fwrt);
 	cancel_delayed_work_sync(&mvm->cs_tx_unblock_dwork);
 	cancel_delayed_work_sync(&mvm->scan_timeout_dwork);
-	iwl_mvm_free_fw_dump_desc(mvm);
+	iwl_fw_free_dump_desc(&mvm->fwrt);
 
 	mutex_lock(&mvm->mutex);
 	__iwl_mvm_mac_stop(mvm);
@@ -1370,17 +1374,15 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
 			goto out_release;
 		}
 
-		if (iwl_mvm_is_dqa_supported(mvm)) {
-			/*
-			 * Only queue for this station is the mcast queue,
-			 * which shouldn't be in TFD mask anyway
-			 */
-			ret = iwl_mvm_allocate_int_sta(mvm, &mvmvif->mcast_sta,
-						       0, vif->type,
-						       IWL_STA_MULTICAST);
-			if (ret)
-				goto out_release;
-		}
+		/*
+		 * Only queue for this station is the mcast queue,
+		 * which shouldn't be in TFD mask anyway
+		 */
+		ret = iwl_mvm_allocate_int_sta(mvm, &mvmvif->mcast_sta,
+					       0, vif->type,
+					       IWL_STA_MULTICAST);
+		if (ret)
+			goto out_release;
 
 		iwl_mvm_vif_dbgfs_register(mvm, vif);
 		goto out_unlock;
@@ -1426,7 +1428,7 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
 		if (ret)
 			goto out_unref_phy;
 
-		ret = iwl_mvm_add_bcast_sta(mvm, vif);
+		ret = iwl_mvm_add_p2p_bcast_sta(mvm, vif);
 		if (ret)
 			goto out_unbind;
 
@@ -1454,8 +1456,6 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
  out_release:
 	if (vif->type != NL80211_IFTYPE_P2P_DEVICE)
 		mvm->vif_count--;
-
-	iwl_mvm_mac_ctxt_release(mvm, vif);
  out_unlock:
 	mutex_unlock(&mvm->mutex);
 
@@ -1467,40 +1467,6 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
 static void iwl_mvm_prepare_mac_removal(struct iwl_mvm *mvm,
 					struct ieee80211_vif *vif)
 {
-	u32 tfd_msk = iwl_mvm_mac_get_queues_mask(vif);
-
-	if (tfd_msk && !iwl_mvm_is_dqa_supported(mvm)) {
-		/*
-		 * mac80211 first removes all the stations of the vif and
-		 * then removes the vif. When it removes a station it also
-		 * flushes the AMPDU session. So by now, all the AMPDU sessions
-		 * of all the stations of this vif are closed, and the queues
-		 * of these AMPDU sessions are properly closed.
-		 * We still need to take care of the shared queues of the vif.
-		 * Flush them here.
-		 * For DQA mode there is no need - broacast and multicast queue
-		 * are flushed separately.
-		 */
-		mutex_lock(&mvm->mutex);
-		iwl_mvm_flush_tx_path(mvm, tfd_msk, 0);
-		mutex_unlock(&mvm->mutex);
-
-		/*
-		 * There are transports that buffer a few frames in the host.
-		 * For these, the flush above isn't enough since while we were
-		 * flushing, the transport might have sent more frames to the
-		 * device. To solve this, wait here until the transport is
-		 * empty. Technically, this could have replaced the flush
-		 * above, but flush is much faster than draining. So flush
-		 * first, and drain to make sure we have no frames in the
-		 * transport anymore.
-		 * If a station still had frames on the shared queues, it is
-		 * already marked as draining, so to complete the draining, we
-		 * just need to wait until the transport is empty.
-		 */
-		iwl_trans_wait_tx_queues_empty(mvm->trans, tfd_msk);
-	}
-
 	if (vif->type == NL80211_IFTYPE_P2P_DEVICE) {
 		/*
 		 * Flush the ROC worker which will flush the OFFCHANNEL queue.
@@ -1508,14 +1474,6 @@ static void iwl_mvm_prepare_mac_removal(struct iwl_mvm *mvm,
 		 * queue are sent in ROC session.
 		 */
 		flush_work(&mvm->roc_done_wk);
-	} else {
-		/*
-		 * By now, all the AC queues are empty. The AGG queues are
-		 * empty too. We already got all the Tx responses for all the
-		 * packets in the queues. The drain work can have been
-		 * triggered. Flush it.
-		 */
-		flush_work(&mvm->sta_drained_wk);
 	}
 }
 
@@ -1556,7 +1514,7 @@ static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw,
 
 	if (vif->type == NL80211_IFTYPE_P2P_DEVICE) {
 		mvm->p2p_device_vif = NULL;
-		iwl_mvm_rm_bcast_sta(mvm, vif);
+		iwl_mvm_rm_p2p_bcast_sta(mvm, vif);
 		iwl_mvm_binding_remove_vif(mvm, vif);
 		iwl_mvm_phy_ctxt_unref(mvm, mvmvif->phy_ctxt);
 		mvmvif->phy_ctxt = NULL;
@@ -1569,7 +1527,6 @@ static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw,
 	iwl_mvm_mac_ctxt_remove(mvm, vif);
 
 out_release:
-	iwl_mvm_mac_ctxt_release(mvm, vif);
 	mutex_unlock(&mvm->mutex);
 }
 
@@ -2067,8 +2024,7 @@ static void iwl_mvm_bss_info_changed_station(struct iwl_mvm *mvm,
 		 * We received a beacon from the associated AP so
 		 * remove the session protection.
 		 */
-		iwl_mvm_remove_time_event(mvm, mvmvif,
-					  &mvmvif->time_event_data);
+		iwl_mvm_stop_session_protection(mvm, vif);
 
 		iwl_mvm_sf_update(mvm, vif, false);
 		WARN_ON(iwl_mvm_enable_beacon_filter(mvm, vif, 0));
@@ -2405,15 +2361,18 @@ static void __iwl_mvm_mac_sta_notify(struct ieee80211_hw *hw,
 	unsigned long txqs = 0, tids = 0;
 	int tid;
 
+	/*
+	 * If we have TVQM then we get too high queue numbers - luckily
+	 * we really shouldn't get here with that because such hardware
+	 * should have firmware supporting buffer station offload.
+	 */
+	if (WARN_ON(iwl_mvm_has_new_tx_api(mvm)))
+		return;
+
 	spin_lock_bh(&mvmsta->lock);
 	for (tid = 0; tid < IWL_MAX_TID_COUNT; tid++) {
 		struct iwl_mvm_tid_data *tid_data = &mvmsta->tid_data[tid];
 
-		if (!iwl_mvm_is_dqa_supported(mvm) &&
-		    tid_data->state != IWL_AGG_ON &&
-		    tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA)
-			continue;
-
 		if (tid_data->txq_id == IWL_MVM_INVALID_QUEUE)
 			continue;
 
@@ -2427,9 +2386,6 @@ static void __iwl_mvm_mac_sta_notify(struct ieee80211_hw *hw,
 
 	switch (cmd) {
 	case STA_NOTIFY_SLEEP:
-		if (atomic_read(&mvm->pending_frames[mvmsta->sta_id]) > 0)
-			ieee80211_sta_block_awake(hw, sta, true);
-
 		for_each_set_bit(tid, &tids, IWL_MAX_TID_COUNT)
 			ieee80211_sta_set_buffered(sta, tid, true);
 
@@ -2572,7 +2528,8 @@ iwl_mvm_tdls_check_trigger(struct iwl_mvm *mvm,
 
 	trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_TDLS);
 	tdls_trig = (void *)trig->data;
-	if (!iwl_fw_dbg_trigger_check_stop(mvm, vif, trig))
+	if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt,
+					   ieee80211_vif_to_wdev(vif), trig))
 		return;
 
 	if (!(tdls_trig->action_bitmap & BIT(action)))
@@ -2582,9 +2539,9 @@ iwl_mvm_tdls_check_trigger(struct iwl_mvm *mvm,
 	    memcmp(tdls_trig->peer, peer_addr, ETH_ALEN) != 0)
 		return;
 
-	iwl_mvm_fw_dbg_collect_trig(mvm, trig,
-				    "TDLS event occurred, peer %pM, action %d",
-				    peer_addr, action);
+	iwl_fw_dbg_collect_trig(&mvm->fwrt, trig,
+				"TDLS event occurred, peer %pM, action %d",
+				peer_addr, action);
 }
 
 static void iwl_mvm_purge_deferred_tx_frames(struct iwl_mvm *mvm,
@@ -2631,9 +2588,6 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw,
 	if (WARN_ON_ONCE(!mvmvif->phy_ctxt))
 		return -EINVAL;
 
-	/* if a STA is being removed, reuse its ID */
-	flush_work(&mvm->sta_drained_wk);
-
 	/*
 	 * If we are in a STA removal flow and in DQA mode:
 	 *
@@ -2648,8 +2602,7 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw,
 	 * make sure the worker is no longer handling frames for this STA.
 	 */
 	if (old_state == IEEE80211_STA_NONE &&
-	    new_state == IEEE80211_STA_NOTEXIST &&
-	    iwl_mvm_is_dqa_supported(mvm)) {
+	    new_state == IEEE80211_STA_NOTEXIST) {
 		iwl_mvm_purge_deferred_tx_frames(mvm, mvm_sta);
 		flush_work(&mvm->add_stream_wk);
 
@@ -3892,7 +3845,9 @@ static int iwl_mvm_pre_channel_switch(struct ieee80211_hw *hw,
 	IWL_DEBUG_MAC80211(mvm, "pre CSA to freq %d\n",
 			   chsw->chandef.center_freq1);
 
-	iwl_fw_dbg_trigger_simple_stop(mvm, vif, FW_DBG_TRIGGER_CHANNEL_SWITCH);
+	iwl_fw_dbg_trigger_simple_stop(&mvm->fwrt,
+				       ieee80211_vif_to_wdev(vif),
+				       FW_DBG_TRIGGER_CHANNEL_SWITCH);
 
 	switch (vif->type) {
 	case NL80211_IFTYPE_AP:
@@ -3931,11 +3886,16 @@ static int iwl_mvm_pre_channel_switch(struct ieee80211_hw *hw,
 
 		/* Schedule the time event to a bit before beacon 1,
 		 * to make sure we're in the new channel when the
-		 * GO/AP arrives.
+		 * GO/AP arrives. In case count <= 1 immediately schedule the
+		 * TE (this might result with some packet loss or connection
+		 * loss).
 		 */
-		apply_time = chsw->device_timestamp +
-			((vif->bss_conf.beacon_int * (chsw->count - 1) -
-			  IWL_MVM_CHANNEL_SWITCH_TIME_CLIENT) * 1024);
+		if (chsw->count <= 1)
+			apply_time = 0;
+		else
+			apply_time = chsw->device_timestamp +
+				((vif->bss_conf.beacon_int * (chsw->count - 1) -
+				  IWL_MVM_CHANNEL_SWITCH_TIME_CLIENT) * 1024);
 
 		if (chsw->block_tx)
 			iwl_mvm_csa_client_absent(mvm, vif);
@@ -4029,8 +3989,7 @@ static void iwl_mvm_mac_flush(struct ieee80211_hw *hw,
 		return;
 
 	/* Make sure we're done with the deferred traffic before flushing */
-	if (iwl_mvm_is_dqa_supported(mvm))
-		flush_work(&mvm->add_stream_wk);
+	flush_work(&mvm->add_stream_wk);
 
 	mutex_lock(&mvm->mutex);
 	mvmvif = iwl_mvm_vif_from_mac80211(vif);
@@ -4167,11 +4126,11 @@ static void iwl_mvm_event_mlme_callback(struct iwl_mvm *mvm,
 					struct ieee80211_vif *vif,
 					const struct ieee80211_event *event)
 {
-#define CHECK_MLME_TRIGGER(_cnt, _fmt...)			\
-	do {							\
-		if ((trig_mlme->_cnt) && --(trig_mlme->_cnt))	\
-			break;					\
-		iwl_mvm_fw_dbg_collect_trig(mvm, trig, _fmt);	\
+#define CHECK_MLME_TRIGGER(_cnt, _fmt...)				\
+	do {								\
+		if ((trig_mlme->_cnt) && --(trig_mlme->_cnt))		\
+			break;						\
+		iwl_fw_dbg_collect_trig(&(mvm)->fwrt, trig, _fmt);	\
 	} while (0)
 
 	struct iwl_fw_dbg_trigger_tlv *trig;
@@ -4182,7 +4141,8 @@ static void iwl_mvm_event_mlme_callback(struct iwl_mvm *mvm,
 
 	trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_MLME);
 	trig_mlme = (void *)trig->data;
-	if (!iwl_fw_dbg_trigger_check_stop(mvm, vif, trig))
+	if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt,
+					   ieee80211_vif_to_wdev(vif), trig))
 		return;
 
 	if (event->u.mlme.data == ASSOC_EVENT) {
@@ -4223,16 +4183,17 @@ static void iwl_mvm_event_bar_rx_callback(struct iwl_mvm *mvm,
 
 	trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_BA);
 	ba_trig = (void *)trig->data;
-	if (!iwl_fw_dbg_trigger_check_stop(mvm, vif, trig))
+	if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt,
+					   ieee80211_vif_to_wdev(vif), trig))
 		return;
 
 	if (!(le16_to_cpu(ba_trig->rx_bar) & BIT(event->u.ba.tid)))
 		return;
 
-	iwl_mvm_fw_dbg_collect_trig(mvm, trig,
-				    "BAR received from %pM, tid %d, ssn %d",
-				    event->u.ba.sta->addr, event->u.ba.tid,
-				    event->u.ba.ssn);
+	iwl_fw_dbg_collect_trig(&mvm->fwrt, trig,
+				"BAR received from %pM, tid %d, ssn %d",
+				event->u.ba.sta->addr, event->u.ba.tid,
+				event->u.ba.ssn);
 }
 
 static void
@@ -4248,15 +4209,16 @@ iwl_mvm_event_frame_timeout_callback(struct iwl_mvm *mvm,
 
 	trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_BA);
 	ba_trig = (void *)trig->data;
-	if (!iwl_fw_dbg_trigger_check_stop(mvm, vif, trig))
+	if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt,
+					   ieee80211_vif_to_wdev(vif), trig))
 		return;
 
 	if (!(le16_to_cpu(ba_trig->frame_timeout) & BIT(event->u.ba.tid)))
 		return;
 
-	iwl_mvm_fw_dbg_collect_trig(mvm, trig,
-				    "Frame from %pM timed out, tid %d",
-				    event->u.ba.sta->addr, event->u.ba.tid);
+	iwl_fw_dbg_collect_trig(&mvm->fwrt, trig,
+				"Frame from %pM timed out, tid %d",
+				event->u.ba.sta->addr, event->u.ba.tid);
 }
 
 static void iwl_mvm_mac_event_callback(struct ieee80211_hw *hw,
@@ -4290,7 +4252,8 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
 	lockdep_assert_held(&mvm->mutex);
 
 	/* TODO - remove a000 disablement when we have RXQ config API */
-	if (!iwl_mvm_has_new_rx_api(mvm) || iwl_mvm_has_new_tx_api(mvm))
+	if (!iwl_mvm_has_new_rx_api(mvm) ||
+	    mvm->trans->cfg->device_family == IWL_DEVICE_FAMILY_A000)
 		return;
 
 	notif->cookie = mvm->queue_sync_cookie;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index ddd8719..83303ba 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -87,6 +87,8 @@
 #include "fw-api.h"
 #include "constants.h"
 #include "tof.h"
+#include "fw/runtime.h"
+#include "fw/dbg.h"
 
 #define IWL_MVM_MAX_ADDRESSES		5
 /* RSSI offset for WkP */
@@ -119,6 +121,9 @@
  */
 #define IWL_MVM_CS_UNBLOCK_TX_TIMEOUT 3
 
+/* offchannel queue towards mac80211 */
+#define IWL_MVM_OFFCHANNEL_QUEUE 0
+
 extern const struct ieee80211_ops iwl_mvm_hw_ops;
 
 /**
@@ -137,34 +142,6 @@ struct iwl_mvm_mod_params {
 };
 extern struct iwl_mvm_mod_params iwlmvm_mod_params;
 
-/**
- * struct iwl_mvm_dump_ptrs - set of pointers needed for the fw-error-dump
- *
- * @op_mode_ptr: pointer to the buffer coming from the mvm op_mode
- * @trans_ptr: pointer to struct %iwl_trans_dump_data which contains the
- *	transport's data.
- * @trans_len: length of the valid data in trans_ptr
- * @op_mode_len: length of the valid data in op_mode_ptr
- */
-struct iwl_mvm_dump_ptrs {
-	struct iwl_trans_dump_data *trans_ptr;
-	void *op_mode_ptr;
-	u32 op_mode_len;
-};
-
-/**
- * struct iwl_mvm_dump_desc - describes the dump
- * @len: length of trig_desc->data
- * @trig_desc: the description of the dump
- */
-struct iwl_mvm_dump_desc {
-	size_t len;
-	/* must be last */
-	struct iwl_fw_error_dump_trigger_desc trig_desc;
-};
-
-extern const struct iwl_mvm_dump_desc iwl_mvm_dump_desc_assert;
-
 struct iwl_mvm_phy_ctxt {
 	u16 id;
 	u16 color;
@@ -606,19 +583,6 @@ enum iwl_mvm_tdls_cs_state {
 	IWL_MVM_TDLS_SW_ACTIVE,
 };
 
-#define MAX_NUM_LMAC 2
-struct iwl_mvm_shared_mem_cfg {
-	int num_lmacs;
-	int num_txfifo_entries;
-	struct {
-		u32 txfifo_size[TX_FIFO_MAX_NUM];
-		u32 rxfifo1_size;
-	} lmac[MAX_NUM_LMAC];
-	u32 rxfifo2_size;
-	u32 internal_txfifo_addr;
-	u32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM];
-};
-
 /**
  * struct iwl_mvm_reorder_buffer - per ra/tid/queue reorder buffer
  * @head_sn: reorder window head sn
@@ -766,7 +730,6 @@ struct iwl_mvm {
 	 */
 	struct iwl_mvm_vif *bf_allowed_vif;
 
-	enum iwl_ucode_type cur_ucode;
 	bool hw_registered;
 	bool calibrating;
 	u32 error_event_table[2];
@@ -815,10 +778,7 @@ struct iwl_mvm {
 	/* NVM sections */
 	struct iwl_nvm_section nvm_sections[NVM_MAX_NUM_SECTIONS];
 
-	/* Paging section */
-	struct iwl_fw_paging fw_paging_db[NUM_OF_FW_PAGING_BLOCKS];
-	u16 num_of_paging_blk;
-	u16 num_of_pages_in_last_blk;
+	struct iwl_fw_runtime fwrt;
 
 	/* EEPROM MAC addresses */
 	struct mac_address addresses[IWL_MVM_MAX_ADDRESSES];
@@ -826,11 +786,7 @@ struct iwl_mvm {
 	/* data related to data path */
 	struct iwl_rx_phy_info last_phy_info;
 	struct ieee80211_sta __rcu *fw_id_to_mac_id[IWL_MVM_STATION_COUNT];
-	struct work_struct sta_drained_wk;
 	unsigned long sta_deferred_frames[BITS_TO_LONGS(IWL_MVM_STATION_COUNT)];
-	unsigned long sta_drained[BITS_TO_LONGS(IWL_MVM_STATION_COUNT)];
-	atomic_t pending_frames[IWL_MVM_STATION_COUNT];
-	u32 tfd_drained[IWL_MVM_STATION_COUNT];
 	u8 rx_ba_sessions;
 
 	/* configured by mac80211 */
@@ -847,9 +803,6 @@ struct iwl_mvm {
 	/* max number of simultaneous scans the FW supports */
 	unsigned int max_scans;
 
-	/* ts of the beginning of a non-collect fw dbg data period */
-	unsigned long fw_dbg_non_collect_ts_start[FW_DBG_TRIGGER_MAX - 1];
-
 	/* UMAC scan tracking */
 	u32 scan_uid_status[IWL_MVM_MAX_UMAC_SCANS];
 
@@ -925,10 +878,6 @@ struct iwl_mvm {
 
 	/* -1 for always, 0 for never, >0 for that many times */
 	s8 fw_restart;
-	u8 fw_dbg_conf;
-	struct delayed_work fw_dump_wk;
-	const struct iwl_mvm_dump_desc *fw_dump_desc;
-	const struct iwl_fw_dbg_trigger_tlv *fw_dump_trig;
 
 #ifdef CONFIG_IWLWIFI_LEDS
 	struct led_classdev led;
@@ -975,8 +924,6 @@ struct iwl_mvm {
 	struct iwl_bt_coex_profile_notif last_bt_notif;
 	struct iwl_bt_coex_ci_cmd last_bt_ci_cmd;
 
-	u32 last_ant_isol;
-	u8 last_corun_lut;
 	u8 bt_tx_prio;
 	enum iwl_bt_force_ant_mode bt_force_ant_mode;
 
@@ -1010,9 +957,6 @@ struct iwl_mvm {
 	u16 probe_queue;
 	u16 p2p_dev_queue;
 
-	u8 first_agg_queue;
-	u8 last_agg_queue;
-
 	/* Indicate if device power save is allowed */
 	u8 ps_disabled; /* u8 instead of bool to ease debugfs_create_* usage */
 	unsigned int max_amsdu_len; /* used for debugfs only */
@@ -1055,7 +999,6 @@ struct iwl_mvm {
 		} peer;
 	} tdls_cs;
 
-	struct iwl_mvm_shared_mem_cfg smem_cfg;
 
 	u32 ciphers[IWL_MVM_NUM_CIPHERS];
 	struct ieee80211_cipher_scheme cs[IWL_UCODE_MAX_CS];
@@ -1095,7 +1038,6 @@ struct iwl_mvm {
  * @IWL_MVM_STATUS_IN_D0I3: NIC is in D0i3
  * @IWL_MVM_STATUS_ROC_AUX_RUNNING: AUX remain-on-channel is running
  * @IWL_MVM_STATUS_D3_RECONFIG: D3 reconfiguration is being done
- * @IWL_MVM_STATUS_DUMPING_FW_LOG: FW log is being dumped
  * @IWL_MVM_STATUS_FIRMWARE_RUNNING: firmware is running
  */
 enum iwl_mvm_status {
@@ -1107,7 +1049,6 @@ enum iwl_mvm_status {
 	IWL_MVM_STATUS_IN_D0I3,
 	IWL_MVM_STATUS_ROC_AUX_RUNNING,
 	IWL_MVM_STATUS_D3_RECONFIG,
-	IWL_MVM_STATUS_DUMPING_FW_LOG,
 	IWL_MVM_STATUS_FIRMWARE_RUNNING,
 };
 
@@ -1180,12 +1121,6 @@ static inline bool iwl_mvm_is_d0i3_supported(struct iwl_mvm *mvm)
 			    IWL_UCODE_TLV_CAPA_D0I3_SUPPORT);
 }
 
-static inline bool iwl_mvm_is_dqa_supported(struct iwl_mvm *mvm)
-{
-	return fw_has_capa(&mvm->fw->ucode_capa,
-			   IWL_UCODE_TLV_CAPA_DQA_SUPPORT);
-}
-
 static inline bool iwl_mvm_enter_d0i3_on_suspend(struct iwl_mvm *mvm)
 {
 	/* For now we only use this mode to differentiate between
@@ -1238,13 +1173,6 @@ static inline bool iwl_mvm_is_wifi_mcc_supported(struct iwl_mvm *mvm)
 			   IWL_UCODE_TLV_CAPA_LAR_MULTI_MCC);
 }
 
-static inline bool iwl_mvm_bt_is_plcr_supported(struct iwl_mvm *mvm)
-{
-	return fw_has_capa(&mvm->fw->ucode_capa,
-			   IWL_UCODE_TLV_CAPA_BT_COEX_PLCR) &&
-		IWL_MVM_BT_COEX_CORUNNING;
-}
-
 static inline bool iwl_mvm_bt_is_rrc_supported(struct iwl_mvm *mvm)
 {
 	return fw_has_capa(&mvm->fw->ucode_capa,
@@ -1287,6 +1215,12 @@ static inline bool iwl_mvm_has_new_tx_api(struct iwl_mvm *mvm)
 	return mvm->trans->cfg->use_tfh;
 }
 
+static inline bool iwl_mvm_has_unified_ucode(struct iwl_mvm *mvm)
+{
+	/* TODO - better define this */
+	return mvm->trans->cfg->device_family >= IWL_DEVICE_FAMILY_A000;
+}
+
 static inline bool iwl_mvm_is_cdb_supported(struct iwl_mvm *mvm)
 {
 	/*
@@ -1308,6 +1242,12 @@ static inline bool iwl_mvm_has_new_rx_stats_api(struct iwl_mvm *mvm)
 			  IWL_UCODE_TLV_API_NEW_RX_STATS);
 }
 
+static inline bool iwl_mvm_has_new_ats_coex_api(struct iwl_mvm *mvm)
+{
+	return fw_has_api(&mvm->fw->ucode_capa,
+			  IWL_UCODE_TLV_API_COEX_ATS_EXTERNAL);
+}
+
 static inline struct agg_tx_status *
 iwl_mvm_get_agg_status(struct iwl_mvm *mvm, void *tx_resp)
 {
@@ -1340,6 +1280,14 @@ static inline bool iwl_mvm_is_ctdp_supported(struct iwl_mvm *mvm)
 }
 
 extern const u8 iwl_mvm_ac_to_tx_fifo[];
+extern const u8 iwl_mvm_ac_to_gen2_tx_fifo[];
+
+static inline u8 iwl_mvm_mac_ac_to_tx_fifo(struct iwl_mvm *mvm,
+					   enum ieee80211_ac_numbers ac)
+{
+	return iwl_mvm_has_new_tx_api(mvm) ?
+		iwl_mvm_ac_to_gen2_tx_fifo[ac] : iwl_mvm_ac_to_tx_fifo[ac];
+}
 
 struct iwl_rate_info {
 	u8 plcp;	/* uCode API:  IWL_RATE_6M_PLCP, etc. */
@@ -1425,8 +1373,7 @@ int iwl_mvm_request_statistics(struct iwl_mvm *mvm, bool clear);
 void iwl_mvm_accu_radio_stats(struct iwl_mvm *mvm);
 
 /* NVM */
-int iwl_nvm_init(struct iwl_mvm *mvm, bool read_nvm_from_nic);
-int iwl_mvm_nvm_get_from_fw(struct iwl_mvm *mvm);
+int iwl_nvm_init(struct iwl_mvm *mvm);
 int iwl_mvm_load_nvm_to_nic(struct iwl_mvm *mvm);
 int iwl_mvm_read_external_nvm(struct iwl_mvm *mvm);
 
@@ -1510,7 +1457,6 @@ u8 iwl_mvm_get_ctrl_pos(struct cfg80211_chan_def *chandef);
 
 /* MAC (virtual interface) programming */
 int iwl_mvm_mac_ctxt_init(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
-void iwl_mvm_mac_ctxt_release(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
 int iwl_mvm_mac_ctxt_add(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
 int iwl_mvm_mac_ctxt_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 			     bool force_assoc_off, const u8 *bssid_override);
@@ -1573,9 +1519,6 @@ void iwl_mvm_rx_umac_scan_complete_notif(struct iwl_mvm *mvm,
 void iwl_mvm_rx_umac_scan_iter_complete_notif(struct iwl_mvm *mvm,
 					      struct iwl_rx_cmd_buffer *rxb);
 
-/* Paging */
-void iwl_free_fw_paging(struct iwl_mvm *mvm);
-
 /* MVM debugfs */
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir);
@@ -1619,6 +1562,7 @@ void iwl_mvm_power_uapsd_misbehaving_ap_notif(struct iwl_mvm *mvm,
 #ifdef CONFIG_IWLWIFI_LEDS
 int iwl_mvm_leds_init(struct iwl_mvm *mvm);
 void iwl_mvm_leds_exit(struct iwl_mvm *mvm);
+void iwl_mvm_leds_sync(struct iwl_mvm *mvm);
 #else
 static inline int iwl_mvm_leds_init(struct iwl_mvm *mvm)
 {
@@ -1627,6 +1571,9 @@ static inline int iwl_mvm_leds_init(struct iwl_mvm *mvm)
 static inline void iwl_mvm_leds_exit(struct iwl_mvm *mvm)
 {
 }
+static inline void iwl_mvm_leds_sync(struct iwl_mvm *mvm)
+{
+}
 #endif
 
 /* D3 (WoWLAN, NetDetect) */
@@ -1764,10 +1711,6 @@ bool iwl_mvm_enable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue,
 int iwl_mvm_tvqm_enable_txq(struct iwl_mvm *mvm, int mac80211_queue,
 			    u8 sta_id, u8 tid, unsigned int timeout);
 
-/*
- * Disable a TXQ.
- * Note that in non-DQA mode the %mac80211_queue and %tid params are ignored.
- */
 int iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue,
 			u8 tid, u8 flags);
 int iwl_mvm_find_free_queue(struct iwl_mvm *mvm, u8 sta_id, u8 minq, u8 maxq);
@@ -1777,33 +1720,15 @@ int iwl_mvm_find_free_queue(struct iwl_mvm *mvm, u8 sta_id, u8 minq, u8 maxq);
  */
 static inline u32 iwl_mvm_flushable_queues(struct iwl_mvm *mvm)
 {
-	u32 cmd_queue = iwl_mvm_is_dqa_supported(mvm) ? IWL_MVM_DQA_CMD_QUEUE :
-		IWL_MVM_CMD_QUEUE;
-
 	return ((BIT(mvm->cfg->base_params->num_of_queues) - 1) &
-		~BIT(cmd_queue));
-}
-
-static inline
-void iwl_mvm_enable_ac_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue,
-			   u8 fifo, u16 ssn, unsigned int wdg_timeout)
-{
-	struct iwl_trans_txq_scd_cfg cfg = {
-		.fifo = fifo,
-		.tid = IWL_MAX_TID_COUNT,
-		.aggregate = false,
-		.frame_limit = IWL_FRAME_LIMIT,
-	};
-
-	iwl_mvm_enable_txq(mvm, queue, mac80211_queue, ssn, &cfg, wdg_timeout);
+		~BIT(IWL_MVM_DQA_CMD_QUEUE));
 }
 
 static inline void iwl_mvm_stop_device(struct iwl_mvm *mvm)
 {
-	if (!iwl_mvm_has_new_tx_api(mvm))
-		iwl_free_fw_paging(mvm);
+	iwl_free_fw_paging(&mvm->fwrt);
 	clear_bit(IWL_MVM_STATUS_FIRMWARE_RUNNING, &mvm->status);
-	mvm->fw_dbg_conf = FW_DBG_INVALID;
+	iwl_fw_dump_conf_clear(&mvm->fwrt);
 	iwl_trans_stop_device(mvm->trans);
 }
 
@@ -1826,6 +1751,7 @@ void iwl_mvm_thermal_exit(struct iwl_mvm *mvm);
 void iwl_mvm_set_hw_ctkill_state(struct iwl_mvm *mvm, bool state);
 int iwl_mvm_get_temp(struct iwl_mvm *mvm, s32 *temp);
 void iwl_mvm_ct_kill_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb);
+void iwl_mvm_enter_ctkill(struct iwl_mvm *mvm);
 int iwl_mvm_send_temp_report_ths_cmd(struct iwl_mvm *mvm);
 int iwl_mvm_ctdp_command(struct iwl_mvm *mvm, u32 op, u32 budget);
 
@@ -1899,21 +1825,7 @@ int iwl_mvm_send_lqm_cmd(struct ieee80211_vif *vif,
 			 u32 duration, u32 timeout);
 bool iwl_mvm_lqm_active(struct iwl_mvm *mvm);
 
-#ifdef CONFIG_ACPI
 int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b);
 int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm);
-#else
-static inline
-int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b)
-{
-	return -ENOENT;
-}
-
-static inline
-int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm)
-{
-	return -ENOENT;
-}
-#endif /* CONFIG_ACPI */
 
 #endif /* __IWL_MVM_H__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
index dac7e54..422aa6b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
@@ -292,7 +292,8 @@ static struct iwl_nvm_data *
 iwl_parse_nvm_sections(struct iwl_mvm *mvm)
 {
 	struct iwl_nvm_section *sections = mvm->nvm_sections;
-	const __le16 *hw, *sw, *calib, *regulatory, *mac_override, *phy_sku;
+	const __be16 *hw;
+	const __le16 *sw, *calib, *regulatory, *mac_override, *phy_sku;
 	bool lar_enabled;
 
 	/* Checking for required sections */
@@ -326,10 +327,7 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm)
 		}
 	}
 
-	if (WARN_ON(!mvm->cfg))
-		return NULL;
-
-	hw = (const __le16 *)sections[mvm->cfg->nvm_hw_section_num].data;
+	hw = (const __be16 *)sections[mvm->cfg->nvm_hw_section_num].data;
 	sw = (const __le16 *)sections[NVM_SECTION_TYPE_SW].data;
 	calib = (const __le16 *)sections[NVM_SECTION_TYPE_CALIBRATION].data;
 	regulatory = (const __le16 *)sections[NVM_SECTION_TYPE_REGULATORY].data;
@@ -546,101 +544,7 @@ int iwl_mvm_load_nvm_to_nic(struct iwl_mvm *mvm)
 	return ret;
 }
 
-int iwl_mvm_nvm_get_from_fw(struct iwl_mvm *mvm)
-{
-	struct iwl_nvm_get_info cmd = {};
-	struct iwl_nvm_get_info_rsp *rsp;
-	struct iwl_trans *trans = mvm->trans;
-	struct iwl_host_cmd hcmd = {
-		.flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL,
-		.data = { &cmd, },
-		.len = { sizeof(cmd) },
-		.id = WIDE_ID(REGULATORY_AND_NVM_GROUP, NVM_GET_INFO)
-	};
-	int  ret;
-	bool lar_fw_supported = !iwlwifi_mod_params.lar_disable &&
-				fw_has_capa(&mvm->fw->ucode_capa,
-					    IWL_UCODE_TLV_CAPA_LAR_SUPPORT);
-
-	lockdep_assert_held(&mvm->mutex);
-
-	ret = iwl_mvm_send_cmd(mvm, &hcmd);
-	if (ret)
-		return ret;
-
-	if (WARN(iwl_rx_packet_payload_len(hcmd.resp_pkt) != sizeof(*rsp),
-		 "Invalid payload len in NVM response from FW %d",
-		 iwl_rx_packet_payload_len(hcmd.resp_pkt))) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	rsp = (void *)hcmd.resp_pkt->data;
-	if (le32_to_cpu(rsp->general.flags)) {
-		IWL_ERR(mvm, "Invalid NVM data from FW\n");
-		ret = -EINVAL;
-		goto out;
-	}
-
-	mvm->nvm_data = kzalloc(sizeof(*mvm->nvm_data) +
-				sizeof(struct ieee80211_channel) *
-				IWL_NUM_CHANNELS, GFP_KERNEL);
-	if (!mvm->nvm_data) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	iwl_set_hw_address_from_csr(trans, mvm->nvm_data);
-	/* TODO: if platform NVM has MAC address - override it here */
-
-	if (!is_valid_ether_addr(mvm->nvm_data->hw_addr)) {
-		IWL_ERR(trans, "no valid mac address was found\n");
-		ret = -EINVAL;
-		goto err_free;
-	}
-
-	IWL_INFO(trans, "base HW address: %pM\n", mvm->nvm_data->hw_addr);
-
-	/* Initialize general data */
-	mvm->nvm_data->nvm_version = le16_to_cpu(rsp->general.nvm_version);
-
-	/* Initialize MAC sku data */
-	mvm->nvm_data->sku_cap_11ac_enable =
-		le32_to_cpu(rsp->mac_sku.enable_11ac);
-	mvm->nvm_data->sku_cap_11n_enable =
-		le32_to_cpu(rsp->mac_sku.enable_11n);
-	mvm->nvm_data->sku_cap_band_24GHz_enable =
-		le32_to_cpu(rsp->mac_sku.enable_24g);
-	mvm->nvm_data->sku_cap_band_52GHz_enable =
-		le32_to_cpu(rsp->mac_sku.enable_5g);
-	mvm->nvm_data->sku_cap_mimo_disabled =
-		le32_to_cpu(rsp->mac_sku.mimo_disable);
-
-	/* Initialize PHY sku data */
-	mvm->nvm_data->valid_tx_ant = (u8)le32_to_cpu(rsp->phy_sku.tx_chains);
-	mvm->nvm_data->valid_rx_ant = (u8)le32_to_cpu(rsp->phy_sku.rx_chains);
-
-	/* Initialize regulatory data */
-	mvm->nvm_data->lar_enabled =
-		le32_to_cpu(rsp->regulatory.lar_enabled) && lar_fw_supported;
-
-	iwl_init_sbands(trans->dev, trans->cfg, mvm->nvm_data,
-			rsp->regulatory.channel_profile,
-			mvm->nvm_data->valid_tx_ant & mvm->fw->valid_tx_ant,
-			mvm->nvm_data->valid_rx_ant & mvm->fw->valid_rx_ant,
-			rsp->regulatory.lar_enabled && lar_fw_supported);
-
-	iwl_free_resp(&hcmd);
-	return 0;
-
-err_free:
-	kfree(mvm->nvm_data);
-out:
-	iwl_free_resp(&hcmd);
-	return ret;
-}
-
-int iwl_nvm_init(struct iwl_mvm *mvm, bool read_nvm_from_nic)
+int iwl_nvm_init(struct iwl_mvm *mvm)
 {
 	int ret, section;
 	u32 size_read = 0;
@@ -651,63 +555,61 @@ int iwl_nvm_init(struct iwl_mvm *mvm, bool read_nvm_from_nic)
 		return -EINVAL;
 
 	/* load NVM values from nic */
-	if (read_nvm_from_nic) {
-		/* Read From FW NVM */
-		IWL_DEBUG_EEPROM(mvm->trans->dev, "Read from NVM\n");
-
-		nvm_buffer = kmalloc(mvm->cfg->base_params->eeprom_size,
-				     GFP_KERNEL);
-		if (!nvm_buffer)
-			return -ENOMEM;
-		for (section = 0; section < NVM_MAX_NUM_SECTIONS; section++) {
-			/* we override the constness for initial read */
-			ret = iwl_nvm_read_section(mvm, section, nvm_buffer,
-						   size_read);
-			if (ret < 0)
-				continue;
-			size_read += ret;
-			temp = kmemdup(nvm_buffer, ret, GFP_KERNEL);
-			if (!temp) {
-				ret = -ENOMEM;
-				break;
-			}
+	/* Read From FW NVM */
+	IWL_DEBUG_EEPROM(mvm->trans->dev, "Read from NVM\n");
+
+	nvm_buffer = kmalloc(mvm->cfg->base_params->eeprom_size,
+			     GFP_KERNEL);
+	if (!nvm_buffer)
+		return -ENOMEM;
+	for (section = 0; section < NVM_MAX_NUM_SECTIONS; section++) {
+		/* we override the constness for initial read */
+		ret = iwl_nvm_read_section(mvm, section, nvm_buffer,
+					   size_read);
+		if (ret < 0)
+			continue;
+		size_read += ret;
+		temp = kmemdup(nvm_buffer, ret, GFP_KERNEL);
+		if (!temp) {
+			ret = -ENOMEM;
+			break;
+		}
 
-			iwl_mvm_nvm_fixups(mvm, section, temp, ret);
+		iwl_mvm_nvm_fixups(mvm, section, temp, ret);
 
-			mvm->nvm_sections[section].data = temp;
-			mvm->nvm_sections[section].length = ret;
+		mvm->nvm_sections[section].data = temp;
+		mvm->nvm_sections[section].length = ret;
 
 #ifdef CONFIG_IWLWIFI_DEBUGFS
-			switch (section) {
-			case NVM_SECTION_TYPE_SW:
-				mvm->nvm_sw_blob.data = temp;
-				mvm->nvm_sw_blob.size  = ret;
-				break;
-			case NVM_SECTION_TYPE_CALIBRATION:
-				mvm->nvm_calib_blob.data = temp;
-				mvm->nvm_calib_blob.size  = ret;
-				break;
-			case NVM_SECTION_TYPE_PRODUCTION:
-				mvm->nvm_prod_blob.data = temp;
-				mvm->nvm_prod_blob.size  = ret;
-				break;
-			case NVM_SECTION_TYPE_PHY_SKU:
-				mvm->nvm_phy_sku_blob.data = temp;
-				mvm->nvm_phy_sku_blob.size  = ret;
+		switch (section) {
+		case NVM_SECTION_TYPE_SW:
+			mvm->nvm_sw_blob.data = temp;
+			mvm->nvm_sw_blob.size  = ret;
+			break;
+		case NVM_SECTION_TYPE_CALIBRATION:
+			mvm->nvm_calib_blob.data = temp;
+			mvm->nvm_calib_blob.size  = ret;
+			break;
+		case NVM_SECTION_TYPE_PRODUCTION:
+			mvm->nvm_prod_blob.data = temp;
+			mvm->nvm_prod_blob.size  = ret;
+			break;
+		case NVM_SECTION_TYPE_PHY_SKU:
+			mvm->nvm_phy_sku_blob.data = temp;
+			mvm->nvm_phy_sku_blob.size  = ret;
+			break;
+		default:
+			if (section == mvm->cfg->nvm_hw_section_num) {
+				mvm->nvm_hw_blob.data = temp;
+				mvm->nvm_hw_blob.size = ret;
 				break;
-			default:
-				if (section == mvm->cfg->nvm_hw_section_num) {
-					mvm->nvm_hw_blob.data = temp;
-					mvm->nvm_hw_blob.size = ret;
-					break;
-				}
 			}
-#endif
 		}
-		if (!size_read)
-			IWL_ERR(mvm, "OTP is blank\n");
-		kfree(nvm_buffer);
+#endif
 	}
+	if (!size_read)
+		IWL_ERR(mvm, "OTP is blank\n");
+	kfree(nvm_buffer);
 
 	/* Only if PNVM selected in the mod param - load external NVM  */
 	if (mvm->nvm_file_name) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 9c175d5..2318789 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -82,11 +82,10 @@
 #include "iwl-io.h"
 #include "iwl-prph.h"
 #include "rs.h"
-#include "fw-api-scan.h"
+#include "fw/api/scan.h"
 #include "time-event.h"
-#include "fw-dbg.h"
 #include "fw-api.h"
-#include "fw-api-scan.h"
+#include "fw/api/scan.h"
 
 #define DRV_DESCRIPTION	"The new Intel(R) wireless AGN driver for Linux"
 MODULE_DESCRIPTION(DRV_DESCRIPTION);
@@ -257,8 +256,6 @@ static const struct iwl_rx_handlers iwl_mvm_rx_handlers[] = {
 		   RX_HANDLER_ASYNC_LOCKED),
 	RX_HANDLER(STATISTICS_NOTIFICATION, iwl_mvm_rx_statistics,
 		   RX_HANDLER_ASYNC_LOCKED),
-	RX_HANDLER(ANTENNA_COUPLING_NOTIFICATION,
-		   iwl_mvm_rx_ant_coupling_notif, RX_HANDLER_ASYNC_LOCKED),
 
 	RX_HANDLER(BA_WINDOW_STATUS_NOTIFICATION_ID,
 		   iwl_mvm_window_status_notif, RX_HANDLER_SYNC),
@@ -326,7 +323,6 @@ static const struct iwl_hcmd_names iwl_mvm_legacy_names[] = {
 	HCMD_NAME(INIT_COMPLETE_NOTIF),
 	HCMD_NAME(PHY_CONTEXT_CMD),
 	HCMD_NAME(DBG_CFG),
-	HCMD_NAME(ANTENNA_COUPLING_NOTIFICATION),
 	HCMD_NAME(SCAN_CFG_CMD),
 	HCMD_NAME(SCAN_REQ_UMAC),
 	HCMD_NAME(SCAN_ABORT_UMAC),
@@ -351,13 +347,13 @@ static const struct iwl_hcmd_names iwl_mvm_legacy_names[] = {
 	HCMD_NAME(BINDING_CONTEXT_CMD),
 	HCMD_NAME(TIME_QUOTA_CMD),
 	HCMD_NAME(NON_QOS_TX_COUNTER_CMD),
+	HCMD_NAME(LEDS_CMD),
 	HCMD_NAME(LQ_CMD),
 	HCMD_NAME(FW_PAGING_BLOCK_CMD),
 	HCMD_NAME(SCAN_OFFLOAD_REQUEST_CMD),
 	HCMD_NAME(SCAN_OFFLOAD_ABORT_CMD),
 	HCMD_NAME(HOT_SPOT_CMD),
 	HCMD_NAME(SCAN_OFFLOAD_PROFILES_QUERY_CMD),
-	HCMD_NAME(BT_COEX_UPDATE_CORUN_LUT),
 	HCMD_NAME(BT_COEX_UPDATE_REDUCED_TXP),
 	HCMD_NAME(BT_COEX_CI),
 	HCMD_NAME(PHY_CONFIGURATION_CMD),
@@ -388,6 +384,7 @@ static const struct iwl_hcmd_names iwl_mvm_legacy_names[] = {
 	HCMD_NAME(SCAN_ITERATION_COMPLETE_UMAC),
 	HCMD_NAME(REPLY_RX_PHY_CMD),
 	HCMD_NAME(REPLY_RX_MPDU_CMD),
+	HCMD_NAME(FRAME_RELEASE),
 	HCMD_NAME(BA_NOTIF),
 	HCMD_NAME(MCC_UPDATE_CMD),
 	HCMD_NAME(MCC_CHUB_UPDATE_CMD),
@@ -510,8 +507,6 @@ static u32 calc_min_backoff(struct iwl_trans *trans, const struct iwl_cfg *cfg)
 	return 0;
 }
 
-static void iwl_mvm_fw_error_dump_wk(struct work_struct *work);
-
 static void iwl_mvm_tx_unblock_dwork(struct work_struct *work)
 {
 	struct iwl_mvm *mvm =
@@ -535,6 +530,34 @@ unlock:
 	mutex_unlock(&mvm->mutex);
 }
 
+static int iwl_mvm_fwrt_dump_start(void *ctx)
+{
+	struct iwl_mvm *mvm = ctx;
+	int ret;
+
+	ret = iwl_mvm_ref_sync(mvm, IWL_MVM_REF_FW_DBG_COLLECT);
+	if (ret)
+		return ret;
+
+	mutex_lock(&mvm->mutex);
+
+	return 0;
+}
+
+static void iwl_mvm_fwrt_dump_end(void *ctx)
+{
+	struct iwl_mvm *mvm = ctx;
+
+	mutex_unlock(&mvm->mutex);
+
+	iwl_mvm_unref(mvm, IWL_MVM_REF_FW_DBG_COLLECT);
+}
+
+static const struct iwl_fw_runtime_ops iwl_mvm_fwrt_ops = {
+	.dump_start = iwl_mvm_fwrt_dump_start,
+	.dump_end = iwl_mvm_fwrt_dump_end,
+};
+
 static struct iwl_op_mode *
 iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 		      const struct iwl_fw *fw, struct dentry *dbgfs_dir)
@@ -580,6 +603,8 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 	mvm->fw = fw;
 	mvm->hw = hw;
 
+	iwl_fw_runtime_init(&mvm->fwrt, trans, fw, &iwl_mvm_fwrt_ops, mvm);
+
 	mvm->init_status = 0;
 
 	if (iwl_mvm_has_new_rx_api(mvm)) {
@@ -596,32 +621,15 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 
 	mvm->fw_restart = iwlwifi_mod_params.fw_restart ? -1 : 0;
 
-	if (!iwl_mvm_is_dqa_supported(mvm)) {
-		mvm->last_agg_queue = mvm->cfg->base_params->num_of_queues - 1;
-
-		if (mvm->cfg->base_params->num_of_queues == 16) {
-			mvm->aux_queue = 11;
-			mvm->first_agg_queue = 12;
-			BUILD_BUG_ON(BITS_PER_BYTE *
-				     sizeof(mvm->hw_queue_to_mac80211[0]) < 12);
-		} else {
-			mvm->aux_queue = 15;
-			mvm->first_agg_queue = 16;
-			BUILD_BUG_ON(BITS_PER_BYTE *
-				     sizeof(mvm->hw_queue_to_mac80211[0]) < 16);
-		}
-	} else {
-		mvm->aux_queue = IWL_MVM_DQA_AUX_QUEUE;
-		mvm->probe_queue = IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
-		mvm->p2p_dev_queue = IWL_MVM_DQA_P2P_DEVICE_QUEUE;
-		mvm->first_agg_queue = IWL_MVM_DQA_MIN_DATA_QUEUE;
-		mvm->last_agg_queue = IWL_MVM_DQA_MAX_DATA_QUEUE;
-	}
+	mvm->aux_queue = IWL_MVM_DQA_AUX_QUEUE;
+	mvm->probe_queue = IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
+	mvm->p2p_dev_queue = IWL_MVM_DQA_P2P_DEVICE_QUEUE;
+
 	mvm->sf_state = SF_UNINIT;
-	if (iwl_mvm_has_new_tx_api(mvm))
-		mvm->cur_ucode = IWL_UCODE_REGULAR;
+	if (iwl_mvm_has_unified_ucode(mvm))
+		iwl_fw_set_current_image(&mvm->fwrt, IWL_UCODE_REGULAR);
 	else
-		mvm->cur_ucode = IWL_UCODE_INIT;
+		iwl_fw_set_current_image(&mvm->fwrt, IWL_UCODE_INIT);
 	mvm->drop_bcn_ap_mode = true;
 
 	mutex_init(&mvm->mutex);
@@ -635,9 +643,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 
 	INIT_WORK(&mvm->async_handlers_wk, iwl_mvm_async_handlers_wk);
 	INIT_WORK(&mvm->roc_done_wk, iwl_mvm_roc_done_wk);
-	INIT_WORK(&mvm->sta_drained_wk, iwl_mvm_sta_drained_wk);
 	INIT_WORK(&mvm->d0i3_exit_work, iwl_mvm_d0i3_exit_work);
-	INIT_DELAYED_WORK(&mvm->fw_dump_wk, iwl_mvm_fw_error_dump_wk);
 	INIT_DELAYED_WORK(&mvm->tdls_cs.dwork, iwl_mvm_tdls_ch_switch_work);
 	INIT_DELAYED_WORK(&mvm->scan_timeout_dwork, iwl_mvm_scan_timeout_wk);
 	INIT_WORK(&mvm->add_stream_wk, iwl_mvm_add_new_dqa_stream_wk);
@@ -688,10 +694,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 	trans_cfg.command_groups = iwl_mvm_groups;
 	trans_cfg.command_groups_size = ARRAY_SIZE(iwl_mvm_groups);
 
-	if (iwl_mvm_is_dqa_supported(mvm))
-		trans_cfg.cmd_queue = IWL_MVM_DQA_CMD_QUEUE;
-	else
-		trans_cfg.cmd_queue = IWL_MVM_CMD_QUEUE;
+	trans_cfg.cmd_queue = IWL_MVM_DQA_CMD_QUEUE;
 	trans_cfg.cmd_fifo = IWL_MVM_TX_FIFO_CMD;
 	trans_cfg.scd_set_active = true;
 
@@ -749,7 +752,6 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 		iwl_mvm_stop_device(mvm);
 	iwl_mvm_unref(mvm, IWL_MVM_REF_INIT_UCODE);
 	mutex_unlock(&mvm->mutex);
-	/* returns 0 if successful, 1 if success but in rfkill */
 	if (err < 0) {
 		IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", err);
 		goto out_free;
@@ -800,7 +802,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 	iwl_mvm_leds_exit(mvm);
 	iwl_mvm_thermal_exit(mvm);
  out_free:
-	flush_delayed_work(&mvm->fw_dump_wk);
+	iwl_fw_flush_dump(&mvm->fwrt);
 
 	if (iwlmvm_mod_params.init_dbg)
 		return op_mode;
@@ -920,7 +922,7 @@ static inline void iwl_mvm_rx_check_trigger(struct iwl_mvm *mvm,
 	trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_FW_NOTIF);
 	cmds_trig = (void *)trig->data;
 
-	if (!iwl_fw_dbg_trigger_check_stop(mvm, NULL, trig))
+	if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt, NULL, trig))
 		return;
 
 	for (i = 0; i < ARRAY_SIZE(cmds_trig->cmds); i++) {
@@ -932,9 +934,9 @@ static inline void iwl_mvm_rx_check_trigger(struct iwl_mvm *mvm,
 		    cmds_trig->cmds[i].group_id != pkt->hdr.group_id)
 			continue;
 
-		iwl_mvm_fw_dbg_collect_trig(mvm, trig,
-					    "CMD 0x%02x.%02x received",
-					    pkt->hdr.group_id, pkt->hdr.cmd);
+		iwl_fw_dbg_collect_trig(&mvm->fwrt, trig,
+					"CMD 0x%02x.%02x received",
+					pkt->hdr.group_id, pkt->hdr.cmd);
 		break;
 	}
 }
@@ -980,8 +982,10 @@ static void iwl_mvm_rx_common(struct iwl_mvm *mvm,
 		list_add_tail(&entry->list, &mvm->async_handlers_list);
 		spin_unlock(&mvm->async_handlers_lock);
 		schedule_work(&mvm->async_handlers_wk);
-		break;
+		return;
 	}
+
+	iwl_fwrt_handle_notification(&mvm->fwrt, rxb);
 }
 
 static void iwl_mvm_rx(struct iwl_op_mode *op_mode,
@@ -1131,7 +1135,7 @@ static bool iwl_mvm_set_hw_rfkill_state(struct iwl_op_mode *op_mode, bool state)
 	 * Stop the device if we run OPERATIONAL firmware or if we are in the
 	 * middle of the calibrations.
 	 */
-	return state && (mvm->cur_ucode != IWL_UCODE_INIT || calibrating);
+	return state && (mvm->fwrt.cur_fw_img != IWL_UCODE_INIT || calibrating);
 }
 
 static void iwl_mvm_free_skb(struct iwl_op_mode *op_mode, struct sk_buff *skb)
@@ -1160,57 +1164,6 @@ static void iwl_mvm_reprobe_wk(struct work_struct *wk)
 	module_put(THIS_MODULE);
 }
 
-static void iwl_mvm_fw_error_dump_wk(struct work_struct *work)
-{
-	struct iwl_mvm *mvm =
-		container_of(work, struct iwl_mvm, fw_dump_wk.work);
-
-	if (iwl_mvm_ref_sync(mvm, IWL_MVM_REF_FW_DBG_COLLECT))
-		return;
-
-	mutex_lock(&mvm->mutex);
-
-	if (mvm->cfg->device_family == IWL_DEVICE_FAMILY_7000) {
-		/* stop recording */
-		iwl_set_bits_prph(mvm->trans, MON_BUFF_SAMPLE_CTL, 0x100);
-
-		iwl_mvm_fw_error_dump(mvm);
-
-		/* start recording again if the firmware is not crashed */
-		if (!test_bit(STATUS_FW_ERROR, &mvm->trans->status) &&
-		    mvm->fw->dbg_dest_tlv) {
-			iwl_clear_bits_prph(mvm->trans,
-					    MON_BUFF_SAMPLE_CTL, 0x100);
-			iwl_clear_bits_prph(mvm->trans,
-					    MON_BUFF_SAMPLE_CTL, 0x1);
-			iwl_set_bits_prph(mvm->trans, MON_BUFF_SAMPLE_CTL, 0x1);
-		}
-	} else {
-		u32 in_sample = iwl_read_prph(mvm->trans, DBGC_IN_SAMPLE);
-		u32 out_ctrl = iwl_read_prph(mvm->trans, DBGC_OUT_CTRL);
-
-		/* stop recording */
-		iwl_write_prph(mvm->trans, DBGC_IN_SAMPLE, 0);
-		udelay(100);
-		iwl_write_prph(mvm->trans, DBGC_OUT_CTRL, 0);
-		/* wait before we collect the data till the DBGC stop */
-		udelay(500);
-
-		iwl_mvm_fw_error_dump(mvm);
-
-		/* start recording again if the firmware is not crashed */
-		if (!test_bit(STATUS_FW_ERROR, &mvm->trans->status) &&
-		    mvm->fw->dbg_dest_tlv) {
-			iwl_write_prph(mvm->trans, DBGC_IN_SAMPLE, in_sample);
-			iwl_write_prph(mvm->trans, DBGC_OUT_CTRL, out_ctrl);
-		}
-	}
-
-	mutex_unlock(&mvm->mutex);
-
-	iwl_mvm_unref(mvm, IWL_MVM_REF_FW_DBG_COLLECT);
-}
-
 void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error)
 {
 	iwl_abort_notification_waits(&mvm->notif_wait);
@@ -1234,7 +1187,7 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error)
 	 * can't recover this since we're already half suspended.
 	 */
 	if (!mvm->fw_restart && fw_error) {
-		iwl_mvm_fw_dbg_collect_desc(mvm, &iwl_mvm_dump_desc_assert,
+		iwl_fw_dbg_collect_desc(&mvm->fwrt, &iwl_dump_desc_assert,
 					NULL);
 	} else if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
 		struct iwl_mvm_reprobe *reprobe;
@@ -1260,7 +1213,7 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error)
 		reprobe->dev = mvm->trans->dev;
 		INIT_WORK(&reprobe->work, iwl_mvm_reprobe_wk);
 		schedule_work(&reprobe->work);
-	} else if (mvm->cur_ucode == IWL_UCODE_REGULAR &&
+	} else if (mvm->fwrt.cur_fw_img == IWL_UCODE_REGULAR &&
 		   mvm->hw_registered) {
 		/* don't let the transport/FW power down */
 		iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN);
@@ -1439,7 +1392,7 @@ int iwl_mvm_enter_d0i3(struct iwl_op_mode *op_mode)
 
 	IWL_DEBUG_RPM(mvm, "MVM entering D0i3\n");
 
-	if (WARN_ON_ONCE(mvm->cur_ucode != IWL_UCODE_REGULAR))
+	if (WARN_ON_ONCE(mvm->fwrt.cur_fw_img != IWL_UCODE_REGULAR))
 		return -EINVAL;
 
 	set_bit(IWL_MVM_STATUS_IN_D0I3, &mvm->status);
@@ -1665,7 +1618,7 @@ int _iwl_mvm_exit_d0i3(struct iwl_mvm *mvm)
 
 	IWL_DEBUG_RPM(mvm, "MVM exiting D0i3\n");
 
-	if (WARN_ON_ONCE(mvm->cur_ucode != IWL_UCODE_REGULAR))
+	if (WARN_ON_ONCE(mvm->fwrt.cur_fw_img != IWL_UCODE_REGULAR))
 		return -EINVAL;
 
 	mutex_lock(&mvm->d0i3_suspend_mutex);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
index fb9eaf0..7ee8e90 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
@@ -251,7 +251,7 @@ int iwl_mvm_phy_ctxt_changed(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt,
 			     struct cfg80211_chan_def *chandef,
 			     u8 chains_static, u8 chains_dynamic)
 {
-	enum iwl_phy_ctxt_action action = FW_CTXT_ACTION_MODIFY;
+	enum iwl_ctxt_action action = FW_CTXT_ACTION_MODIFY;
 
 	lockdep_assert_held(&mvm->mutex);
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/power.c b/drivers/net/wireless/intel/iwlwifi/mvm/power.c
index e684811..c11fe26 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/power.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/power.c
@@ -7,7 +7,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
- * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
+ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -34,7 +34,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
- * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
+ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -75,7 +75,7 @@
 #include "iwl-debug.h"
 #include "mvm.h"
 #include "iwl-modparams.h"
-#include "fw-api-power.h"
+#include "fw/api/power.h"
 
 #define POWER_KEEP_ALIVE_PERIOD_SEC    25
 
@@ -186,7 +186,7 @@ static void iwl_mvm_power_configure_uapsd(struct iwl_mvm *mvm,
 		if (!mvmvif->queue_params[ac].uapsd)
 			continue;
 
-		if (mvm->cur_ucode != IWL_UCODE_WOWLAN)
+		if (mvm->fwrt.cur_fw_img != IWL_UCODE_WOWLAN)
 			cmd->flags |=
 				cpu_to_le16(POWER_FLAGS_ADVANCE_PM_ENA_MSK);
 
@@ -220,14 +220,15 @@ static void iwl_mvm_power_configure_uapsd(struct iwl_mvm *mvm,
 				    BIT(IEEE80211_AC_BK))) {
 		cmd->flags |= cpu_to_le16(POWER_FLAGS_SNOOZE_ENA_MSK);
 		cmd->snooze_interval = cpu_to_le16(IWL_MVM_PS_SNOOZE_INTERVAL);
-		cmd->snooze_window = (mvm->cur_ucode == IWL_UCODE_WOWLAN) ?
-			cpu_to_le16(IWL_MVM_WOWLAN_PS_SNOOZE_WINDOW) :
-			cpu_to_le16(IWL_MVM_PS_SNOOZE_WINDOW);
+		cmd->snooze_window =
+			(mvm->fwrt.cur_fw_img == IWL_UCODE_WOWLAN) ?
+				cpu_to_le16(IWL_MVM_WOWLAN_PS_SNOOZE_WINDOW) :
+				cpu_to_le16(IWL_MVM_PS_SNOOZE_WINDOW);
 	}
 
 	cmd->uapsd_max_sp = mvm->hw->uapsd_max_sp_len;
 
-	if (mvm->cur_ucode == IWL_UCODE_WOWLAN || cmd->flags &
+	if (mvm->fwrt.cur_fw_img == IWL_UCODE_WOWLAN || cmd->flags &
 	    cpu_to_le16(POWER_FLAGS_SNOOZE_ENA_MSK)) {
 		cmd->rx_data_timeout_uapsd =
 			cpu_to_le32(IWL_MVM_WOWLAN_PS_RX_DATA_TIMEOUT);
@@ -502,7 +503,7 @@ static int iwl_mvm_power_send_cmd(struct iwl_mvm *mvm,
 	struct iwl_mac_power_cmd cmd = {};
 
 	iwl_mvm_power_build_cmd(mvm, vif, &cmd,
-				mvm->cur_ucode != IWL_UCODE_WOWLAN);
+				mvm->fwrt.cur_fw_img != IWL_UCODE_WOWLAN);
 	iwl_mvm_power_log(mvm, &cmd);
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 	memcpy(&iwl_mvm_vif_from_mac80211(vif)->mac_pwr_cmd, &cmd, sizeof(cmd));
@@ -525,8 +526,8 @@ int iwl_mvm_power_update_device(struct iwl_mvm *mvm)
 		cmd.flags |= cpu_to_le16(DEVICE_POWER_FLAGS_POWER_SAVE_ENA_MSK);
 
 #ifdef CONFIG_IWLWIFI_DEBUGFS
-	if ((mvm->cur_ucode == IWL_UCODE_WOWLAN) ? mvm->disable_power_off_d3 :
-	    mvm->disable_power_off)
+	if ((mvm->fwrt.cur_fw_img == IWL_UCODE_WOWLAN) ?
+			mvm->disable_power_off_d3 : mvm->disable_power_off)
 		cmd.flags &=
 			cpu_to_le16(~DEVICE_POWER_FLAGS_POWER_SAVE_ENA_MSK);
 #endif
@@ -933,7 +934,7 @@ static int iwl_mvm_power_set_ba(struct iwl_mvm *mvm,
 	if (!mvmvif->bf_data.bf_enabled)
 		return 0;
 
-	if (mvm->cur_ucode == IWL_UCODE_WOWLAN)
+	if (mvm->fwrt.cur_fw_img == IWL_UCODE_WOWLAN)
 		cmd.ba_escape_timer = cpu_to_le32(IWL_BA_ESCAPE_TIMER_D3);
 
 	mvmvif->bf_data.ba_enabled = !(!mvmvif->pm_enabled ||
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
index 8999a11..ba7bd04 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
@@ -622,7 +622,9 @@ static int rs_tl_turn_on_agg_for_tid(struct iwl_mvm *mvm,
 
 	IWL_DEBUG_HT(mvm, "Starting Tx agg: STA: %pM tid: %d\n",
 		     sta->addr, tid);
-	ret = ieee80211_start_tx_ba_session(sta, tid, 5000);
+
+	/* start BA session until the peer sends del BA */
+	ret = ieee80211_start_tx_ba_session(sta, tid, 0);
 	if (ret == -EAGAIN) {
 		/*
 		 * driver and mac80211 is out of sync
@@ -636,15 +638,31 @@ static int rs_tl_turn_on_agg_for_tid(struct iwl_mvm *mvm,
 	return ret;
 }
 
-static void rs_tl_turn_on_agg(struct iwl_mvm *mvm, u8 tid,
-			      struct iwl_lq_sta *lq_data,
+static void rs_tl_turn_on_agg(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta,
+			      u8 tid, struct iwl_lq_sta *lq_sta,
 			      struct ieee80211_sta *sta)
 {
-	if (tid < IWL_MAX_TID_COUNT)
-		rs_tl_turn_on_agg_for_tid(mvm, lq_data, tid, sta);
-	else
+	struct iwl_mvm_tid_data *tid_data;
+
+	/*
+	 * In AP mode, tid can be equal to IWL_MAX_TID_COUNT
+	 * when the frame is not QoS
+	 */
+	if (WARN_ON_ONCE(tid > IWL_MAX_TID_COUNT)) {
 		IWL_ERR(mvm, "tid exceeds max TID count: %d/%d\n",
 			tid, IWL_MAX_TID_COUNT);
+		return;
+	} else if (tid == IWL_MAX_TID_COUNT) {
+		return;
+	}
+
+	tid_data = &mvmsta->tid_data[tid];
+	if ((tid_data->state == IWL_AGG_OFF) &&
+	    (lq_sta->tx_agg_tid_en & BIT(tid)) &&
+	    (tid_data->tx_count_last >= IWL_MVM_RS_AGG_START_THRESHOLD)) {
+		IWL_DEBUG_RATE(mvm, "try to aggregate tid %d\n", tid);
+		rs_tl_turn_on_agg_for_tid(mvm, lq_sta, tid, sta);
+	}
 }
 
 static inline int get_num_of_ant_from_rate(u32 rate_n_flags)
@@ -753,8 +771,38 @@ static int rs_collect_tpc_data(struct iwl_mvm *mvm,
 				    window);
 }
 
+static void rs_update_tid_tpt_stats(struct iwl_mvm *mvm,
+				    struct iwl_mvm_sta *mvmsta,
+				    u8 tid, int successes)
+{
+	struct iwl_mvm_tid_data *tid_data;
+
+	if (tid >= IWL_MAX_TID_COUNT)
+		return;
+
+	tid_data = &mvmsta->tid_data[tid];
+
+	/*
+	 * Measure if there're enough successful transmits per second.
+	 * These statistics are used only to decide if we can start a
+	 * BA session, so it should be updated only when A-MPDU is
+	 * off.
+	 */
+	if (tid_data->state != IWL_AGG_OFF)
+		return;
+
+	if (time_is_before_jiffies(tid_data->tpt_meas_start + HZ) ||
+	    (tid_data->tx_count >= IWL_MVM_RS_AGG_START_THRESHOLD)) {
+		tid_data->tx_count_last = tid_data->tx_count;
+		tid_data->tx_count = 0;
+		tid_data->tpt_meas_start = jiffies;
+	} else {
+		tid_data->tx_count += successes;
+	}
+}
+
 static int rs_collect_tlc_data(struct iwl_mvm *mvm,
-			       struct iwl_lq_sta *lq_sta,
+			       struct iwl_mvm_sta *mvmsta, u8 tid,
 			       struct iwl_scale_tbl_info *tbl,
 			       int scale_index, int attempts, int successes)
 {
@@ -764,12 +812,14 @@ static int rs_collect_tlc_data(struct iwl_mvm *mvm,
 		return -EINVAL;
 
 	if (tbl->column != RS_COLUMN_INVALID) {
-		struct lq_sta_pers *pers = &lq_sta->pers;
+		struct lq_sta_pers *pers = &mvmsta->lq_sta.pers;
 
 		pers->tx_stats[tbl->column][scale_index].total += attempts;
 		pers->tx_stats[tbl->column][scale_index].success += successes;
 	}
 
+	rs_update_tid_tpt_stats(mvm, mvmsta, tid, successes);
+
 	/* Select window for current tx bit rate */
 	window = &(tbl->win[scale_index]);
 	return _rs_collect_tx_data(mvm, tbl, scale_index, attempts, successes,
@@ -1211,12 +1261,7 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 	if (time_after(jiffies,
 		       (unsigned long)(lq_sta->last_tx +
 				       (IWL_MVM_RS_IDLE_TIMEOUT * HZ)))) {
-		int t;
-
 		IWL_DEBUG_RATE(mvm, "Tx idle for too long. reinit rs\n");
-		for (t = 0; t < IWL_MAX_TID_COUNT; t++)
-			ieee80211_stop_tx_ba_session(sta, t);
-
 		iwl_mvm_rs_rate_init(mvm, sta, info->band, false);
 		return;
 	}
@@ -1312,7 +1357,7 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 		if (info->status.ampdu_ack_len == 0)
 			info->status.ampdu_len = 1;
 
-		rs_collect_tlc_data(mvm, lq_sta, curr_tbl, tx_resp_rate.index,
+		rs_collect_tlc_data(mvm, mvmsta, tid, curr_tbl, tx_resp_rate.index,
 				    info->status.ampdu_len,
 				    info->status.ampdu_ack_len);
 
@@ -1351,7 +1396,7 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 					    tx_resp_rate.index, 1,
 					    i < retries ? 0 : legacy_success,
 					    reduced_txp);
-			rs_collect_tlc_data(mvm, lq_sta, tmp_tbl,
+			rs_collect_tlc_data(mvm, mvmsta, tid, tmp_tbl,
 					    tx_resp_rate.index, 1,
 					    i < retries ? 0 : legacy_success);
 		}
@@ -1673,14 +1718,14 @@ static void rs_set_amsdu_len(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 			     struct iwl_scale_tbl_info *tbl,
 			     enum rs_action scale_action)
 {
-	struct iwl_mvm_sta *sta_priv = iwl_mvm_sta_from_mac80211(sta);
+	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
 
 	if ((!is_vht(&tbl->rate) && !is_ht(&tbl->rate)) ||
 	    tbl->rate.index < IWL_RATE_MCS_5_INDEX ||
 	    scale_action == RS_ACTION_DOWNSCALE)
-		sta_priv->tlc_amsdu = false;
+		mvmsta->tlc_amsdu = false;
 	else
-		sta_priv->tlc_amsdu = true;
+		mvmsta->tlc_amsdu = true;
 }
 
 /*
@@ -2228,11 +2273,10 @@ static void rs_rate_scale_perform(struct iwl_mvm *mvm,
 	u16 high_low;
 	s32 sr;
 	u8 prev_agg = lq_sta->is_agg;
-	struct iwl_mvm_sta *sta_priv = iwl_mvm_sta_from_mac80211(sta);
-	struct iwl_mvm_tid_data *tid_data;
+	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
 	struct rs_rate *rate;
 
-	lq_sta->is_agg = !!sta_priv->agg_tids;
+	lq_sta->is_agg = !!mvmsta->agg_tids;
 
 	/*
 	 * Select rate-scale / modulation-mode table to work with in
@@ -2480,44 +2524,12 @@ lq_update:
 		}
 	}
 
+	if (!ndp)
+		rs_tl_turn_on_agg(mvm, mvmsta, tid, lq_sta, sta);
+
 	if (done_search && lq_sta->rs_state == RS_STATE_SEARCH_CYCLE_ENDED) {
-		/* If the "active" (non-search) mode was legacy,
-		 * and we've tried switching antennas,
-		 * but we haven't been able to try HT modes (not available),
-		 * stay with best antenna legacy modulation for a while
-		 * before next round of mode comparisons. */
 		tbl1 = &(lq_sta->lq_info[lq_sta->active_tbl]);
-		if (is_legacy(&tbl1->rate)) {
-			IWL_DEBUG_RATE(mvm, "LQ: STAY in legacy table\n");
-
-			if (tid != IWL_MAX_TID_COUNT) {
-				tid_data = &sta_priv->tid_data[tid];
-				if (tid_data->state != IWL_AGG_OFF) {
-					IWL_DEBUG_RATE(mvm,
-						       "Stop aggregation on tid %d\n",
-						       tid);
-					ieee80211_stop_tx_ba_session(sta, tid);
-				}
-			}
-			rs_set_stay_in_table(mvm, 1, lq_sta);
-		} else {
-		/* If we're in an HT mode, and all 3 mode switch actions
-		 * have been tried and compared, stay in this best modulation
-		 * mode for a while before next round of mode comparisons. */
-			if ((lq_sta->last_tpt > IWL_AGG_TPT_THREHOLD) &&
-			    (lq_sta->tx_agg_tid_en & (1 << tid)) &&
-			    (tid != IWL_MAX_TID_COUNT)) {
-				tid_data = &sta_priv->tid_data[tid];
-				if (tid_data->state == IWL_AGG_OFF && !ndp) {
-					IWL_DEBUG_RATE(mvm,
-						       "try to aggregate tid %d\n",
-						       tid);
-					rs_tl_turn_on_agg(mvm, tid,
-							  lq_sta, sta);
-				}
-			}
-			rs_set_stay_in_table(mvm, 0, lq_sta);
-		}
+		rs_set_stay_in_table(mvm, is_legacy(&tbl1->rate), lq_sta);
 	}
 }
 
@@ -2900,10 +2912,10 @@ static void rs_get_rate(void *mvm_r, struct ieee80211_sta *sta, void *mvm_sta,
 static void *rs_alloc_sta(void *mvm_rate, struct ieee80211_sta *sta,
 			  gfp_t gfp)
 {
-	struct iwl_mvm_sta *sta_priv = iwl_mvm_sta_from_mac80211(sta);
+	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
 	struct iwl_op_mode *op_mode = (struct iwl_op_mode *)mvm_rate;
 	struct iwl_mvm *mvm  = IWL_OP_MODE_GET_MVM(op_mode);
-	struct iwl_lq_sta *lq_sta = &sta_priv->lq_sta;
+	struct iwl_lq_sta *lq_sta = &mvmsta->lq_sta;
 
 	IWL_DEBUG_RATE(mvm, "create station rate scale window\n");
 
@@ -2917,7 +2929,7 @@ static void *rs_alloc_sta(void *mvm_rate, struct ieee80211_sta *sta,
 	memset(lq_sta->pers.chain_signal, 0, sizeof(lq_sta->pers.chain_signal));
 	lq_sta->pers.last_rssi = S8_MIN;
 
-	return &sta_priv->lq_sta;
+	return &mvmsta->lq_sta;
 }
 
 static int rs_vht_highest_rx_mcs_index(struct ieee80211_sta_vht_cap *vht_cap,
@@ -3109,8 +3121,8 @@ void iwl_mvm_rs_rate_init(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 	struct ieee80211_hw *hw = mvm->hw;
 	struct ieee80211_sta_ht_cap *ht_cap = &sta->ht_cap;
 	struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap;
-	struct iwl_mvm_sta *sta_priv = iwl_mvm_sta_from_mac80211(sta);
-	struct iwl_lq_sta *lq_sta = &sta_priv->lq_sta;
+	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
+	struct iwl_lq_sta *lq_sta = &mvmsta->lq_sta;
 	struct ieee80211_supported_band *sband;
 	unsigned long supp; /* must be unsigned long for for_each_set_bit */
 
@@ -3119,8 +3131,8 @@ void iwl_mvm_rs_rate_init(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 
 	sband = hw->wiphy->bands[band];
 
-	lq_sta->lq.sta_id = sta_priv->sta_id;
-	sta_priv->tlc_amsdu = false;
+	lq_sta->lq.sta_id = mvmsta->sta_id;
+	mvmsta->tlc_amsdu = false;
 
 	for (j = 0; j < LQ_SIZE; j++)
 		rs_rate_scale_clear_tbl_windows(mvm, &lq_sta->lq_info[j]);
@@ -3130,7 +3142,7 @@ void iwl_mvm_rs_rate_init(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 
 	IWL_DEBUG_RATE(mvm,
 		       "LQ: *** rate scale station global init for station %d ***\n",
-		       sta_priv->sta_id);
+		       mvmsta->sta_id);
 	/* TODO: what is a good starting rate for STA? About middle? Maybe not
 	 * the lowest or the highest rate.. Could consider using RSSI from
 	 * previous packets? Need to have IEEE 802.1X auth succeed immediately
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
index 622d543..184c749 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
@@ -67,7 +67,6 @@
 #include "iwl-trans.h"
 #include "mvm.h"
 #include "fw-api.h"
-#include "fw-dbg.h"
 
 /*
  * iwl_mvm_rx_rx_phy_cmd - REPLY_RX_PHY_CMD handler
@@ -397,10 +396,12 @@ void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi,
 			rssi = le32_to_cpu(rssi_trig->rssi);
 
 			trig_check =
-				iwl_fw_dbg_trigger_check_stop(mvm, mvmsta->vif,
+				iwl_fw_dbg_trigger_check_stop(&mvm->fwrt,
+							      ieee80211_vif_to_wdev(mvmsta->vif),
 							      trig);
 			if (trig_check && rx_status->signal < rssi)
-				iwl_mvm_fw_dbg_collect_trig(mvm, trig, NULL);
+				iwl_fw_dbg_collect_trig(&mvm->fwrt, trig,
+							NULL);
 		}
 
 		if (ieee80211_is_data(hdr->frame_control))
@@ -624,7 +625,7 @@ iwl_mvm_rx_stats_check_trigger(struct iwl_mvm *mvm, struct iwl_rx_packet *pkt)
 	trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_STATS);
 	trig_stats = (void *)trig->data;
 
-	if (!iwl_fw_dbg_trigger_check_stop(mvm, NULL, trig))
+	if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt, NULL, trig))
 		return;
 
 	trig_offset = le32_to_cpu(trig_stats->stop_offset);
@@ -636,7 +637,7 @@ iwl_mvm_rx_stats_check_trigger(struct iwl_mvm *mvm, struct iwl_rx_packet *pkt)
 	if (le32_to_cpup((__le32 *) (pkt->data + trig_offset)) < trig_thold)
 		return;
 
-	iwl_mvm_fw_dbg_collect_trig(mvm, trig, NULL);
+	iwl_fw_dbg_collect_trig(&mvm->fwrt, trig, NULL);
 }
 
 void iwl_mvm_handle_rx_statistics(struct iwl_mvm *mvm,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 71c8b80..67ffd97 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -63,7 +63,6 @@
 #include "iwl-trans.h"
 #include "mvm.h"
 #include "fw-api.h"
-#include "fw-dbg.h"
 
 static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb,
 				   int queue, struct ieee80211_sta *sta)
@@ -854,7 +853,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 
 	rcu_read_lock();
 
-	if (le16_to_cpu(desc->status) & IWL_RX_MPDU_STATUS_SRC_STA_FOUND) {
+	if (desc->status & cpu_to_le16(IWL_RX_MPDU_STATUS_SRC_STA_FOUND)) {
 		u8 id = desc->sta_id_flags & IWL_RX_MPDU_SIF_STA_ID_MASK;
 
 		if (!WARN_ON_ONCE(id >= ARRAY_SIZE(mvm->fw_id_to_mac_id))) {
@@ -908,10 +907,12 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
 			rssi = le32_to_cpu(rssi_trig->rssi);
 
 			trig_check =
-				iwl_fw_dbg_trigger_check_stop(mvm, mvmsta->vif,
+				iwl_fw_dbg_trigger_check_stop(&mvm->fwrt,
+							      ieee80211_vif_to_wdev(mvmsta->vif),
 							      trig);
 			if (trig_check && rx_status->signal < rssi)
-				iwl_mvm_fw_dbg_collect_trig(mvm, trig, NULL);
+				iwl_fw_dbg_collect_trig(&mvm->fwrt, trig,
+							NULL);
 		}
 
 		if (ieee80211_is_data(hdr->frame_control))
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index 35e813b..5098361 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -69,7 +69,7 @@
 #include <net/mac80211.h>
 
 #include "mvm.h"
-#include "fw-api-scan.h"
+#include "fw/api/scan.h"
 #include "iwl-io.h"
 
 #define IWL_DENSE_EBS_SCAN_RATIO 5
@@ -743,7 +743,7 @@ static inline bool iwl_mvm_scan_use_ebs(struct iwl_mvm *mvm,
 	 *	4. it's not a p2p find operation.
 	 */
 	return ((capa->flags & IWL_UCODE_TLV_FLAGS_EBS_SUPPORT) &&
-		mvm->last_ebs_successful &&
+		mvm->last_ebs_successful && IWL_MVM_ENABLE_EBS &&
 		vif->type != NL80211_IFTYPE_P2P_DEVICE);
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 027ee5e..411a205 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -297,60 +297,6 @@ unlock:
 	rcu_read_unlock();
 }
 
-static int iwl_mvm_tdls_sta_init(struct iwl_mvm *mvm,
-				 struct ieee80211_sta *sta)
-{
-	unsigned long used_hw_queues;
-	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
-	unsigned int wdg_timeout =
-		iwl_mvm_get_wd_timeout(mvm, NULL, true, false);
-	u32 ac;
-
-	lockdep_assert_held(&mvm->mutex);
-
-	used_hw_queues = iwl_mvm_get_used_hw_queues(mvm, NULL);
-
-	/* Find available queues, and allocate them to the ACs */
-	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
-		u8 queue = find_first_zero_bit(&used_hw_queues,
-					       mvm->first_agg_queue);
-
-		if (queue >= mvm->first_agg_queue) {
-			IWL_ERR(mvm, "Failed to allocate STA queue\n");
-			return -EBUSY;
-		}
-
-		__set_bit(queue, &used_hw_queues);
-		mvmsta->hw_queue[ac] = queue;
-	}
-
-	/* Found a place for all queues - enable them */
-	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
-		iwl_mvm_enable_ac_txq(mvm, mvmsta->hw_queue[ac],
-				      mvmsta->hw_queue[ac],
-				      iwl_mvm_ac_to_tx_fifo[ac], 0,
-				      wdg_timeout);
-		mvmsta->tfd_queue_msk |= BIT(mvmsta->hw_queue[ac]);
-	}
-
-	return 0;
-}
-
-static void iwl_mvm_tdls_sta_deinit(struct iwl_mvm *mvm,
-				    struct ieee80211_sta *sta)
-{
-	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
-	unsigned long sta_msk;
-	int i;
-
-	lockdep_assert_held(&mvm->mutex);
-
-	/* disable the TDLS STA-specific queues */
-	sta_msk = mvmsta->tfd_queue_msk;
-	for_each_set_bit(i, &sta_msk, sizeof(sta_msk) * BITS_PER_BYTE)
-		iwl_mvm_disable_txq(mvm, i, i, IWL_MAX_TID_COUNT, 0);
-}
-
 /* Disable aggregations for a bitmap of TIDs for a given station */
 static int iwl_mvm_invalidate_sta_queue(struct iwl_mvm *mvm, int queue,
 					unsigned long disable_agg_tids,
@@ -758,7 +704,7 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm,
 {
 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
 	struct iwl_trans_txq_scd_cfg cfg = {
-		.fifo = iwl_mvm_ac_to_tx_fifo[ac],
+		.fifo = iwl_mvm_mac_ac_to_tx_fifo(mvm, ac),
 		.sta_id = mvmsta->sta_id,
 		.tid = tid,
 		.frame_limit = IWL_FRAME_LIMIT,
@@ -1316,7 +1262,7 @@ static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm,
 			u16 seq = IEEE80211_SEQ_TO_SN(tid_data->seq_number);
 
 			cfg.tid = i;
-			cfg.fifo = iwl_mvm_ac_to_tx_fifo[ac];
+			cfg.fifo = iwl_mvm_mac_ac_to_tx_fifo(mvm, ac);
 			cfg.aggregate = (txq_id >= IWL_MVM_DQA_MIN_DATA_QUEUE ||
 					 txq_id ==
 					 IWL_MVM_DQA_BSS_CLIENT_QUEUE);
@@ -1330,8 +1276,50 @@ static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm,
 			mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_READY;
 		}
 	}
+}
 
-	atomic_set(&mvm->pending_frames[mvm_sta->sta_id], 0);
+static int iwl_mvm_add_int_sta_common(struct iwl_mvm *mvm,
+				      struct iwl_mvm_int_sta *sta,
+				      const u8 *addr,
+				      u16 mac_id, u16 color)
+{
+	struct iwl_mvm_add_sta_cmd cmd;
+	int ret;
+	u32 status;
+
+	lockdep_assert_held(&mvm->mutex);
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.sta_id = sta->sta_id;
+	cmd.mac_id_n_color = cpu_to_le32(FW_CMD_ID_AND_COLOR(mac_id,
+							     color));
+	if (fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_STA_TYPE))
+		cmd.station_type = sta->type;
+
+	if (!iwl_mvm_has_new_tx_api(mvm))
+		cmd.tfd_queue_msk = cpu_to_le32(sta->tfd_queue_msk);
+	cmd.tid_disable_tx = cpu_to_le16(0xffff);
+
+	if (addr)
+		memcpy(cmd.addr, addr, ETH_ALEN);
+
+	ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA,
+					  iwl_mvm_add_sta_cmd_size(mvm),
+					  &cmd, &status);
+	if (ret)
+		return ret;
+
+	switch (status & IWL_ADD_STA_STATUS_MASK) {
+	case ADD_STA_SUCCESS:
+		IWL_DEBUG_INFO(mvm, "Internal station added.\n");
+		return 0;
+	default:
+		ret = -EIO;
+		IWL_ERR(mvm, "Add internal station failed, status=0x%x\n",
+			status);
+		break;
+	}
+	return ret;
 }
 
 int iwl_mvm_add_sta(struct iwl_mvm *mvm,
@@ -1342,6 +1330,8 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm,
 	struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta);
 	struct iwl_mvm_rxq_dup_data *dup_data;
 	int i, ret, sta_id;
+	bool sta_update = false;
+	unsigned int sta_flags = 0;
 
 	lockdep_assert_held(&mvm->mutex);
 
@@ -1356,10 +1346,25 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm,
 
 	spin_lock_init(&mvm_sta->lock);
 
-	/* In DQA mode, if this is a HW restart, re-alloc existing queues */
-	if (iwl_mvm_is_dqa_supported(mvm) &&
-	    test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
+	/* if this is a HW restart re-alloc existing queues */
+	if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
+		struct iwl_mvm_int_sta tmp_sta = {
+			.sta_id = sta_id,
+			.type = mvm_sta->sta_type,
+		};
+
+		/*
+		 * First add an empty station since allocating
+		 * a queue requires a valid station
+		 */
+		ret = iwl_mvm_add_int_sta_common(mvm, &tmp_sta, sta->addr,
+						 mvmvif->id, mvmvif->color);
+		if (ret)
+			goto err;
+
 		iwl_mvm_realloc_queues_after_restart(mvm, mvm_sta);
+		sta_update = true;
+		sta_flags = iwl_mvm_has_new_tx_api(mvm) ? 0 : STA_MODIFY_QUEUES;
 		goto update_fw;
 	}
 
@@ -1376,33 +1381,15 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm,
 	mvm_sta->sta_type = sta->tdls ? IWL_STA_TDLS_LINK : IWL_STA_LINK;
 
 	/* HW restart, don't assume the memory has been zeroed */
-	atomic_set(&mvm->pending_frames[sta_id], 0);
 	mvm_sta->tid_disable_agg = 0xffff; /* No aggs at first */
 	mvm_sta->tfd_queue_msk = 0;
 
-	/*
-	 * Allocate new queues for a TDLS station, unless we're in DQA mode,
-	 * and then they'll be allocated dynamically
-	 */
-	if (!iwl_mvm_is_dqa_supported(mvm) && sta->tdls) {
-		ret = iwl_mvm_tdls_sta_init(mvm, sta);
-		if (ret)
-			return ret;
-	} else if (!iwl_mvm_is_dqa_supported(mvm)) {
-		for (i = 0; i < IEEE80211_NUM_ACS; i++)
-			if (vif->hw_queue[i] != IEEE80211_INVAL_HW_QUEUE)
-				mvm_sta->tfd_queue_msk |= BIT(vif->hw_queue[i]);
-	}
-
 	/* for HW restart - reset everything but the sequence number */
 	for (i = 0; i <= IWL_MAX_TID_COUNT; i++) {
 		u16 seq = mvm_sta->tid_data[i].seq_number;
 		memset(&mvm_sta->tid_data[i], 0, sizeof(mvm_sta->tid_data[i]));
 		mvm_sta->tid_data[i].seq_number = seq;
 
-		if (!iwl_mvm_is_dqa_supported(mvm))
-			continue;
-
 		/*
 		 * Mark all queues for this STA as unallocated and defer TX
 		 * frames until the queue is allocated
@@ -1436,7 +1423,7 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm,
 		mvm_sta->dup_data = dup_data;
 	}
 
-	if (iwl_mvm_is_dqa_supported(mvm) && !iwl_mvm_has_new_tx_api(mvm)) {
+	if (!iwl_mvm_has_new_tx_api(mvm)) {
 		ret = iwl_mvm_reserve_sta_stream(mvm, sta,
 						 ieee80211_vif_type_p2p(vif));
 		if (ret)
@@ -1444,7 +1431,7 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm,
 	}
 
 update_fw:
-	ret = iwl_mvm_sta_send_to_fw(mvm, sta, false, 0);
+	ret = iwl_mvm_sta_send_to_fw(mvm, sta, sta_update, sta_flags);
 	if (ret)
 		goto err;
 
@@ -1462,8 +1449,6 @@ update_fw:
 	return 0;
 
 err:
-	if (!iwl_mvm_is_dqa_supported(mvm) && sta->tdls)
-		iwl_mvm_tdls_sta_deinit(mvm, sta);
 	return ret;
 }
 
@@ -1536,79 +1521,6 @@ static int iwl_mvm_rm_sta_common(struct iwl_mvm *mvm, u8 sta_id)
 	return 0;
 }
 
-void iwl_mvm_sta_drained_wk(struct work_struct *wk)
-{
-	struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, sta_drained_wk);
-	u8 sta_id;
-
-	/*
-	 * The mutex is needed because of the SYNC cmd, but not only: if the
-	 * work would run concurrently with iwl_mvm_rm_sta, it would run before
-	 * iwl_mvm_rm_sta sets the station as busy, and exit. Then
-	 * iwl_mvm_rm_sta would set the station as busy, and nobody will clean
-	 * that later.
-	 */
-	mutex_lock(&mvm->mutex);
-
-	for_each_set_bit(sta_id, mvm->sta_drained, IWL_MVM_STATION_COUNT) {
-		int ret;
-		struct ieee80211_sta *sta =
-			rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id],
-						  lockdep_is_held(&mvm->mutex));
-
-		/*
-		 * This station is in use or RCU-removed; the latter happens in
-		 * managed mode, where mac80211 removes the station before we
-		 * can remove it from firmware (we can only do that after the
-		 * MAC is marked unassociated), and possibly while the deauth
-		 * frame to disconnect from the AP is still queued. Then, the
-		 * station pointer is -ENOENT when the last skb is reclaimed.
-		 */
-		if (!IS_ERR(sta) || PTR_ERR(sta) == -ENOENT)
-			continue;
-
-		if (PTR_ERR(sta) == -EINVAL) {
-			IWL_ERR(mvm, "Drained sta %d, but it is internal?\n",
-				sta_id);
-			continue;
-		}
-
-		if (!sta) {
-			IWL_ERR(mvm, "Drained sta %d, but it was NULL?\n",
-				sta_id);
-			continue;
-		}
-
-		WARN_ON(PTR_ERR(sta) != -EBUSY);
-		/* This station was removed and we waited until it got drained,
-		 * we can now proceed and remove it.
-		 */
-		ret = iwl_mvm_rm_sta_common(mvm, sta_id);
-		if (ret) {
-			IWL_ERR(mvm,
-				"Couldn't remove sta %d after it was drained\n",
-				sta_id);
-			continue;
-		}
-		RCU_INIT_POINTER(mvm->fw_id_to_mac_id[sta_id], NULL);
-		clear_bit(sta_id, mvm->sta_drained);
-
-		if (mvm->tfd_drained[sta_id]) {
-			unsigned long i, msk = mvm->tfd_drained[sta_id];
-
-			for_each_set_bit(i, &msk, sizeof(msk) * BITS_PER_BYTE)
-				iwl_mvm_disable_txq(mvm, i, i,
-						    IWL_MAX_TID_COUNT, 0);
-
-			mvm->tfd_drained[sta_id] = 0;
-			IWL_DEBUG_TDLS(mvm, "Drained sta %d, with queues %ld\n",
-				       sta_id, msk);
-		}
-	}
-
-	mutex_unlock(&mvm->mutex);
-}
-
 static void iwl_mvm_disable_sta_queues(struct iwl_mvm *mvm,
 				       struct ieee80211_vif *vif,
 				       struct iwl_mvm_sta *mvm_sta)
@@ -1632,10 +1544,11 @@ static void iwl_mvm_disable_sta_queues(struct iwl_mvm *mvm,
 int iwl_mvm_wait_sta_queues_empty(struct iwl_mvm *mvm,
 				  struct iwl_mvm_sta *mvm_sta)
 {
-	int i, ret;
+	int i;
 
 	for (i = 0; i < ARRAY_SIZE(mvm_sta->tid_data); i++) {
 		u16 txq_id;
+		int ret;
 
 		spin_lock_bh(&mvm_sta->lock);
 		txq_id = mvm_sta->tid_data[i].txq_id;
@@ -1646,10 +1559,10 @@ int iwl_mvm_wait_sta_queues_empty(struct iwl_mvm *mvm,
 
 		ret = iwl_trans_wait_txq_empty(mvm->trans, txq_id);
 		if (ret)
-			break;
+			return ret;
 	}
 
-	return ret;
+	return 0;
 }
 
 int iwl_mvm_rm_sta(struct iwl_mvm *mvm,
@@ -1666,79 +1579,65 @@ int iwl_mvm_rm_sta(struct iwl_mvm *mvm,
 	if (iwl_mvm_has_new_rx_api(mvm))
 		kfree(mvm_sta->dup_data);
 
-	if ((vif->type == NL80211_IFTYPE_STATION &&
-	     mvmvif->ap_sta_id == sta_id) ||
-	    iwl_mvm_is_dqa_supported(mvm)){
-		ret = iwl_mvm_drain_sta(mvm, mvm_sta, true);
-		if (ret)
-			return ret;
-		/* flush its queues here since we are freeing mvm_sta */
-		ret = iwl_mvm_flush_sta(mvm, mvm_sta, false, 0);
-		if (ret)
-			return ret;
-		if (iwl_mvm_has_new_tx_api(mvm)) {
-			ret = iwl_mvm_wait_sta_queues_empty(mvm, mvm_sta);
-		} else {
-			u32 q_mask = mvm_sta->tfd_queue_msk;
+	ret = iwl_mvm_drain_sta(mvm, mvm_sta, true);
+	if (ret)
+		return ret;
 
-			ret = iwl_trans_wait_tx_queues_empty(mvm->trans,
-							     q_mask);
-		}
-		if (ret)
-			return ret;
-		ret = iwl_mvm_drain_sta(mvm, mvm_sta, false);
-
-		/* If DQA is supported - the queues can be disabled now */
-		if (iwl_mvm_is_dqa_supported(mvm)) {
-			iwl_mvm_disable_sta_queues(mvm, vif, mvm_sta);
-			/*
-			 * If pending_frames is set at this point - it must be
-			 * driver internal logic error, since queues are empty
-			 * and removed successuly.
-			 * warn on it but set it to 0 anyway to avoid station
-			 * not being removed later in the function
-			 */
-			WARN_ON(atomic_xchg(&mvm->pending_frames[sta_id], 0));
-		}
+	/* flush its queues here since we are freeing mvm_sta */
+	ret = iwl_mvm_flush_sta(mvm, mvm_sta, false, 0);
+	if (ret)
+		return ret;
+	if (iwl_mvm_has_new_tx_api(mvm)) {
+		ret = iwl_mvm_wait_sta_queues_empty(mvm, mvm_sta);
+	} else {
+		u32 q_mask = mvm_sta->tfd_queue_msk;
 
-		/* If there is a TXQ still marked as reserved - free it */
-		if (iwl_mvm_is_dqa_supported(mvm) &&
-		    mvm_sta->reserved_queue != IEEE80211_INVAL_HW_QUEUE) {
-			u8 reserved_txq = mvm_sta->reserved_queue;
-			enum iwl_mvm_queue_status *status;
-
-			/*
-			 * If no traffic has gone through the reserved TXQ - it
-			 * is still marked as IWL_MVM_QUEUE_RESERVED, and
-			 * should be manually marked as free again
-			 */
-			spin_lock_bh(&mvm->queue_info_lock);
-			status = &mvm->queue_info[reserved_txq].status;
-			if (WARN((*status != IWL_MVM_QUEUE_RESERVED) &&
-				 (*status != IWL_MVM_QUEUE_FREE),
-				 "sta_id %d reserved txq %d status %d",
-				 sta_id, reserved_txq, *status)) {
-				spin_unlock_bh(&mvm->queue_info_lock);
-				return -EINVAL;
-			}
+		ret = iwl_trans_wait_tx_queues_empty(mvm->trans,
+						     q_mask);
+	}
+	if (ret)
+		return ret;
 
-			*status = IWL_MVM_QUEUE_FREE;
+	ret = iwl_mvm_drain_sta(mvm, mvm_sta, false);
+
+	iwl_mvm_disable_sta_queues(mvm, vif, mvm_sta);
+
+	/* If there is a TXQ still marked as reserved - free it */
+	if (mvm_sta->reserved_queue != IEEE80211_INVAL_HW_QUEUE) {
+		u8 reserved_txq = mvm_sta->reserved_queue;
+		enum iwl_mvm_queue_status *status;
+
+		/*
+		 * If no traffic has gone through the reserved TXQ - it
+		 * is still marked as IWL_MVM_QUEUE_RESERVED, and
+		 * should be manually marked as free again
+		 */
+		spin_lock_bh(&mvm->queue_info_lock);
+		status = &mvm->queue_info[reserved_txq].status;
+		if (WARN((*status != IWL_MVM_QUEUE_RESERVED) &&
+			 (*status != IWL_MVM_QUEUE_FREE),
+			 "sta_id %d reserved txq %d status %d",
+			 sta_id, reserved_txq, *status)) {
 			spin_unlock_bh(&mvm->queue_info_lock);
+			return -EINVAL;
 		}
 
-		if (vif->type == NL80211_IFTYPE_STATION &&
-		    mvmvif->ap_sta_id == sta_id) {
-			/* if associated - we can't remove the AP STA now */
-			if (vif->bss_conf.assoc)
-				return ret;
+		*status = IWL_MVM_QUEUE_FREE;
+		spin_unlock_bh(&mvm->queue_info_lock);
+	}
 
-			/* unassoc - go ahead - remove the AP STA now */
-			mvmvif->ap_sta_id = IWL_MVM_INVALID_STA;
+	if (vif->type == NL80211_IFTYPE_STATION &&
+	    mvmvif->ap_sta_id == sta_id) {
+		/* if associated - we can't remove the AP STA now */
+		if (vif->bss_conf.assoc)
+			return ret;
 
-			/* clear d0i3_ap_sta_id if no longer relevant */
-			if (mvm->d0i3_ap_sta_id == sta_id)
-				mvm->d0i3_ap_sta_id = IWL_MVM_INVALID_STA;
-		}
+		/* unassoc - go ahead - remove the AP STA now */
+		mvmvif->ap_sta_id = IWL_MVM_INVALID_STA;
+
+		/* clear d0i3_ap_sta_id if no longer relevant */
+		if (mvm->d0i3_ap_sta_id == sta_id)
+			mvm->d0i3_ap_sta_id = IWL_MVM_INVALID_STA;
 	}
 
 	/*
@@ -1755,32 +1654,10 @@ int iwl_mvm_rm_sta(struct iwl_mvm *mvm,
 	 * calls the drain worker.
 	 */
 	spin_lock_bh(&mvm_sta->lock);
+	spin_unlock_bh(&mvm_sta->lock);
 
-	/*
-	 * There are frames pending on the AC queues for this station.
-	 * We need to wait until all the frames are drained...
-	 */
-	if (atomic_read(&mvm->pending_frames[sta_id])) {
-		rcu_assign_pointer(mvm->fw_id_to_mac_id[sta_id],
-				   ERR_PTR(-EBUSY));
-		spin_unlock_bh(&mvm_sta->lock);
-
-		/* disable TDLS sta queues on drain complete */
-		if (sta->tdls) {
-			mvm->tfd_drained[sta_id] = mvm_sta->tfd_queue_msk;
-			IWL_DEBUG_TDLS(mvm, "Draining TDLS sta %d\n", sta_id);
-		}
-
-		ret = iwl_mvm_drain_sta(mvm, mvm_sta, true);
-	} else {
-		spin_unlock_bh(&mvm_sta->lock);
-
-		if (!iwl_mvm_is_dqa_supported(mvm) && sta->tdls)
-			iwl_mvm_tdls_sta_deinit(mvm, sta);
-
-		ret = iwl_mvm_rm_sta_common(mvm, mvm_sta->sta_id);
-		RCU_INIT_POINTER(mvm->fw_id_to_mac_id[mvm_sta->sta_id], NULL);
-	}
+	ret = iwl_mvm_rm_sta_common(mvm, mvm_sta->sta_id);
+	RCU_INIT_POINTER(mvm->fw_id_to_mac_id[mvm_sta->sta_id], NULL);
 
 	return ret;
 }
@@ -1823,50 +1700,6 @@ void iwl_mvm_dealloc_int_sta(struct iwl_mvm *mvm, struct iwl_mvm_int_sta *sta)
 	sta->sta_id = IWL_MVM_INVALID_STA;
 }
 
-static int iwl_mvm_add_int_sta_common(struct iwl_mvm *mvm,
-				      struct iwl_mvm_int_sta *sta,
-				      const u8 *addr,
-				      u16 mac_id, u16 color)
-{
-	struct iwl_mvm_add_sta_cmd cmd;
-	int ret;
-	u32 status;
-
-	lockdep_assert_held(&mvm->mutex);
-
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.sta_id = sta->sta_id;
-	cmd.mac_id_n_color = cpu_to_le32(FW_CMD_ID_AND_COLOR(mac_id,
-							     color));
-	if (fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_STA_TYPE))
-		cmd.station_type = sta->type;
-
-	if (!iwl_mvm_has_new_tx_api(mvm))
-		cmd.tfd_queue_msk = cpu_to_le32(sta->tfd_queue_msk);
-	cmd.tid_disable_tx = cpu_to_le16(0xffff);
-
-	if (addr)
-		memcpy(cmd.addr, addr, ETH_ALEN);
-
-	ret = iwl_mvm_send_cmd_pdu_status(mvm, ADD_STA,
-					  iwl_mvm_add_sta_cmd_size(mvm),
-					  &cmd, &status);
-	if (ret)
-		return ret;
-
-	switch (status & IWL_ADD_STA_STATUS_MASK) {
-	case ADD_STA_SUCCESS:
-		IWL_DEBUG_INFO(mvm, "Internal station added.\n");
-		return 0;
-	default:
-		ret = -EIO;
-		IWL_ERR(mvm, "Add internal station failed, status=0x%x\n",
-			status);
-		break;
-	}
-	return ret;
-}
-
 static void iwl_mvm_enable_aux_queue(struct iwl_mvm *mvm)
 {
 	unsigned int wdg_timeout = iwlmvm_mod_params.tfd_q_hang_detect ?
@@ -1879,7 +1712,7 @@ static void iwl_mvm_enable_aux_queue(struct iwl_mvm *mvm)
 						    IWL_MAX_TID_COUNT,
 						    wdg_timeout);
 		mvm->aux_queue = queue;
-	} else if (iwl_mvm_is_dqa_supported(mvm)) {
+	} else {
 		struct iwl_trans_txq_scd_cfg cfg = {
 			.fifo = IWL_MVM_TX_FIFO_MCAST,
 			.sta_id = mvm->aux_sta.sta_id,
@@ -1890,9 +1723,6 @@ static void iwl_mvm_enable_aux_queue(struct iwl_mvm *mvm)
 
 		iwl_mvm_enable_txq(mvm, mvm->aux_queue, mvm->aux_queue, 0, &cfg,
 				   wdg_timeout);
-	} else {
-		iwl_mvm_enable_ac_txq(mvm, mvm->aux_queue, mvm->aux_queue,
-				      IWL_MVM_TX_FIFO_MCAST, 0, wdg_timeout);
 	}
 }
 
@@ -1992,7 +1822,7 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 
 	lockdep_assert_held(&mvm->mutex);
 
-	if (iwl_mvm_is_dqa_supported(mvm) && !iwl_mvm_has_new_tx_api(mvm)) {
+	if (!iwl_mvm_has_new_tx_api(mvm)) {
 		if (vif->type == NL80211_IFTYPE_AP ||
 		    vif->type == NL80211_IFTYPE_ADHOC)
 			queue = mvm->probe_queue;
@@ -2079,8 +1909,7 @@ int iwl_mvm_send_rm_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 
 	lockdep_assert_held(&mvm->mutex);
 
-	if (iwl_mvm_is_dqa_supported(mvm))
-		iwl_mvm_free_bcast_sta_queues(mvm, vif);
+	iwl_mvm_free_bcast_sta_queues(mvm, vif);
 
 	ret = iwl_mvm_rm_sta_common(mvm, mvmvif->bcast_sta.sta_id);
 	if (ret)
@@ -2091,23 +1920,10 @@ int iwl_mvm_send_rm_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 int iwl_mvm_alloc_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
-	u32 qmask = 0;
 
 	lockdep_assert_held(&mvm->mutex);
 
-	if (!iwl_mvm_is_dqa_supported(mvm)) {
-		qmask = iwl_mvm_mac_get_queues_mask(vif);
-
-		/*
-		 * The firmware defines the TFD queue mask to only be relevant
-		 * for *unicast* queues, so the multicast (CAB) queue shouldn't
-		 * be included. This only happens in NL80211_IFTYPE_AP vif type,
-		 * so the next line will only have an effect there.
-		 */
-		qmask &= ~BIT(vif->cab_queue);
-	}
-
-	return iwl_mvm_allocate_int_sta(mvm, &mvmvif->bcast_sta, qmask,
+	return iwl_mvm_allocate_int_sta(mvm, &mvmvif->bcast_sta, 0,
 					ieee80211_vif_type_p2p(vif),
 					IWL_STA_GENERAL_PURPOSE);
 }
@@ -2119,7 +1935,7 @@ int iwl_mvm_alloc_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
  * @mvm: the mvm component
  * @vif: the interface to which the broadcast station is added
  * @bsta: the broadcast station to add. */
-int iwl_mvm_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
+int iwl_mvm_add_p2p_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	struct iwl_mvm_int_sta *bsta = &mvmvif->bcast_sta;
@@ -2150,7 +1966,7 @@ void iwl_mvm_dealloc_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
  * Send the FW a request to remove the station from it's internal data
  * structures, and in addition remove it from the local data structure.
  */
-int iwl_mvm_rm_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
+int iwl_mvm_rm_p2p_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 {
 	int ret;
 
@@ -2189,9 +2005,6 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 
 	lockdep_assert_held(&mvm->mutex);
 
-	if (!iwl_mvm_is_dqa_supported(mvm))
-		return 0;
-
 	if (WARN_ON(vif->type != NL80211_IFTYPE_AP &&
 		    vif->type != NL80211_IFTYPE_ADHOC))
 		return -ENOTSUPP;
@@ -2256,9 +2069,6 @@ int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 
 	lockdep_assert_held(&mvm->mutex);
 
-	if (!iwl_mvm_is_dqa_supported(mvm))
-		return 0;
-
 	iwl_mvm_flush_sta(mvm, &mvmvif->mcast_sta, true, 0);
 
 	iwl_mvm_disable_txq(mvm, mvmvif->cab_queue, vif->cab_queue,
@@ -2508,8 +2318,6 @@ int iwl_mvm_sta_tx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 		mvm_sta->tid_disable_agg &= ~BIT(tid);
 	} else {
 		/* In DQA-mode the queue isn't removed on agg termination */
-		if (!iwl_mvm_is_dqa_supported(mvm))
-			mvm_sta->tfd_queue_msk &= ~BIT(queue);
 		mvm_sta->tid_disable_agg |= BIT(tid);
 	}
 
@@ -2612,19 +2420,17 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 			ret = -ENXIO;
 			goto release_locks;
 		}
-	} else if (iwl_mvm_is_dqa_supported(mvm) &&
-		   unlikely(mvm->queue_info[txq_id].status ==
+	} else if (unlikely(mvm->queue_info[txq_id].status ==
 			    IWL_MVM_QUEUE_SHARED)) {
 		ret = -ENXIO;
 		IWL_DEBUG_TX_QUEUES(mvm,
 				    "Can't start tid %d agg on shared queue!\n",
 				    tid);
 		goto release_locks;
-	} else if (!iwl_mvm_is_dqa_supported(mvm) ||
-	    mvm->queue_info[txq_id].status != IWL_MVM_QUEUE_READY) {
+	} else if (mvm->queue_info[txq_id].status != IWL_MVM_QUEUE_READY) {
 		txq_id = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id,
-						 mvm->first_agg_queue,
-						 mvm->last_agg_queue);
+						 IWL_MVM_DQA_MIN_DATA_QUEUE,
+						 IWL_MVM_DQA_MAX_DATA_QUEUE);
 		if (txq_id < 0) {
 			ret = txq_id;
 			IWL_ERR(mvm, "Failed to allocate agg queue\n");
@@ -2742,37 +2548,34 @@ int iwl_mvm_sta_tx_agg_oper(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	queue_status = mvm->queue_info[queue].status;
 	spin_unlock_bh(&mvm->queue_info_lock);
 
-	/* In DQA mode, the existing queue might need to be reconfigured */
-	if (iwl_mvm_is_dqa_supported(mvm)) {
-		/* Maybe there is no need to even alloc a queue... */
-		if (mvm->queue_info[queue].status == IWL_MVM_QUEUE_READY)
-			alloc_queue = false;
+	/* Maybe there is no need to even alloc a queue... */
+	if (mvm->queue_info[queue].status == IWL_MVM_QUEUE_READY)
+		alloc_queue = false;
 
+	/*
+	 * Only reconfig the SCD for the queue if the window size has
+	 * changed from current (become smaller)
+	 */
+	if (!alloc_queue && buf_size < mvmsta->max_agg_bufsize) {
 		/*
-		 * Only reconfig the SCD for the queue if the window size has
-		 * changed from current (become smaller)
+		 * If reconfiguring an existing queue, it first must be
+		 * drained
 		 */
-		if (!alloc_queue && buf_size < mvmsta->max_agg_bufsize) {
-			/*
-			 * If reconfiguring an existing queue, it first must be
-			 * drained
-			 */
-			ret = iwl_trans_wait_tx_queues_empty(mvm->trans,
-							     BIT(queue));
-			if (ret) {
-				IWL_ERR(mvm,
-					"Error draining queue before reconfig\n");
-				return ret;
-			}
+		ret = iwl_trans_wait_tx_queues_empty(mvm->trans,
+						     BIT(queue));
+		if (ret) {
+			IWL_ERR(mvm,
+				"Error draining queue before reconfig\n");
+			return ret;
+		}
 
-			ret = iwl_mvm_reconfig_scd(mvm, queue, cfg.fifo,
-						   mvmsta->sta_id, tid,
-						   buf_size, ssn);
-			if (ret) {
-				IWL_ERR(mvm,
-					"Error reconfiguring TXQ #%d\n", queue);
-				return ret;
-			}
+		ret = iwl_mvm_reconfig_scd(mvm, queue, cfg.fifo,
+					   mvmsta->sta_id, tid,
+					   buf_size, ssn);
+		if (ret) {
+			IWL_ERR(mvm,
+				"Error reconfiguring TXQ #%d\n", queue);
+			return ret;
 		}
 	}
 
@@ -2868,18 +2671,6 @@ int iwl_mvm_sta_tx_agg_stop(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 				    "ssn = %d, next_recl = %d\n",
 				    tid_data->ssn, tid_data->next_reclaimed);
 
-		/*
-		 * There are still packets for this RA / TID in the HW.
-		 * Not relevant for DQA mode, since there is no need to disable
-		 * the queue.
-		 */
-		if (!iwl_mvm_is_dqa_supported(mvm) &&
-		    tid_data->ssn != tid_data->next_reclaimed) {
-			tid_data->state = IWL_EMPTYING_HW_QUEUE_DELBA;
-			err = 0;
-			break;
-		}
-
 		tid_data->ssn = 0xffff;
 		tid_data->state = IWL_AGG_OFF;
 		spin_unlock_bh(&mvmsta->lock);
@@ -2887,12 +2678,6 @@ int iwl_mvm_sta_tx_agg_stop(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 		ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
 
 		iwl_mvm_sta_tx_agg(mvm, sta, tid, txq_id, false);
-
-		if (!iwl_mvm_is_dqa_supported(mvm)) {
-			int mac_queue = vif->hw_queue[tid_to_mac80211_ac[tid]];
-
-			iwl_mvm_disable_txq(mvm, txq_id, mac_queue, tid, 0);
-		}
 		return 0;
 	case IWL_AGG_STARTING:
 	case IWL_EMPTYING_HW_QUEUE_ADDBA:
@@ -2962,13 +2747,6 @@ int iwl_mvm_sta_tx_agg_flush(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 		iwl_mvm_drain_sta(mvm, mvmsta, false);
 
 		iwl_mvm_sta_tx_agg(mvm, sta, tid, txq_id, false);
-
-		if (!iwl_mvm_is_dqa_supported(mvm)) {
-			int mac_queue = vif->hw_queue[tid_to_mac80211_ac[tid]];
-
-			iwl_mvm_disable_txq(mvm, tid_data->txq_id, mac_queue,
-					    tid, 0);
-		}
 	}
 
 	return 0;
@@ -3587,15 +3365,6 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm,
 			u16 n_queued;
 
 			tid_data = &mvmsta->tid_data[tid];
-			if (WARN(!iwl_mvm_is_dqa_supported(mvm) &&
-				 tid_data->state != IWL_AGG_ON &&
-				 tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA,
-				 "TID %d state is %d\n",
-				 tid, tid_data->state)) {
-				spin_unlock_bh(&mvmsta->lock);
-				ieee80211_sta_eosp(sta);
-				return;
-			}
 
 			n_queued = iwl_mvm_tid_queued(mvm, tid_data);
 			if (n_queued > remaining) {
@@ -3689,13 +3458,8 @@ void iwl_mvm_sta_modify_disable_tx_ap(struct iwl_mvm *mvm,
 
 	mvm_sta->disable_tx = disable;
 
-	/*
-	 * Tell mac80211 to start/stop queuing tx for this station,
-	 * but don't stop queuing if there are still pending frames
-	 * for this station.
-	 */
-	if (disable || !atomic_read(&mvm->pending_frames[mvm_sta->sta_id]))
-		ieee80211_sta_block_awake(mvm->hw, sta, disable);
+	/* Tell mac80211 to start/stop queuing tx for this station */
+	ieee80211_sta_block_awake(mvm->hw, sta, disable);
 
 	iwl_mvm_sta_modify_disable_tx(mvm, mvm_sta, disable);
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
index 05fecbe..d138938 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
@@ -222,16 +222,7 @@ struct iwl_mvm_vif;
  * we remove the STA of the AP. The flush can be done synchronously against the
  * fw.
  * Drain means that the fw will drop all the frames sent to a specific station.
- * This is useful when a client (if we are IBSS / GO or AP) disassociates. In
- * that case, we need to drain all the frames for that client from the AC queues
- * that are shared with the other clients. Only then, we can remove the STA in
- * the fw. In order to do so, we track the non-AMPDU packets for each station.
- * If mac80211 removes a STA and if it still has non-AMPDU packets pending in
- * the queues, we mark this station as %EBUSY in %fw_id_to_mac_id, and drop all
- * the frames for this STA (%iwl_mvm_rm_sta). When the last frame is dropped
- * (we know about it with its Tx response), we remove the station in fw and set
- * it as %NULL in %fw_id_to_mac_id: this is the purpose of
- * %iwl_mvm_sta_drained_wk.
+ * This is useful when a client (if we are IBSS / GO or AP) disassociates.
  */
 
 /**
@@ -325,6 +316,10 @@ enum iwl_mvm_agg_state {
  * @is_tid_active: has this TID sent traffic in the last
  *	%IWL_MVM_DQA_QUEUE_TIMEOUT time period. If %txq_id is invalid, this
  *	field should be ignored.
+ * @tpt_meas_start: time of the throughput measurements start, is reset every HZ
+ * @tx_count_last: number of frames transmitted during the last second
+ * @tx_count: counts the number of frames transmitted since the last reset of
+ *	 tpt_meas_start
  */
 struct iwl_mvm_tid_data {
 	struct sk_buff_head deferred_tx_frames;
@@ -339,6 +334,9 @@ struct iwl_mvm_tid_data {
 	u16 ssn;
 	u16 tx_time;
 	bool is_tid_active;
+	unsigned long tpt_meas_start;
+	u32 tx_count_last;
+	u32 tx_count;
 };
 
 struct iwl_mvm_key_pn {
@@ -371,7 +369,6 @@ struct iwl_mvm_rxq_dup_data {
  * struct iwl_mvm_sta - representation of a station in the driver
  * @sta_id: the index of the station in the fw (will be replaced by id_n_color)
  * @tfd_queue_msk: the tfd queues used by the station
- * @hw_queue: per-AC mapping of the TFD queues used by station
  * @mac_id_n_color: the MAC context this station is linked to
  * @tid_disable_agg: bitmap: if bit(tid) is set, the fw won't send ampdus for
  *	tid.
@@ -409,7 +406,6 @@ struct iwl_mvm_rxq_dup_data {
 struct iwl_mvm_sta {
 	u32 sta_id;
 	u32 tfd_queue_msk;
-	u8 hw_queue[IEEE80211_NUM_ACS];
 	u32 mac_id_n_color;
 	u16 tid_disable_agg;
 	u8 max_agg_bufsize;
@@ -533,9 +529,9 @@ void iwl_mvm_del_aux_sta(struct iwl_mvm *mvm);
 
 int iwl_mvm_alloc_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
 int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
-int iwl_mvm_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
+int iwl_mvm_add_p2p_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
 int iwl_mvm_send_rm_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
-int iwl_mvm_rm_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
+int iwl_mvm_rm_p2p_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
 int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
 int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
 int iwl_mvm_allocate_int_sta(struct iwl_mvm *mvm,
@@ -548,7 +544,6 @@ int iwl_mvm_add_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
 int iwl_mvm_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
 void iwl_mvm_dealloc_snif_sta(struct iwl_mvm *mvm);
 
-void iwl_mvm_sta_drained_wk(struct work_struct *wk);
 void iwl_mvm_sta_modify_ps_wake(struct iwl_mvm *mvm,
 				struct ieee80211_sta *sta);
 void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index 5a68272..4d03149 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -7,6 +7,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -33,6 +34,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2017 Intel Deutschland GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -73,7 +75,6 @@
 #include "mvm.h"
 #include "iwl-io.h"
 #include "iwl-prph.h"
-#include "fw-dbg.h"
 
 /*
  * For the high priority TE use a time event type that has similar priority to
@@ -130,10 +131,7 @@ void iwl_mvm_roc_done_wk(struct work_struct *wk)
 	 * issue as it will have to complete before the next command is
 	 * executed, and a new time event means a new command.
 	 */
-	if (iwl_mvm_is_dqa_supported(mvm))
-		iwl_mvm_flush_sta(mvm, &mvm->aux_sta, true, CMD_ASYNC);
-	else
-		iwl_mvm_flush_tx_path(mvm, queues, CMD_ASYNC);
+	iwl_mvm_flush_sta(mvm, &mvm->aux_sta, true, CMD_ASYNC);
 }
 
 static void iwl_mvm_roc_finished(struct iwl_mvm *mvm)
@@ -248,7 +246,9 @@ static void iwl_mvm_te_check_trigger(struct iwl_mvm *mvm,
 	trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_TIME_EVENT);
 	te_trig = (void *)trig->data;
 
-	if (!iwl_fw_dbg_trigger_check_stop(mvm, te_data->vif, trig))
+	if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt,
+					   ieee80211_vif_to_wdev(te_data->vif),
+					   trig))
 		return;
 
 	for (i = 0; i < ARRAY_SIZE(te_trig->time_events); i++) {
@@ -263,11 +263,11 @@ static void iwl_mvm_te_check_trigger(struct iwl_mvm *mvm,
 		    !(trig_status_bitmap & BIT(le32_to_cpu(notif->status))))
 			continue;
 
-		iwl_mvm_fw_dbg_collect_trig(mvm, trig,
-					    "Time event %d Action 0x%x received status: %d",
-					    te_data->id,
-					    le32_to_cpu(notif->action),
-					    le32_to_cpu(notif->status));
+		iwl_fw_dbg_collect_trig(&mvm->fwrt, trig,
+					"Time event %d Action 0x%x received status: %d",
+					te_data->id,
+					le32_to_cpu(notif->action),
+					le32_to_cpu(notif->status));
 		break;
 	}
 }
@@ -728,8 +728,21 @@ void iwl_mvm_stop_session_protection(struct iwl_mvm *mvm,
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	struct iwl_mvm_time_event_data *te_data = &mvmvif->time_event_data;
+	u32 id;
 
 	lockdep_assert_held(&mvm->mutex);
+
+	spin_lock_bh(&mvm->time_event_lock);
+	id = te_data->id;
+	spin_unlock_bh(&mvm->time_event_lock);
+
+	if (id != TE_BSS_STA_AGGRESSIVE_ASSOC) {
+		IWL_DEBUG_TE(mvm,
+			     "don't remove TE with id=%u (not session protection)\n",
+			     id);
+		return;
+	}
+
 	iwl_mvm_remove_time_event(mvm, mvmvif, te_data);
 }
 
@@ -861,8 +874,23 @@ int iwl_mvm_schedule_csa_period(struct iwl_mvm *mvm,
 	lockdep_assert_held(&mvm->mutex);
 
 	if (te_data->running) {
-		IWL_DEBUG_TE(mvm, "CS period is already scheduled\n");
-		return -EBUSY;
+		u32 id;
+
+		spin_lock_bh(&mvm->time_event_lock);
+		id = te_data->id;
+		spin_unlock_bh(&mvm->time_event_lock);
+
+		if (id == TE_CHANNEL_SWITCH_PERIOD) {
+			IWL_DEBUG_TE(mvm, "CS period is already scheduled\n");
+			return -EBUSY;
+		}
+
+		/*
+		 * Remove the session protection time event to allow the
+		 * channel switch. If we got here, we just heard a beacon so
+		 * the session protection is not needed anymore anyway.
+		 */
+		iwl_mvm_remove_time_event(mvm, mvmvif, te_data);
 	}
 
 	time_cmd.action = cpu_to_le32(FW_CTXT_ACTION_ADD);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tof.c b/drivers/net/wireless/intel/iwlwifi/mvm/tof.c
index 634175b..2d0b8a39 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tof.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tof.c
@@ -61,7 +61,7 @@
  *
  *****************************************************************************/
 #include "mvm.h"
-#include "fw-api-tof.h"
+#include "fw/api/tof.h"
 
 #define IWL_MVM_TOF_RANGE_REQ_MAX_ID 256
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tof.h b/drivers/net/wireless/intel/iwlwifi/mvm/tof.h
index 8c3421c..2ff560a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tof.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tof.h
@@ -63,7 +63,7 @@
 #ifndef __tof_h__
 #define __tof_h__
 
-#include "fw-api-tof.h"
+#include "fw/api/tof.h"
 
 struct iwl_mvm_tof_data {
 	struct iwl_tof_config_cmd tof_cfg;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
index 453a785..8876c2a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
@@ -71,7 +71,7 @@
 
 #define IWL_MVM_TEMP_NOTIF_WAIT_TIMEOUT	HZ
 
-static void iwl_mvm_enter_ctkill(struct iwl_mvm *mvm)
+void iwl_mvm_enter_ctkill(struct iwl_mvm *mvm)
 {
 	struct iwl_mvm_tt_mgmt *tt = &mvm->thermal_throttle;
 	u32 duration = tt->params.ct_kill_duration;
@@ -629,7 +629,7 @@ static int iwl_mvm_tzone_get_temp(struct thermal_zone_device *device,
 	mutex_lock(&mvm->mutex);
 
 	if (!iwl_mvm_firmware_running(mvm) ||
-	    mvm->cur_ucode != IWL_UCODE_REGULAR) {
+	    mvm->fwrt.cur_fw_img != IWL_UCODE_REGULAR) {
 		ret = -EIO;
 		goto out;
 	}
@@ -680,7 +680,7 @@ static int iwl_mvm_tzone_set_trip_temp(struct thermal_zone_device *device,
 	mutex_lock(&mvm->mutex);
 
 	if (!iwl_mvm_firmware_running(mvm) ||
-	    mvm->cur_ucode != IWL_UCODE_REGULAR) {
+	    mvm->fwrt.cur_fw_img != IWL_UCODE_REGULAR) {
 		ret = -EIO;
 		goto out;
 	}
@@ -795,7 +795,7 @@ static int iwl_mvm_tcool_set_cur_state(struct thermal_cooling_device *cdev,
 	mutex_lock(&mvm->mutex);
 
 	if (!iwl_mvm_firmware_running(mvm) ||
-	    mvm->cur_ucode != IWL_UCODE_REGULAR) {
+	    mvm->fwrt.cur_fw_img != IWL_UCODE_REGULAR) {
 		ret = -EIO;
 		goto unlock;
 	}
@@ -813,7 +813,7 @@ unlock:
 	return ret;
 }
 
-static struct thermal_cooling_device_ops tcooling_ops = {
+static const struct thermal_cooling_device_ops tcooling_ops = {
 	.get_max_state = iwl_mvm_tcool_get_max_state,
 	.get_cur_state = iwl_mvm_tcool_get_cur_state,
 	.set_cur_state = iwl_mvm_tcool_set_cur_state,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 5fcc9dd..172b5e6 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -74,7 +74,6 @@
 #include "iwl-eeprom-parse.h"
 #include "mvm.h"
 #include "sta.h"
-#include "fw-dbg.h"
 
 static void
 iwl_mvm_bar_check_trigger(struct iwl_mvm *mvm, const u8 *addr,
@@ -89,15 +88,15 @@ iwl_mvm_bar_check_trigger(struct iwl_mvm *mvm, const u8 *addr,
 	trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_BA);
 	ba_trig = (void *)trig->data;
 
-	if (!iwl_fw_dbg_trigger_check_stop(mvm, NULL, trig))
+	if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt, NULL, trig))
 		return;
 
 	if (!(le16_to_cpu(ba_trig->tx_bar) & BIT(tid)))
 		return;
 
-	iwl_mvm_fw_dbg_collect_trig(mvm, trig,
-				    "BAR sent to %pM, tid %d, ssn %d",
-				    addr, tid, ssn);
+	iwl_fw_dbg_collect_trig(&mvm->fwrt, trig,
+				"BAR sent to %pM, tid %d, ssn %d",
+				addr, tid, ssn);
 }
 
 #define OPT_HDR(type, skb, off) \
@@ -559,9 +558,6 @@ static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm,
 {
 	struct iwl_mvm_vif *mvmvif;
 
-	if (!iwl_mvm_is_dqa_supported(mvm))
-		return info->hw_queue;
-
 	mvmvif = iwl_mvm_vif_from_mac80211(info->control.vif);
 
 	switch (info->control.vif->type) {
@@ -660,8 +656,7 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 
 			if (ap_sta_id != IWL_MVM_INVALID_STA)
 				sta_id = ap_sta_id;
-		} else if (iwl_mvm_is_dqa_supported(mvm) &&
-			   info.control.vif->type == NL80211_IFTYPE_MONITOR) {
+		} else if (info.control.vif->type == NL80211_IFTYPE_MONITOR) {
 			queue = mvm->aux_queue;
 		}
 	}
@@ -680,17 +675,6 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 		return -1;
 	}
 
-	/*
-	 * Increase the pending frames counter, so that later when a reply comes
-	 * in and the counter is decreased - we don't start getting negative
-	 * values.
-	 * Note that we don't need to make sure it isn't agg'd, since we're
-	 * TXing non-sta
-	 * For DQA mode - we shouldn't increase it though
-	 */
-	if (!iwl_mvm_is_dqa_supported(mvm))
-		atomic_inc(&mvm->pending_frames[sta_id]);
-
 	return 0;
 }
 
@@ -758,7 +742,7 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,
 	max_amsdu_len = sta->max_amsdu_len;
 
 	/* the Tx FIFO to which this A-MSDU will be routed */
-	txf = iwl_mvm_ac_to_tx_fifo[tid_to_mac80211_ac[tid]];
+	txf = iwl_mvm_mac_ac_to_tx_fifo(mvm, tid_to_mac80211_ac[tid]);
 
 	/*
 	 * Don't send an AMSDU that will be longer than the TXF.
@@ -767,7 +751,8 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,
 	 * fifo to be able to send bursts.
 	 */
 	max_amsdu_len = min_t(unsigned int, max_amsdu_len,
-			      mvm->smem_cfg.lmac[0].txfifo_size[txf] - 256);
+			      mvm->fwrt.smem_cfg.lmac[0].txfifo_size[txf] -
+			      256);
 
 	if (unlikely(dbg_max_amsdu_len))
 		max_amsdu_len = min_t(unsigned int, max_amsdu_len,
@@ -1000,22 +985,13 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
 		}
 	}
 
-	if (iwl_mvm_is_dqa_supported(mvm) || is_ampdu)
-		txq_id = mvmsta->tid_data[tid].txq_id;
-
-	if (sta->tdls && !iwl_mvm_is_dqa_supported(mvm)) {
-		/* default to TID 0 for non-QoS packets */
-		u8 tdls_tid = tid == IWL_MAX_TID_COUNT ? 0 : tid;
-
-		txq_id = mvmsta->hw_queue[tid_to_mac80211_ac[tdls_tid]];
-	}
+	txq_id = mvmsta->tid_data[tid].txq_id;
 
 	WARN_ON_ONCE(info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM);
 
 	/* Check if TXQ needs to be allocated or re-activated */
 	if (unlikely(txq_id == IWL_MVM_INVALID_QUEUE ||
-		     !mvmsta->tid_data[tid].is_tid_active) &&
-	    iwl_mvm_is_dqa_supported(mvm)) {
+		     !mvmsta->tid_data[tid].is_tid_active)) {
 		/* If TXQ needs to be allocated... */
 		if (txq_id == IWL_MVM_INVALID_QUEUE) {
 			iwl_mvm_tx_add_stream(mvm, mvmsta, tid, skb);
@@ -1042,7 +1018,7 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
 				    txq_id);
 	}
 
-	if (iwl_mvm_is_dqa_supported(mvm) && !iwl_mvm_has_new_tx_api(mvm)) {
+	if (!iwl_mvm_has_new_tx_api(mvm)) {
 		/* Keep track of the time of the last frame for this RA/TID */
 		mvm->queue_info[txq_id].last_frame_time[tid] = jiffies;
 
@@ -1076,10 +1052,6 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb,
 
 	spin_unlock(&mvmsta->lock);
 
-	/* Increase pending frames count if this isn't AMPDU or DQA queue */
-	if (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu)
-		atomic_inc(&mvm->pending_frames[mvmsta->sta_id]);
-
 	return 0;
 
 drop_unlock_sta:
@@ -1148,8 +1120,7 @@ static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm,
 	lockdep_assert_held(&mvmsta->lock);
 
 	if ((tid_data->state == IWL_AGG_ON ||
-	     tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA ||
-	     iwl_mvm_is_dqa_supported(mvm)) &&
+	     tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA) &&
 	    iwl_mvm_tid_queued(mvm, tid_data) == 0) {
 		/*
 		 * Now that this aggregation or DQA queue is empty tell
@@ -1183,13 +1154,6 @@ static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm,
 		IWL_DEBUG_TX_QUEUES(mvm,
 				    "Can continue DELBA flow ssn = next_recl = %d\n",
 				    tid_data->next_reclaimed);
-		if (!iwl_mvm_is_dqa_supported(mvm)) {
-			u8 mac80211_ac = tid_to_mac80211_ac[tid];
-
-			iwl_mvm_disable_txq(mvm, tid_data->txq_id,
-					    vif->hw_queue[mac80211_ac], tid,
-					    CMD_ASYNC);
-		}
 		tid_data->state = IWL_AGG_OFF;
 		ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
 		break;
@@ -1301,7 +1265,7 @@ static void iwl_mvm_tx_status_check_trigger(struct iwl_mvm *mvm,
 	trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_TX_STATUS);
 	status_trig = (void *)trig->data;
 
-	if (!iwl_fw_dbg_trigger_check_stop(mvm, NULL, trig))
+	if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt, NULL, trig))
 		return;
 
 	for (i = 0; i < ARRAY_SIZE(status_trig->statuses); i++) {
@@ -1312,9 +1276,9 @@ static void iwl_mvm_tx_status_check_trigger(struct iwl_mvm *mvm,
 		if (status_trig->statuses[i].status != (status & TX_STATUS_MSK))
 			continue;
 
-		iwl_mvm_fw_dbg_collect_trig(mvm, trig,
-					    "Tx status %d was received",
-					    status & TX_STATUS_MSK);
+		iwl_fw_dbg_collect_trig(&mvm->fwrt, trig,
+					"Tx status %d was received",
+					status & TX_STATUS_MSK);
 		break;
 	}
 }
@@ -1373,6 +1337,7 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 	while (!skb_queue_empty(&skbs)) {
 		struct sk_buff *skb = __skb_dequeue(&skbs);
 		struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+		bool flushed = false;
 
 		skb_freed++;
 
@@ -1386,11 +1351,15 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 		case TX_STATUS_DIRECT_DONE:
 			info->flags |= IEEE80211_TX_STAT_ACK;
 			break;
+		case TX_STATUS_FAIL_FIFO_FLUSHED:
+		case TX_STATUS_FAIL_DRAIN_FLOW:
+			flushed = true;
+			break;
 		case TX_STATUS_FAIL_DEST_PS:
-			/* In DQA, the FW should have stopped the queue and not
+			/* the FW should have stopped the queue and not
 			 * return this status
 			 */
-			WARN_ON(iwl_mvm_is_dqa_supported(mvm));
+			WARN_ON(1);
 			info->flags |= IEEE80211_TX_STAT_TX_FILTERED;
 			break;
 		default:
@@ -1408,7 +1377,7 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 		/* Single frame failure in an AMPDU queue => send BAR */
 		if (info->flags & IEEE80211_TX_CTL_AMPDU &&
 		    !(info->flags & IEEE80211_TX_STAT_ACK) &&
-		    !(info->flags & IEEE80211_TX_STAT_TX_FILTERED))
+		    !(info->flags & IEEE80211_TX_STAT_TX_FILTERED) && !flushed)
 			info->flags |= IEEE80211_TX_STAT_AMPDU_NO_BACK;
 		info->flags &= ~IEEE80211_TX_CTL_AMPDU;
 
@@ -1446,26 +1415,21 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 		ieee80211_tx_status(mvm->hw, skb);
 	}
 
-	if (iwl_mvm_is_dqa_supported(mvm) || txq_id >= mvm->first_agg_queue) {
-		/* If this is an aggregation queue, we use the ssn since:
-		 * ssn = wifi seq_num % 256.
-		 * The seq_ctl is the sequence control of the packet to which
-		 * this Tx response relates. But if there is a hole in the
-		 * bitmap of the BA we received, this Tx response may allow to
-		 * reclaim the hole and all the subsequent packets that were
-		 * already acked. In that case, seq_ctl != ssn, and the next
-		 * packet to be reclaimed will be ssn and not seq_ctl. In that
-		 * case, several packets will be reclaimed even if
-		 * frame_count = 1.
-		 *
-		 * The ssn is the index (% 256) of the latest packet that has
-		 * treated (acked / dropped) + 1.
-		 */
-		next_reclaimed = ssn;
-	} else {
-		/* The next packet to be reclaimed is the one after this one */
-		next_reclaimed = IEEE80211_SEQ_TO_SN(seq_ctl + 0x10);
-	}
+	/* This is an aggregation queue or might become one, so we use
+	 * the ssn since: ssn = wifi seq_num % 256.
+	 * The seq_ctl is the sequence control of the packet to which
+	 * this Tx response relates. But if there is a hole in the
+	 * bitmap of the BA we received, this Tx response may allow to
+	 * reclaim the hole and all the subsequent packets that were
+	 * already acked. In that case, seq_ctl != ssn, and the next
+	 * packet to be reclaimed will be ssn and not seq_ctl. In that
+	 * case, several packets will be reclaimed even if
+	 * frame_count = 1.
+	 *
+	 * The ssn is the index (% 256) of the latest packet that has
+	 * treated (acked / dropped) + 1.
+	 */
+	next_reclaimed = ssn;
 
 	IWL_DEBUG_TX_REPLY(mvm,
 			   "TXQ %d status %s (0x%08x)\n",
@@ -1548,49 +1512,6 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm,
 		mvmsta = NULL;
 	}
 
-	/*
-	 * If the txq is not an AMPDU queue, there is no chance we freed
-	 * several skbs. Check that out...
-	 */
-	if (iwl_mvm_is_dqa_supported(mvm) || txq_id >= mvm->first_agg_queue)
-		goto out;
-
-	/* We can't free more than one frame at once on a shared queue */
-	WARN_ON(skb_freed > 1);
-
-	/* If we have still frames for this STA nothing to do here */
-	if (!atomic_sub_and_test(skb_freed, &mvm->pending_frames[sta_id]))
-		goto out;
-
-	if (mvmsta && mvmsta->vif->type == NL80211_IFTYPE_AP) {
-
-		/*
-		 * If there are no pending frames for this STA and
-		 * the tx to this station is not disabled, notify
-		 * mac80211 that this station can now wake up in its
-		 * STA table.
-		 * If mvmsta is not NULL, sta is valid.
-		 */
-
-		spin_lock_bh(&mvmsta->lock);
-
-		if (!mvmsta->disable_tx)
-			ieee80211_sta_block_awake(mvm->hw, sta, false);
-
-		spin_unlock_bh(&mvmsta->lock);
-	}
-
-	if (PTR_ERR(sta) == -EBUSY || PTR_ERR(sta) == -ENOENT) {
-		/*
-		 * We are draining and this was the last packet - pre_rcu_remove
-		 * has been called already. We might be after the
-		 * synchronize_net already.
-		 * Don't rely on iwl_mvm_rm_sta to see the empty Tx queues.
-		 */
-		set_bit(sta_id, mvm->sta_drained);
-		schedule_work(&mvm->sta_drained_wk);
-	}
-
 out:
 	rcu_read_unlock();
 }
@@ -1605,7 +1526,7 @@ static const char *iwl_get_agg_tx_status(u16 status)
 	AGG_TX_STATE_(BT_PRIO);
 	AGG_TX_STATE_(FEW_BYTES);
 	AGG_TX_STATE_(ABORT);
-	AGG_TX_STATE_(LAST_SENT_TTL);
+	AGG_TX_STATE_(TX_ON_AIR_DROP);
 	AGG_TX_STATE_(LAST_SENT_TRY_CNT);
 	AGG_TX_STATE_(LAST_SENT_BT_KILL);
 	AGG_TX_STATE_(SCD_QUERY);
@@ -1654,9 +1575,8 @@ static void iwl_mvm_rx_tx_cmd_agg(struct iwl_mvm *mvm,
 	struct iwl_mvm_sta *mvmsta;
 	int queue = SEQ_TO_QUEUE(sequence);
 
-	if (WARN_ON_ONCE(queue < mvm->first_agg_queue &&
-			 (!iwl_mvm_is_dqa_supported(mvm) ||
-			  (queue != IWL_MVM_DQA_BSS_CLIENT_QUEUE))))
+	if (WARN_ON_ONCE(queue < IWL_MVM_DQA_MIN_DATA_QUEUE &&
+			 (queue != IWL_MVM_DQA_BSS_CLIENT_QUEUE)))
 		return;
 
 	if (WARN_ON_ONCE(tid == IWL_TID_NON_QOS))
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index fc5a490..2ea74ab 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -70,9 +70,8 @@
 #include "iwl-io.h"
 #include "iwl-prph.h"
 #include "iwl-csr.h"
-#include "fw-dbg.h"
 #include "mvm.h"
-#include "fw-api-rs.h"
+#include "fw/api/rs.h"
 
 /*
  * Will return 0 even if the cmd failed when RFKILL is asserted unless
@@ -464,8 +463,8 @@ static void iwl_mvm_dump_umac_error_log(struct iwl_mvm *mvm)
 		IWL_ERR(mvm,
 			"Not valid error log pointer 0x%08X for %s uCode\n",
 			base,
-			(mvm->cur_ucode == IWL_UCODE_INIT)
-					? "Init" : "RT");
+			(mvm->fwrt.cur_fw_img == IWL_UCODE_INIT)
+			? "Init" : "RT");
 		return;
 	}
 
@@ -500,7 +499,7 @@ static void iwl_mvm_dump_lmac_error_log(struct iwl_mvm *mvm, u32 base)
 	struct iwl_error_event_table table;
 	u32 val;
 
-	if (mvm->cur_ucode == IWL_UCODE_INIT) {
+	if (mvm->fwrt.cur_fw_img == IWL_UCODE_INIT) {
 		if (!base)
 			base = mvm->fw->init_errlog_ptr;
 	} else {
@@ -512,8 +511,8 @@ static void iwl_mvm_dump_lmac_error_log(struct iwl_mvm *mvm, u32 base)
 		IWL_ERR(mvm,
 			"Not valid error log pointer 0x%08X for %s uCode\n",
 			base,
-			(mvm->cur_ucode == IWL_UCODE_INIT)
-					? "Init" : "RT");
+			(mvm->fwrt.cur_fw_img == IWL_UCODE_INIT)
+			? "Init" : "RT");
 		return;
 	}
 
@@ -1190,14 +1189,15 @@ void iwl_mvm_connection_loss(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 
 	trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_MLME);
 	trig_mlme = (void *)trig->data;
-	if (!iwl_fw_dbg_trigger_check_stop(mvm, vif, trig))
+	if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt,
+					   ieee80211_vif_to_wdev(vif), trig))
 		goto out;
 
 	if (trig_mlme->stop_connection_loss &&
 	    --trig_mlme->stop_connection_loss)
 		goto out;
 
-	iwl_mvm_fw_dbg_collect_trig(mvm, trig, "%s", errmsg);
+	iwl_fw_dbg_collect_trig(&mvm->fwrt, trig, "%s", errmsg);
 
 out:
 	ieee80211_connection_loss(vif);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
index eddaca7..3fc4343 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
@@ -244,7 +244,7 @@ int iwl_pcie_ctxt_info_init(struct iwl_trans *trans,
 	ctxt_info->hcmd_cfg.cmd_queue_addr =
 		cpu_to_le64(trans_pcie->txq[trans_pcie->cmd_queue]->dma_addr);
 	ctxt_info->hcmd_cfg.cmd_queue_size =
-		TFD_QUEUE_CB_SIZE(TFD_QUEUE_SIZE_MAX);
+		TFD_QUEUE_CB_SIZE(TFD_CMD_SLOTS);
 
 	/* allocate ucode sections in dram and set addresses */
 	ret = iwl_pcie_ctxt_info_init_fw_sec(trans, fw, ctxt_info);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 84f4ba0..858765f 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -430,6 +430,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x095B, 0x520A, iwl7265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x095A, 0x9000, iwl7265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x095A, 0x9400, iwl7265_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x095A, 0x9E10, iwl7265_2ac_cfg)},
 
 /* 8000 Series */
 	{IWL_PCI_DEVICE(0x24F3, 0x0010, iwl8260_2ac_cfg)},
@@ -710,12 +711,23 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		iwl_trans->cfg = cfg_7265d;
 	}
 
-	if (iwl_trans->cfg->rf_id && cfg == &iwla000_2ac_cfg_hr_cdb) {
-		if (iwl_trans->hw_rf_id == CSR_HW_RF_ID_TYPE_JF)
-			cfg = &iwla000_2ac_cfg_jf;
-		else if (iwl_trans->hw_rf_id == CSR_HW_RF_ID_TYPE_HR)
-			cfg = &iwla000_2ac_cfg_hr;
-
+	if (iwl_trans->cfg->rf_id && cfg == &iwla000_2ac_cfg_hr_cdb &&
+	    iwl_trans->hw_rev != CSR_HW_REV_TYPE_HR_CDB) {
+		u32 rf_id_chp = CSR_HW_RF_ID_TYPE_CHIP_ID(iwl_trans->hw_rf_id);
+		u32 jf_chp_id = CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_JF);
+		u32 hr_chp_id = CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR);
+
+		if (rf_id_chp == jf_chp_id) {
+			if (iwl_trans->hw_rev == CSR_HW_REV_TYPE_QNJ)
+				cfg = &iwla000_2ax_cfg_qnj_jf_b0;
+			else
+				cfg = &iwla000_2ac_cfg_jf;
+		} else if (rf_id_chp == hr_chp_id) {
+			if (iwl_trans->hw_rev == CSR_HW_REV_TYPE_QNJ)
+				cfg = &iwla000_2ax_cfg_qnj_hr_a0;
+			else
+				cfg = &iwla000_2ac_cfg_hr;
+		}
 		iwl_trans->cfg = cfg;
 	}
 #endif
@@ -825,11 +837,11 @@ static int iwl_pci_resume(struct device *device)
 	/*
 	 * Enable rfkill interrupt (in order to keep track of the rfkill
 	 * status). Must be locked to avoid processing a possible rfkill
-	 * interrupt while in iwl_trans_check_hw_rf_kill().
+	 * interrupt while in iwl_pcie_check_hw_rf_kill().
 	 */
 	mutex_lock(&trans_pcie->mutex);
 	iwl_enable_rfkill_int(trans);
-	iwl_trans_check_hw_rf_kill(trans);
+	iwl_pcie_check_hw_rf_kill(trans);
 	mutex_unlock(&trans_pcie->mutex);
 
 	return 0;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index a1ea9ef..4fb7647 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -661,10 +661,16 @@ static inline void iwl_pcie_sw_reset(struct iwl_trans *trans)
 	usleep_range(5000, 6000);
 }
 
+static inline u8 iwl_pcie_get_cmd_index(struct iwl_txq *q, u32 index)
+{
+	return index & (q->n_window - 1);
+}
+
 static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie,
 				     struct iwl_txq *txq, int idx)
 {
-	return txq->tfds + trans_pcie->tfd_size * idx;
+	return txq->tfds + trans_pcie->tfd_size * iwl_pcie_get_cmd_index(txq,
+									 idx);
 }
 
 static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
@@ -726,11 +732,6 @@ static inline bool iwl_queue_used(const struct iwl_txq *q, int i)
 		!(i < q->read_ptr && i >= q->write_ptr);
 }
 
-static inline u8 get_cmd_index(struct iwl_txq *q, u32 index)
-{
-	return index & (q->n_window - 1);
-}
-
 static inline bool iwl_is_rfkill_set(struct iwl_trans *trans)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@ -793,7 +794,7 @@ void iwl_pcie_rx_allocator_work(struct work_struct *data);
 void iwl_pcie_apm_config(struct iwl_trans *trans);
 int iwl_pcie_prepare_card_hw(struct iwl_trans *trans);
 void iwl_pcie_synchronize_irqs(struct iwl_trans *trans);
-bool iwl_trans_check_hw_rf_kill(struct iwl_trans *trans);
+bool iwl_pcie_check_hw_rf_kill(struct iwl_trans *trans);
 void iwl_trans_pcie_handle_stop_rfkill(struct iwl_trans *trans,
 				       bool was_in_rfkill);
 void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq);
@@ -808,6 +809,8 @@ int iwl_pcie_alloc_dma_ptr(struct iwl_trans *trans,
 			   struct iwl_dma_ptr *ptr, size_t size);
 void iwl_pcie_free_dma_ptr(struct iwl_trans *trans, struct iwl_dma_ptr *ptr);
 void iwl_pcie_apply_destination(struct iwl_trans *trans);
+void iwl_pcie_free_tso_page(struct iwl_trans_pcie *trans_pcie,
+			    struct sk_buff *skb);
 #ifdef CONFIG_INET
 struct iwl_tso_hdr_page *get_page_hdr(struct iwl_trans *trans, size_t len);
 #endif
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index 942736d..a06b661 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@ -1168,7 +1168,7 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
 
 		sequence = le16_to_cpu(pkt->hdr.sequence);
 		index = SEQ_TO_INDEX(sequence);
-		cmd_index = get_cmd_index(txq, index);
+		cmd_index = iwl_pcie_get_cmd_index(txq, index);
 
 		if (rxq->id == 0)
 			iwl_op_mode_rx(trans->op_mode, &rxq->napi,
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
index b84b782..c59f458 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
@@ -307,7 +307,7 @@ int iwl_trans_pcie_gen2_start_fw(struct iwl_trans *trans,
 	mutex_lock(&trans_pcie->mutex);
 
 	/* If platform's RF_KILL switch is NOT set to KILL */
-	hw_rfkill = iwl_trans_check_hw_rf_kill(trans);
+	hw_rfkill = iwl_pcie_check_hw_rf_kill(trans);
 	if (hw_rfkill && !run_in_rfkill) {
 		ret = -ERFKILL;
 		goto out;
@@ -340,7 +340,7 @@ int iwl_trans_pcie_gen2_start_fw(struct iwl_trans *trans,
 		goto out;
 
 	/* re-check RF-Kill state since we may have missed the interrupt */
-	hw_rfkill = iwl_trans_check_hw_rf_kill(trans);
+	hw_rfkill = iwl_pcie_check_hw_rf_kill(trans);
 	if (hw_rfkill && !run_in_rfkill)
 		ret = -ERFKILL;
 
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 3927bbf..2e3e013 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -986,7 +986,7 @@ static int iwl_pcie_load_given_ucode_8000(struct iwl_trans *trans,
 					       &first_ucode_section);
 }
 
-bool iwl_trans_check_hw_rf_kill(struct iwl_trans *trans)
+bool iwl_pcie_check_hw_rf_kill(struct iwl_trans *trans)
 {
 	struct iwl_trans_pcie *trans_pcie =  IWL_TRANS_GET_PCIE_TRANS(trans);
 	bool hw_rfkill = iwl_is_rfkill_set(trans);
@@ -1252,7 +1252,7 @@ static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
 	mutex_lock(&trans_pcie->mutex);
 
 	/* If platform's RF_KILL switch is NOT set to KILL */
-	hw_rfkill = iwl_trans_check_hw_rf_kill(trans);
+	hw_rfkill = iwl_pcie_check_hw_rf_kill(trans);
 	if (hw_rfkill && !run_in_rfkill) {
 		ret = -ERFKILL;
 		goto out;
@@ -1300,7 +1300,7 @@ static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
 		ret = iwl_pcie_load_given_ucode(trans, fw);
 
 	/* re-check RF-Kill state since we may have missed the interrupt */
-	hw_rfkill = iwl_trans_check_hw_rf_kill(trans);
+	hw_rfkill = iwl_pcie_check_hw_rf_kill(trans);
 	if (hw_rfkill && !run_in_rfkill)
 		ret = -ERFKILL;
 
@@ -1663,7 +1663,7 @@ static int _iwl_trans_pcie_start_hw(struct iwl_trans *trans, bool low_power)
 	trans_pcie->is_down = false;
 
 	/* ...rfkill can call stop_device and set it false if needed */
-	iwl_trans_check_hw_rf_kill(trans);
+	iwl_pcie_check_hw_rf_kill(trans);
 
 	/* Make sure we sync here, because we'll need full access later */
 	if (low_power)
@@ -1847,8 +1847,8 @@ static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans,
 	 * These bits say the device is running, and should keep running for
 	 * at least a short while (at least as long as MAC_ACCESS_REQ stays 1),
 	 * but they do not indicate that embedded SRAM is restored yet;
-	 * 3945 and 4965 have volatile SRAM, and must save/restore contents
-	 * to/from host DRAM when sleeping/waking for power-saving.
+	 * HW with volatile SRAM must save/restore contents to/from
+	 * host DRAM when sleeping/waking for power-saving.
 	 * Each direction takes approximately 1/4 millisecond; with this
 	 * overhead, it's a good idea to grab and hold MAC_ACCESS_REQUEST if a
 	 * series of register accesses are expected (e.g. reading Event Log),
@@ -1856,8 +1856,9 @@ static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans,
 	 *
 	 * CSR_UCODE_DRV_GP1 register bit MAC_SLEEP == 0 indicates that
 	 * SRAM is okay/restored.  We don't check that here because this call
-	 * is just for hardware register access; but GP1 MAC_SLEEP check is a
-	 * good idea before accessing 3945/4965 SRAM (e.g. reading Event Log).
+	 * is just for hardware register access; but GP1 MAC_SLEEP
+	 * check is a good idea before accessing the SRAM of HW with
+	 * volatile SRAM (e.g. reading Event Log).
 	 *
 	 * 5000 series and later (including 1000 series) have non-volatile SRAM,
 	 * and do not save/restore SRAM when power cycling.
@@ -2839,7 +2840,7 @@ static struct iwl_trans_dump_data
 	spin_lock_bh(&cmdq->lock);
 	ptr = cmdq->write_ptr;
 	for (i = 0; i < cmdq->n_window; i++) {
-		u8 idx = get_cmd_index(cmdq, ptr);
+		u8 idx = iwl_pcie_get_cmd_index(cmdq, ptr);
 		u32 caplen, cmdlen;
 
 		cmdlen = iwl_trans_pcie_get_cmdlen(trans, cmdq->tfds +
@@ -3142,7 +3143,18 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
 		iwl_set_bit(trans, CSR_HOST_CHICKEN,
 			    CSR_HOST_CHICKEN_PM_IDLE_SRC_DIS_SB_PME);
 
+#if IS_ENABLED(CONFIG_IWLMVM)
 	trans->hw_rf_id = iwl_read32(trans, CSR_HW_RF_ID);
+	if (trans->hw_rf_id == CSR_HW_RF_ID_TYPE_HR) {
+		u32 hw_status;
+
+		hw_status = iwl_read_prph(trans, UMAG_GEN_HW_STATUS);
+		if (hw_status & UMAG_GEN_HW_IS_FPGA)
+			trans->cfg = &iwla000_2ax_cfg_qnj_hr_f0;
+		else
+			trans->cfg = &iwla000_2ac_cfg_hr;
+	}
+#endif
 
 	iwl_pcie_set_interrupt_capa(pdev, trans);
 	trans->hw_id = (pdev->device << 16) + pdev->subsystem_device;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index a3795ba..d74613f 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -55,7 +55,7 @@
 #include "iwl-csr.h"
 #include "iwl-io.h"
 #include "internal.h"
-#include "mvm/fw-api.h"
+#include "fw/api/tx.h"
 
  /*
  * iwl_pcie_gen2_tx_stop - Stop all Tx DMA channels
@@ -88,14 +88,14 @@ static void iwl_pcie_gen2_update_byte_tbl(struct iwl_txq *txq, u16 byte_cnt,
 					  int num_tbs)
 {
 	struct iwlagn_scd_bc_tbl *scd_bc_tbl = txq->bc_tbl.addr;
-	int write_ptr = txq->write_ptr;
+	int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
 	u8 filled_tfd_size, num_fetch_chunks;
 	u16 len = byte_cnt;
 	__le16 bc_ent;
 
 	len = DIV_ROUND_UP(len, 4);
 
-	if (WARN_ON(len > 0xFFF || write_ptr >= TFD_QUEUE_SIZE_MAX))
+	if (WARN_ON(len > 0xFFF || idx >= txq->n_window))
 		return;
 
 	filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) +
@@ -111,7 +111,7 @@ static void iwl_pcie_gen2_update_byte_tbl(struct iwl_txq *txq, u16 byte_cnt,
 	num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1;
 
 	bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12));
-	scd_bc_tbl->tfd_offset[write_ptr] = bc_ent;
+	scd_bc_tbl->tfd_offset[idx] = bc_ent;
 }
 
 /*
@@ -176,16 +176,12 @@ static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
 	/* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
 	 * idx is bounded by n_window
 	 */
-	int rd_ptr = txq->read_ptr;
-	int idx = get_cmd_index(txq, rd_ptr);
+	int idx = iwl_pcie_get_cmd_index(txq, txq->read_ptr);
 
 	lockdep_assert_held(&txq->lock);
 
-	/* We have only q->n_window txq->entries, but we use
-	 * TFD_QUEUE_SIZE_MAX tfds
-	 */
 	iwl_pcie_gen2_tfd_unmap(trans, &txq->entries[idx].meta,
-				iwl_pcie_get_tfd(trans_pcie, txq, rd_ptr));
+				iwl_pcie_get_tfd(trans_pcie, txq, idx));
 
 	/* free SKB */
 	if (txq->entries) {
@@ -373,8 +369,9 @@ struct iwl_tfh_tfd *iwl_pcie_gen2_build_tfd(struct iwl_trans *trans,
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
 	struct iwl_tfh_tfd *tfd =
-		iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
+		iwl_pcie_get_tfd(trans_pcie, txq, idx);
 	dma_addr_t tb_phys;
 	bool amsdu;
 	int i, len, tb1_len, tb2_len, hdr_len;
@@ -386,10 +383,10 @@ struct iwl_tfh_tfd *iwl_pcie_gen2_build_tfd(struct iwl_trans *trans,
 		(*ieee80211_get_qos_ctl(hdr) &
 		 IEEE80211_QOS_CTL_A_MSDU_PRESENT);
 
-	tb_phys = iwl_pcie_get_first_tb_dma(txq, txq->write_ptr);
+	tb_phys = iwl_pcie_get_first_tb_dma(txq, idx);
 	/* The first TB points to bi-directional DMA data */
 	if (!amsdu)
-		memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr,
+		memcpy(&txq->first_tb_bufs[idx], &dev_cmd->hdr,
 		       IWL_FIRST_TB_SIZE);
 
 	iwl_pcie_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE);
@@ -422,16 +419,16 @@ struct iwl_tfh_tfd *iwl_pcie_gen2_build_tfd(struct iwl_trans *trans,
 	hdr_len = ieee80211_hdrlen(hdr->frame_control);
 
 	if (amsdu) {
-		if (!iwl_pcie_gen2_build_amsdu(trans, skb, tfd,
-					       tb1_len + IWL_FIRST_TB_SIZE,
-					       hdr_len, dev_cmd))
+		if (iwl_pcie_gen2_build_amsdu(trans, skb, tfd,
+					      tb1_len + IWL_FIRST_TB_SIZE,
+					      hdr_len, dev_cmd))
 			goto out_err;
 
 		/*
 		 * building the A-MSDU might have changed this data, so memcpy
 		 * it now
 		 */
-		memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr,
+		memcpy(&txq->first_tb_bufs[idx], &dev_cmd->hdr,
 		       IWL_FIRST_TB_SIZE);
 		return tfd;
 	}
@@ -484,6 +481,7 @@ int iwl_trans_pcie_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb,
 	struct iwl_tx_cmd_gen2 *tx_cmd = (void *)dev_cmd->payload;
 	struct iwl_cmd_meta *out_meta;
 	struct iwl_txq *txq = trans_pcie->txq[txq_id];
+	int idx;
 	void *tfd;
 
 	if (WARN_ONCE(!test_bit(txq_id, trans_pcie->queue_used),
@@ -497,16 +495,18 @@ int iwl_trans_pcie_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb,
 
 	spin_lock(&txq->lock);
 
+	idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
+
 	/* Set up driver data for this TFD */
-	txq->entries[txq->write_ptr].skb = skb;
-	txq->entries[txq->write_ptr].cmd = dev_cmd;
+	txq->entries[idx].skb = skb;
+	txq->entries[idx].cmd = dev_cmd;
 
 	dev_cmd->hdr.sequence =
 		cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
-			    INDEX_TO_SEQ(txq->write_ptr)));
+			    INDEX_TO_SEQ(idx)));
 
 	/* Set up first empty entry in queue's array of Tx/cmd buffers */
-	out_meta = &txq->entries[txq->write_ptr].meta;
+	out_meta = &txq->entries[idx].meta;
 	out_meta->flags = 0;
 
 	tfd = iwl_pcie_gen2_build_tfd(trans, txq, dev_cmd, skb, out_meta);
@@ -562,7 +562,7 @@ static int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
 	unsigned long flags;
 	void *dup_buf = NULL;
 	dma_addr_t phys_addr;
-	int idx, i, cmd_pos;
+	int i, cmd_pos, idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
 	u16 copy_size, cmd_size, tb0_size;
 	bool had_nocopy = false;
 	u8 group_id = iwl_cmd_groupid(cmd->id);
@@ -651,7 +651,6 @@ static int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
 		goto free_dup_buf;
 	}
 
-	idx = get_cmd_index(txq, txq->write_ptr);
 	out_cmd = txq->entries[idx].cmd;
 	out_meta = &txq->entries[idx].meta;
 
@@ -937,6 +936,15 @@ void iwl_pcie_gen2_txq_unmap(struct iwl_trans *trans, int txq_id)
 		IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
 				   txq_id, txq->read_ptr);
 
+		if (txq_id != trans_pcie->cmd_queue) {
+			int idx = iwl_pcie_get_cmd_index(txq, txq->read_ptr);
+			struct sk_buff *skb = txq->entries[idx].skb;
+
+			if (WARN_ON_ONCE(!skb))
+				continue;
+
+			iwl_pcie_free_tso_page(trans_pcie, skb);
+		}
 		iwl_pcie_gen2_free_tfd(trans, txq);
 		txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr);
 
@@ -1033,6 +1041,7 @@ int iwl_trans_pcie_dyn_txq_alloc(struct iwl_trans *trans,
 		.flags = CMD_WANT_SKB,
 	};
 	int ret, qid;
+	u32 wr_ptr;
 
 	txq = kzalloc(sizeof(*txq), GFP_KERNEL);
 	if (!txq)
@@ -1060,7 +1069,7 @@ int iwl_trans_pcie_dyn_txq_alloc(struct iwl_trans *trans,
 
 	cmd->tfdq_addr = cpu_to_le64(txq->dma_addr);
 	cmd->byte_cnt_addr = cpu_to_le64(txq->bc_tbl.dma);
-	cmd->cb_size = cpu_to_le32(TFD_QUEUE_CB_SIZE(TFD_QUEUE_SIZE_MAX));
+	cmd->cb_size = cpu_to_le32(TFD_QUEUE_CB_SIZE(TFD_TX_CMD_SLOTS));
 
 	ret = iwl_trans_send_cmd(trans, &hcmd);
 	if (ret)
@@ -1073,6 +1082,7 @@ int iwl_trans_pcie_dyn_txq_alloc(struct iwl_trans *trans,
 
 	rsp = (void *)hcmd.resp_pkt->data;
 	qid = le16_to_cpu(rsp->queue_number);
+	wr_ptr = le16_to_cpu(rsp->write_pointer);
 
 	if (qid >= ARRAY_SIZE(trans_pcie->txq)) {
 		WARN_ONCE(1, "queue index %d unsupported", qid);
@@ -1088,10 +1098,11 @@ int iwl_trans_pcie_dyn_txq_alloc(struct iwl_trans *trans,
 
 	txq->id = qid;
 	trans_pcie->txq[qid] = txq;
+	wr_ptr &= (TFD_QUEUE_SIZE_MAX - 1);
 
 	/* Place first TFD at index corresponding to start sequence number */
-	txq->read_ptr = le16_to_cpu(rsp->write_pointer);
-	txq->write_ptr = le16_to_cpu(rsp->write_pointer);
+	txq->read_ptr = wr_ptr;
+	txq->write_ptr = wr_ptr;
 	iwl_write_direct32(trans, HBUS_TARG_WRPTR,
 			   (txq->write_ptr) | (qid << 16));
 	IWL_DEBUG_TX_QUEUES(trans, "Activate queue %d\n", qid);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index 034bdb4..c645d10 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -43,8 +43,7 @@
 #include "iwl-scd.h"
 #include "iwl-op-mode.h"
 #include "internal.h"
-/* FIXME: need to abstract out TX command (once we know what it looks like) */
-#include "dvm/commands.h"
+#include "fw/api/tx.h"
 
 #define IWL_TX_CRC_SIZE 4
 #define IWL_TX_DELIMITER_SIZE 4
@@ -107,7 +106,7 @@ static int iwl_queue_init(struct iwl_txq *q, int slots_num)
 	q->n_window = slots_num;
 
 	/* slots_num must be power-of-two size, otherwise
-	 * get_cmd_index is broken. */
+	 * iwl_pcie_get_cmd_index is broken. */
 	if (WARN_ON(!is_power_of_2(slots_num)))
 		return -EINVAL;
 
@@ -429,7 +428,7 @@ void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
 	 * idx is bounded by n_window
 	 */
 	int rd_ptr = txq->read_ptr;
-	int idx = get_cmd_index(txq, rd_ptr);
+	int idx = iwl_pcie_get_cmd_index(txq, rd_ptr);
 
 	lockdep_assert_held(&txq->lock);
 
@@ -578,8 +577,8 @@ int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq,
 	return 0;
 }
 
-static void iwl_pcie_free_tso_page(struct iwl_trans_pcie *trans_pcie,
-				   struct sk_buff *skb)
+void iwl_pcie_free_tso_page(struct iwl_trans_pcie *trans_pcie,
+			    struct sk_buff *skb)
 {
 	struct page **page_ptr;
 
@@ -1101,7 +1100,8 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
 	for (;
 	     txq->read_ptr != tfd_num;
 	     txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr)) {
-		struct sk_buff *skb = txq->entries[txq->read_ptr].skb;
+		int idx = iwl_pcie_get_cmd_index(txq, txq->read_ptr);
+		struct sk_buff *skb = txq->entries[idx].skb;
 
 		if (WARN_ON_ONCE(!skb))
 			continue;
@@ -1110,7 +1110,7 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
 
 		__skb_queue_tail(skbs, skb);
 
-		txq->entries[txq->read_ptr].skb = NULL;
+		txq->entries[idx].skb = NULL;
 
 		if (!trans->cfg->use_tfh)
 			iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq);
@@ -1560,7 +1560,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		goto free_dup_buf;
 	}
 
-	idx = get_cmd_index(txq, txq->write_ptr);
+	idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
 	out_cmd = txq->entries[idx].cmd;
 	out_meta = &txq->entries[idx].meta;
 
@@ -1752,7 +1752,7 @@ void iwl_pcie_hcmd_complete(struct iwl_trans *trans,
 
 	spin_lock_bh(&txq->lock);
 
-	cmd_index = get_cmd_index(txq, index);
+	cmd_index = iwl_pcie_get_cmd_index(txq, index);
 	cmd = txq->entries[cmd_index].cmd;
 	meta = &txq->entries[cmd_index].meta;
 	group_id = cmd->hdr.group_id;
@@ -2370,7 +2370,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 		tb1_len = ALIGN(len, 4);
 		/* Tell NIC about any 2-byte padding after MAC header */
 		if (tb1_len != len)
-			tx_cmd->tx_flags |= TX_CMD_FLG_MH_PAD_MSK;
+			tx_cmd->tx_flags |= cpu_to_le32(TX_CMD_FLG_MH_PAD);
 	} else {
 		tb1_len = len;
 	}
diff --git a/drivers/net/wireless/intersil/hostap/hostap_main.c b/drivers/net/wireless/intersil/hostap/hostap_main.c
index a3c066f..012930d 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_main.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_main.c
@@ -125,8 +125,8 @@ void hostap_remove_interface(struct net_device *dev, int rtnl_locked,
 	else
 		unregister_netdev(dev);
 
-	/* dev->destructor = free_netdev() will free the device data, including
-	 * private data, when removing the device */
+	/* 'dev->needs_free_netdev = true' implies device data, including
+	 * private data, will be freed when the device is removed */
 }
 
 
diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
index c84fd84..56f6e3b 100644
--- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
+++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
@@ -210,7 +210,7 @@ struct ezusb_packet {
 } __packed;
 
 /* Table of devices that work or may work with this driver */
-static struct usb_device_id ezusb_table[] = {
+static const struct usb_device_id ezusb_table[] = {
 	{USB_DEVICE(USB_COMPAQ_VENDOR_ID, USB_COMPAQ_WL215_ID)},
 	{USB_DEVICE(USB_COMPAQ_VENDOR_ID, USB_HP_WL215_ID)},
 	{USB_DEVICE(USB_COMPAQ_VENDOR_ID, USB_COMPAQ_W200_ID)},
diff --git a/drivers/net/wireless/intersil/p54/p54usb.c b/drivers/net/wireless/intersil/p54/p54usb.c
index 043bd1c..b0b86f7 100644
--- a/drivers/net/wireless/intersil/p54/p54usb.c
+++ b/drivers/net/wireless/intersil/p54/p54usb.c
@@ -41,7 +41,7 @@ MODULE_FIRMWARE("isl3887usb");
  * whenever you add a new device.
  */
 
-static struct usb_device_id p54u_table[] = {
+static const struct usb_device_id p54u_table[] = {
 	/* Version 1 devices (pci chip + net2280) */
 	{USB_DEVICE(0x0411, 0x0050)},	/* Buffalo WLI2-USB2-G54 */
 	{USB_DEVICE(0x045e, 0x00c2)},	/* Microsoft MN-710 */
diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c
index e53025e..16e54c7 100644
--- a/drivers/net/wireless/marvell/libertas/if_usb.c
+++ b/drivers/net/wireless/marvell/libertas/if_usb.c
@@ -52,7 +52,7 @@ static const struct lbs_fw_table fw_table[] = {
 	{ MODEL_8682, "libertas/usb8682.bin", NULL }
 };
 
-static struct usb_device_id if_usb_table[] = {
+static const struct usb_device_id if_usb_table[] = {
 	/* Enter the device signature inside */
 	{ USB_DEVICE(0x1286, 0x2001), .driver_info = MODEL_8388 },
 	{ USB_DEVICE(0x05a3, 0x8388), .driver_info = MODEL_8388 },
diff --git a/drivers/net/wireless/marvell/libertas_tf/if_usb.c b/drivers/net/wireless/marvell/libertas_tf/if_usb.c
index e0ade40..e9104ec 100644
--- a/drivers/net/wireless/marvell/libertas_tf/if_usb.c
+++ b/drivers/net/wireless/marvell/libertas_tf/if_usb.c
@@ -31,7 +31,7 @@ module_param_named(fw_name, lbtf_fw_name, charp, 0644);
 
 MODULE_FIRMWARE("lbtf_usb.bin");
 
-static struct usb_device_id if_usb_table[] = {
+static const struct usb_device_id if_usb_table[] = {
 	/* Enter the device signature inside */
 	{ USB_DEVICE(0x1286, 0x2001) },
 	{ USB_DEVICE(0x05a3, 0x8388) },
diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c
index 16c77c2..7252069 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n.c
@@ -572,6 +572,8 @@ int mwifiex_send_addba(struct mwifiex_private *priv, int tid, u8 *peer_mac)
 
 	mwifiex_dbg(priv->adapter, CMD, "cmd: %s: tid %d\n", __func__, tid);
 
+	memset(&add_ba_req, 0, sizeof(add_ba_req));
+
 	if ((GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_STA) &&
 	    ISSUPP_TDLS_ENABLED(priv->adapter->fw_cap_info) &&
 	    priv->adapter->is_hw_11ac_capable &&
diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
index 06ad2d5..32c5074 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -889,23 +889,15 @@ mwifiex_init_new_priv_params(struct mwifiex_private *priv,
 	switch (type) {
 	case NL80211_IFTYPE_STATION:
 	case NL80211_IFTYPE_ADHOC:
-		priv->bss_num = mwifiex_get_unused_bss_num(adapter,
-			 MWIFIEX_BSS_TYPE_STA);
 		priv->bss_role =  MWIFIEX_BSS_ROLE_STA;
 		break;
 	case NL80211_IFTYPE_P2P_CLIENT:
-		priv->bss_num = mwifiex_get_unused_bss_num(adapter,
-			 MWIFIEX_BSS_TYPE_P2P);
 		priv->bss_role =  MWIFIEX_BSS_ROLE_STA;
 		break;
 	case NL80211_IFTYPE_P2P_GO:
-		priv->bss_num = mwifiex_get_unused_bss_num(adapter,
-			 MWIFIEX_BSS_TYPE_P2P);
 		priv->bss_role =  MWIFIEX_BSS_ROLE_UAP;
 		break;
 	case NL80211_IFTYPE_AP:
-		priv->bss_num = mwifiex_get_unused_bss_num(adapter,
-			 MWIFIEX_BSS_TYPE_UAP);
 		priv->bss_role = MWIFIEX_BSS_ROLE_UAP;
 		break;
 	default:
@@ -923,6 +915,8 @@ mwifiex_init_new_priv_params(struct mwifiex_private *priv,
 	adapter->rx_locked = false;
 	spin_unlock_irqrestore(&adapter->rx_proc_lock, flags);
 
+	mwifiex_set_mac_address(priv, dev);
+
 	return 0;
 }
 
@@ -2012,6 +2006,8 @@ static int mwifiex_cfg80211_start_ap(struct wiphy *wiphy,
 		priv->state_11h.is_11h_active = false;
 	}
 
+	mwifiex_config_uap_11d(priv, &params->beacon);
+
 	if (mwifiex_config_start_uap(priv, bss_cfg)) {
 		mwifiex_dbg(priv->adapter, ERROR,
 			    "Failed to start AP\n");
@@ -2963,6 +2959,8 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy,
 	}
 
 	mwifiex_init_priv_params(priv, dev);
+	mwifiex_set_mac_address(priv, dev);
+
 	priv->netdev = dev;
 
 	ret = mwifiex_send_cmd(priv, HostCmd_CMD_SET_BSS_MODE,
@@ -2990,7 +2988,6 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy,
 	dev_net_set(dev, wiphy_net(wiphy));
 	dev->ieee80211_ptr = &priv->wdev;
 	dev->ieee80211_ptr->iftype = priv->bss_mode;
-	memcpy(dev->dev_addr, wiphy->perm_addr, ETH_ALEN);
 	SET_NETDEV_DEV(dev, wiphy_dev(wiphy));
 
 	dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
@@ -3123,11 +3120,7 @@ int mwifiex_del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev)
 		priv->dfs_chan_sw_workqueue = NULL;
 	}
 	/* Clear the priv in adapter */
-	priv->netdev->ieee80211_ptr = NULL;
 	priv->netdev = NULL;
-	priv->wdev.iftype = NL80211_IFTYPE_UNSPECIFIED;
-
-	priv->media_connected = false;
 
 	switch (priv->bss_mode) {
 	case NL80211_IFTYPE_UNSPECIFIED:
@@ -3395,11 +3388,8 @@ static int mwifiex_cfg80211_suspend(struct wiphy *wiphy,
 
 	for (i = 0; i < adapter->priv_num; i++) {
 		priv = adapter->priv[i];
-		if (priv && priv->netdev) {
-			mwifiex_stop_net_dev_queue(priv->netdev, adapter);
-			if (netif_carrier_ok(priv->netdev))
-				netif_carrier_off(priv->netdev);
-		}
+		if (priv && priv->netdev)
+			netif_device_detach(priv->netdev);
 	}
 
 	for (i = 0; i < retry_num; i++) {
@@ -3470,11 +3460,8 @@ static int mwifiex_cfg80211_resume(struct wiphy *wiphy)
 
 	for (i = 0; i < adapter->priv_num; i++) {
 		priv = adapter->priv[i];
-		if (priv && priv->netdev) {
-			if (!netif_carrier_ok(priv->netdev))
-				netif_carrier_on(priv->netdev);
-			mwifiex_wake_up_net_dev_queue(priv->netdev, adapter);
-		}
+		if (priv && priv->netdev)
+			netif_device_attach(priv->netdev);
 	}
 
 	if (!wiphy->wowlan_config)
@@ -4215,7 +4202,7 @@ int mwifiex_init_channel_scan_gap(struct mwifiex_adapter *adapter)
 	if (adapter->config_bands & BAND_A)
 		n_channels_a = mwifiex_band_5ghz.n_channels;
 
-	adapter->num_in_chan_stats = max_t(u32, n_channels_bg, n_channels_a);
+	adapter->num_in_chan_stats = n_channels_bg + n_channels_a;
 	adapter->chan_stats = vmalloc(sizeof(*adapter->chan_stats) *
 				      adapter->num_in_chan_stats);
 
diff --git a/drivers/net/wireless/marvell/mwifiex/cfp.c b/drivers/net/wireless/marvell/mwifiex/cfp.c
index 6e29943..bfe84e5 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfp.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfp.c
@@ -180,11 +180,9 @@ static struct region_code_mapping region_code_mapping_t[] = {
 u8 *mwifiex_11d_code_2_region(u8 code)
 {
 	u8 i;
-	u8 size = sizeof(region_code_mapping_t)/
-				sizeof(struct region_code_mapping);
 
 	/* Look for code in mapping table */
-	for (i = 0; i < size; i++)
+	for (i = 0; i < ARRAY_SIZE(region_code_mapping_t); i++)
 		if (region_code_mapping_t[i].code == code)
 			return region_code_mapping_t[i].region;
 
diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
index 8dad528..0edc5d6 100644
--- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c
+++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
@@ -26,6 +26,8 @@
 #include "11n.h"
 #include "11ac.h"
 
+static void mwifiex_cancel_pending_ioctl(struct mwifiex_adapter *adapter);
+
 /*
  * This function initializes a command node.
  *
@@ -427,7 +429,7 @@ int mwifiex_alloc_cmd_buffer(struct mwifiex_adapter *adapter)
  * The function calls the completion callback for all the command
  * buffers that still have response buffers associated with them.
  */
-int mwifiex_free_cmd_buffer(struct mwifiex_adapter *adapter)
+void mwifiex_free_cmd_buffer(struct mwifiex_adapter *adapter)
 {
 	struct cmd_ctrl_node *cmd_array;
 	u32 i;
@@ -436,7 +438,7 @@ int mwifiex_free_cmd_buffer(struct mwifiex_adapter *adapter)
 	if (!adapter->cmd_pool) {
 		mwifiex_dbg(adapter, FATAL,
 			    "info: FREE_CMD_BUF: cmd_pool is null\n");
-		return 0;
+		return;
 	}
 
 	cmd_array = adapter->cmd_pool;
@@ -464,8 +466,6 @@ int mwifiex_free_cmd_buffer(struct mwifiex_adapter *adapter)
 		kfree(adapter->cmd_pool);
 		adapter->cmd_pool = NULL;
 	}
-
-	return 0;
 }
 
 /*
@@ -666,7 +666,7 @@ int mwifiex_send_cmd(struct mwifiex_private *priv, u16 cmd_no,
 	    cmd_no == HostCmd_CMD_802_11_SCAN_EXT) {
 		mwifiex_queue_scan_cmd(priv, cmd_node);
 	} else {
-		mwifiex_insert_cmd_to_pending_q(adapter, cmd_node, true);
+		mwifiex_insert_cmd_to_pending_q(adapter, cmd_node);
 		queue_work(adapter->workqueue, &adapter->main_work);
 		if (cmd_node->wait_q_enabled)
 			ret = mwifiex_wait_queue_complete(adapter, cmd_node);
@@ -684,11 +684,12 @@ int mwifiex_send_cmd(struct mwifiex_private *priv, u16 cmd_no,
  */
 void
 mwifiex_insert_cmd_to_pending_q(struct mwifiex_adapter *adapter,
-				struct cmd_ctrl_node *cmd_node, u32 add_tail)
+				struct cmd_ctrl_node *cmd_node)
 {
 	struct host_cmd_ds_command *host_cmd = NULL;
 	u16 command;
 	unsigned long flags;
+	bool add_tail = true;
 
 	host_cmd = (struct host_cmd_ds_command *) (cmd_node->cmd_skb->data);
 	if (!host_cmd) {
@@ -1075,7 +1076,7 @@ mwifiex_cancel_all_pending_cmd(struct mwifiex_adapter *adapter)
  * In case of scan commands, all pending commands in scan pending queue
  * are cancelled.
  */
-void
+static void
 mwifiex_cancel_pending_ioctl(struct mwifiex_adapter *adapter)
 {
 	struct cmd_ctrl_node *cmd_node = NULL;
diff --git a/drivers/net/wireless/marvell/mwifiex/debugfs.c b/drivers/net/wireless/marvell/mwifiex/debugfs.c
index f6f105a..6f4239b 100644
--- a/drivers/net/wireless/marvell/mwifiex/debugfs.c
+++ b/drivers/net/wireless/marvell/mwifiex/debugfs.c
@@ -940,8 +940,6 @@ mwifiex_reset_write(struct file *file,
 
 	if (adapter->if_ops.card_reset) {
 		dev_info(adapter->dev, "Resetting per request\n");
-		adapter->hw_status = MWIFIEX_HW_STATUS_RESET;
-		mwifiex_cancel_all_pending_cmd(adapter);
 		adapter->if_ops.card_reset(adapter);
 	}
 
diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c
index 3ecb59f..e11919d 100644
--- a/drivers/net/wireless/marvell/mwifiex/init.c
+++ b/drivers/net/wireless/marvell/mwifiex/init.c
@@ -337,17 +337,9 @@ void mwifiex_wake_up_net_dev_queue(struct net_device *netdev,
 					struct mwifiex_adapter *adapter)
 {
 	unsigned long dev_queue_flags;
-	unsigned int i;
 
 	spin_lock_irqsave(&adapter->queue_lock, dev_queue_flags);
-
-	for (i = 0; i < netdev->num_tx_queues; i++) {
-		struct netdev_queue *txq = netdev_get_tx_queue(netdev, i);
-
-		if (netif_tx_queue_stopped(txq))
-			netif_tx_wake_queue(txq);
-	}
-
+	netif_tx_wake_all_queues(netdev);
 	spin_unlock_irqrestore(&adapter->queue_lock, dev_queue_flags);
 }
 
@@ -358,30 +350,20 @@ void mwifiex_stop_net_dev_queue(struct net_device *netdev,
 					struct mwifiex_adapter *adapter)
 {
 	unsigned long dev_queue_flags;
-	unsigned int i;
 
 	spin_lock_irqsave(&adapter->queue_lock, dev_queue_flags);
-
-	for (i = 0; i < netdev->num_tx_queues; i++) {
-		struct netdev_queue *txq = netdev_get_tx_queue(netdev, i);
-
-		if (!netif_tx_queue_stopped(txq))
-			netif_tx_stop_queue(txq);
-	}
-
+	netif_tx_stop_all_queues(netdev);
 	spin_unlock_irqrestore(&adapter->queue_lock, dev_queue_flags);
 }
 
 /*
- *  This function releases the lock variables and frees the locks and
- *  associated locks.
+ * This function invalidates the list heads.
  */
-static void mwifiex_free_lock_list(struct mwifiex_adapter *adapter)
+static void mwifiex_invalidate_lists(struct mwifiex_adapter *adapter)
 {
 	struct mwifiex_private *priv;
 	s32 i, j;
 
-	/* Free lists */
 	list_del(&adapter->cmd_free_q);
 	list_del(&adapter->cmd_pending_q);
 	list_del(&adapter->scan_pending_q);
@@ -418,9 +400,11 @@ mwifiex_adapter_cleanup(struct mwifiex_adapter *adapter)
 	mwifiex_cancel_all_pending_cmd(adapter);
 	wake_up_interruptible(&adapter->cmd_wait_q.wait);
 	wake_up_interruptible(&adapter->hs_activate_wait_q);
+}
 
-	/* Free lock variables */
-	mwifiex_free_lock_list(adapter);
+void mwifiex_free_cmd_buffers(struct mwifiex_adapter *adapter)
+{
+	mwifiex_invalidate_lists(adapter);
 
 	/* Free command buffer */
 	mwifiex_dbg(adapter, INFO, "info: free cmd buffer\n");
diff --git a/drivers/net/wireless/marvell/mwifiex/join.c b/drivers/net/wireless/marvell/mwifiex/join.c
index b89596c..d87aeff 100644
--- a/drivers/net/wireless/marvell/mwifiex/join.c
+++ b/drivers/net/wireless/marvell/mwifiex/join.c
@@ -253,7 +253,7 @@ mwifiex_cmd_append_wps_ie(struct mwifiex_private *priv, u8 **buffer)
 			    priv->wps_ie_len, *buffer);
 
 		/* Wrap the generic IE buffer with a pass through TLV type */
-		ie_header.type = cpu_to_le16(TLV_TYPE_MGMT_IE);
+		ie_header.type = cpu_to_le16(TLV_TYPE_PASSTHROUGH);
 		ie_header.len = cpu_to_le16(priv->wps_ie_len);
 		memcpy(*buffer, &ie_header, sizeof(ie_header));
 		*buffer += sizeof(ie_header);
diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index f2600b8..ee40b73 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -46,7 +46,7 @@ MODULE_PARM_DESC(mfg_mode, "manufacturing mode enable:1, disable:0");
 
 bool aggr_ctrl;
 module_param(aggr_ctrl, bool, 0000);
-MODULE_PARM_DESC(aggr_ctrl, "usb tx aggreataon enable:1, disable:0");
+MODULE_PARM_DESC(aggr_ctrl, "usb tx aggregation enable:1, disable:0");
 
 /*
  * This function registers the device and performs all the necessary
@@ -588,7 +588,7 @@ static int _mwifiex_fw_dpc(const struct firmware *firmware, void *context)
 	if (mwifiex_init_channel_scan_gap(adapter)) {
 		mwifiex_dbg(adapter, ERROR,
 			    "could not init channel stats table\n");
-		goto err_init_fw;
+		goto err_init_chan_scan;
 	}
 
 	if (driver_mode) {
@@ -636,6 +636,7 @@ static int _mwifiex_fw_dpc(const struct firmware *firmware, void *context)
 
 err_add_intf:
 	vfree(adapter->chan_stats);
+err_init_chan_scan:
 	wiphy_unregister(adapter->wiphy);
 	wiphy_free(adapter->wiphy);
 err_init_fw:
@@ -653,6 +654,7 @@ err_dnld_fw:
 	if (adapter->hw_status == MWIFIEX_HW_STATUS_READY) {
 		pr_debug("info: %s: shutdown mwifiex\n", __func__);
 		mwifiex_shutdown_drv(adapter);
+		mwifiex_free_cmd_buffers(adapter);
 	}
 
 	init_failed = true;
@@ -665,8 +667,11 @@ done:
 		release_firmware(adapter->firmware);
 		adapter->firmware = NULL;
 	}
-	if (init_failed)
+	if (init_failed) {
+		if (adapter->irq_wakeup >= 0)
+			device_init_wakeup(adapter->dev, false);
 		mwifiex_free_adapter(adapter);
+	}
 	/* Tell all current and future waiters we're finished */
 	complete_all(fw_done);
 
@@ -935,31 +940,44 @@ mwifiex_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-/*
- * CFG802.11 network device handler for setting MAC address.
- */
-static int
-mwifiex_set_mac_address(struct net_device *dev, void *addr)
+int mwifiex_set_mac_address(struct mwifiex_private *priv,
+			    struct net_device *dev)
 {
-	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
-	struct sockaddr *hw_addr = addr;
 	int ret;
+	u64 mac_addr;
 
-	memcpy(priv->curr_addr, hw_addr->sa_data, ETH_ALEN);
+	if (priv->bss_type != MWIFIEX_BSS_TYPE_P2P)
+		goto done;
+
+	mac_addr = ether_addr_to_u64(priv->curr_addr);
+	mac_addr |= BIT_ULL(MWIFIEX_MAC_LOCAL_ADMIN_BIT);
+	u64_to_ether_addr(mac_addr, priv->curr_addr);
 
 	/* Send request to firmware */
 	ret = mwifiex_send_cmd(priv, HostCmd_CMD_802_11_MAC_ADDRESS,
 			       HostCmd_ACT_GEN_SET, 0, NULL, true);
 
-	if (!ret)
-		memcpy(priv->netdev->dev_addr, priv->curr_addr, ETH_ALEN);
-	else
+	if (ret) {
 		mwifiex_dbg(priv->adapter, ERROR,
 			    "set mac address failed: ret=%d\n", ret);
+		return ret;
+	}
 
+done:
 	memcpy(dev->dev_addr, priv->curr_addr, ETH_ALEN);
+	return 0;
+}
 
-	return ret;
+/* CFG802.11 network device handler for setting MAC address.
+ */
+static int
+mwifiex_ndo_set_mac_address(struct net_device *dev, void *addr)
+{
+	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
+	struct sockaddr *hw_addr = addr;
+
+	memcpy(priv->curr_addr, hw_addr->sa_data, ETH_ALEN);
+	return mwifiex_set_mac_address(priv, dev);
 }
 
 /*
@@ -1252,7 +1270,7 @@ static const struct net_device_ops mwifiex_netdev_ops = {
 	.ndo_open = mwifiex_open,
 	.ndo_stop = mwifiex_close,
 	.ndo_start_xmit = mwifiex_hard_start_xmit,
-	.ndo_set_mac_address = mwifiex_set_mac_address,
+	.ndo_set_mac_address = mwifiex_ndo_set_mac_address,
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_tx_timeout = mwifiex_tx_timeout,
 	.ndo_get_stats = mwifiex_get_stats,
@@ -1296,7 +1314,6 @@ void mwifiex_init_priv_params(struct mwifiex_private *priv,
 	priv->gen_idx = MWIFIEX_AUTO_IDX_MASK;
 	priv->num_tx_timeout = 0;
 	ether_addr_copy(priv->curr_addr, priv->adapter->perm_addr);
-	memcpy(dev->dev_addr, priv->curr_addr, ETH_ALEN);
 
 	if (GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_STA ||
 	    GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_UAP) {
@@ -1352,26 +1369,12 @@ static void mwifiex_main_work_queue(struct work_struct *work)
 	mwifiex_main_process(adapter);
 }
 
-/*
- * This function gets called during PCIe function level reset. Required
- * code is extracted from mwifiex_remove_card()
- */
-int
-mwifiex_shutdown_sw(struct mwifiex_adapter *adapter)
+/* Common teardown code used for both device removal and reset */
+static void mwifiex_uninit_sw(struct mwifiex_adapter *adapter)
 {
 	struct mwifiex_private *priv;
 	int i;
 
-	if (!adapter)
-		goto exit_return;
-
-	wait_for_completion(adapter->fw_done);
-	/* Caller should ensure we aren't suspending while this happens */
-	reinit_completion(adapter->fw_done);
-
-	priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY);
-	mwifiex_deauthenticate(priv, NULL);
-
 	/* We can no longer handle interrupts once we start doing the teardown
 	 * below.
 	 */
@@ -1380,6 +1383,7 @@ mwifiex_shutdown_sw(struct mwifiex_adapter *adapter)
 
 	adapter->surprise_removed = true;
 	mwifiex_terminate_workqueue(adapter);
+	adapter->int_status = 0;
 
 	/* Stop data */
 	for (i = 0; i < adapter->priv_num; i++) {
@@ -1393,12 +1397,9 @@ mwifiex_shutdown_sw(struct mwifiex_adapter *adapter)
 	}
 
 	mwifiex_dbg(adapter, CMD, "cmd: calling mwifiex_shutdown_drv...\n");
-
 	mwifiex_shutdown_drv(adapter);
-	if (adapter->if_ops.down_dev)
-		adapter->if_ops.down_dev(adapter);
-
 	mwifiex_dbg(adapter, CMD, "cmd: mwifiex_shutdown_drv done\n");
+
 	if (atomic_read(&adapter->rx_pending) ||
 	    atomic_read(&adapter->tx_pending) ||
 	    atomic_read(&adapter->cmd_pending)) {
@@ -1420,10 +1421,37 @@ mwifiex_shutdown_sw(struct mwifiex_adapter *adapter)
 			mwifiex_del_virtual_intf(adapter->wiphy, &priv->wdev);
 		rtnl_unlock();
 	}
+
+	wiphy_unregister(adapter->wiphy);
+	wiphy_free(adapter->wiphy);
+	adapter->wiphy = NULL;
+
 	vfree(adapter->chan_stats);
+	mwifiex_free_cmd_buffers(adapter);
+}
+
+/*
+ * This function gets called during PCIe function level reset.
+ */
+int mwifiex_shutdown_sw(struct mwifiex_adapter *adapter)
+{
+	struct mwifiex_private *priv;
+
+	if (!adapter)
+		return 0;
+
+	wait_for_completion(adapter->fw_done);
+	/* Caller should ensure we aren't suspending while this happens */
+	reinit_completion(adapter->fw_done);
+
+	priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY);
+	mwifiex_deauthenticate(priv, NULL);
+
+	mwifiex_uninit_sw(adapter);
+
+	if (adapter->if_ops.down_dev)
+		adapter->if_ops.down_dev(adapter);
 
-	mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__);
-exit_return:
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mwifiex_shutdown_sw);
@@ -1506,6 +1534,7 @@ err_kmalloc:
 		mwifiex_dbg(adapter, ERROR,
 			    "info: %s: shutdown mwifiex\n", __func__);
 		mwifiex_shutdown_drv(adapter);
+		mwifiex_free_cmd_buffers(adapter);
 	}
 
 	complete_all(adapter->fw_done);
@@ -1605,10 +1634,8 @@ mwifiex_add_card(void *card, struct completion *fw_done,
 	adapter->cmd_wait_q.status = 0;
 	adapter->scan_wait_q_woken = false;
 
-	if ((num_possible_cpus() > 1) || adapter->iface_type == MWIFIEX_USB) {
+	if ((num_possible_cpus() > 1) || adapter->iface_type == MWIFIEX_USB)
 		adapter->rx_work_enabled = true;
-		pr_notice("rx work enabled, cpus %d\n", num_possible_cpus());
-	}
 
 	adapter->workqueue =
 		alloc_workqueue("MWIFIEX_WORK_QUEUE",
@@ -1653,8 +1680,11 @@ err_registerdev:
 	if (adapter->hw_status == MWIFIEX_HW_STATUS_READY) {
 		pr_debug("info: %s: shutdown mwifiex\n", __func__);
 		mwifiex_shutdown_drv(adapter);
+		mwifiex_free_cmd_buffers(adapter);
 	}
 err_kmalloc:
+	if (adapter->irq_wakeup >= 0)
+		device_init_wakeup(adapter->dev, false);
 	mwifiex_free_adapter(adapter);
 
 err_init_sw:
@@ -1676,64 +1706,10 @@ EXPORT_SYMBOL_GPL(mwifiex_add_card);
  */
 int mwifiex_remove_card(struct mwifiex_adapter *adapter)
 {
-	struct mwifiex_private *priv = NULL;
-	int i;
-
 	if (!adapter)
-		goto exit_remove;
-
-	/* We can no longer handle interrupts once we start doing the teardown
-	 * below. */
-	if (adapter->if_ops.disable_int)
-		adapter->if_ops.disable_int(adapter);
-
-	adapter->surprise_removed = true;
-
-	mwifiex_terminate_workqueue(adapter);
-
-	/* Stop data */
-	for (i = 0; i < adapter->priv_num; i++) {
-		priv = adapter->priv[i];
-		if (priv && priv->netdev) {
-			mwifiex_stop_net_dev_queue(priv->netdev, adapter);
-			if (netif_carrier_ok(priv->netdev))
-				netif_carrier_off(priv->netdev);
-		}
-	}
-
-	mwifiex_dbg(adapter, CMD,
-		    "cmd: calling mwifiex_shutdown_drv...\n");
-
-	mwifiex_shutdown_drv(adapter);
-	mwifiex_dbg(adapter, CMD,
-		    "cmd: mwifiex_shutdown_drv done\n");
-	if (atomic_read(&adapter->rx_pending) ||
-	    atomic_read(&adapter->tx_pending) ||
-	    atomic_read(&adapter->cmd_pending)) {
-		mwifiex_dbg(adapter, ERROR,
-			    "rx_pending=%d, tx_pending=%d,\t"
-			    "cmd_pending=%d\n",
-			    atomic_read(&adapter->rx_pending),
-			    atomic_read(&adapter->tx_pending),
-			    atomic_read(&adapter->cmd_pending));
-	}
-
-	for (i = 0; i < adapter->priv_num; i++) {
-		priv = adapter->priv[i];
-
-		if (!priv)
-			continue;
-
-		rtnl_lock();
-		if (priv->netdev &&
-		    priv->wdev.iftype != NL80211_IFTYPE_UNSPECIFIED)
-			mwifiex_del_virtual_intf(adapter->wiphy, &priv->wdev);
-		rtnl_unlock();
-	}
-	vfree(adapter->chan_stats);
+		return 0;
 
-	wiphy_unregister(adapter->wiphy);
-	wiphy_free(adapter->wiphy);
+	mwifiex_uninit_sw(adapter);
 
 	if (adapter->irq_wakeup >= 0)
 		device_init_wakeup(adapter->dev, false);
@@ -1748,7 +1724,6 @@ int mwifiex_remove_card(struct mwifiex_adapter *adapter)
 		    "info: free adapter\n");
 	mwifiex_free_adapter(adapter);
 
-exit_remove:
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mwifiex_remove_card);
diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h
index f8cf307..a76bd79 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.h
+++ b/drivers/net/wireless/marvell/mwifiex/main.h
@@ -165,6 +165,8 @@ enum {
 /* Address alignment */
 #define MWIFIEX_ALIGN_ADDR(p, a) (((long)(p) + (a) - 1) & ~((a) - 1))
 
+#define MWIFIEX_MAC_LOCAL_ADMIN_BIT		41
+
 /**
  *enum mwifiex_debug_level  -  marvell wifi debug level
  */
@@ -1077,9 +1079,9 @@ int mwifiex_get_debug_info(struct mwifiex_private *,
 			   struct mwifiex_debug_info *);
 
 int mwifiex_alloc_cmd_buffer(struct mwifiex_adapter *adapter);
-int mwifiex_free_cmd_buffer(struct mwifiex_adapter *adapter);
+void mwifiex_free_cmd_buffer(struct mwifiex_adapter *adapter);
+void mwifiex_free_cmd_buffers(struct mwifiex_adapter *adapter);
 void mwifiex_cancel_all_pending_cmd(struct mwifiex_adapter *adapter);
-void mwifiex_cancel_pending_ioctl(struct mwifiex_adapter *adapter);
 void mwifiex_cancel_pending_scan_cmd(struct mwifiex_adapter *adapter);
 void mwifiex_cancel_scan(struct mwifiex_adapter *adapter);
 
@@ -1087,8 +1089,7 @@ void mwifiex_recycle_cmd_node(struct mwifiex_adapter *adapter,
 			      struct cmd_ctrl_node *cmd_node);
 
 void mwifiex_insert_cmd_to_pending_q(struct mwifiex_adapter *adapter,
-				     struct cmd_ctrl_node *cmd_node,
-				     u32 addtail);
+				     struct cmd_ctrl_node *cmd_node);
 
 int mwifiex_exec_next_cmd(struct mwifiex_adapter *adapter);
 int mwifiex_process_cmdresp(struct mwifiex_adapter *adapter);
@@ -1563,6 +1564,9 @@ int mwifiex_config_start_uap(struct mwifiex_private *priv,
 void mwifiex_uap_del_sta_data(struct mwifiex_private *priv,
 			      struct mwifiex_sta_node *node);
 
+void mwifiex_config_uap_11d(struct mwifiex_private *priv,
+			    struct cfg80211_beacon_data *beacon_data);
+
 void mwifiex_init_11h_params(struct mwifiex_private *priv);
 int mwifiex_is_11h_active(struct mwifiex_private *priv);
 int mwifiex_11h_activate(struct mwifiex_private *priv, bool flag);
@@ -1672,6 +1676,8 @@ void mwifiex_process_tx_pause_event(struct mwifiex_private *priv,
 void mwifiex_process_multi_chan_event(struct mwifiex_private *priv,
 				      struct sk_buff *event_skb);
 void mwifiex_multi_chan_resync(struct mwifiex_adapter *adapter);
+int mwifiex_set_mac_address(struct mwifiex_private *priv,
+			    struct net_device *dev);
 
 #ifdef CONFIG_DEBUG_FS
 void mwifiex_debugfs_init(void);
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c
index 21f2201..cd31494 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie.c
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
@@ -1043,12 +1043,14 @@ static int mwifiex_pcie_delete_cmdrsp_buf(struct mwifiex_adapter *adapter)
 		mwifiex_unmap_pci_memory(adapter, card->cmdrsp_buf,
 					 PCI_DMA_FROMDEVICE);
 		dev_kfree_skb_any(card->cmdrsp_buf);
+		card->cmdrsp_buf = NULL;
 	}
 
 	if (card && card->cmd_buf) {
 		mwifiex_unmap_pci_memory(adapter, card->cmd_buf,
 					 PCI_DMA_TODEVICE);
 		dev_kfree_skb_any(card->cmd_buf);
+		card->cmd_buf = NULL;
 	}
 	return 0;
 }
@@ -1983,7 +1985,8 @@ static int mwifiex_pcie_event_complete(struct mwifiex_adapter *adapter,
  * (3) wifi image.
  *
  * This function bypass the header and bluetooth part, return
- * the offset of tail wifi-only part.
+ * the offset of tail wifi-only part. If the image is already wifi-only,
+ * that is start with CMD1, return 0.
  */
 
 static int mwifiex_extract_wifi_fw(struct mwifiex_adapter *adapter,
@@ -1991,7 +1994,7 @@ static int mwifiex_extract_wifi_fw(struct mwifiex_adapter *adapter,
 	const struct mwifiex_fw_data *fwdata;
 	u32 offset = 0, data_len, dnld_cmd;
 	int ret = 0;
-	bool cmd7_before = false;
+	bool cmd7_before = false, first_cmd = false;
 
 	while (1) {
 		/* Check for integer and buffer overflow */
@@ -2012,20 +2015,29 @@ static int mwifiex_extract_wifi_fw(struct mwifiex_adapter *adapter,
 
 		switch (dnld_cmd) {
 		case MWIFIEX_FW_DNLD_CMD_1:
-			if (!cmd7_before) {
-				mwifiex_dbg(adapter, ERROR,
-					    "no cmd7 before cmd1!\n");
+			if (offset + data_len < data_len) {
+				mwifiex_dbg(adapter, ERROR, "bad FW parse\n");
 				ret = -1;
 				goto done;
 			}
-			if (offset + data_len < data_len) {
-				mwifiex_dbg(adapter, ERROR, "bad FW parse\n");
+
+			/* Image start with cmd1, already wifi-only firmware */
+			if (!first_cmd) {
+				mwifiex_dbg(adapter, MSG,
+					    "input wifi-only firmware\n");
+				return 0;
+			}
+
+			if (!cmd7_before) {
+				mwifiex_dbg(adapter, ERROR,
+					    "no cmd7 before cmd1!\n");
 				ret = -1;
 				goto done;
 			}
 			offset += data_len;
 			break;
 		case MWIFIEX_FW_DNLD_CMD_5:
+			first_cmd = true;
 			/* Check for integer overflow */
 			if (offset + data_len < data_len) {
 				mwifiex_dbg(adapter, ERROR, "bad FW parse\n");
@@ -2035,6 +2047,7 @@ static int mwifiex_extract_wifi_fw(struct mwifiex_adapter *adapter,
 			offset += data_len;
 			break;
 		case MWIFIEX_FW_DNLD_CMD_6:
+			first_cmd = true;
 			/* Check for integer overflow */
 			if (offset + data_len < data_len) {
 				mwifiex_dbg(adapter, ERROR, "bad FW parse\n");
@@ -2051,6 +2064,7 @@ static int mwifiex_extract_wifi_fw(struct mwifiex_adapter *adapter,
 			}
 			goto done;
 		case MWIFIEX_FW_DNLD_CMD_7:
+			first_cmd = true;
 			cmd7_before = true;
 			break;
 		default:
@@ -2428,7 +2442,7 @@ exit:
  * In case of Rx packets received, the packets are uploaded from card to
  * host and processed accordingly.
  */
-static int mwifiex_process_pcie_int(struct mwifiex_adapter *adapter)
+static int mwifiex_process_int_status(struct mwifiex_adapter *adapter)
 {
 	int ret;
 	u32 pcie_ireg = 0;
@@ -2471,28 +2485,24 @@ static int mwifiex_process_pcie_int(struct mwifiex_adapter *adapter)
 	}
 
 	if (pcie_ireg & HOST_INTR_DNLD_DONE) {
-		pcie_ireg &= ~HOST_INTR_DNLD_DONE;
 		mwifiex_dbg(adapter, INTR, "info: TX DNLD Done\n");
 		ret = mwifiex_pcie_send_data_complete(adapter);
 		if (ret)
 			return ret;
 	}
 	if (pcie_ireg & HOST_INTR_UPLD_RDY) {
-		pcie_ireg &= ~HOST_INTR_UPLD_RDY;
 		mwifiex_dbg(adapter, INTR, "info: Rx DATA\n");
 		ret = mwifiex_pcie_process_recv_data(adapter);
 		if (ret)
 			return ret;
 	}
 	if (pcie_ireg & HOST_INTR_EVENT_RDY) {
-		pcie_ireg &= ~HOST_INTR_EVENT_RDY;
 		mwifiex_dbg(adapter, INTR, "info: Rx EVENT\n");
 		ret = mwifiex_pcie_process_event_ready(adapter);
 		if (ret)
 			return ret;
 	}
 	if (pcie_ireg & HOST_INTR_CMD_DONE) {
-		pcie_ireg &= ~HOST_INTR_CMD_DONE;
 		if (adapter->cmd_sent) {
 			mwifiex_dbg(adapter, INTR,
 				    "info: CMD sent Interrupt\n");
@@ -2507,75 +2517,13 @@ static int mwifiex_process_pcie_int(struct mwifiex_adapter *adapter)
 	mwifiex_dbg(adapter, INTR,
 		    "info: cmd_sent=%d data_sent=%d\n",
 		    adapter->cmd_sent, adapter->data_sent);
-	if (!card->msi_enable && adapter->ps_state != PS_STATE_SLEEP)
+	if (!card->msi_enable && !card->msix_enable &&
+				 adapter->ps_state != PS_STATE_SLEEP)
 		mwifiex_pcie_enable_host_int(adapter);
 
 	return 0;
 }
 
-static int mwifiex_process_msix_int(struct mwifiex_adapter *adapter)
-{
-	int ret;
-	u32 pcie_ireg;
-	unsigned long flags;
-
-	spin_lock_irqsave(&adapter->int_lock, flags);
-	/* Clear out unused interrupts */
-	pcie_ireg = adapter->int_status;
-	adapter->int_status = 0;
-	spin_unlock_irqrestore(&adapter->int_lock, flags);
-
-	if (pcie_ireg & HOST_INTR_DNLD_DONE) {
-		mwifiex_dbg(adapter, INTR,
-			    "info: TX DNLD Done\n");
-		ret = mwifiex_pcie_send_data_complete(adapter);
-		if (ret)
-			return ret;
-	}
-	if (pcie_ireg & HOST_INTR_UPLD_RDY) {
-		mwifiex_dbg(adapter, INTR,
-			    "info: Rx DATA\n");
-		ret = mwifiex_pcie_process_recv_data(adapter);
-		if (ret)
-			return ret;
-	}
-	if (pcie_ireg & HOST_INTR_EVENT_RDY) {
-		mwifiex_dbg(adapter, INTR,
-			    "info: Rx EVENT\n");
-		ret = mwifiex_pcie_process_event_ready(adapter);
-		if (ret)
-			return ret;
-	}
-
-	if (pcie_ireg & HOST_INTR_CMD_DONE) {
-		if (adapter->cmd_sent) {
-			mwifiex_dbg(adapter, INTR,
-				    "info: CMD sent Interrupt\n");
-			adapter->cmd_sent = false;
-		}
-		/* Handle command response */
-		ret = mwifiex_pcie_process_cmd_complete(adapter);
-		if (ret)
-			return ret;
-	}
-
-	mwifiex_dbg(adapter, INTR,
-		    "info: cmd_sent=%d data_sent=%d\n",
-		    adapter->cmd_sent, adapter->data_sent);
-
-	return 0;
-}
-
-static int mwifiex_process_int_status(struct mwifiex_adapter *adapter)
-{
-	struct pcie_service_card *card = adapter->card;
-
-	if (card->msix_enable)
-		return mwifiex_process_msix_int(adapter);
-	else
-		return mwifiex_process_pcie_int(adapter);
-}
-
 /*
  * This function downloads data from driver to card.
  *
@@ -2934,7 +2882,6 @@ static void mwifiex_pcie_free_buffers(struct mwifiex_adapter *adapter)
 	mwifiex_pcie_delete_evtbd_ring(adapter);
 	mwifiex_pcie_delete_rxbd_ring(adapter);
 	mwifiex_pcie_delete_txbd_ring(adapter);
-	card->cmdrsp_buf = NULL;
 }
 
 /*
@@ -3036,15 +2983,14 @@ static void mwifiex_cleanup_pcie(struct mwifiex_adapter *adapter)
 				    "Failed to write driver not-ready signature\n");
 	}
 
-	mwifiex_pcie_free_buffers(adapter);
+	pci_disable_device(pdev);
 
-	if (pdev) {
-		pci_iounmap(pdev, card->pci_mmap);
-		pci_iounmap(pdev, card->pci_mmap1);
-		pci_disable_device(pdev);
-		pci_release_region(pdev, 2);
-		pci_release_region(pdev, 0);
-	}
+	pci_iounmap(pdev, card->pci_mmap);
+	pci_iounmap(pdev, card->pci_mmap1);
+	pci_release_region(pdev, 2);
+	pci_release_region(pdev, 0);
+
+	mwifiex_pcie_free_buffers(adapter);
 }
 
 static int mwifiex_pcie_request_irq(struct mwifiex_adapter *adapter)
@@ -3220,7 +3166,6 @@ static void mwifiex_unregister_dev(struct mwifiex_adapter *adapter)
 static void mwifiex_pcie_up_dev(struct mwifiex_adapter *adapter)
 {
 	struct pcie_service_card *card = adapter->card;
-	int ret;
 	struct pci_dev *pdev = card->dev;
 
 	/* tx_buf_size might be changed to 3584 by firmware during
@@ -3228,11 +3173,9 @@ static void mwifiex_pcie_up_dev(struct mwifiex_adapter *adapter)
 	 */
 	adapter->tx_buf_size = card->pcie.tx_buf_size;
 
-	ret = mwifiex_pcie_alloc_buffers(adapter);
-	if (!ret)
-		return;
+	mwifiex_pcie_alloc_buffers(adapter);
 
-	pci_iounmap(pdev, card->pci_mmap1);
+	pci_set_master(pdev);
 }
 
 /* This function cleans up the PCI-E host memory space. */
@@ -3240,10 +3183,13 @@ static void mwifiex_pcie_down_dev(struct mwifiex_adapter *adapter)
 {
 	struct pcie_service_card *card = adapter->card;
 	const struct mwifiex_pcie_card_reg *reg = card->pcie.reg;
+	struct pci_dev *pdev = card->dev;
 
 	if (mwifiex_write_reg(adapter, reg->drv_rdy, 0x00000000))
 		mwifiex_dbg(adapter, ERROR, "Failed to write driver not-ready signature\n");
 
+	pci_clear_master(pdev);
+
 	adapter->seq_num = 0;
 
 	mwifiex_pcie_free_buffers(adapter);
diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c
index ae9630b..c9d41ed 100644
--- a/drivers/net/wireless/marvell/mwifiex/scan.c
+++ b/drivers/net/wireless/marvell/mwifiex/scan.c
@@ -1534,8 +1534,7 @@ int mwifiex_scan_networks(struct mwifiex_private *priv,
 			list_del(&cmd_node->list);
 			spin_unlock_irqrestore(&adapter->scan_pending_q_lock,
 					       flags);
-			mwifiex_insert_cmd_to_pending_q(adapter, cmd_node,
-							true);
+			mwifiex_insert_cmd_to_pending_q(adapter, cmd_node);
 			queue_work(adapter->workqueue, &adapter->main_work);
 
 			/* Perform internal scan synchronously */
@@ -1948,7 +1947,8 @@ mwifiex_active_scan_req_for_passive_chan(struct mwifiex_private *priv)
 	}
 
 	adapter->active_scan_triggered = true;
-	ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac);
+	if (priv->scan_request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)
+		ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac);
 	user_scan_cfg->num_ssids = priv->scan_request->n_ssids;
 	user_scan_cfg->ssid_list = priv->scan_request->ssids;
 
@@ -2033,7 +2033,7 @@ static void mwifiex_check_next_scan_command(struct mwifiex_private *priv)
 					    struct cmd_ctrl_node, list);
 		list_del(&cmd_node->list);
 		spin_unlock_irqrestore(&adapter->scan_pending_q_lock, flags);
-		mwifiex_insert_cmd_to_pending_q(adapter, cmd_node, true);
+		mwifiex_insert_cmd_to_pending_q(adapter, cmd_node);
 	}
 
 	return;
@@ -2492,6 +2492,12 @@ mwifiex_update_chan_statistics(struct mwifiex_private *priv,
 					      sizeof(struct mwifiex_chan_stats);
 
 	for (i = 0 ; i < num_chan; i++) {
+		if (adapter->survey_idx >= adapter->num_in_chan_stats) {
+			mwifiex_dbg(adapter, WARN,
+				    "FW reported too many channel results (max %d)\n",
+				    adapter->num_in_chan_stats);
+			return;
+		}
 		chan_stats.chan_num = fw_chan_stats->chan_num;
 		chan_stats.bandcfg = fw_chan_stats->bandcfg;
 		chan_stats.flags = fw_chan_stats->flags;
@@ -2785,7 +2791,6 @@ static int mwifiex_scan_specific_ssid(struct mwifiex_private *priv,
 	if (!scan_cfg)
 		return -ENOMEM;
 
-	ether_addr_copy(scan_cfg->random_mac, priv->random_mac);
 	scan_cfg->ssid_list = req_ssid;
 	scan_cfg->num_ssids = 1;
 
diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c
index f81a006..fd5183c 100644
--- a/drivers/net/wireless/marvell/mwifiex/sdio.c
+++ b/drivers/net/wireless/marvell/mwifiex/sdio.c
@@ -390,7 +390,8 @@ mwifiex_sdio_remove(struct sdio_func *func)
 	mwifiex_dbg(adapter, INFO, "info: SDIO func num=%d\n", func->num);
 
 	ret = mwifiex_sdio_read_fw_status(adapter, &firmware_stat);
-	if (firmware_stat == FIRMWARE_READY_SDIO && !adapter->mfg_mode) {
+	if (!ret && firmware_stat == FIRMWARE_READY_SDIO &&
+	    !adapter->mfg_mode) {
 		mwifiex_deauthenticate_all(adapter);
 
 		priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY);
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
index 534d94a..fb09014 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
@@ -189,9 +189,7 @@ static int mwifiex_cmd_tx_rate_cfg(struct mwifiex_private *priv,
 	if (pbitmap_rates != NULL) {
 		rate_scope->hr_dsss_rate_bitmap = cpu_to_le16(pbitmap_rates[0]);
 		rate_scope->ofdm_rate_bitmap = cpu_to_le16(pbitmap_rates[1]);
-		for (i = 0;
-		     i < sizeof(rate_scope->ht_mcs_rate_bitmap) / sizeof(u16);
-		     i++)
+		for (i = 0; i < ARRAY_SIZE(rate_scope->ht_mcs_rate_bitmap); i++)
 			rate_scope->ht_mcs_rate_bitmap[i] =
 				cpu_to_le16(pbitmap_rates[2 + i]);
 		if (priv->adapter->fw_api_ver == MWIFIEX_FW_V15) {
@@ -206,9 +204,7 @@ static int mwifiex_cmd_tx_rate_cfg(struct mwifiex_private *priv,
 			cpu_to_le16(priv->bitmap_rates[0]);
 		rate_scope->ofdm_rate_bitmap =
 			cpu_to_le16(priv->bitmap_rates[1]);
-		for (i = 0;
-		     i < sizeof(rate_scope->ht_mcs_rate_bitmap) / sizeof(u16);
-		     i++)
+		for (i = 0; i < ARRAY_SIZE(rate_scope->ht_mcs_rate_bitmap); i++)
 			rate_scope->ht_mcs_rate_bitmap[i] =
 				cpu_to_le16(priv->bitmap_rates[2 + i]);
 		if (priv->adapter->fw_api_ver == MWIFIEX_FW_V15) {
@@ -1755,7 +1751,7 @@ mwifiex_cmd_tdls_oper(struct mwifiex_private *priv,
 	struct mwifiex_ie_types_vhtcap *vht_capab;
 	struct mwifiex_ie_types_aid *aid;
 	struct mwifiex_ie_types_tdls_idle_timeout *timeout;
-	u8 *pos, qos_info;
+	u8 *pos;
 	u16 config_len = 0;
 	struct station_parameters *params = priv->sta_params;
 
@@ -1789,12 +1785,11 @@ mwifiex_cmd_tdls_oper(struct mwifiex_private *priv,
 		put_unaligned_le16(params->capability, pos);
 		config_len += sizeof(params->capability);
 
-		qos_info = params->uapsd_queues | (params->max_sp << 5);
-		wmm_qos_info = (struct mwifiex_ie_types_qos_info *)(pos +
-								    config_len);
+		wmm_qos_info = (void *)(pos + config_len);
 		wmm_qos_info->header.type = cpu_to_le16(WLAN_EID_QOS_CAPA);
-		wmm_qos_info->header.len = cpu_to_le16(sizeof(qos_info));
-		wmm_qos_info->qos_info = qos_info;
+		wmm_qos_info->header.len =
+				cpu_to_le16(sizeof(wmm_qos_info->qos_info));
+		wmm_qos_info->qos_info = 0;
 		config_len += sizeof(struct mwifiex_ie_types_qos_info);
 
 		if (params->ht_capa) {
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
index 2945775..0fba5b1 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
@@ -298,9 +298,8 @@ static int mwifiex_ret_tx_rate_cfg(struct mwifiex_private *priv,
 			priv->bitmap_rates[1] =
 				le16_to_cpu(rate_scope->ofdm_rate_bitmap);
 			for (i = 0;
-			     i <
-			     sizeof(rate_scope->ht_mcs_rate_bitmap) /
-			     sizeof(u16); i++)
+			     i < ARRAY_SIZE(rate_scope->ht_mcs_rate_bitmap);
+			     i++)
 				priv->bitmap_rates[2 + i] =
 					le16_to_cpu(rate_scope->
 						    ht_mcs_rate_bitmap[i]);
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
index 42997e0..a6077ab 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
@@ -654,9 +654,9 @@ int mwifiex_get_bss_info(struct mwifiex_private *priv,
  */
 int mwifiex_disable_auto_ds(struct mwifiex_private *priv)
 {
-	struct mwifiex_ds_auto_ds auto_ds;
-
-	auto_ds.auto_ds = DEEP_SLEEP_OFF;
+	struct mwifiex_ds_auto_ds auto_ds = {
+		.auto_ds = DEEP_SLEEP_OFF,
+	};
 
 	return mwifiex_send_cmd(priv, HostCmd_CMD_802_11_PS_MODE_ENH,
 				DIS_AUTO_PS, BITMAP_AUTO_DS, &auto_ds, true);
@@ -811,8 +811,8 @@ int mwifiex_drv_set_power(struct mwifiex_private *priv, u32 *ps_mode)
  * is checked to determine WPA version. If buffer length is zero, the existing
  * WPA IE is reset.
  */
-static int mwifiex_set_wpa_ie_helper(struct mwifiex_private *priv,
-				     u8 *ie_data_ptr, u16 ie_len)
+static int mwifiex_set_wpa_ie(struct mwifiex_private *priv,
+			      u8 *ie_data_ptr, u16 ie_len)
 {
 	if (ie_len) {
 		if (ie_len > sizeof(priv->wpa_ie)) {
@@ -1351,101 +1351,96 @@ static int
 mwifiex_set_gen_ie_helper(struct mwifiex_private *priv, u8 *ie_data_ptr,
 			  u16 ie_len)
 {
-	int ret = 0;
 	struct ieee_types_vendor_header *pvendor_ie;
 	const u8 wpa_oui[] = { 0x00, 0x50, 0xf2, 0x01 };
 	const u8 wps_oui[] = { 0x00, 0x50, 0xf2, 0x04 };
-	u16 unparsed_len = ie_len;
-	int find_wpa_ie = 0;
+	u16 unparsed_len = ie_len, cur_ie_len;
 
 	/* If the passed length is zero, reset the buffer */
 	if (!ie_len) {
 		priv->gen_ie_buf_len = 0;
 		priv->wps.session_enable = false;
-
 		return 0;
-	} else if (!ie_data_ptr) {
+	} else if (!ie_data_ptr ||
+		   ie_len <= sizeof(struct ieee_types_header)) {
 		return -1;
 	}
 	pvendor_ie = (struct ieee_types_vendor_header *) ie_data_ptr;
 
 	while (pvendor_ie) {
+		cur_ie_len = pvendor_ie->len + sizeof(struct ieee_types_header);
+
+		if (pvendor_ie->element_id == WLAN_EID_RSN) {
+			/* IE is a WPA/WPA2 IE so call set_wpa function */
+			mwifiex_set_wpa_ie(priv, (u8 *)pvendor_ie, cur_ie_len);
+			priv->wps.session_enable = false;
+			goto next_ie;
+		}
+
+		if (pvendor_ie->element_id == WLAN_EID_BSS_AC_ACCESS_DELAY) {
+			/* IE is a WAPI IE so call set_wapi function */
+			mwifiex_set_wapi_ie(priv, (u8 *)pvendor_ie,
+					    cur_ie_len);
+			goto next_ie;
+		}
+
 		if (pvendor_ie->element_id == WLAN_EID_VENDOR_SPECIFIC) {
-			/* Test to see if it is a WPA IE, if not, then it is a
-			 * gen IE
+			/* Test to see if it is a WPA IE, if not, then
+			 * it is a gen IE
 			 */
 			if (!memcmp(pvendor_ie->oui, wpa_oui,
 				    sizeof(wpa_oui))) {
-				find_wpa_ie = 1;
-				break;
+				/* IE is a WPA/WPA2 IE so call set_wpa function
+				 */
+				mwifiex_set_wpa_ie(priv, (u8 *)pvendor_ie,
+						   cur_ie_len);
+				priv->wps.session_enable = false;
+				goto next_ie;
 			}
 
-			/* Test to see if it is a WPS IE, if so, enable
-			 * wps session flag
-			 */
 			if (!memcmp(pvendor_ie->oui, wps_oui,
 				    sizeof(wps_oui))) {
+				/* Test to see if it is a WPS IE,
+				 * if so, enable wps session flag
+				 */
 				priv->wps.session_enable = true;
 				mwifiex_dbg(priv->adapter, MSG,
-					    "info: WPS Session Enabled.\n");
-				ret = mwifiex_set_wps_ie(priv,
-							 (u8 *)pvendor_ie,
-							 unparsed_len);
+					    "WPS Session Enabled.\n");
+				mwifiex_set_wps_ie(priv, (u8 *)pvendor_ie,
+						   cur_ie_len);
+				goto next_ie;
 			}
 		}
 
-		if (pvendor_ie->element_id == WLAN_EID_RSN) {
-			find_wpa_ie = 1;
-			break;
-		}
+		/* Saved in gen_ie, such as P2P IE.etc.*/
 
-		if (pvendor_ie->element_id == WLAN_EID_BSS_AC_ACCESS_DELAY) {
-		/* IE is a WAPI IE so call set_wapi function */
-			ret = mwifiex_set_wapi_ie(priv, (u8 *)pvendor_ie,
-						  unparsed_len);
-			return ret;
+		/* Verify that the passed length is not larger than the
+		 * available space remaining in the buffer
+		 */
+		if (cur_ie_len <
+		    (sizeof(priv->gen_ie_buf) - priv->gen_ie_buf_len)) {
+			/* Append the passed data to the end
+			 * of the genIeBuffer
+			 */
+			memcpy(priv->gen_ie_buf + priv->gen_ie_buf_len,
+			       (u8 *)pvendor_ie, cur_ie_len);
+			/* Increment the stored buffer length by the
+			 * size passed
+			 */
+			priv->gen_ie_buf_len += cur_ie_len;
 		}
 
-		unparsed_len -= (pvendor_ie->len +
-				 sizeof(struct ieee_types_header));
+next_ie:
+		unparsed_len -= cur_ie_len;
 
 		if (unparsed_len <= sizeof(struct ieee_types_header))
 			pvendor_ie = NULL;
 		else
 			pvendor_ie = (struct ieee_types_vendor_header *)
-				(((u8 *)pvendor_ie) + pvendor_ie->len +
-				 sizeof(struct ieee_types_header));
+				(((u8 *)pvendor_ie) + cur_ie_len);
 	}
 
-	if (find_wpa_ie) {
-		/* IE is a WPA/WPA2 IE so call set_wpa function */
-		ret = mwifiex_set_wpa_ie_helper(priv, (u8 *)pvendor_ie,
-						unparsed_len);
-		priv->wps.session_enable = false;
-		return ret;
-	}
-
-	/*
-	 * Verify that the passed length is not larger than the
-	 * available space remaining in the buffer
-	 */
-	if (ie_len < (sizeof(priv->gen_ie_buf) - priv->gen_ie_buf_len)) {
-
-		/* Append the passed data to the end of the
-		   genIeBuffer */
-		memcpy(priv->gen_ie_buf + priv->gen_ie_buf_len, ie_data_ptr,
-		       ie_len);
-		/* Increment the stored buffer length by the
-		   size passed */
-		priv->gen_ie_buf_len += ie_len;
-	} else {
-		/* Passed data does not fit in the remaining
-		   buffer space */
-		ret = -1;
-	}
-
-	/* Return 0, or -1 for error case */
-	return ret;
+	return 0;
 }
 
 /*
diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c
index 39cd677..e76af286 100644
--- a/drivers/net/wireless/marvell/mwifiex/tdls.c
+++ b/drivers/net/wireless/marvell/mwifiex/tdls.c
@@ -130,7 +130,7 @@ mwifiex_tdls_append_rates_ie(struct mwifiex_private *priv,
 
 	if (skb_tailroom(skb) < rates_size + 4) {
 		mwifiex_dbg(priv->adapter, ERROR,
-			    "Insuffient space while adding rates\n");
+			    "Insufficient space while adding rates\n");
 		return -ENOMEM;
 	}
 
diff --git a/drivers/net/wireless/marvell/mwifiex/uap_cmd.c b/drivers/net/wireless/marvell/mwifiex/uap_cmd.c
index 477c29c..18f7d9b 100644
--- a/drivers/net/wireless/marvell/mwifiex/uap_cmd.c
+++ b/drivers/net/wireless/marvell/mwifiex/uap_cmd.c
@@ -444,6 +444,28 @@ mwifiex_uap_bss_wep(u8 **tlv_buf, void *cmd_buf, u16 *param_size)
 	return;
 }
 
+/* This function enable 11D if userspace set the country IE.
+ */
+void mwifiex_config_uap_11d(struct mwifiex_private *priv,
+			    struct cfg80211_beacon_data *beacon_data)
+{
+	enum state_11d_t state_11d;
+	const u8 *country_ie;
+
+	country_ie = cfg80211_find_ie(WLAN_EID_COUNTRY, beacon_data->tail,
+				      beacon_data->tail_len);
+	if (country_ie) {
+		/* Send cmd to FW to enable 11D function */
+		state_11d = ENABLE_11D;
+		if (mwifiex_send_cmd(priv, HostCmd_CMD_802_11_SNMP_MIB,
+				     HostCmd_ACT_GEN_SET, DOT11D_I,
+				     &state_11d, true)) {
+			mwifiex_dbg(priv->adapter, ERROR,
+				    "11D: failed to enable 11D\n");
+		}
+	}
+}
+
 /* This function parses BSS related parameters from structure
  * and prepares TLVs. These TLVs are appended to command buffer.
 */
@@ -848,8 +870,6 @@ void mwifiex_uap_set_channel(struct mwifiex_private *priv,
 int mwifiex_config_start_uap(struct mwifiex_private *priv,
 			     struct mwifiex_uap_bss_param *bss_cfg)
 {
-	enum state_11d_t state_11d;
-
 	if (mwifiex_send_cmd(priv, HostCmd_CMD_UAP_SYS_CONFIG,
 			     HostCmd_ACT_GEN_SET,
 			     UAP_BSS_PARAMS_I, bss_cfg, true)) {
@@ -858,16 +878,6 @@ int mwifiex_config_start_uap(struct mwifiex_private *priv,
 		return -1;
 	}
 
-	/* Send cmd to FW to enable 11D function */
-	state_11d = ENABLE_11D;
-	if (mwifiex_send_cmd(priv, HostCmd_CMD_802_11_SNMP_MIB,
-			     HostCmd_ACT_GEN_SET, DOT11D_I,
-			     &state_11d, true)) {
-		mwifiex_dbg(priv->adapter, ERROR,
-			    "11D: failed to enable 11D\n");
-		return -1;
-	}
-
 	if (mwifiex_send_cmd(priv, HostCmd_CMD_UAP_BSS_START,
 			     HostCmd_ACT_GEN_SET, 0, NULL, true)) {
 		mwifiex_dbg(priv->adapter, ERROR,
diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c
index cb1753e..f4f2b9b 100644
--- a/drivers/net/wireless/marvell/mwifiex/usb.c
+++ b/drivers/net/wireless/marvell/mwifiex/usb.c
@@ -24,7 +24,7 @@
 
 static struct mwifiex_if_ops usb_ops;
 
-static struct usb_device_id mwifiex_usb_table[] = {
+static const struct usb_device_id mwifiex_usb_table[] = {
 	/* 8766 */
 	{USB_DEVICE(USB8XXX_VID, USB8766_PID_1)},
 	{USB_DEVICE_AND_INTERFACE_INFO(USB8XXX_VID, USB8766_PID_2,
@@ -1112,7 +1112,7 @@ static void mwifiex_usb_tx_aggr_tmo(unsigned long context)
 	if (err) {
 		mwifiex_dbg(adapter, ERROR,
 			    "prepare tx aggr skb failed, err=%d\n", err);
-		return;
+		goto unlock;
 	}
 
 	if (atomic_read(&port->tx_data_urb_pending) >=
@@ -1133,6 +1133,7 @@ static void mwifiex_usb_tx_aggr_tmo(unsigned long context)
 done:
 	if (err == -1)
 		mwifiex_write_data_complete(adapter, skb_send, 0, -1);
+unlock:
 	spin_unlock_irqrestore(&port->tx_aggr_lock, flags);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c
index 660267b..7f3e398 100644
--- a/drivers/net/wireless/mediatek/mt7601u/dma.c
+++ b/drivers/net/wireless/mediatek/mt7601u/dma.c
@@ -457,6 +457,9 @@ static void mt7601u_free_tx(struct mt7601u_dev *dev)
 {
 	int i;
 
+	if (!dev->tx_q)
+		return;
+
 	for (i = 0; i < __MT_EP_OUT_MAX; i++)
 		mt7601u_free_tx_queue(&dev->tx_q[i]);
 }
@@ -484,6 +487,8 @@ static int mt7601u_alloc_tx(struct mt7601u_dev *dev)
 
 	dev->tx_q = devm_kcalloc(dev->dev, __MT_EP_OUT_MAX,
 				 sizeof(*dev->tx_q), GFP_KERNEL);
+	if (!dev->tx_q)
+		return -ENOMEM;
 
 	for (i = 0; i < __MT_EP_OUT_MAX; i++)
 		if (mt7601u_alloc_tx_queue(dev, &dev->tx_q[i]))
diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.c b/drivers/net/wireless/mediatek/mt7601u/usb.c
index 416c604..b9e4f67 100644
--- a/drivers/net/wireless/mediatek/mt7601u/usb.c
+++ b/drivers/net/wireless/mediatek/mt7601u/usb.c
@@ -19,7 +19,7 @@
 #include "usb.h"
 #include "trace.h"
 
-static struct usb_device_id mt7601u_device_table[] = {
+static const struct usb_device_id mt7601u_device_table[] = {
 	{ USB_DEVICE(0x0b05, 0x17d3) },
 	{ USB_DEVICE(0x0e8d, 0x760a) },
 	{ USB_DEVICE(0x0e8d, 0x760b) },
diff --git a/drivers/net/wireless/quantenna/qtnfmac/Makefile b/drivers/net/wireless/quantenna/qtnfmac/Makefile
index 0d618e5..f236b7d 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/Makefile
+++ b/drivers/net/wireless/quantenna/qtnfmac/Makefile
@@ -25,7 +25,3 @@ qtnfmac_pearl_pcie-objs += \
 	pearl/pcie.o
 
 qtnfmac_pearl_pcie-$(CONFIG_DEBUG_FS) += debug.o
-
-#
-
-ccflags-y += -D__CHECK_ENDIAN
diff --git a/drivers/net/wireless/quantenna/qtnfmac/bus.h b/drivers/net/wireless/quantenna/qtnfmac/bus.h
index dda0500..56e5fed 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/bus.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/bus.h
@@ -130,7 +130,6 @@ static __always_inline void qtnf_bus_unlock(struct qtnf_bus *bus)
 
 /* interface functions from common layer */
 
-void qtnf_rx_frame(struct device *dev, struct sk_buff *rxp);
 int qtnf_core_attach(struct qtnf_bus *bus);
 void qtnf_core_detach(struct qtnf_bus *bus);
 void qtnf_txflowblock(struct device *dev, bool state);
diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c
index e3c0900..856fa6e 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c
@@ -266,11 +266,19 @@ static int qtnf_start_ap(struct wiphy *wiphy, struct net_device *dev,
 			 struct cfg80211_ap_settings *settings)
 {
 	struct qtnf_vif *vif = qtnf_netdev_get_priv(dev);
+	struct qtnf_wmac *mac = wiphy_priv(wiphy);
 	struct qtnf_bss_config *bss_cfg;
 	int ret;
 
-	bss_cfg = &vif->bss_cfg;
+	if (!cfg80211_chandef_identical(&mac->chandef, &settings->chandef)) {
+		memcpy(&mac->chandef, &settings->chandef, sizeof(mac->chandef));
+		if (vif->vifid != 0)
+			pr_warn("%s: unexpected chan %u (%u MHz)\n", dev->name,
+				settings->chandef.chan->hw_value,
+				settings->chandef.chan->center_freq);
+	}
 
+	bss_cfg = &vif->bss_cfg;
 	memset(bss_cfg, 0, sizeof(*bss_cfg));
 
 	bss_cfg->bcn_period = settings->beacon_interval;
@@ -281,8 +289,6 @@ static int qtnf_start_ap(struct wiphy *wiphy, struct net_device *dev,
 	bss_cfg->ssid_len = settings->ssid_len;
 	memcpy(&bss_cfg->ssid, settings->ssid, bss_cfg->ssid_len);
 
-	memcpy(&bss_cfg->chandef, &settings->chandef,
-	       sizeof(struct cfg80211_chan_def));
 	memcpy(&bss_cfg->crypto, &settings->crypto,
 	       sizeof(struct cfg80211_crypto_settings));
 
@@ -573,19 +579,33 @@ qtnf_del_station(struct wiphy *wiphy, struct net_device *dev,
 	return ret;
 }
 
+static void qtnf_scan_timeout(unsigned long data)
+{
+	struct qtnf_wmac *mac = (struct qtnf_wmac *)data;
+
+	pr_warn("mac%d scan timed out\n", mac->macid);
+	qtnf_scan_done(mac, true);
+}
+
 static int
 qtnf_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request)
 {
 	struct qtnf_wmac *mac = wiphy_priv(wiphy);
-	int ret;
 
 	mac->scan_req = request;
 
-	ret = qtnf_cmd_send_scan(mac);
-	if (ret)
+	if (qtnf_cmd_send_scan(mac)) {
 		pr_err("MAC%u: failed to start scan\n", mac->macid);
+		mac->scan_req = NULL;
+		return -EFAULT;
+	}
 
-	return ret;
+	mac->scan_timeout.data = (unsigned long)mac;
+	mac->scan_timeout.function = qtnf_scan_timeout;
+	mod_timer(&mac->scan_timeout,
+		  jiffies + QTNF_SCAN_TIMEOUT_SEC * HZ);
+
+	return 0;
 }
 
 static int
@@ -593,6 +613,8 @@ qtnf_connect(struct wiphy *wiphy, struct net_device *dev,
 	     struct cfg80211_connect_params *sme)
 {
 	struct qtnf_vif *vif = qtnf_netdev_get_priv(dev);
+	struct qtnf_wmac *mac = wiphy_priv(wiphy);
+	struct cfg80211_chan_def chandef;
 	struct qtnf_bss_config *bss_cfg;
 	int ret;
 
@@ -605,9 +627,20 @@ qtnf_connect(struct wiphy *wiphy, struct net_device *dev,
 	bss_cfg = &vif->bss_cfg;
 	memset(bss_cfg, 0, sizeof(*bss_cfg));
 
+	if (sme->channel) {
+		/* FIXME: need to set proper nl80211_channel_type value */
+		cfg80211_chandef_create(&chandef, sme->channel,
+					NL80211_CHAN_HT20);
+		/* fall-back to minimal safe chandef description */
+		if (!cfg80211_chandef_valid(&chandef))
+			cfg80211_chandef_create(&chandef, sme->channel,
+						NL80211_CHAN_HT20);
+
+		memcpy(&mac->chandef, &chandef, sizeof(mac->chandef));
+	}
+
 	bss_cfg->ssid_len = sme->ssid_len;
 	memcpy(&bss_cfg->ssid, sme->ssid, bss_cfg->ssid_len);
-	bss_cfg->chandef.chan = sme->channel;
 	bss_cfg->auth_type = sme->auth_type;
 	bss_cfg->privacy = sme->privacy;
 	bss_cfg->mfp = sme->mfp;
@@ -677,6 +710,175 @@ qtnf_disconnect(struct wiphy *wiphy, struct net_device *dev,
 	return 0;
 }
 
+static int
+qtnf_dump_survey(struct wiphy *wiphy, struct net_device *dev,
+		 int idx, struct survey_info *survey)
+{
+	struct qtnf_wmac *mac = wiphy_priv(wiphy);
+	struct ieee80211_supported_band *sband;
+	struct cfg80211_chan_def *chandef;
+	struct ieee80211_channel *chan;
+	struct qtnf_chan_stats stats;
+	struct qtnf_vif *vif;
+	int ret;
+
+	vif = qtnf_netdev_get_priv(dev);
+	chandef = &mac->chandef;
+
+	sband = wiphy->bands[NL80211_BAND_2GHZ];
+	if (sband && idx >= sband->n_channels) {
+		idx -= sband->n_channels;
+		sband = NULL;
+	}
+
+	if (!sband)
+		sband = wiphy->bands[NL80211_BAND_5GHZ];
+
+	if (!sband || idx >= sband->n_channels)
+		return -ENOENT;
+
+	chan = &sband->channels[idx];
+	memset(&stats, 0, sizeof(stats));
+
+	survey->channel = chan;
+	survey->filled = 0x0;
+
+	if (chandef->chan) {
+		if (chan->hw_value == chandef->chan->hw_value)
+			survey->filled = SURVEY_INFO_IN_USE;
+	}
+
+	ret = qtnf_cmd_get_chan_stats(mac, chan->hw_value, &stats);
+	switch (ret) {
+	case 0:
+		if (unlikely(stats.chan_num != chan->hw_value)) {
+			pr_err("received stats for channel %d instead of %d\n",
+			       stats.chan_num, chan->hw_value);
+			ret = -EINVAL;
+			break;
+		}
+
+		survey->filled |= SURVEY_INFO_TIME |
+				 SURVEY_INFO_TIME_SCAN |
+				 SURVEY_INFO_TIME_BUSY |
+				 SURVEY_INFO_TIME_RX |
+				 SURVEY_INFO_TIME_TX |
+				 SURVEY_INFO_NOISE_DBM;
+
+		survey->time_scan = stats.cca_try;
+		survey->time = stats.cca_try;
+		survey->time_tx = stats.cca_tx;
+		survey->time_rx = stats.cca_rx;
+		survey->time_busy = stats.cca_busy;
+		survey->noise = stats.chan_noise;
+		break;
+	case -ENOENT:
+		pr_debug("no stats for channel %u\n", chan->hw_value);
+		ret = 0;
+		break;
+	default:
+		pr_debug("failed to get chan(%d) stats from card\n",
+			 chan->hw_value);
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int
+qtnf_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev,
+		 struct cfg80211_chan_def *chandef)
+{
+	struct qtnf_wmac *mac = wiphy_priv(wiphy);
+	struct net_device *ndev = wdev->netdev;
+	struct qtnf_vif *vif;
+
+	if (!ndev)
+		return -ENODEV;
+
+	vif = qtnf_netdev_get_priv(wdev->netdev);
+
+	switch (vif->wdev.iftype) {
+	case NL80211_IFTYPE_STATION:
+		if (vif->sta_state == QTNF_STA_DISCONNECTED) {
+			pr_warn("%s: STA disconnected\n", ndev->name);
+			return -ENODATA;
+		}
+		break;
+	case NL80211_IFTYPE_AP:
+		if (!(vif->bss_status & QTNF_STATE_AP_START)) {
+			pr_warn("%s: AP not started\n", ndev->name);
+			return -ENODATA;
+		}
+		break;
+	default:
+		pr_err("unsupported vif type (%d)\n", vif->wdev.iftype);
+		return -ENODATA;
+	}
+
+	if (!cfg80211_chandef_valid(&mac->chandef)) {
+		pr_err("invalid channel settings on %s\n", ndev->name);
+		return -ENODATA;
+	}
+
+	memcpy(chandef, &mac->chandef, sizeof(*chandef));
+	return 0;
+}
+
+static int qtnf_channel_switch(struct wiphy *wiphy, struct net_device *dev,
+			       struct cfg80211_csa_settings *params)
+{
+	struct qtnf_wmac *mac = wiphy_priv(wiphy);
+	struct qtnf_vif *vif = qtnf_netdev_get_priv(dev);
+	int ret;
+
+	pr_debug("%s: chan(%u) count(%u) radar(%u) block_tx(%u)\n", dev->name,
+		 params->chandef.chan->hw_value, params->count,
+		 params->radar_required, params->block_tx);
+
+	switch (vif->wdev.iftype) {
+	case NL80211_IFTYPE_AP:
+		if (!(vif->bss_status & QTNF_STATE_AP_START)) {
+			pr_warn("AP not started on %s\n", dev->name);
+			return -ENOTCONN;
+		}
+		break;
+	default:
+		pr_err("unsupported vif type (%d) on %s\n",
+		       vif->wdev.iftype, dev->name);
+		return -EOPNOTSUPP;
+	}
+
+	if (vif->vifid != 0) {
+		if (!(mac->status & QTNF_MAC_CSA_ACTIVE))
+			return -EOPNOTSUPP;
+
+		if (!cfg80211_chandef_identical(&params->chandef,
+						&mac->csa_chandef))
+			return -EINVAL;
+
+		return 0;
+	}
+
+	if (!cfg80211_chandef_valid(&params->chandef)) {
+		pr_err("%s: invalid channel\n", dev->name);
+		return -EINVAL;
+	}
+
+	if (cfg80211_chandef_identical(&params->chandef, &mac->chandef)) {
+		pr_err("%s: switch request to the same channel\n", dev->name);
+		return -EALREADY;
+	}
+
+	ret = qtnf_cmd_send_chan_switch(mac, params);
+	if (ret)
+		pr_warn("%s: failed to switch to channel (%u)\n",
+			dev->name, params->chandef.chan->hw_value);
+
+	return ret;
+}
+
 static struct cfg80211_ops qtn_cfg80211_ops = {
 	.add_virtual_intf	= qtnf_add_virtual_intf,
 	.change_virtual_intf	= qtnf_change_virtual_intf,
@@ -697,69 +899,49 @@ static struct cfg80211_ops qtn_cfg80211_ops = {
 	.set_default_mgmt_key	= qtnf_set_default_mgmt_key,
 	.scan			= qtnf_scan,
 	.connect		= qtnf_connect,
-	.disconnect		= qtnf_disconnect
+	.disconnect		= qtnf_disconnect,
+	.dump_survey		= qtnf_dump_survey,
+	.get_channel		= qtnf_get_channel,
+	.channel_switch		= qtnf_channel_switch
 };
 
-static void qtnf_cfg80211_reg_notifier(struct wiphy *wiphy,
+static void qtnf_cfg80211_reg_notifier(struct wiphy *wiphy_in,
 				       struct regulatory_request *req)
 {
-	struct qtnf_wmac *mac = wiphy_priv(wiphy);
-	struct qtnf_bus *bus;
-	struct qtnf_vif *vif;
-	struct qtnf_wmac *chan_mac;
-	int i;
+	struct qtnf_wmac *mac = wiphy_priv(wiphy_in);
+	struct qtnf_bus *bus = mac->bus;
+	struct wiphy *wiphy;
+	unsigned int mac_idx;
 	enum nl80211_band band;
-
-	bus = mac->bus;
+	int ret;
 
 	pr_debug("MAC%u: initiator=%d alpha=%c%c\n", mac->macid, req->initiator,
 		 req->alpha2[0], req->alpha2[1]);
 
-	vif = qtnf_mac_get_base_vif(mac);
-	if (!vif) {
-		pr_err("MAC%u: primary VIF is not configured\n", mac->macid);
-		return;
-	}
-
-	/* ignore non-ISO3166 country codes */
-	for (i = 0; i < sizeof(req->alpha2); i++) {
-		if (req->alpha2[i] < 'A' || req->alpha2[i] > 'Z') {
-			pr_err("MAC%u: not an ISO3166 code\n", mac->macid);
-			return;
-		}
-	}
-	if (!strncasecmp(req->alpha2, bus->hw_info.alpha2_code,
-			 sizeof(req->alpha2))) {
-		pr_warn("MAC%u: unchanged country code\n", mac->macid);
-		return;
-	}
-
-	if (qtnf_cmd_send_regulatory_config(mac, req->alpha2)) {
-		pr_err("MAC%u: failed to configure regulatory\n", mac->macid);
+	ret = qtnf_cmd_reg_notify(bus, req);
+	if (ret) {
+		if (ret != -EOPNOTSUPP && ret != -EALREADY)
+			pr_err("failed to update reg domain to %c%c\n",
+			       req->alpha2[0], req->alpha2[1]);
 		return;
 	}
 
-	for (i = 0; i < bus->hw_info.num_mac; i++) {
-		chan_mac = bus->mac[i];
-
-		if (!chan_mac)
+	for (mac_idx = 0; mac_idx < QTNF_MAX_MAC; ++mac_idx) {
+		if (!(bus->hw_info.mac_bitmap & (1 << mac_idx)))
 			continue;
 
-		if (!(bus->hw_info.mac_bitmap & BIT(i)))
-			continue;
+		mac = bus->mac[mac_idx];
+		wiphy = priv_to_wiphy(mac);
 
 		for (band = 0; band < NUM_NL80211_BANDS; ++band) {
 			if (!wiphy->bands[band])
 				continue;
 
-			if (qtnf_cmd_get_mac_chan_info(chan_mac,
-						       wiphy->bands[band])) {
-				pr_err("MAC%u: can't get channel info\n",
-				       chan_mac->macid);
-				qtnf_core_detach(bus);
-
-				return;
-			}
+			ret = qtnf_cmd_get_mac_chan_info(mac,
+							 wiphy->bands[band]);
+			if (ret)
+				pr_err("failed to get chan info for mac %u band %u\n",
+				       mac_idx, band);
 		}
 	}
 }
@@ -844,10 +1026,8 @@ int qtnf_wiphy_register(struct qtnf_hw_info *hw_info, struct qtnf_wmac *mac)
 	}
 
 	iface_comb = kzalloc(sizeof(*iface_comb), GFP_KERNEL);
-	if (!iface_comb) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!iface_comb)
+		return -ENOMEM;
 
 	ret = qtnf_wiphy_setup_if_comb(wiphy, iface_comb, &mac->macinfo);
 	if (ret)
@@ -869,6 +1049,7 @@ int qtnf_wiphy_register(struct qtnf_hw_info *hw_info, struct qtnf_wmac *mac)
 
 	wiphy->iface_combinations = iface_comb;
 	wiphy->n_iface_combinations = 1;
+	wiphy->max_num_csa_counters = 2;
 
 	/* Initialize cipher suits */
 	wiphy->cipher_suites = qtnf_cipher_suites;
@@ -876,7 +1057,8 @@ int qtnf_wiphy_register(struct qtnf_hw_info *hw_info, struct qtnf_wmac *mac)
 	wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
 	wiphy->flags |= WIPHY_FLAG_HAVE_AP_SME |
 			WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD |
-			WIPHY_FLAG_AP_UAPSD;
+			WIPHY_FLAG_AP_UAPSD |
+			WIPHY_FLAG_HAS_CHANNEL_SWITCH;
 
 	wiphy->probe_resp_offload = NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS |
 				    NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2;
@@ -889,21 +1071,26 @@ int qtnf_wiphy_register(struct qtnf_hw_info *hw_info, struct qtnf_wmac *mac)
 	ether_addr_copy(wiphy->perm_addr, mac->macaddr);
 
 	if (hw_info->hw_capab & QLINK_HW_SUPPORTS_REG_UPDATE) {
-		pr_debug("device supports REG_UPDATE\n");
+		wiphy->regulatory_flags |= REGULATORY_STRICT_REG |
+			REGULATORY_CUSTOM_REG;
 		wiphy->reg_notifier = qtnf_cfg80211_reg_notifier;
-		pr_debug("hint regulatory about EP region: %c%c\n",
-			 hw_info->alpha2_code[0],
-			 hw_info->alpha2_code[1]);
-		regulatory_hint(wiphy, hw_info->alpha2_code);
+		wiphy_apply_custom_regulatory(wiphy, hw_info->rd);
 	} else {
-		pr_debug("device doesn't support REG_UPDATE\n");
 		wiphy->regulatory_flags |= REGULATORY_WIPHY_SELF_MANAGED;
 	}
 
 	ret = wiphy_register(wiphy);
+	if (ret < 0)
+		goto out;
+
+	if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
+		ret = regulatory_set_wiphy_regd(wiphy, hw_info->rd);
+	else if (isalpha(hw_info->rd->alpha2[0]) &&
+		 isalpha(hw_info->rd->alpha2[1]))
+		ret = regulatory_hint(wiphy, hw_info->rd->alpha2);
 
 out:
-	if (ret < 0) {
+	if (ret) {
 		kfree(iface_comb);
 		return ret;
 	}
diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.h b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.h
index 5bd3312..6a4af52 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.h
@@ -34,10 +34,14 @@ static inline void qtnf_scan_done(struct qtnf_wmac *mac, bool aborted)
 		.aborted = aborted,
 	};
 
+	mutex_lock(&mac->mac_lock);
+
 	if (mac->scan_req) {
 		cfg80211_scan_done(mac->scan_req, &info);
 		mac->scan_req = NULL;
 	}
+
+	mutex_unlock(&mac->mac_lock);
 }
 
 #endif /* _QTN_FMAC_CFG80211_H_ */
diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c
index b39dbc3..4206886 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/commands.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c
@@ -181,43 +181,11 @@ out:
 	return ret;
 }
 
-int qtnf_cmd_send_regulatory_config(struct qtnf_wmac *mac, const char *alpha2)
-{
-	struct sk_buff *cmd_skb;
-	u16 res_code = QLINK_CMD_RESULT_OK;
-	int ret;
-
-	cmd_skb = qtnf_cmd_alloc_new_cmdskb(mac->macid, QLINK_VIFID_RSVD,
-					    QLINK_CMD_REG_REGION,
-					    sizeof(struct qlink_cmd));
-	if (unlikely(!cmd_skb))
-		return -ENOMEM;
-
-	qtnf_cmd_skb_put_tlv_arr(cmd_skb, WLAN_EID_COUNTRY, alpha2,
-				 QTNF_MAX_ALPHA_LEN);
-
-	ret = qtnf_cmd_send(mac->bus, cmd_skb, &res_code);
-
-	if (unlikely(ret))
-		goto out;
-
-	if (unlikely(res_code != QLINK_CMD_RESULT_OK)) {
-		pr_err("MAC%u: CMD failed: %u\n", mac->macid, res_code);
-		ret = -EFAULT;
-		goto out;
-	}
-
-	memcpy(mac->bus->hw_info.alpha2_code, alpha2,
-	       sizeof(mac->bus->hw_info.alpha2_code));
-out:
-	return ret;
-}
-
 int qtnf_cmd_send_config_ap(struct qtnf_vif *vif)
 {
 	struct sk_buff *cmd_skb;
 	struct qtnf_bss_config *bss_cfg = &vif->bss_cfg;
-	struct cfg80211_chan_def *chandef = &bss_cfg->chandef;
+	struct cfg80211_chan_def *chandef = &vif->mac->chandef;
 	struct qlink_tlv_channel *qchan;
 	struct qlink_auth_encr aen;
 	u16 res_code = QLINK_CMD_RESULT_OK;
@@ -848,25 +816,168 @@ out:
 	return ret;
 }
 
+static u32 qtnf_cmd_resp_reg_rule_flags_parse(u32 qflags)
+{
+	u32 flags = 0;
+
+	if (qflags & QLINK_RRF_NO_OFDM)
+		flags |= NL80211_RRF_NO_OFDM;
+
+	if (qflags & QLINK_RRF_NO_CCK)
+		flags |= NL80211_RRF_NO_CCK;
+
+	if (qflags & QLINK_RRF_NO_INDOOR)
+		flags |= NL80211_RRF_NO_INDOOR;
+
+	if (qflags & QLINK_RRF_NO_OUTDOOR)
+		flags |= NL80211_RRF_NO_OUTDOOR;
+
+	if (qflags & QLINK_RRF_DFS)
+		flags |= NL80211_RRF_DFS;
+
+	if (qflags & QLINK_RRF_PTP_ONLY)
+		flags |= NL80211_RRF_PTP_ONLY;
+
+	if (qflags & QLINK_RRF_PTMP_ONLY)
+		flags |= NL80211_RRF_PTMP_ONLY;
+
+	if (qflags & QLINK_RRF_NO_IR)
+		flags |= NL80211_RRF_NO_IR;
+
+	if (qflags & QLINK_RRF_AUTO_BW)
+		flags |= NL80211_RRF_AUTO_BW;
+
+	if (qflags & QLINK_RRF_IR_CONCURRENT)
+		flags |= NL80211_RRF_IR_CONCURRENT;
+
+	if (qflags & QLINK_RRF_NO_HT40MINUS)
+		flags |= NL80211_RRF_NO_HT40MINUS;
+
+	if (qflags & QLINK_RRF_NO_HT40PLUS)
+		flags |= NL80211_RRF_NO_HT40PLUS;
+
+	if (qflags & QLINK_RRF_NO_80MHZ)
+		flags |= NL80211_RRF_NO_80MHZ;
+
+	if (qflags & QLINK_RRF_NO_160MHZ)
+		flags |= NL80211_RRF_NO_160MHZ;
+
+	return flags;
+}
+
 static int
 qtnf_cmd_resp_proc_hw_info(struct qtnf_bus *bus,
-			   const struct qlink_resp_get_hw_info *resp)
+			   const struct qlink_resp_get_hw_info *resp,
+			   size_t info_len)
 {
 	struct qtnf_hw_info *hwinfo = &bus->hw_info;
+	const struct qlink_tlv_hdr *tlv;
+	const struct qlink_tlv_reg_rule *tlv_rule;
+	struct ieee80211_reg_rule *rule;
+	u16 tlv_type;
+	u16 tlv_value_len;
+	unsigned int rule_idx = 0;
+
+	if (WARN_ON(resp->n_reg_rules > NL80211_MAX_SUPP_REG_RULES))
+		return -E2BIG;
+
+	hwinfo->rd = kzalloc(sizeof(*hwinfo->rd)
+			     + sizeof(struct ieee80211_reg_rule)
+			     * resp->n_reg_rules, GFP_KERNEL);
+
+	if (!hwinfo->rd)
+		return -ENOMEM;
 
 	hwinfo->num_mac = resp->num_mac;
 	hwinfo->mac_bitmap = resp->mac_bitmap;
 	hwinfo->fw_ver = le32_to_cpu(resp->fw_ver);
 	hwinfo->ql_proto_ver = le16_to_cpu(resp->ql_proto_ver);
-	memcpy(hwinfo->alpha2_code, resp->alpha2_code,
-	       sizeof(hwinfo->alpha2_code));
 	hwinfo->total_tx_chain = resp->total_tx_chain;
 	hwinfo->total_rx_chain = resp->total_rx_chain;
 	hwinfo->hw_capab = le32_to_cpu(resp->hw_capab);
+	hwinfo->rd->n_reg_rules = resp->n_reg_rules;
+	hwinfo->rd->alpha2[0] = resp->alpha2[0];
+	hwinfo->rd->alpha2[1] = resp->alpha2[1];
+
+	switch (resp->dfs_region) {
+	case QLINK_DFS_FCC:
+		hwinfo->rd->dfs_region = NL80211_DFS_FCC;
+		break;
+	case QLINK_DFS_ETSI:
+		hwinfo->rd->dfs_region = NL80211_DFS_ETSI;
+		break;
+	case QLINK_DFS_JP:
+		hwinfo->rd->dfs_region = NL80211_DFS_JP;
+		break;
+	case QLINK_DFS_UNSET:
+	default:
+		hwinfo->rd->dfs_region = NL80211_DFS_UNSET;
+		break;
+	}
+
+	tlv = (const struct qlink_tlv_hdr *)resp->info;
+
+	while (info_len >= sizeof(*tlv)) {
+		tlv_type = le16_to_cpu(tlv->type);
+		tlv_value_len = le16_to_cpu(tlv->len);
+
+		if (tlv_value_len + sizeof(*tlv) > info_len) {
+			pr_warn("malformed TLV 0x%.2X; LEN: %u\n",
+				tlv_type, tlv_value_len);
+			return -EINVAL;
+		}
+
+		switch (tlv_type) {
+		case QTN_TLV_ID_REG_RULE:
+			if (rule_idx >= resp->n_reg_rules) {
+				pr_warn("unexpected number of rules: %u\n",
+					resp->n_reg_rules);
+				return -EINVAL;
+			}
+
+			if (tlv_value_len != sizeof(*tlv_rule) - sizeof(*tlv)) {
+				pr_warn("malformed TLV 0x%.2X; LEN: %u\n",
+					tlv_type, tlv_value_len);
+				return -EINVAL;
+			}
+
+			tlv_rule = (const struct qlink_tlv_reg_rule *)tlv;
+			rule = &hwinfo->rd->reg_rules[rule_idx++];
+
+			rule->freq_range.start_freq_khz =
+				le32_to_cpu(tlv_rule->start_freq_khz);
+			rule->freq_range.end_freq_khz =
+				le32_to_cpu(tlv_rule->end_freq_khz);
+			rule->freq_range.max_bandwidth_khz =
+				le32_to_cpu(tlv_rule->max_bandwidth_khz);
+			rule->power_rule.max_antenna_gain =
+				le32_to_cpu(tlv_rule->max_antenna_gain);
+			rule->power_rule.max_eirp =
+				le32_to_cpu(tlv_rule->max_eirp);
+			rule->dfs_cac_ms =
+				le32_to_cpu(tlv_rule->dfs_cac_ms);
+			rule->flags = qtnf_cmd_resp_reg_rule_flags_parse(
+					le32_to_cpu(tlv_rule->flags));
+			break;
+		default:
+			break;
+		}
+
+		info_len -= tlv_value_len + sizeof(*tlv);
+		tlv = (struct qlink_tlv_hdr *)(tlv->val + tlv_value_len);
+	}
+
+	if (rule_idx != resp->n_reg_rules) {
+		pr_warn("unexpected number of rules: expected %u got %u\n",
+			resp->n_reg_rules, rule_idx);
+		kfree(hwinfo->rd);
+		hwinfo->rd = NULL;
+		return -EINVAL;
+	}
 
 	pr_info("fw_version=%d, MACs map %#x, alpha2=\"%c%c\", chains Tx=%u Rx=%u\n",
 		hwinfo->fw_ver, hwinfo->mac_bitmap,
-		hwinfo->alpha2_code[0], hwinfo->alpha2_code[1],
+		hwinfo->rd->alpha2[0], hwinfo->rd->alpha2[1],
 		hwinfo->total_tx_chain, hwinfo->total_rx_chain);
 
 	return 0;
@@ -878,7 +989,7 @@ static int qtnf_parse_variable_mac_info(struct qtnf_wmac *mac,
 	struct ieee80211_iface_limit *limits = NULL;
 	const struct qlink_iface_limit *limit_record;
 	size_t record_count = 0, rec = 0;
-	u16 tlv_type, tlv_value_len, mask;
+	u16 tlv_type, tlv_value_len;
 	struct qlink_iface_comb_num *comb;
 	size_t tlv_full_len;
 	const struct qlink_tlv_hdr *tlv;
@@ -931,10 +1042,12 @@ static int qtnf_parse_variable_mac_info(struct qtnf_wmac *mac,
 
 			limit_record = (void *)tlv->val;
 			limits[rec].max = le16_to_cpu(limit_record->max_num);
-			mask = le16_to_cpu(limit_record->type_mask);
-			limits[rec].types = qlink_iface_type_mask_to_nl(mask);
-			/* only AP and STA modes are supported */
+			limits[rec].types = qlink_iface_type_to_nl_mask(
+				le16_to_cpu(limit_record->type));
+
+			/* supported modes: STA, AP */
 			limits[rec].types &= BIT(NL80211_IFTYPE_AP) |
+					     BIT(NL80211_IFTYPE_AP_VLAN) |
 					     BIT(NL80211_IFTYPE_STATION);
 
 			pr_debug("MAC%u: MAX: %u; TYPES: %.4X\n", mac->macid,
@@ -946,6 +1059,7 @@ static int qtnf_parse_variable_mac_info(struct qtnf_wmac *mac,
 		default:
 			break;
 		}
+
 		tlv_buf_size -= tlv_full_len;
 		tlv = (struct qlink_tlv_hdr *)(tlv->val + tlv_value_len);
 	}
@@ -1013,14 +1127,24 @@ qtnf_cmd_resp_fill_channels_info(struct ieee80211_supported_band *band,
 	unsigned int chidx = 0;
 	u32 qflags;
 
-	kfree(band->channels);
-	band->channels = NULL;
+	if (band->channels) {
+		if (band->n_channels == resp->num_chans) {
+			memset(band->channels, 0,
+			       sizeof(*band->channels) * band->n_channels);
+		} else {
+			kfree(band->channels);
+			band->n_channels = 0;
+			band->channels = NULL;
+		}
+	}
 
 	band->n_channels = resp->num_chans;
 	if (band->n_channels == 0)
 		return 0;
 
-	band->channels = kcalloc(band->n_channels, sizeof(*chan), GFP_KERNEL);
+	if (!band->channels)
+		band->channels = kcalloc(band->n_channels, sizeof(*chan),
+					 GFP_KERNEL);
 	if (!band->channels) {
 		band->n_channels = 0;
 		return -ENOMEM;
@@ -1212,6 +1336,62 @@ static int qtnf_cmd_resp_proc_phy_params(struct qtnf_wmac *mac,
 	return 0;
 }
 
+static int
+qtnf_cmd_resp_proc_chan_stat_info(struct qtnf_chan_stats *stats,
+				  const u8 *payload, size_t payload_len)
+{
+	struct qlink_chan_stats *qlink_stats;
+	const struct qlink_tlv_hdr *tlv;
+	size_t tlv_full_len;
+	u16 tlv_value_len;
+	u16 tlv_type;
+
+	tlv = (struct qlink_tlv_hdr *)payload;
+	while (payload_len >= sizeof(struct qlink_tlv_hdr)) {
+		tlv_type = le16_to_cpu(tlv->type);
+		tlv_value_len = le16_to_cpu(tlv->len);
+		tlv_full_len = tlv_value_len + sizeof(struct qlink_tlv_hdr);
+		if (tlv_full_len > payload_len) {
+			pr_warn("malformed TLV 0x%.2X; LEN: %u\n",
+				tlv_type, tlv_value_len);
+			return -EINVAL;
+		}
+		switch (tlv_type) {
+		case QTN_TLV_ID_CHANNEL_STATS:
+			if (unlikely(tlv_value_len != sizeof(*qlink_stats))) {
+				pr_err("invalid CHANNEL_STATS entry size\n");
+				return -EINVAL;
+			}
+
+			qlink_stats = (void *)tlv->val;
+
+			stats->chan_num = le32_to_cpu(qlink_stats->chan_num);
+			stats->cca_tx = le32_to_cpu(qlink_stats->cca_tx);
+			stats->cca_rx = le32_to_cpu(qlink_stats->cca_rx);
+			stats->cca_busy = le32_to_cpu(qlink_stats->cca_busy);
+			stats->cca_try = le32_to_cpu(qlink_stats->cca_try);
+			stats->chan_noise = qlink_stats->chan_noise;
+
+			pr_debug("chan(%u) try(%u) busy(%u) noise(%d)\n",
+				 stats->chan_num, stats->cca_try,
+				 stats->cca_busy, stats->chan_noise);
+			break;
+		default:
+			pr_warn("Unknown TLV type: %#x\n",
+				le16_to_cpu(tlv->type));
+		}
+		payload_len -= tlv_full_len;
+		tlv = (struct qlink_tlv_hdr *)(tlv->val + tlv_value_len);
+	}
+
+	if (payload_len) {
+		pr_warn("malformed TLV buf; bytes left: %zu\n", payload_len);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 int qtnf_cmd_get_mac_info(struct qtnf_wmac *mac)
 {
 	struct sk_buff *cmd_skb, *resp_skb = NULL;
@@ -1256,6 +1436,7 @@ int qtnf_cmd_get_hw_info(struct qtnf_bus *bus)
 	const struct qlink_resp_get_hw_info *resp;
 	u16 res_code = QLINK_CMD_RESULT_OK;
 	int ret = 0;
+	size_t info_len;
 
 	cmd_skb = qtnf_cmd_alloc_new_cmdskb(QLINK_MACID_RSVD, QLINK_VIFID_RSVD,
 					    QLINK_CMD_GET_HW_INFO,
@@ -1266,7 +1447,7 @@ int qtnf_cmd_get_hw_info(struct qtnf_bus *bus)
 	qtnf_bus_lock(bus);
 
 	ret = qtnf_cmd_send_with_reply(bus, cmd_skb, &resp_skb, &res_code,
-				       sizeof(*resp), NULL);
+				       sizeof(*resp), &info_len);
 
 	if (unlikely(ret))
 		goto out;
@@ -1278,7 +1459,7 @@ int qtnf_cmd_get_hw_info(struct qtnf_bus *bus)
 	}
 
 	resp = (const struct qlink_resp_get_hw_info *)resp_skb->data;
-	ret = qtnf_cmd_resp_proc_hw_info(bus, resp);
+	ret = qtnf_cmd_resp_proc_hw_info(bus, resp, info_len);
 
 out:
 	qtnf_bus_unlock(bus);
@@ -1320,6 +1501,9 @@ int qtnf_cmd_get_mac_chan_info(struct qtnf_wmac *mac,
 
 	cmd = (struct qlink_cmd_chans_info_get *)cmd_skb->data;
 	cmd->band = qband;
+
+	qtnf_bus_lock(mac->bus);
+
 	ret = qtnf_cmd_send_with_reply(mac->bus, cmd_skb, &resp_skb, &res_code,
 				       sizeof(*resp), &info_len);
 
@@ -1343,6 +1527,7 @@ int qtnf_cmd_get_mac_chan_info(struct qtnf_wmac *mac,
 	ret = qtnf_cmd_resp_fill_channels_info(band, resp, info_len);
 
 out:
+	qtnf_bus_unlock(mac->bus);
 	consume_skb(resp_skb);
 
 	return ret;
@@ -1676,10 +1861,27 @@ int qtnf_cmd_send_change_sta(struct qtnf_vif *vif, const u8 *mac,
 
 	cmd = (struct qlink_cmd_change_sta *)cmd_skb->data;
 	ether_addr_copy(cmd->sta_addr, mac);
-	cmd->sta_flags_mask = cpu_to_le32(qtnf_encode_sta_flags(
-					  params->sta_flags_mask));
-	cmd->sta_flags_set = cpu_to_le32(qtnf_encode_sta_flags(
-					 params->sta_flags_set));
+
+	switch (vif->wdev.iftype) {
+	case NL80211_IFTYPE_AP:
+		cmd->if_type = cpu_to_le16(QLINK_IFTYPE_AP);
+		cmd->sta_flags_mask = cpu_to_le32(qtnf_encode_sta_flags(
+						  params->sta_flags_mask));
+		cmd->sta_flags_set = cpu_to_le32(qtnf_encode_sta_flags(
+						 params->sta_flags_set));
+		break;
+	case NL80211_IFTYPE_STATION:
+		cmd->if_type = cpu_to_le16(QLINK_IFTYPE_STATION);
+		cmd->sta_flags_mask = cpu_to_le32(qtnf_encode_sta_flags(
+						  params->sta_flags_mask));
+		cmd->sta_flags_set = cpu_to_le32(qtnf_encode_sta_flags(
+						 params->sta_flags_set));
+		break;
+	default:
+		pr_err("unsupported iftype %d\n", vif->wdev.iftype);
+		ret = -EINVAL;
+		goto out;
+	}
 
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb, &res_code);
 	if (unlikely(ret))
@@ -1853,8 +2055,8 @@ int qtnf_cmd_send_connect(struct qtnf_vif *vif,
 
 	ether_addr_copy(cmd->bssid, bss_cfg->bssid);
 
-	if (bss_cfg->chandef.chan)
-		cmd->freq = cpu_to_le16(bss_cfg->chandef.chan->center_freq);
+	if (vif->mac->chandef.chan)
+		cmd->channel = cpu_to_le16(vif->mac->chandef.chan->hw_value);
 
 	cmd->bg_scan_period = cpu_to_le16(bss_cfg->bg_scan_period);
 
@@ -1976,3 +2178,183 @@ out:
 	qtnf_bus_unlock(vif->mac->bus);
 	return ret;
 }
+
+int qtnf_cmd_reg_notify(struct qtnf_bus *bus, struct regulatory_request *req)
+{
+	struct sk_buff *cmd_skb;
+	int ret;
+	u16 res_code;
+	struct qlink_cmd_reg_notify *cmd;
+
+	cmd_skb = qtnf_cmd_alloc_new_cmdskb(QLINK_MACID_RSVD, QLINK_VIFID_RSVD,
+					    QLINK_CMD_REG_NOTIFY,
+					    sizeof(*cmd));
+	if (!cmd_skb)
+		return -ENOMEM;
+
+	cmd = (struct qlink_cmd_reg_notify *)cmd_skb->data;
+	cmd->alpha2[0] = req->alpha2[0];
+	cmd->alpha2[1] = req->alpha2[1];
+
+	switch (req->initiator) {
+	case NL80211_REGDOM_SET_BY_CORE:
+		cmd->initiator = QLINK_REGDOM_SET_BY_CORE;
+		break;
+	case NL80211_REGDOM_SET_BY_USER:
+		cmd->initiator = QLINK_REGDOM_SET_BY_USER;
+		break;
+	case NL80211_REGDOM_SET_BY_DRIVER:
+		cmd->initiator = QLINK_REGDOM_SET_BY_DRIVER;
+		break;
+	case NL80211_REGDOM_SET_BY_COUNTRY_IE:
+		cmd->initiator = QLINK_REGDOM_SET_BY_COUNTRY_IE;
+		break;
+	}
+
+	switch (req->user_reg_hint_type) {
+	case NL80211_USER_REG_HINT_USER:
+		cmd->user_reg_hint_type = QLINK_USER_REG_HINT_USER;
+		break;
+	case NL80211_USER_REG_HINT_CELL_BASE:
+		cmd->user_reg_hint_type = QLINK_USER_REG_HINT_CELL_BASE;
+		break;
+	case NL80211_USER_REG_HINT_INDOOR:
+		cmd->user_reg_hint_type = QLINK_USER_REG_HINT_INDOOR;
+		break;
+	}
+
+	qtnf_bus_lock(bus);
+
+	ret = qtnf_cmd_send(bus, cmd_skb, &res_code);
+	if (ret)
+		goto out;
+
+	switch (res_code) {
+	case QLINK_CMD_RESULT_ENOTSUPP:
+		pr_warn("reg update not supported\n");
+		ret = -EOPNOTSUPP;
+		break;
+	case QLINK_CMD_RESULT_EALREADY:
+		pr_info("regulatory domain is already set to %c%c",
+			req->alpha2[0], req->alpha2[1]);
+		ret = -EALREADY;
+		break;
+	case QLINK_CMD_RESULT_OK:
+		ret = 0;
+		break;
+	default:
+		ret = -EFAULT;
+		break;
+	}
+
+out:
+	qtnf_bus_unlock(bus);
+
+	return ret;
+}
+
+int qtnf_cmd_get_chan_stats(struct qtnf_wmac *mac, u16 channel,
+			    struct qtnf_chan_stats *stats)
+{
+	struct sk_buff *cmd_skb, *resp_skb = NULL;
+	struct qlink_cmd_get_chan_stats *cmd;
+	struct qlink_resp_get_chan_stats *resp;
+	size_t var_data_len;
+	u16 res_code = QLINK_CMD_RESULT_OK;
+	int ret = 0;
+
+	cmd_skb = qtnf_cmd_alloc_new_cmdskb(mac->macid, QLINK_VIFID_RSVD,
+					    QLINK_CMD_CHAN_STATS,
+					    sizeof(*cmd));
+	if (!cmd_skb)
+		return -ENOMEM;
+
+	qtnf_bus_lock(mac->bus);
+
+	cmd = (struct qlink_cmd_get_chan_stats *)cmd_skb->data;
+	cmd->channel = cpu_to_le16(channel);
+
+	ret = qtnf_cmd_send_with_reply(mac->bus, cmd_skb, &resp_skb, &res_code,
+				       sizeof(*resp), &var_data_len);
+	if (unlikely(ret)) {
+		qtnf_bus_unlock(mac->bus);
+		return ret;
+	}
+
+	if (unlikely(res_code != QLINK_CMD_RESULT_OK)) {
+		switch (res_code) {
+		case QLINK_CMD_RESULT_ENOTFOUND:
+			ret = -ENOENT;
+			break;
+		default:
+			pr_err("cmd exec failed: 0x%.4X\n", res_code);
+			ret = -EFAULT;
+			break;
+		}
+		goto out;
+	}
+
+	resp = (struct qlink_resp_get_chan_stats *)resp_skb->data;
+	ret = qtnf_cmd_resp_proc_chan_stat_info(stats, resp->info,
+						var_data_len);
+
+out:
+	qtnf_bus_unlock(mac->bus);
+	consume_skb(resp_skb);
+	return ret;
+}
+
+int qtnf_cmd_send_chan_switch(struct qtnf_wmac *mac,
+			      struct cfg80211_csa_settings *params)
+{
+	struct qlink_cmd_chan_switch *cmd;
+	struct sk_buff *cmd_skb;
+	u16 res_code = QLINK_CMD_RESULT_OK;
+	int ret;
+
+	cmd_skb = qtnf_cmd_alloc_new_cmdskb(mac->macid, 0x0,
+					    QLINK_CMD_CHAN_SWITCH,
+					    sizeof(*cmd));
+
+	if (unlikely(!cmd_skb))
+		return -ENOMEM;
+
+	qtnf_bus_lock(mac->bus);
+
+	cmd = (struct qlink_cmd_chan_switch *)cmd_skb->data;
+	cmd->channel = cpu_to_le16(params->chandef.chan->hw_value);
+	cmd->radar_required = params->radar_required;
+	cmd->block_tx = params->block_tx;
+	cmd->beacon_count = params->count;
+
+	ret = qtnf_cmd_send(mac->bus, cmd_skb, &res_code);
+
+	if (unlikely(ret))
+		goto out;
+
+	switch (res_code) {
+	case QLINK_CMD_RESULT_OK:
+		memcpy(&mac->csa_chandef, &params->chandef,
+		       sizeof(mac->csa_chandef));
+		mac->status |= QTNF_MAC_CSA_ACTIVE;
+		ret = 0;
+		break;
+	case QLINK_CMD_RESULT_ENOTFOUND:
+		ret = -ENOENT;
+		break;
+	case QLINK_CMD_RESULT_ENOTSUPP:
+		ret = -EOPNOTSUPP;
+		break;
+	case QLINK_CMD_RESULT_EALREADY:
+		ret = -EALREADY;
+		break;
+	case QLINK_CMD_RESULT_INVALID:
+	default:
+		ret = -EFAULT;
+		break;
+	}
+
+out:
+	qtnf_bus_unlock(mac->bus);
+	return ret;
+}
diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.h b/drivers/net/wireless/quantenna/qtnfmac/commands.h
index 6c51854..783b2036 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/commands.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/commands.h
@@ -70,5 +70,10 @@ int qtnf_cmd_send_disconnect(struct qtnf_vif *vif,
 			     u16 reason_code);
 int qtnf_cmd_send_updown_intf(struct qtnf_vif *vif,
 			      bool up);
+int qtnf_cmd_reg_notify(struct qtnf_bus *bus, struct regulatory_request *req);
+int qtnf_cmd_get_chan_stats(struct qtnf_wmac *mac, u16 channel,
+			    struct qtnf_chan_stats *stats);
+int qtnf_cmd_send_chan_switch(struct qtnf_wmac *mac,
+			      struct cfg80211_csa_settings *params);
 
 #endif /* QLINK_COMMANDS_H_ */
diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c
index f053532..5e60180 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/core.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/core.c
@@ -288,6 +288,8 @@ static struct qtnf_wmac *qtnf_core_mac_alloc(struct qtnf_bus *bus,
 		mac->iflist[i].mac = mac;
 		mac->iflist[i].vifid = i;
 		qtnf_sta_list_init(&mac->iflist[i].sta_list);
+		mutex_init(&mac->mac_lock);
+		init_timer(&mac->scan_timeout);
 	}
 
 	qtnf_mac_init_primary_intf(mac);
@@ -549,6 +551,9 @@ void qtnf_core_detach(struct qtnf_bus *bus)
 		destroy_workqueue(bus->workqueue);
 	}
 
+	kfree(bus->hw_info.rd);
+	bus->hw_info.rd = NULL;
+
 	qtnf_trans_free(bus);
 }
 EXPORT_SYMBOL_GPL(qtnf_core_detach);
diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h
index a616434..066fcd1 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/core.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/core.h
@@ -42,11 +42,11 @@
 
 #define QTNF_MAX_SSID_LIST_LENGTH	2
 #define QTNF_MAX_VSIE_LEN		255
-#define QTNF_MAX_ALPHA_LEN		2
 #define QTNF_MAX_INTF			8
 #define QTNF_MAX_EVENT_QUEUE_LEN	255
 #define QTNF_DEFAULT_BG_SCAN_PERIOD	300
 #define QTNF_MAX_BG_SCAN_PERIOD		0xffff
+#define QTNF_SCAN_TIMEOUT_SEC		15
 
 #define QTNF_DEF_BSS_PRIORITY		0
 #define QTNF_DEF_WDOG_TIMEOUT		5
@@ -68,7 +68,6 @@ struct qtnf_bss_config {
 	u16 auth_type;
 	bool privacy;
 	enum nl80211_mfp mfp;
-	struct cfg80211_chan_def chandef;
 	struct cfg80211_crypto_settings crypto;
 	u16 bg_scan_period;
 	u32 connect_flags;
@@ -90,6 +89,10 @@ enum qtnf_sta_state {
 	QTNF_STA_CONNECTED
 };
 
+enum qtnf_mac_status {
+	QTNF_MAC_CSA_ACTIVE	= BIT(0)
+};
+
 struct qtnf_vif {
 	struct wireless_dev wdev;
 	u8 vifid;
@@ -125,25 +128,39 @@ struct qtnf_mac_info {
 	size_t n_limits;
 };
 
+struct qtnf_chan_stats {
+	u32 chan_num;
+	u32 cca_tx;
+	u32 cca_rx;
+	u32 cca_busy;
+	u32 cca_try;
+	s8 chan_noise;
+};
+
 struct qtnf_wmac {
 	u8 macid;
 	u8 wiphy_registered;
 	u8 macaddr[ETH_ALEN];
+	u32 status;
 	struct qtnf_bus *bus;
 	struct qtnf_mac_info macinfo;
 	struct qtnf_vif iflist[QTNF_MAX_INTF];
 	struct cfg80211_scan_request *scan_req;
+	struct cfg80211_chan_def chandef;
+	struct cfg80211_chan_def csa_chandef;
+	struct mutex mac_lock;	/* lock during wmac speicific ops */
+	struct timer_list scan_timeout;
 };
 
 struct qtnf_hw_info {
+	u16 ql_proto_ver;
 	u8 num_mac;
 	u8 mac_bitmap;
-	u8 alpha2_code[QTNF_MAX_ALPHA_LEN];
 	u32 fw_ver;
-	u16 ql_proto_ver;
+	u32 hw_capab;
+	struct ieee80211_regdomain *rd;
 	u8 total_tx_chain;
 	u8 total_rx_chain;
-	u32 hw_capab;
 };
 
 struct qtnf_vif *qtnf_mac_get_free_vif(struct qtnf_wmac *mac);
diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c
index 9b61e9a..0fc2814 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/event.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/event.c
@@ -211,8 +211,8 @@ qtnf_event_handle_bss_leave(struct qtnf_vif *vif,
 
 	pr_debug("VIF%u.%u: disconnected\n", vif->mac->macid, vif->vifid);
 
-	cfg80211_disconnected(vif->netdev, leave_info->reason, NULL, 0, 0,
-			      GFP_KERNEL);
+	cfg80211_disconnected(vif->netdev, le16_to_cpu(leave_info->reason),
+			      NULL, 0, 0, GFP_KERNEL);
 
 	vif->sta_state = QTNF_STA_DISCONNECTED;
 	netif_carrier_off(vif->netdev);
@@ -345,11 +345,70 @@ qtnf_event_handle_scan_complete(struct qtnf_wmac *mac,
 		return -EINVAL;
 	}
 
+	if (timer_pending(&mac->scan_timeout))
+		del_timer_sync(&mac->scan_timeout);
 	qtnf_scan_done(mac, le32_to_cpu(status->flags) & QLINK_SCAN_ABORTED);
 
 	return 0;
 }
 
+static int
+qtnf_event_handle_freq_change(struct qtnf_wmac *mac,
+			      const struct qlink_event_freq_change *data,
+			      u16 len)
+{
+	struct wiphy *wiphy = priv_to_wiphy(mac);
+	struct cfg80211_chan_def chandef;
+	struct ieee80211_channel *chan;
+	struct qtnf_vif *vif;
+	int freq;
+	int i;
+
+	if (len < sizeof(*data)) {
+		pr_err("payload is too short\n");
+		return -EINVAL;
+	}
+
+	freq = le32_to_cpu(data->freq);
+	chan = ieee80211_get_channel(wiphy, freq);
+	if (!chan) {
+		pr_err("channel at %d MHz not found\n", freq);
+		return -EINVAL;
+	}
+
+	pr_debug("MAC%d switch to new channel %u MHz\n", mac->macid, freq);
+
+	if (mac->status & QTNF_MAC_CSA_ACTIVE) {
+		mac->status &= ~QTNF_MAC_CSA_ACTIVE;
+		if (chan->hw_value != mac->csa_chandef.chan->hw_value)
+			pr_warn("unexpected switch to %u during CSA to %u\n",
+				chan->hw_value,
+				mac->csa_chandef.chan->hw_value);
+	}
+
+	/* FIXME: need to figure out proper nl80211_channel_type value */
+	cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_HT20);
+	/* fall-back to minimal safe chandef description */
+	if (!cfg80211_chandef_valid(&chandef))
+		cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_HT20);
+
+	memcpy(&mac->chandef, &chandef, sizeof(mac->chandef));
+
+	for (i = 0; i < QTNF_MAX_INTF; i++) {
+		vif = &mac->iflist[i];
+		if (vif->wdev.iftype == NL80211_IFTYPE_UNSPECIFIED)
+			continue;
+
+		if (vif->netdev) {
+			mutex_lock(&vif->wdev.mtx);
+			cfg80211_ch_switch_notify(vif->netdev, &chandef);
+			mutex_unlock(&vif->wdev.mtx);
+		}
+	}
+
+	return 0;
+}
+
 static int qtnf_event_parse(struct qtnf_wmac *mac,
 			    const struct sk_buff *event_skb)
 {
@@ -400,6 +459,10 @@ static int qtnf_event_parse(struct qtnf_wmac *mac,
 		ret = qtnf_event_handle_bss_leave(vif, (const void *)event,
 						  event_len);
 		break;
+	case QLINK_EVENT_FREQ_CHANGE:
+		ret = qtnf_event_handle_freq_change(mac, (const void *)event,
+						    event_len);
+		break;
 	default:
 		pr_warn("unknown event type: %x\n", event_id);
 		break;
diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
index 7fc4f0d..502e72b 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie.c
@@ -25,6 +25,8 @@
 #include <linux/completion.h>
 #include <linux/crc32.h>
 #include <linux/spinlock.h>
+#include <linux/circ_buf.h>
+#include <linux/log2.h>
 
 #include "qtn_hw_ids.h"
 #include "pcie_bus_priv.h"
@@ -36,17 +38,13 @@ static bool use_msi = true;
 module_param(use_msi, bool, 0644);
 MODULE_PARM_DESC(use_msi, "set 0 to use legacy interrupt");
 
-static unsigned int tx_bd_size_param = 256;
+static unsigned int tx_bd_size_param = 32;
 module_param(tx_bd_size_param, uint, 0644);
-MODULE_PARM_DESC(tx_bd_size_param, "Tx descriptors queue size");
+MODULE_PARM_DESC(tx_bd_size_param, "Tx descriptors queue size, power of two");
 
 static unsigned int rx_bd_size_param = 256;
 module_param(rx_bd_size_param, uint, 0644);
-MODULE_PARM_DESC(rx_bd_size_param, "Rx descriptors queue size");
-
-static unsigned int rx_bd_reserved_param = 16;
-module_param(rx_bd_reserved_param, uint, 0644);
-MODULE_PARM_DESC(rx_bd_reserved_param, "Reserved RX descriptors");
+MODULE_PARM_DESC(rx_bd_size_param, "Rx descriptors queue size, power of two");
 
 static u8 flashboot = 1;
 module_param(flashboot, byte, 0644);
@@ -186,8 +184,10 @@ static void __iomem *qtnf_map_bar(struct qtnf_pcie_bus_priv *priv, u8 index)
 		return IOMEM_ERR_PTR(ret);
 
 	busaddr = pci_resource_start(priv->pdev, index);
-	vaddr = pcim_iomap_table(priv->pdev)[index];
 	len = pci_resource_len(priv->pdev, index);
+	vaddr = pcim_iomap_table(priv->pdev)[index];
+	if (!vaddr)
+		return IOMEM_ERR_PTR(-ENOMEM);
 
 	pr_debug("BAR%u vaddr=0x%p busaddr=%pad len=%u\n",
 		 index, vaddr, &busaddr, (int)len);
@@ -250,19 +250,19 @@ static int qtnf_pcie_init_memory(struct qtnf_pcie_bus_priv *priv)
 	int ret = -ENOMEM;
 
 	priv->sysctl_bar = qtnf_map_bar(priv, QTN_SYSCTL_BAR);
-	if (IS_ERR_OR_NULL(priv->sysctl_bar)) {
+	if (IS_ERR(priv->sysctl_bar)) {
 		pr_err("failed to map BAR%u\n", QTN_SYSCTL_BAR);
 		return ret;
 	}
 
 	priv->dmareg_bar = qtnf_map_bar(priv, QTN_DMA_BAR);
-	if (IS_ERR_OR_NULL(priv->dmareg_bar)) {
+	if (IS_ERR(priv->dmareg_bar)) {
 		pr_err("failed to map BAR%u\n", QTN_DMA_BAR);
 		return ret;
 	}
 
 	priv->epmem_bar = qtnf_map_bar(priv, QTN_SHMEM_BAR);
-	if (IS_ERR_OR_NULL(priv->epmem_bar)) {
+	if (IS_ERR(priv->epmem_bar)) {
 		pr_err("failed to map BAR%u\n", QTN_SHMEM_BAR);
 		return ret;
 	}
@@ -274,32 +274,6 @@ static int qtnf_pcie_init_memory(struct qtnf_pcie_bus_priv *priv)
 	return 0;
 }
 
-static int
-qtnf_pcie_init_dma_mask(struct qtnf_pcie_bus_priv *priv, u64 dma_mask)
-{
-	int ret;
-
-	ret = dma_supported(&priv->pdev->dev, dma_mask);
-	if (!ret) {
-		pr_err("DMA mask %llu not supported\n", dma_mask);
-		return ret;
-	}
-
-	ret = pci_set_dma_mask(priv->pdev, dma_mask);
-	if (ret) {
-		pr_err("failed to set DMA mask %llu\n", dma_mask);
-		return ret;
-	}
-
-	ret = pci_set_consistent_dma_mask(priv->pdev, dma_mask);
-	if (ret) {
-		pr_err("failed to set consistent DMA mask %llu\n", dma_mask);
-		return ret;
-	}
-
-	return ret;
-}
-
 static void qtnf_tune_pcie_mps(struct qtnf_pcie_bus_priv *priv)
 {
 	struct pci_dev *pdev = priv->pdev;
@@ -418,9 +392,8 @@ static int alloc_bd_table(struct qtnf_pcie_bus_priv *priv)
 
 	pr_debug("TX descriptor table: vaddr=0x%p paddr=%pad\n", vaddr, &paddr);
 
-	priv->tx_bd_reclaim_start = 0;
-	priv->tx_bd_index = 0;
-	priv->tx_queue_len = 0;
+	priv->tx_bd_r_index = 0;
+	priv->tx_bd_w_index = 0;
 
 	/* rx bd */
 
@@ -430,43 +403,34 @@ static int alloc_bd_table(struct qtnf_pcie_bus_priv *priv)
 	priv->rx_bd_vbase = vaddr;
 	priv->rx_bd_pbase = paddr;
 
-	writel(QTN_HOST_LO32(paddr),
-	       PCIE_HDP_TX_HOST_Q_BASE_L(priv->pcie_reg_base));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 	writel(QTN_HOST_HI32(paddr),
 	       PCIE_HDP_TX_HOST_Q_BASE_H(priv->pcie_reg_base));
+#endif
+	writel(QTN_HOST_LO32(paddr),
+	       PCIE_HDP_TX_HOST_Q_BASE_L(priv->pcie_reg_base));
 	writel(priv->rx_bd_num | (sizeof(struct qtnf_rx_bd)) << 16,
 	       PCIE_HDP_TX_HOST_Q_SZ_CTRL(priv->pcie_reg_base));
 
-	priv->hw_txproc_wr_ptr = priv->rx_bd_num - rx_bd_reserved_param;
-
-	writel(priv->hw_txproc_wr_ptr,
-	       PCIE_HDP_TX_HOST_Q_WR_PTR(priv->pcie_reg_base));
-
 	pr_debug("RX descriptor table: vaddr=0x%p paddr=%pad\n", vaddr, &paddr);
 
-	priv->rx_bd_index = 0;
-
 	return 0;
 }
 
-static int skb2rbd_attach(struct qtnf_pcie_bus_priv *priv, u16 rx_bd_index)
+static int skb2rbd_attach(struct qtnf_pcie_bus_priv *priv, u16 index)
 {
 	struct qtnf_rx_bd *rxbd;
 	struct sk_buff *skb;
 	dma_addr_t paddr;
 
-	skb = __dev_alloc_skb(SKB_BUF_SIZE + NET_IP_ALIGN,
-			      GFP_ATOMIC);
+	skb = __netdev_alloc_skb_ip_align(NULL, SKB_BUF_SIZE, GFP_ATOMIC);
 	if (!skb) {
-		priv->rx_skb[rx_bd_index] = NULL;
+		priv->rx_skb[index] = NULL;
 		return -ENOMEM;
 	}
 
-	priv->rx_skb[rx_bd_index] = skb;
-
-	skb_reserve(skb, NET_IP_ALIGN);
-
-	rxbd = &priv->rx_bd_vbase[rx_bd_index];
+	priv->rx_skb[index] = skb;
+	rxbd = &priv->rx_bd_vbase[index];
 
 	paddr = pci_map_single(priv->pdev, skb->data,
 			       SKB_BUF_SIZE, PCI_DMA_FROMDEVICE);
@@ -475,17 +439,24 @@ static int skb2rbd_attach(struct qtnf_pcie_bus_priv *priv, u16 rx_bd_index)
 		return -ENOMEM;
 	}
 
-	writel(QTN_HOST_LO32(paddr),
-	       PCIE_HDP_HHBM_BUF_PTR(priv->pcie_reg_base));
-	writel(QTN_HOST_HI32(paddr),
-	       PCIE_HDP_HHBM_BUF_PTR_H(priv->pcie_reg_base));
-
 	/* keep rx skb paddrs in rx buffer descriptors for cleanup purposes */
 	rxbd->addr = cpu_to_le32(QTN_HOST_LO32(paddr));
 	rxbd->addr_h = cpu_to_le32(QTN_HOST_HI32(paddr));
-
 	rxbd->info = 0x0;
 
+	priv->rx_bd_w_index = index;
+
+	/* sync up all descriptor updates */
+	wmb();
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	writel(QTN_HOST_HI32(paddr),
+	       PCIE_HDP_HHBM_BUF_PTR_H(priv->pcie_reg_base));
+#endif
+	writel(QTN_HOST_LO32(paddr),
+	       PCIE_HDP_HHBM_BUF_PTR(priv->pcie_reg_base));
+
+	writel(index, PCIE_HDP_TX_HOST_Q_WR_PTR(priv->pcie_reg_base));
 	return 0;
 }
 
@@ -516,7 +487,7 @@ static void free_xfer_buffers(void *data)
 
 	/* free rx buffers */
 	for (i = 0; i < priv->rx_bd_num; i++) {
-		if (priv->rx_skb[i]) {
+		if (priv->rx_skb && priv->rx_skb[i]) {
 			rxbd = &priv->rx_bd_vbase[i];
 			paddr = QTN_HOST_ADDR(le32_to_cpu(rxbd->addr_h),
 					      le32_to_cpu(rxbd->addr));
@@ -529,19 +500,72 @@ static void free_xfer_buffers(void *data)
 
 	/* free tx buffers */
 	for (i = 0; i < priv->tx_bd_num; i++) {
-		if (priv->tx_skb[i]) {
+		if (priv->tx_skb && priv->tx_skb[i]) {
 			dev_kfree_skb_any(priv->tx_skb[i]);
 			priv->tx_skb[i] = NULL;
 		}
 	}
 }
 
+static int qtnf_hhbm_init(struct qtnf_pcie_bus_priv *priv)
+{
+	u32 val;
+
+	val = readl(PCIE_HHBM_CONFIG(priv->pcie_reg_base));
+	val |= HHBM_CONFIG_SOFT_RESET;
+	writel(val, PCIE_HHBM_CONFIG(priv->pcie_reg_base));
+	usleep_range(50, 100);
+	val &= ~HHBM_CONFIG_SOFT_RESET;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	val |= HHBM_64BIT;
+#endif
+	writel(val, PCIE_HHBM_CONFIG(priv->pcie_reg_base));
+	writel(priv->rx_bd_num, PCIE_HHBM_Q_LIMIT_REG(priv->pcie_reg_base));
+
+	return 0;
+}
+
 static int qtnf_pcie_init_xfer(struct qtnf_pcie_bus_priv *priv)
 {
 	int ret;
+	u32 val;
 
 	priv->tx_bd_num = tx_bd_size_param;
 	priv->rx_bd_num = rx_bd_size_param;
+	priv->rx_bd_w_index = 0;
+	priv->rx_bd_r_index = 0;
+
+	if (!priv->tx_bd_num || !is_power_of_2(priv->tx_bd_num)) {
+		pr_err("tx_bd_size_param %u is not power of two\n",
+		       priv->tx_bd_num);
+		return -EINVAL;
+	}
+
+	val = priv->tx_bd_num * sizeof(struct qtnf_tx_bd);
+	if (val > PCIE_HHBM_MAX_SIZE) {
+		pr_err("tx_bd_size_param %u is too large\n",
+		       priv->tx_bd_num);
+		return -EINVAL;
+	}
+
+	if (!priv->rx_bd_num || !is_power_of_2(priv->rx_bd_num)) {
+		pr_err("rx_bd_size_param %u is not power of two\n",
+		       priv->rx_bd_num);
+		return -EINVAL;
+	}
+
+	val = priv->rx_bd_num * sizeof(dma_addr_t);
+	if (val > PCIE_HHBM_MAX_SIZE) {
+		pr_err("rx_bd_size_param %u is too large\n",
+		       priv->rx_bd_num);
+		return -EINVAL;
+	}
+
+	ret = qtnf_hhbm_init(priv);
+	if (ret) {
+		pr_err("failed to init h/w queues\n");
+		return ret;
+	}
 
 	ret = alloc_skb_array(priv);
 	if (ret) {
@@ -564,67 +588,72 @@ static int qtnf_pcie_init_xfer(struct qtnf_pcie_bus_priv *priv)
 	return ret;
 }
 
-static int qtnf_pcie_data_tx_reclaim(struct qtnf_pcie_bus_priv *priv)
+static void qtnf_pcie_data_tx_reclaim(struct qtnf_pcie_bus_priv *priv)
 {
 	struct qtnf_tx_bd *txbd;
 	struct sk_buff *skb;
+	unsigned long flags;
 	dma_addr_t paddr;
-	int last_sent;
-	int count;
+	u32 tx_done_index;
+	int count = 0;
 	int i;
 
-	last_sent = readl(PCIE_HDP_RX0DMA_CNT(priv->pcie_reg_base))
-			% priv->tx_bd_num;
-	i = priv->tx_bd_reclaim_start;
-	count = 0;
+	spin_lock_irqsave(&priv->tx_reclaim_lock, flags);
 
-	while (i != last_sent) {
-		skb = priv->tx_skb[i];
-		if (!skb)
-			break;
+	tx_done_index = readl(PCIE_HDP_RX0DMA_CNT(priv->pcie_reg_base))
+			& (priv->tx_bd_num - 1);
 
-		txbd = &priv->tx_bd_vbase[i];
-		paddr = QTN_HOST_ADDR(le32_to_cpu(txbd->addr_h),
-				      le32_to_cpu(txbd->addr));
-		pci_unmap_single(priv->pdev, paddr, skb->len, PCI_DMA_TODEVICE);
+	i = priv->tx_bd_r_index;
 
-		if (skb->dev) {
-			skb->dev->stats.tx_packets++;
-			skb->dev->stats.tx_bytes += skb->len;
+	while (CIRC_CNT(tx_done_index, i, priv->tx_bd_num)) {
+		skb = priv->tx_skb[i];
+		if (likely(skb)) {
+			txbd = &priv->tx_bd_vbase[i];
+			paddr = QTN_HOST_ADDR(le32_to_cpu(txbd->addr_h),
+					      le32_to_cpu(txbd->addr));
+			pci_unmap_single(priv->pdev, paddr, skb->len,
+					 PCI_DMA_TODEVICE);
+
+			if (skb->dev) {
+				skb->dev->stats.tx_packets++;
+				skb->dev->stats.tx_bytes += skb->len;
+
+				if (netif_queue_stopped(skb->dev))
+					netif_wake_queue(skb->dev);
+			}
 
-			if (netif_queue_stopped(skb->dev))
-				netif_wake_queue(skb->dev);
+			dev_kfree_skb_any(skb);
 		}
 
-		dev_kfree_skb_any(skb);
 		priv->tx_skb[i] = NULL;
-		priv->tx_queue_len--;
 		count++;
 
 		if (++i >= priv->tx_bd_num)
 			i = 0;
 	}
 
-	priv->tx_bd_reclaim_start = i;
 	priv->tx_reclaim_done += count;
 	priv->tx_reclaim_req++;
+	priv->tx_bd_r_index = i;
 
-	return count;
+	spin_unlock_irqrestore(&priv->tx_reclaim_lock, flags);
 }
 
-static bool qtnf_tx_queue_ready(struct qtnf_pcie_bus_priv *priv)
+static int qtnf_tx_queue_ready(struct qtnf_pcie_bus_priv *priv)
 {
-	if (priv->tx_queue_len >= priv->tx_bd_num - 1) {
+	if (!CIRC_SPACE(priv->tx_bd_w_index, priv->tx_bd_r_index,
+			priv->tx_bd_num)) {
 		pr_err_ratelimited("reclaim full Tx queue\n");
 		qtnf_pcie_data_tx_reclaim(priv);
 
-		if (priv->tx_queue_len >= priv->tx_bd_num - 1) {
+		if (!CIRC_SPACE(priv->tx_bd_w_index, priv->tx_bd_r_index,
+				priv->tx_bd_num)) {
 			priv->tx_full_count++;
-			return false;
+			return 0;
 		}
 	}
 
-	return true;
+	return 1;
 }
 
 static int qtnf_pcie_data_tx(struct qtnf_bus *bus, struct sk_buff *skb)
@@ -632,24 +661,18 @@ static int qtnf_pcie_data_tx(struct qtnf_bus *bus, struct sk_buff *skb)
 	struct qtnf_pcie_bus_priv *priv = (void *)get_bus_priv(bus);
 	dma_addr_t txbd_paddr, skb_paddr;
 	struct qtnf_tx_bd *txbd;
-	unsigned long flags;
 	int len, i;
 	u32 info;
 	int ret = 0;
 
-	spin_lock_irqsave(&priv->tx_lock, flags);
-
-	priv->tx_done_count++;
-
 	if (!qtnf_tx_queue_ready(priv)) {
 		if (skb->dev)
 			netif_stop_queue(skb->dev);
 
-		spin_unlock_irqrestore(&priv->tx_lock, flags);
 		return NETDEV_TX_BUSY;
 	}
 
-	i = priv->tx_bd_index;
+	i = priv->tx_bd_w_index;
 	priv->tx_skb[i] = skb;
 	len = skb->len;
 
@@ -673,16 +696,18 @@ static int qtnf_pcie_data_tx(struct qtnf_bus *bus, struct sk_buff *skb)
 
 	/* write new TX descriptor to PCIE_RX_FIFO on EP */
 	txbd_paddr = priv->tx_bd_pbase + i * sizeof(struct qtnf_tx_bd);
-	writel(QTN_HOST_LO32(txbd_paddr),
-	       PCIE_HDP_HOST_WR_DESC0(priv->pcie_reg_base));
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 	writel(QTN_HOST_HI32(txbd_paddr),
 	       PCIE_HDP_HOST_WR_DESC0_H(priv->pcie_reg_base));
+#endif
+	writel(QTN_HOST_LO32(txbd_paddr),
+	       PCIE_HDP_HOST_WR_DESC0(priv->pcie_reg_base));
 
 	if (++i >= priv->tx_bd_num)
 		i = 0;
 
-	priv->tx_bd_index = i;
-	priv->tx_queue_len++;
+	priv->tx_bd_w_index = i;
 
 tx_done:
 	if (ret && skb) {
@@ -692,7 +717,8 @@ tx_done:
 		dev_kfree_skb_any(skb);
 	}
 
-	spin_unlock_irqrestore(&priv->tx_lock, flags);
+	qtnf_pcie_data_tx_reclaim(priv);
+	priv->tx_done_count++;
 
 	return NETDEV_TX_OK;
 }
@@ -719,14 +745,21 @@ static irqreturn_t qtnf_interrupt(int irq, void *data)
 	if (!(status & priv->pcie_irq_mask))
 		goto irq_done;
 
-	if (status & PCIE_HDP_INT_RX_BITS) {
+	if (status & PCIE_HDP_INT_RX_BITS)
 		priv->pcie_irq_rx_count++;
+
+	if (status & PCIE_HDP_INT_TX_BITS)
+		priv->pcie_irq_tx_count++;
+
+	if (status & PCIE_HDP_INT_HHBM_UF)
+		priv->pcie_irq_uf_count++;
+
+	if (status & PCIE_HDP_INT_RX_BITS) {
 		qtnf_dis_rxdone_irq(priv);
 		napi_schedule(&bus->mux_napi);
 	}
 
 	if (status & PCIE_HDP_INT_TX_BITS) {
-		priv->pcie_irq_tx_count++;
 		qtnf_dis_txdone_irq(priv);
 		tasklet_hi_schedule(&priv->reclaim_tq);
 	}
@@ -741,16 +774,19 @@ irq_done:
 	return IRQ_HANDLED;
 }
 
-static inline void hw_txproc_wr_ptr_inc(struct qtnf_pcie_bus_priv *priv)
+static int qtnf_rx_data_ready(struct qtnf_pcie_bus_priv *priv)
 {
-	u32 index;
+	u16 index = priv->rx_bd_r_index;
+	struct qtnf_rx_bd *rxbd;
+	u32 descw;
 
-	index = priv->hw_txproc_wr_ptr;
+	rxbd = &priv->rx_bd_vbase[index];
+	descw = le32_to_cpu(rxbd->info);
 
-	if (++index >= priv->rx_bd_num)
-		index = 0;
+	if (descw & QTN_TXDONE_MASK)
+		return 1;
 
-	priv->hw_txproc_wr_ptr = index;
+	return 0;
 }
 
 static int qtnf_rx_poll(struct napi_struct *napi, int budget)
@@ -762,64 +798,96 @@ static int qtnf_rx_poll(struct napi_struct *napi, int budget)
 	int processed = 0;
 	struct qtnf_rx_bd *rxbd;
 	dma_addr_t skb_paddr;
+	int consume;
 	u32 descw;
-	u16 index;
+	u32 psize;
+	u16 r_idx;
+	u16 w_idx;
 	int ret;
 
-	index = priv->rx_bd_index;
-	rxbd = &priv->rx_bd_vbase[index];
+	while (processed < budget) {
 
-	descw = le32_to_cpu(rxbd->info);
 
-	while ((descw & QTN_TXDONE_MASK) && (processed < budget)) {
-		skb = priv->rx_skb[index];
+		if (!qtnf_rx_data_ready(priv))
+			goto rx_out;
 
-		if (likely(skb)) {
-			skb_put(skb, QTN_GET_LEN(descw));
+		r_idx = priv->rx_bd_r_index;
+		rxbd = &priv->rx_bd_vbase[r_idx];
+		descw = le32_to_cpu(rxbd->info);
+
+		skb = priv->rx_skb[r_idx];
+		psize = QTN_GET_LEN(descw);
+		consume = 1;
+
+		if (!(descw & QTN_TXDONE_MASK)) {
+			pr_warn("skip invalid rxbd[%d]\n", r_idx);
+			consume = 0;
+		}
+
+		if (!skb) {
+			pr_warn("skip missing rx_skb[%d]\n", r_idx);
+			consume = 0;
+		}
 
+		if (skb && (skb_tailroom(skb) <  psize)) {
+			pr_err("skip packet with invalid length: %u > %u\n",
+			       psize, skb_tailroom(skb));
+			consume = 0;
+		}
+
+		if (skb) {
 			skb_paddr = QTN_HOST_ADDR(le32_to_cpu(rxbd->addr_h),
 						  le32_to_cpu(rxbd->addr));
 			pci_unmap_single(priv->pdev, skb_paddr, SKB_BUF_SIZE,
 					 PCI_DMA_FROMDEVICE);
+		}
 
+		if (consume) {
+			skb_put(skb, psize);
 			ndev = qtnf_classify_skb(bus, skb);
 			if (likely(ndev)) {
 				ndev->stats.rx_packets++;
 				ndev->stats.rx_bytes += skb->len;
 
 				skb->protocol = eth_type_trans(skb, ndev);
-				netif_receive_skb(skb);
+				napi_gro_receive(napi, skb);
 			} else {
 				pr_debug("drop untagged skb\n");
 				bus->mux_dev.stats.rx_dropped++;
 				dev_kfree_skb_any(skb);
 			}
-
-			processed++;
 		} else {
-			pr_err("missing rx_skb[%d]\n", index);
+			if (skb) {
+				bus->mux_dev.stats.rx_dropped++;
+				dev_kfree_skb_any(skb);
+			}
 		}
 
-		/* attached rx buffer is passed upstream: map a new one */
-		ret = skb2rbd_attach(priv, index);
-		if (likely(!ret)) {
-			if (++index >= priv->rx_bd_num)
-				index = 0;
+		priv->rx_skb[r_idx] = NULL;
+		if (++r_idx >= priv->rx_bd_num)
+			r_idx = 0;
 
-			priv->rx_bd_index = index;
-			hw_txproc_wr_ptr_inc(priv);
+		priv->rx_bd_r_index = r_idx;
 
-			rxbd = &priv->rx_bd_vbase[index];
-			descw = le32_to_cpu(rxbd->info);
-		} else {
-			pr_err("failed to allocate new rx_skb[%d]\n", index);
-			break;
+		/* repalce processed buffer by a new one */
+		w_idx = priv->rx_bd_w_index;
+		while (CIRC_SPACE(priv->rx_bd_w_index, priv->rx_bd_r_index,
+				  priv->rx_bd_num) > 0) {
+			if (++w_idx >= priv->rx_bd_num)
+				w_idx = 0;
+
+			ret = skb2rbd_attach(priv, w_idx);
+			if (ret) {
+				pr_err("failed to allocate new rx_skb[%d]\n",
+				       w_idx);
+				break;
+			}
 		}
 
-		writel(priv->hw_txproc_wr_ptr,
-		       PCIE_HDP_TX_HOST_Q_WR_PTR(priv->pcie_reg_base));
+		processed++;
 	}
 
+rx_out:
 	if (processed < budget) {
 		napi_complete(napi);
 		qtnf_en_rxdone_irq(priv);
@@ -1058,11 +1126,8 @@ static int qtnf_bringup_fw(struct qtnf_bus *bus)
 static void qtnf_reclaim_tasklet_fn(unsigned long data)
 {
 	struct qtnf_pcie_bus_priv *priv = (void *)data;
-	unsigned long flags;
 
-	spin_lock_irqsave(&priv->tx_lock, flags);
 	qtnf_pcie_data_tx_reclaim(priv);
-	spin_unlock_irqrestore(&priv->tx_lock, flags);
 	qtnf_en_txdone_irq(priv);
 }
 
@@ -1090,10 +1155,22 @@ static int qtnf_dbg_irq_stats(struct seq_file *s, void *data)
 {
 	struct qtnf_bus *bus = dev_get_drvdata(s->private);
 	struct qtnf_pcie_bus_priv *priv = get_bus_priv(bus);
+	u32 reg = readl(PCIE_HDP_INT_EN(priv->pcie_reg_base));
+	u32 status;
 
 	seq_printf(s, "pcie_irq_count(%u)\n", priv->pcie_irq_count);
 	seq_printf(s, "pcie_irq_tx_count(%u)\n", priv->pcie_irq_tx_count);
+	status = reg &  PCIE_HDP_INT_TX_BITS;
+	seq_printf(s, "pcie_irq_tx_status(%s)\n",
+		   (status == PCIE_HDP_INT_TX_BITS) ? "EN" : "DIS");
 	seq_printf(s, "pcie_irq_rx_count(%u)\n", priv->pcie_irq_rx_count);
+	status = reg &  PCIE_HDP_INT_RX_BITS;
+	seq_printf(s, "pcie_irq_rx_status(%s)\n",
+		   (status == PCIE_HDP_INT_RX_BITS) ? "EN" : "DIS");
+	seq_printf(s, "pcie_irq_uf_count(%u)\n", priv->pcie_irq_uf_count);
+	status = reg &  PCIE_HDP_INT_HHBM_UF;
+	seq_printf(s, "pcie_irq_hhbm_uf_status(%s)\n",
+		   (status == PCIE_HDP_INT_HHBM_UF) ? "EN" : "DIS");
 
 	return 0;
 }
@@ -1107,10 +1184,24 @@ static int qtnf_dbg_hdp_stats(struct seq_file *s, void *data)
 	seq_printf(s, "tx_done_count(%u)\n", priv->tx_done_count);
 	seq_printf(s, "tx_reclaim_done(%u)\n", priv->tx_reclaim_done);
 	seq_printf(s, "tx_reclaim_req(%u)\n", priv->tx_reclaim_req);
-	seq_printf(s, "tx_bd_reclaim_start(%u)\n", priv->tx_bd_reclaim_start);
-	seq_printf(s, "tx_bd_index(%u)\n", priv->tx_bd_index);
-	seq_printf(s, "rx_bd_index(%u)\n", priv->rx_bd_index);
-	seq_printf(s, "tx_queue_len(%u)\n", priv->tx_queue_len);
+
+	seq_printf(s, "tx_bd_r_index(%u)\n", priv->tx_bd_r_index);
+	seq_printf(s, "tx_bd_p_index(%u)\n",
+		   readl(PCIE_HDP_RX0DMA_CNT(priv->pcie_reg_base))
+			& (priv->tx_bd_num - 1));
+	seq_printf(s, "tx_bd_w_index(%u)\n", priv->tx_bd_w_index);
+	seq_printf(s, "tx queue len(%u)\n",
+		   CIRC_CNT(priv->tx_bd_w_index, priv->tx_bd_r_index,
+			    priv->tx_bd_num));
+
+	seq_printf(s, "rx_bd_r_index(%u)\n", priv->rx_bd_r_index);
+	seq_printf(s, "rx_bd_p_index(%u)\n",
+		   readl(PCIE_HDP_TX0DMA_CNT(priv->pcie_reg_base))
+			& (priv->rx_bd_num - 1));
+	seq_printf(s, "rx_bd_w_index(%u)\n", priv->rx_bd_w_index);
+	seq_printf(s, "rx alloc queue len(%u)\n",
+		   CIRC_SPACE(priv->rx_bd_w_index, priv->rx_bd_r_index,
+			      priv->rx_bd_num));
 
 	return 0;
 }
@@ -1157,7 +1248,7 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	init_completion(&bus->request_firmware_complete);
 	mutex_init(&bus->bus_lock);
 	spin_lock_init(&pcie_priv->irq_lock);
-	spin_lock_init(&pcie_priv->tx_lock);
+	spin_lock_init(&pcie_priv->tx_reclaim_lock);
 
 	/* init stats */
 	pcie_priv->tx_full_count = 0;
@@ -1165,6 +1256,7 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pcie_priv->pcie_irq_count = 0;
 	pcie_priv->pcie_irq_rx_count = 0;
 	pcie_priv->pcie_irq_tx_count = 0;
+	pcie_priv->pcie_irq_uf_count = 0;
 	pcie_priv->tx_reclaim_done = 0;
 	pcie_priv->tx_reclaim_req = 0;
 
@@ -1191,6 +1283,16 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		pr_debug("successful init of PCI device %x\n", pdev->device);
 	}
 
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+#else
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+#endif
+	if (ret) {
+		pr_err("PCIE DMA coherent mask init failed\n");
+		goto err_base;
+	}
+
 	pcim_pin_device(pdev);
 	pci_set_master(pdev);
 
@@ -1212,12 +1314,6 @@ static int qtnf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_base;
 	}
 
-	ret = qtnf_pcie_init_dma_mask(pcie_priv, DMA_BIT_MASK(32));
-	if (ret) {
-		pr_err("PCIE DMA mask init failed\n");
-		goto err_base;
-	}
-
 	ret = devm_add_action(&pdev->dev, free_xfer_buffers, (void *)pcie_priv);
 	if (ret) {
 		pr_err("custom release callback init failed\n");
@@ -1336,7 +1432,7 @@ static SIMPLE_DEV_PM_OPS(qtnf_pcie_pm_ops, qtnf_pcie_suspend,
 			 qtnf_pcie_resume);
 #endif
 
-static struct pci_device_id qtnf_pcie_devid_table[] = {
+static const struct pci_device_id qtnf_pcie_devid_table[] = {
 	{
 		PCIE_VENDOR_ID_QUANTENNA, PCIE_DEVICE_ID_QTN_PEARL,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_bus_priv.h b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_bus_priv.h
index 2a897db..e76a237 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_bus_priv.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_bus_priv.h
@@ -32,8 +32,8 @@ struct qtnf_pcie_bus_priv {
 	/* lock for irq configuration changes */
 	spinlock_t irq_lock;
 
-	/* lock for tx operations */
-	spinlock_t tx_lock;
+	/* lock for tx reclaim operations */
+	spinlock_t tx_reclaim_lock;
 	u8 msi_enabled;
 	int mps;
 
@@ -66,13 +66,11 @@ struct qtnf_pcie_bus_priv {
 	void *bd_table_vaddr;
 	u32 bd_table_len;
 
-	u32 hw_txproc_wr_ptr;
+	u32 rx_bd_w_index;
+	u32 rx_bd_r_index;
 
-	u16 tx_bd_reclaim_start;
-	u16 tx_bd_index;
-	u32 tx_queue_len;
-
-	u16 rx_bd_index;
+	u32 tx_bd_w_index;
+	u32 tx_bd_r_index;
 
 	u32 pcie_irq_mask;
 
@@ -80,6 +78,7 @@ struct qtnf_pcie_bus_priv {
 	u32 pcie_irq_count;
 	u32 pcie_irq_rx_count;
 	u32 pcie_irq_tx_count;
+	u32 pcie_irq_uf_count;
 	u32 tx_full_count;
 	u32 tx_done_count;
 	u32 tx_reclaim_done;
diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_ipc.h b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_ipc.h
index e00d508..c5a4e46 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_ipc.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_ipc.h
@@ -50,22 +50,21 @@
 #define PCIE_HDP_INT_RX_BITS (0		\
 	| PCIE_HDP_INT_EP_TXDMA		\
 	| PCIE_HDP_INT_EP_TXEMPTY	\
+	| PCIE_HDP_INT_HHBM_UF		\
 	)
 
 #define PCIE_HDP_INT_TX_BITS (0		\
 	| PCIE_HDP_INT_EP_RXDMA		\
 	)
 
-#if BITS_PER_LONG == 64
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 #define QTN_HOST_HI32(a)	((u32)(((u64)a) >> 32))
 #define QTN_HOST_LO32(a)	((u32)(((u64)a) & 0xffffffffUL))
 #define QTN_HOST_ADDR(h, l)	((((u64)h) << 32) | ((u64)l))
-#elif BITS_PER_LONG == 32
+#else
 #define QTN_HOST_HI32(a)	0
 #define QTN_HOST_LO32(a)	((u32)(((u32)a) & 0xffffffffUL))
 #define QTN_HOST_ADDR(h, l)	((u32)l)
-#else
-#error Unexpected BITS_PER_LONG value
 #endif
 
 #define QTN_SYSCTL_BAR	0
@@ -75,7 +74,7 @@
 #define QTN_PCIE_BDA_VERSION		0x1002
 
 #define PCIE_BDA_NAMELEN		32
-#define PCIE_HHBM_MAX_SIZE		512
+#define PCIE_HHBM_MAX_SIZE		2048
 
 #define SKB_BUF_SIZE		2048
 
@@ -112,7 +111,7 @@ struct qtnf_pcie_bda {
 	__le32 bda_flashsz;
 	u8 bda_boardname[PCIE_BDA_NAMELEN];
 	__le32 bda_rc_msi_enabled;
-	__le32 bda_hhbm_list[PCIE_HHBM_MAX_SIZE];
+	u8 bda_hhbm_list[PCIE_HHBM_MAX_SIZE];
 	__le32 bda_dsbw_start_index;
 	__le32 bda_dsbw_end_index;
 	__le32 bda_dsbw_total_bytes;
diff --git a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_regs_pearl.h b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_regs_pearl.h
index 78715b8..5b48b42 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_regs_pearl.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/pearl/pcie_regs_pearl.h
@@ -109,6 +109,7 @@
 #define HHBM_WR_REQ				(BIT(0))
 #define HHBM_RD_REQ				(BIT(1))
 #define HHBM_DONE				(BIT(31))
+#define HHBM_64BIT				(BIT(10))
 
 /* offsets for dual PCIE */
 #define PCIE_PORT_LINK_CTL(base)		((base) + 0x0710)
@@ -333,6 +334,7 @@
 #define PCIE_HDP_INT_RX_LEN_ERR		(BIT(2))
 #define PCIE_HDP_INT_RX_HDR_LEN_ERR	(BIT(3))
 #define PCIE_HDP_INT_EP_TXDMA		(BIT(12))
+#define PCIE_HDP_INT_HHBM_UF		(BIT(13))
 #define PCIE_HDP_INT_EP_TXEMPTY		(BIT(15))
 #define PCIE_HDP_INT_IPC		(BIT(29))
 
diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h
index 6eafc15..a8242f6 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h
@@ -19,7 +19,7 @@
 
 #include <linux/ieee80211.h>
 
-#define QLINK_PROTO_VER		3
+#define QLINK_PROTO_VER		5
 
 #define QLINK_MACID_RSVD		0xFF
 #define QLINK_VIFID_RSVD		0xFF
@@ -77,6 +77,7 @@ enum qlink_iface_type {
 	QLINK_IFTYPE_ADHOC	= 3,
 	QLINK_IFTYPE_MONITOR	= 4,
 	QLINK_IFTYPE_WDS	= 5,
+	QLINK_IFTYPE_AP_VLAN	= 6,
 };
 
 /**
@@ -85,12 +86,12 @@ enum qlink_iface_type {
  * Data describing a single virtual interface.
  *
  * @if_type: Mode of interface operation, one of &enum qlink_iface_type
- * @flags: interface flagsmap.
+ * @vlanid: VLAN ID for AP_VLAN interface type
  * @mac_addr: MAC address of virtual interface.
  */
 struct qlink_intf_info {
 	__le16 if_type;
-	__le16 flags;
+	__le16 vlanid;
 	u8 mac_addr[ETH_ALEN];
 	u8 rsvd[2];
 } __packed;
@@ -133,6 +134,9 @@ enum qlink_channel_width {
  *	number of operational channels and information on each of the channel.
  *	This command is generic to a specified MAC, interface index must be set
  *	to QLINK_VIFID_RSVD in command header.
+ * @QLINK_CMD_REG_NOTIFY: notify device about regulatory domain change. This
+ *	command is supported only if device reports QLINK_HW_SUPPORTS_REG_UPDATE
+ *	capability.
  */
 enum qlink_cmd_type {
 	QLINK_CMD_FW_INIT		= 0x0001,
@@ -148,8 +152,9 @@ enum qlink_cmd_type {
 	QLINK_CMD_DEL_INTF		= 0x0016,
 	QLINK_CMD_CHANGE_INTF		= 0x0017,
 	QLINK_CMD_UPDOWN_INTF		= 0x0018,
-	QLINK_CMD_REG_REGION		= 0x0019,
+	QLINK_CMD_REG_NOTIFY		= 0x0019,
 	QLINK_CMD_CHANS_INFO_GET	= 0x001A,
+	QLINK_CMD_CHAN_SWITCH		= 0x001B,
 	QLINK_CMD_CONFIG_AP		= 0x0020,
 	QLINK_CMD_START_AP		= 0x0021,
 	QLINK_CMD_STOP_AP		= 0x0022,
@@ -161,6 +166,7 @@ enum qlink_cmd_type {
 	QLINK_CMD_CHANGE_STA		= 0x0051,
 	QLINK_CMD_DEL_STA		= 0x0052,
 	QLINK_CMD_SCAN			= 0x0053,
+	QLINK_CMD_CHAN_STATS		= 0x0054,
 	QLINK_CMD_CONNECT		= 0x0060,
 	QLINK_CMD_DISCONNECT		= 0x0061,
 };
@@ -287,6 +293,7 @@ struct qlink_cmd_get_sta_info {
  * @pairwise: whether to use pairwise key.
  * @addr: MAC address of a STA key is being installed to.
  * @cipher: cipher suite.
+ * @vlanid: VLAN ID for AP_VLAN interface type
  * @key_data: key data itself.
  */
 struct qlink_cmd_add_key {
@@ -295,6 +302,7 @@ struct qlink_cmd_add_key {
 	u8 pairwise;
 	u8 addr[ETH_ALEN];
 	__le32 cipher;
+	__le16 vlanid;
 	u8 key_data[0];
 } __packed;
 
@@ -341,12 +349,16 @@ struct qlink_cmd_set_def_mgmt_key {
  *
  * @sta_flags_mask: STA flags mask, bitmap of &enum qlink_sta_flags
  * @sta_flags_set: STA flags values, bitmap of &enum qlink_sta_flags
+ * @if_type: Mode of interface operation, one of &enum qlink_iface_type
+ * @vlanid: VLAN ID to assign to specific STA
  * @sta_addr: address of the STA for which parameters are set.
  */
 struct qlink_cmd_change_sta {
 	struct qlink_cmd chdr;
 	__le32 sta_flags_mask;
 	__le32 sta_flags_set;
+	__le16 if_type;
+	__le16 vlanid;
 	u8 sta_addr[ETH_ALEN];
 } __packed;
 
@@ -380,7 +392,7 @@ enum qlink_sta_connect_flags {
 struct qlink_cmd_connect {
 	struct qlink_cmd chdr;
 	__le32 flags;
-	__le16 freq;
+	__le16 channel;
 	__le16 bg_scan_period;
 	u8 bssid[ETH_ALEN];
 	u8 payload[0];
@@ -430,6 +442,70 @@ struct qlink_cmd_chans_info_get {
 	u8 band;
 } __packed;
 
+/**
+ * struct qlink_cmd_get_chan_stats - data for QLINK_CMD_CHAN_STATS command
+ *
+ * @channel: channel number according to 802.11 17.3.8.3.2 and Annex J
+ */
+struct qlink_cmd_get_chan_stats {
+	struct qlink_cmd chdr;
+	__le16 channel;
+} __packed;
+
+/**
+ * enum qlink_reg_initiator - Indicates the initiator of a reg domain request
+ *
+ * See &enum nl80211_reg_initiator for more info.
+ */
+enum qlink_reg_initiator {
+	QLINK_REGDOM_SET_BY_CORE,
+	QLINK_REGDOM_SET_BY_USER,
+	QLINK_REGDOM_SET_BY_DRIVER,
+	QLINK_REGDOM_SET_BY_COUNTRY_IE,
+};
+
+/**
+ * enum qlink_user_reg_hint_type - type of user regulatory hint
+ *
+ * See &enum nl80211_user_reg_hint_type for more info.
+ */
+enum qlink_user_reg_hint_type {
+	QLINK_USER_REG_HINT_USER	= 0,
+	QLINK_USER_REG_HINT_CELL_BASE	= 1,
+	QLINK_USER_REG_HINT_INDOOR	= 2,
+};
+
+/**
+ * struct qlink_cmd_reg_notify - data for QLINK_CMD_REG_NOTIFY command
+ *
+ * @alpha2: the ISO / IEC 3166 alpha2 country code.
+ * @initiator: which entity sent the request, one of &enum qlink_reg_initiator.
+ * @user_reg_hint_type: type of hint for QLINK_REGDOM_SET_BY_USER request, one
+ *	of &enum qlink_user_reg_hint_type.
+ */
+struct qlink_cmd_reg_notify {
+	struct qlink_cmd chdr;
+	u8 alpha2[2];
+	u8 initiator;
+	u8 user_reg_hint_type;
+} __packed;
+
+/**
+ * struct qlink_cmd_chan_switch - data for QLINK_CMD_CHAN_SWITCH command
+ *
+ * @channel: channel number according to 802.11 17.3.8.3.2 and Annex J
+ * @radar_required: whether radar detection is required on the new channel
+ * @block_tx: whether transmissions should be blocked while changing
+ * @beacon_count: number of beacons until switch
+ */
+struct qlink_cmd_chan_switch {
+	struct qlink_cmd chdr;
+	__le16 channel;
+	u8 radar_required;
+	u8 block_tx;
+	u8 beacon_count;
+} __packed;
+
 /* QLINK Command Responses messages related definitions
  */
 
@@ -438,6 +514,7 @@ enum qlink_cmd_result {
 	QLINK_CMD_RESULT_INVALID,
 	QLINK_CMD_RESULT_ENOTSUPP,
 	QLINK_CMD_RESULT_ENOTFOUND,
+	QLINK_CMD_RESULT_EALREADY,
 };
 
 /**
@@ -497,6 +574,18 @@ struct qlink_resp_get_mac_info {
 } __packed;
 
 /**
+ * enum qlink_dfs_regions - regulatory DFS regions
+ *
+ * Corresponds to &enum nl80211_dfs_regions.
+ */
+enum qlink_dfs_regions {
+	QLINK_DFS_UNSET	= 0,
+	QLINK_DFS_FCC	= 1,
+	QLINK_DFS_ETSI	= 2,
+	QLINK_DFS_JP	= 3,
+};
+
+/**
  * struct qlink_resp_get_hw_info - response for QLINK_CMD_GET_HW_INFO command
  *
  * Description of wireless hardware capabilities and features.
@@ -504,22 +593,29 @@ struct qlink_resp_get_mac_info {
  * @fw_ver: wireless hardware firmware version.
  * @hw_capab: Bitmap of capabilities supported by firmware.
  * @ql_proto_ver: Version of QLINK protocol used by firmware.
- * @country_code: country code ID firmware is configured to.
  * @num_mac: Number of separate physical radio devices provided by hardware.
  * @mac_bitmap: Bitmap of MAC IDs that are active and can be used in firmware.
  * @total_tx_chains: total number of transmit chains used by device.
  * @total_rx_chains: total number of receive chains.
+ * @alpha2: country code ID firmware is configured to.
+ * @n_reg_rules: number of regulatory rules TLVs in variable portion of the
+ *	message.
+ * @dfs_region: regulatory DFS region, one of @enum qlink_dfs_region.
+ * @info: variable-length HW info, can contain QTN_TLV_ID_REG_RULE.
  */
 struct qlink_resp_get_hw_info {
 	struct qlink_resp rhdr;
 	__le32 fw_ver;
 	__le32 hw_capab;
 	__le16 ql_proto_ver;
-	u8 alpha2_code[2];
 	u8 num_mac;
 	u8 mac_bitmap;
 	u8 total_tx_chain;
 	u8 total_rx_chain;
+	u8 alpha2[2];
+	u8 n_reg_rules;
+	u8 dfs_region;
+	u8 info[0];
 } __packed;
 
 /**
@@ -574,6 +670,16 @@ struct qlink_resp_phy_params {
 	u8 info[0];
 } __packed;
 
+/**
+ * struct qlink_resp_get_chan_stats - response for QLINK_CMD_CHAN_STATS cmd
+ *
+ * @info: variable-length channel info.
+ */
+struct qlink_resp_get_chan_stats {
+	struct qlink_cmd rhdr;
+	u8 info[0];
+} __packed;
+
 /* QLINK Events messages related definitions
  */
 
@@ -585,6 +691,7 @@ enum qlink_event_type {
 	QLINK_EVENT_SCAN_COMPLETE	= 0x0025,
 	QLINK_EVENT_BSS_JOIN		= 0x0026,
 	QLINK_EVENT_BSS_LEAVE		= 0x0027,
+	QLINK_EVENT_FREQ_CHANGE		= 0x0028,
 };
 
 /**
@@ -651,7 +758,17 @@ struct qlink_event_bss_join {
  */
 struct qlink_event_bss_leave {
 	struct qlink_event ehdr;
-	u16 reason;
+	__le16 reason;
+} __packed;
+
+/**
+ * struct qlink_event_freq_change - data for QLINK_EVENT_FREQ_CHANGE event
+ *
+ * @freq: new operating frequency in MHz
+ */
+struct qlink_event_freq_change {
+	struct qlink_event ehdr;
+	__le32 freq;
 } __packed;
 
 enum qlink_rxmgmt_flags {
@@ -741,10 +858,12 @@ enum qlink_tlv_id {
 	QTN_TLV_ID_LRETRY_LIMIT		= 0x0204,
 	QTN_TLV_ID_BCN_PERIOD		= 0x0205,
 	QTN_TLV_ID_DTIM			= 0x0206,
+	QTN_TLV_ID_REG_RULE		= 0x0207,
 	QTN_TLV_ID_CHANNEL		= 0x020F,
 	QTN_TLV_ID_COVERAGE_CLASS	= 0x0213,
 	QTN_TLV_ID_IFACE_LIMIT		= 0x0214,
 	QTN_TLV_ID_NUM_IFACE_COMB	= 0x0215,
+	QTN_TLV_ID_CHANNEL_STATS	= 0x0216,
 	QTN_TLV_ID_STA_BASIC_COUNTERS	= 0x0300,
 	QTN_TLV_ID_STA_GENERIC_INFO	= 0x0301,
 	QTN_TLV_ID_KEY			= 0x0302,
@@ -761,7 +880,7 @@ struct qlink_tlv_hdr {
 
 struct qlink_iface_limit {
 	__le16 max_num;
-	__le16 type_mask;
+	__le16 type;
 } __packed;
 
 struct qlink_iface_comb_num {
@@ -844,12 +963,54 @@ struct qlink_tlv_cclass {
 	u8 cclass;
 } __packed;
 
-enum qlink_dfs_state {
-	QLINK_DFS_USABLE,
-	QLINK_DFS_UNAVAILABLE,
-	QLINK_DFS_AVAILABLE,
+/**
+ * enum qlink_reg_rule_flags - regulatory rule flags
+ *
+ * See description of &enum nl80211_reg_rule_flags
+ */
+enum qlink_reg_rule_flags {
+	QLINK_RRF_NO_OFDM	= BIT(0),
+	QLINK_RRF_NO_CCK	= BIT(1),
+	QLINK_RRF_NO_INDOOR	= BIT(2),
+	QLINK_RRF_NO_OUTDOOR	= BIT(3),
+	QLINK_RRF_DFS		= BIT(4),
+	QLINK_RRF_PTP_ONLY	= BIT(5),
+	QLINK_RRF_PTMP_ONLY	= BIT(6),
+	QLINK_RRF_NO_IR		= BIT(7),
+	QLINK_RRF_AUTO_BW	= BIT(8),
+	QLINK_RRF_IR_CONCURRENT	= BIT(9),
+	QLINK_RRF_NO_HT40MINUS	= BIT(10),
+	QLINK_RRF_NO_HT40PLUS	= BIT(11),
+	QLINK_RRF_NO_80MHZ	= BIT(12),
+	QLINK_RRF_NO_160MHZ	= BIT(13),
 };
 
+/**
+ * struct qlink_tlv_reg_rule - data for QTN_TLV_ID_REG_RULE TLV
+ *
+ * Regulatory rule description.
+ *
+ * @start_freq_khz: start frequency of the range the rule is attributed to.
+ * @end_freq_khz: end frequency of the range the rule is attributed to.
+ * @max_bandwidth_khz: max bandwidth that channels in specified range can be
+ *	configured to.
+ * @max_antenna_gain: max antenna gain that can be used in the specified
+ *	frequency range, dBi.
+ * @max_eirp: maximum EIRP.
+ * @flags: regulatory rule flags in &enum qlink_reg_rule_flags.
+ * @dfs_cac_ms: DFS CAC period.
+ */
+struct qlink_tlv_reg_rule {
+	struct qlink_tlv_hdr hdr;
+	__le32 start_freq_khz;
+	__le32 end_freq_khz;
+	__le32 max_bandwidth_khz;
+	__le32 max_antenna_gain;
+	__le32 max_eirp;
+	__le32 flags;
+	__le32 dfs_cac_ms;
+} __packed;
+
 enum qlink_channel_flags {
 	QLINK_CHAN_DISABLED		= BIT(0),
 	QLINK_CHAN_NO_IR		= BIT(1),
@@ -865,6 +1026,12 @@ enum qlink_channel_flags {
 	QLINK_CHAN_NO_10MHZ		= BIT(12),
 };
 
+enum qlink_dfs_state {
+	QLINK_DFS_USABLE,
+	QLINK_DFS_UNAVAILABLE,
+	QLINK_DFS_AVAILABLE,
+};
+
 struct qlink_tlv_channel {
 	struct qlink_tlv_hdr hdr;
 	__le16 hw_value;
@@ -898,4 +1065,13 @@ struct qlink_auth_encr {
 	u8 control_port_no_encrypt;
 } __packed;
 
+struct qlink_chan_stats {
+	__le32 chan_num;
+	__le32 cca_tx;
+	__le32 cca_rx;
+	__le32 cca_busy;
+	__le32 cca_try;
+	s8 chan_noise;
+} __packed;
+
 #endif /* _QTN_QLINK_H_ */
diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c
index 49ae652..cf024c9 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c
@@ -17,24 +17,30 @@
 
 #include "qlink_util.h"
 
-u16 qlink_iface_type_mask_to_nl(u16 qlink_mask)
+u16 qlink_iface_type_to_nl_mask(u16 qlink_type)
 {
 	u16 result = 0;
 
-	if (qlink_mask & QLINK_IFTYPE_AP)
+	switch (qlink_type) {
+	case QLINK_IFTYPE_AP:
 		result |= BIT(NL80211_IFTYPE_AP);
-
-	if (qlink_mask & QLINK_IFTYPE_STATION)
+		break;
+	case QLINK_IFTYPE_STATION:
 		result |= BIT(NL80211_IFTYPE_STATION);
-
-	if (qlink_mask & QLINK_IFTYPE_ADHOC)
+		break;
+	case QLINK_IFTYPE_ADHOC:
 		result |= BIT(NL80211_IFTYPE_ADHOC);
-
-	if (qlink_mask & QLINK_IFTYPE_MONITOR)
+		break;
+	case QLINK_IFTYPE_MONITOR:
 		result |= BIT(NL80211_IFTYPE_MONITOR);
-
-	if (qlink_mask & QLINK_IFTYPE_WDS)
+		break;
+	case QLINK_IFTYPE_WDS:
 		result |= BIT(NL80211_IFTYPE_WDS);
+		break;
+	case QLINK_IFTYPE_AP_VLAN:
+		result |= BIT(NL80211_IFTYPE_AP_VLAN);
+		break;
+	}
 
 	return result;
 }
diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h
index 90d7d09..de06c1e 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h
@@ -22,14 +22,6 @@
 
 #include "qlink.h"
 
-static inline void qtnf_cmd_skb_put_action(struct sk_buff *skb, u16 action)
-{
-	__le16 *buf_ptr;
-
-	buf_ptr = skb_put(skb, sizeof(action));
-	*buf_ptr = cpu_to_le16(action);
-}
-
 static inline void
 qtnf_cmd_skb_put_buffer(struct sk_buff *skb, const u8 *buf_src, size_t len)
 {
@@ -68,7 +60,7 @@ static inline void qtnf_cmd_skb_put_tlv_u16(struct sk_buff *skb,
 	memcpy(hdr->val, &tmp, sizeof(tmp));
 }
 
-u16 qlink_iface_type_mask_to_nl(u16 qlink_mask);
+u16 qlink_iface_type_to_nl_mask(u16 qlink_type);
 u8 qlink_chan_width_mask_to_nl(u16 qlink_mask);
 
 #endif /* _QTN_FMAC_QLINK_UTIL_H_ */
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2500usb.c b/drivers/net/wireless/ralink/rt2x00/rt2500usb.c
index 529e059..f4b48b7 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2500usb.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2500usb.c
@@ -1911,7 +1911,7 @@ static const struct rt2x00_ops rt2500usb_ops = {
 /*
  * rt2500usb module information.
  */
-static struct usb_device_id rt2500usb_device_table[] = {
+static const struct usb_device_id rt2500usb_device_table[] = {
 	/* ASUS */
 	{ USB_DEVICE(0x0b05, 0x1706) },
 	{ USB_DEVICE(0x0b05, 0x1707) },
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
index 0b75def..d2c28944 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
@@ -3702,7 +3702,10 @@ static void rt2800_config_channel(struct rt2x00_dev *rt2x00dev,
 	if (rt2x00_rt(rt2x00dev, RT3572))
 		rt2800_rfcsr_write(rt2x00dev, 8, 0);
 
-	tx_pin = rt2800_register_read(rt2x00dev, TX_PIN_CFG);
+	if (rt2x00_rt(rt2x00dev, RT6352))
+		tx_pin = rt2800_register_read(rt2x00dev, TX_PIN_CFG);
+	else
+		tx_pin = 0;
 
 	switch (rt2x00dev->default_ant.tx_chain_num) {
 	case 3:
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800mmio.c b/drivers/net/wireless/ralink/rt2x00/rt2800mmio.c
index ee5276e..1123e2b 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800mmio.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800mmio.c
@@ -136,10 +136,19 @@ void rt2800mmio_fill_rxdone(struct queue_entry *entry,
 		 */
 		rxdesc->flags |= RX_FLAG_MMIC_STRIPPED;
 
-		if (rxdesc->cipher_status == RX_CRYPTO_SUCCESS)
+		if (rxdesc->cipher_status == RX_CRYPTO_SUCCESS) {
 			rxdesc->flags |= RX_FLAG_DECRYPTED;
-		else if (rxdesc->cipher_status == RX_CRYPTO_FAIL_MIC)
+		} else if (rxdesc->cipher_status == RX_CRYPTO_FAIL_MIC) {
+			/*
+			 * In order to check the Michael Mic, the packet must have
+			 * been decrypted.  Mac80211 doesnt check the MMIC failure 
+			 * flag to initiate MMIC countermeasures if the decoded flag
+			 * has not been set.
+			 */
+			rxdesc->flags |= RX_FLAG_DECRYPTED;
+
 			rxdesc->flags |= RX_FLAG_MMIC_ERROR;
+		}
 	}
 
 	if (rt2x00_get_field32(word, RXD_W3_MY_BSS))
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800usb.c b/drivers/net/wireless/ralink/rt2x00/rt2800usb.c
index 685b8e0..24fc6d2 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800usb.c
@@ -697,11 +697,20 @@ static void rt2800usb_fill_rxdone(struct queue_entry *entry,
 		 * stripped it from the frame. Signal this to mac80211.
 		 */
 		rxdesc->flags |= RX_FLAG_MMIC_STRIPPED;
-
-		if (rxdesc->cipher_status == RX_CRYPTO_SUCCESS)
+        
+		if (rxdesc->cipher_status == RX_CRYPTO_SUCCESS) {
+			rxdesc->flags |= RX_FLAG_DECRYPTED;
+		} else if (rxdesc->cipher_status == RX_CRYPTO_FAIL_MIC) {
+			/*
+			 * In order to check the Michael Mic, the packet must have
+			 * been decrypted.  Mac80211 doesnt check the MMIC failure 
+			 * flag to initiate MMIC countermeasures if the decoded flag
+			 * has not been set.
+			 */
 			rxdesc->flags |= RX_FLAG_DECRYPTED;
-		else if (rxdesc->cipher_status == RX_CRYPTO_FAIL_MIC)
+
 			rxdesc->flags |= RX_FLAG_MMIC_ERROR;
+		}
 	}
 
 	if (rt2x00_get_field32(word, RXD_W0_MY_BSS))
@@ -915,7 +924,7 @@ static const struct rt2x00_ops rt2800usb_ops = {
 /*
  * rt2800usb module information.
  */
-static struct usb_device_id rt2800usb_device_table[] = {
+static const struct usb_device_id rt2800usb_device_table[] = {
 	/* Abocom */
 	{ USB_DEVICE(0x07b8, 0x2870) },
 	{ USB_DEVICE(0x07b8, 0x2770) },
diff --git a/drivers/net/wireless/ralink/rt2x00/rt73usb.c b/drivers/net/wireless/ralink/rt2x00/rt73usb.c
index fd91322..9a21282 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt73usb.c
@@ -2408,7 +2408,7 @@ static const struct rt2x00_ops rt73usb_ops = {
 /*
  * rt73usb module information.
  */
-static struct usb_device_id rt73usb_device_table[] = {
+static const struct usb_device_id rt73usb_device_table[] = {
 	/* AboCom */
 	{ USB_DEVICE(0x07b8, 0xb21b) },
 	{ USB_DEVICE(0x07b8, 0xb21c) },
diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
index 55198ac2..121b94f 100644
--- a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
+++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c
@@ -43,7 +43,7 @@ MODULE_AUTHOR("Larry Finger <Larry.Finger@lwfinger.net>");
 MODULE_DESCRIPTION("RTL8187/RTL8187B USB wireless driver");
 MODULE_LICENSE("GPL");
 
-static struct usb_device_id rtl8187_table[] = {
+static const struct usb_device_id rtl8187_table[] = {
 	/* Asus */
 	{USB_DEVICE(0x0b05, 0x171d), .driver_info = DEVICE_RTL8187},
 	/* Belkin */
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
index 21e5ef0..7806a4d 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
@@ -6190,7 +6190,7 @@ static void rtl8xxxu_disconnect(struct usb_interface *interface)
 	ieee80211_free_hw(hw);
 }
 
-static struct usb_device_id dev_table[] = {
+static const struct usb_device_id dev_table[] = {
 {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x8724, 0xff, 0xff, 0xff),
 	.driver_info = (unsigned long)&rtl8723au_fops},
 {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x1724, 0xff, 0xff, 0xff),
diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index e36ee59..ea18aa7 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -426,9 +426,8 @@ static void _rtl_init_mac80211(struct ieee80211_hw *hw)
 	hw->extra_tx_headroom = RTL_TX_HEADER_SIZE;
 
 	/* TODO: Correct this value for our hw */
-	/* TODO: define these hard code value */
-	hw->max_listen_interval = 10;
-	hw->max_rate_tries = 4;
+	hw->max_listen_interval = MAX_LISTEN_INTERVAL;
+	hw->max_rate_tries = MAX_RATE_TRIES;
 	/* hw->max_rates = 1; */
 	hw->sta_data_size = sizeof(struct rtl_sta_info);
 
@@ -1166,9 +1165,9 @@ void rtl_get_tcb_desc(struct ieee80211_hw *hw,
 			}
 		}
 
-		if (is_multicast_ether_addr(ieee80211_get_DA(hdr)))
+		if (is_multicast_ether_addr(hdr->addr1))
 			tcb_desc->multicast = 1;
-		else if (is_broadcast_ether_addr(ieee80211_get_DA(hdr)))
+		else if (is_broadcast_ether_addr(hdr->addr1))
 			tcb_desc->broadcast = 1;
 
 		_rtl_txrate_selectmode(hw, sta, tcb_desc);
@@ -1408,6 +1407,11 @@ u8 rtl_is_special_data(struct ieee80211_hw *hw, struct sk_buff *skb, u8 is_tx,
 
 		return true;
 	} else if (ETH_P_PAE == ether_type) {
+		/* EAPOL is seens as in-4way */
+		rtlpriv->btcoexist.btc_info.in_4way = true;
+		rtlpriv->btcoexist.btc_info.in_4way_ts = jiffies;
+	rtlpriv->btcoexist.btc_info.in_4way_ts = jiffies;
+
 		RT_TRACE(rtlpriv, (COMP_SEND | COMP_RECV), DBG_DMESG,
 			 "802.1X %s EAPOL pkt!!\n", (is_tx) ? "Tx" : "Rx");
 
@@ -1735,12 +1739,12 @@ void rtl_scan_list_expire(struct ieee80211_hw *hw)
 			continue;
 
 		list_del(&entry->list);
-		kfree(entry);
 		rtlpriv->scan_list.num--;
 
 		RT_TRACE(rtlpriv, COMP_SCAN, DBG_LOUD,
 			 "BSSID=%pM is expire in scan list (total=%d)\n",
 			 entry->bssid, rtlpriv->scan_list.num);
+		kfree(entry);
 	}
 
 	spin_unlock_irqrestore(&rtlpriv->locks.scan_list_lock, flags);
@@ -1959,6 +1963,12 @@ label_lps_done:
 	if (rtlpriv->cfg->ops->get_btc_status())
 		rtlpriv->btcoexist.btc_ops->btc_periodical(rtlpriv);
 
+	if (rtlpriv->btcoexist.btc_info.in_4way) {
+		if (time_after(jiffies, rtlpriv->btcoexist.btc_info.in_4way_ts +
+			       msecs_to_jiffies(IN_4WAY_TIMEOUT_TIME)))
+			rtlpriv->btcoexist.btc_info.in_4way = false;
+	}
+
 	rtlpriv->link_info.bcn_rx_inperiod = 0;
 
 	/* <6> scan list */
diff --git a/drivers/net/wireless/realtek/rtlwifi/base.h b/drivers/net/wireless/realtek/rtlwifi/base.h
index ab7d819..b56d1b7 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.h
+++ b/drivers/net/wireless/realtek/rtlwifi/base.h
@@ -65,6 +65,8 @@ enum ap_peer {
 #define FRAME_OFFSET_ADDRESS3		16
 #define FRAME_OFFSET_SEQUENCE		22
 #define FRAME_OFFSET_ADDRESS4		24
+#define MAX_LISTEN_INTERVAL		10
+#define MAX_RATE_TRIES			4
 
 #define SET_80211_HDR_FRAME_CONTROL(_hdr, _val)		\
 	WRITEEF2BYTE(_hdr, _val)
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbt_precomp.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbt_precomp.h
index 2ac989a..02dff4c 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbt_precomp.h
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbt_precomp.h
@@ -43,22 +43,6 @@
 #define RT_SDIO_INTERFACE	3
 #define DEV_BUS_TYPE		RT_PCI_INTERFACE
 
-/* IC type */
-#define RTL_HW_TYPE(adapter)	(rtl_hal((struct rtl_priv *)adapter)->hw_type)
-
-#define IS_NEW_GENERATION_IC(adapter)		\
-			(RTL_HW_TYPE(adapter) >= HARDWARE_TYPE_RTL8192EE)
-#define IS_HARDWARE_TYPE_8812(adapter)		\
-			(RTL_HW_TYPE(adapter) == HARDWARE_TYPE_RTL8812AE)
-#define IS_HARDWARE_TYPE_8821(adapter)		\
-			(RTL_HW_TYPE(adapter) == HARDWARE_TYPE_RTL8821AE)
-#define IS_HARDWARE_TYPE_8723A(adapter)	\
-			(RTL_HW_TYPE(adapter) == HARDWARE_TYPE_RTL8723AE)
-#define IS_HARDWARE_TYPE_8723B(adapter)	\
-			(RTL_HW_TYPE(adapter) == HARDWARE_TYPE_RTL8723BE)
-#define IS_HARDWARE_TYPE_8192E(adapter)	\
-			(RTL_HW_TYPE(adapter) == HARDWARE_TYPE_RTL8192EE)
-
 #include "halbtc8192e2ant.h"
 #include "halbtc8723b1ant.h"
 #include "halbtc8723b2ant.h"
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
index 03998d2..c044252 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
@@ -600,14 +600,8 @@ static void halbtc8723b1ant_coex_table_with_type(struct btc_coexist *btcoexist,
 						   0xffffff, 0x3);
 		break;
 	case 5:
-		if ((coex_sta->cck_ever_lock) && (coex_sta->scan_ap_num <= 5))
-			halbtc8723b1ant_coex_table(btcoexist, force_exec,
-						   0x5a5a5a5a, 0x5aaa5a5a,
-						   0xffffff, 0x3);
-		else
-			halbtc8723b1ant_coex_table(btcoexist, force_exec,
-						   0x5a5a5a5a, 0x5aaa5a5a,
-						   0xffffff, 0x3);
+		halbtc8723b1ant_coex_table(btcoexist, force_exec, 0x5a5a5a5a,
+					   0x5aaa5a5a, 0xffffff, 0x3);
 		break;
 	case 6:
 		halbtc8723b1ant_coex_table(btcoexist, force_exec, 0x55555555,
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
index e6024b0..c1eacd8 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c
@@ -327,7 +327,22 @@ static void halbtc_aggregation_check(struct btc_coexist *btcoexist)
 
 static u32 halbtc_get_bt_patch_version(struct btc_coexist *btcoexist)
 {
-	return 0;
+	struct rtl_priv *rtlpriv = btcoexist->adapter;
+	u8 cmd_buffer[4] = {0};
+	u8 oper_ver = 0;
+	u8 req_num = 0x0E;
+
+	if (btcoexist->bt_info.bt_real_fw_ver)
+		goto label_done;
+
+	cmd_buffer[0] |= (oper_ver & 0x0f);	/* Set OperVer */
+	cmd_buffer[0] |= ((req_num << 4) & 0xf0);	/* Set ReqNum */
+	cmd_buffer[1] = 0; /* BT_OP_GET_BT_VERSION = 0 */
+	rtlpriv->cfg->ops->fill_h2c_cmd(rtlpriv->mac80211.hw, 0x67, 4,
+					&cmd_buffer[0]);
+
+label_done:
+	return btcoexist->bt_info.bt_real_fw_ver;
 }
 
 u32 halbtc_get_wifi_link_status(struct btc_coexist *btcoexist)
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.c
index 4366c98..7d296a4 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.c
@@ -41,6 +41,7 @@ static struct rtl_btc_ops rtl_btc_operation = {
 	.btc_periodical = rtl_btc_periodical,
 	.btc_halt_notify = rtl_btc_halt_notify,
 	.btc_btinfo_notify = rtl_btc_btinfo_notify,
+	.btc_btmpinfo_notify = rtl_btc_btmpinfo_notify,
 	.btc_is_limited_dig = rtl_btc_is_limited_dig,
 	.btc_is_disable_edca_turbo = rtl_btc_is_disable_edca_turbo,
 	.btc_is_bt_disabled = rtl_btc_is_bt_disabled,
@@ -165,6 +166,33 @@ void rtl_btc_btinfo_notify(struct rtl_priv *rtlpriv, u8 *tmp_buf, u8 length)
 	exhalbtc_bt_info_notify(&gl_bt_coexist, tmp_buf, length);
 }
 
+void rtl_btc_btmpinfo_notify(struct rtl_priv *rtlpriv, u8 *tmp_buf, u8 length)
+{
+	u8 extid, seq, len;
+	u16 bt_real_fw_ver;
+	u8 bt_fw_ver;
+
+	if ((length < 4) || (!tmp_buf))
+		return;
+
+	extid = tmp_buf[0];
+	/* not response from BT FW then exit*/
+	if (extid != 1) /* C2H_TRIG_BY_BT_FW = 1 */
+		return;
+
+	len = tmp_buf[1] >> 4;
+	seq = tmp_buf[2] >> 4;
+
+	/* BT Firmware version response */
+	if (seq == 0x0E) {
+		bt_real_fw_ver = tmp_buf[3] | (tmp_buf[4] << 8);
+		bt_fw_ver = tmp_buf[5];
+
+		gl_bt_coexist.bt_info.bt_real_fw_ver = bt_real_fw_ver;
+		gl_bt_coexist.bt_info.bt_fw_ver = bt_fw_ver;
+	}
+}
+
 bool rtl_btc_is_limited_dig(struct rtl_priv *rtlpriv)
 {
 	return gl_bt_coexist.bt_info.limited_dig;
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.h b/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.h
index 6fe521c..ac1253c 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.h
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/rtl_btc.h
@@ -39,6 +39,7 @@ void rtl_btc_mediastatus_notify(struct rtl_priv *rtlpriv,
 void rtl_btc_periodical(struct rtl_priv *rtlpriv);
 void rtl_btc_halt_notify(void);
 void rtl_btc_btinfo_notify(struct rtl_priv *rtlpriv, u8 *tmpbuf, u8 length);
+void rtl_btc_btmpinfo_notify(struct rtl_priv *rtlpriv, u8 *tmp_buf, u8 length);
 bool rtl_btc_is_limited_dig(struct rtl_priv *rtlpriv);
 bool rtl_btc_is_disable_edca_turbo(struct rtl_priv *rtlpriv);
 bool rtl_btc_is_bt_disabled(struct rtl_priv *rtlpriv);
diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index b0ad061..c53cbf3 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -1505,6 +1505,8 @@ static int rtl_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	u8 mac_addr[ETH_ALEN];
 	u8 bcast_addr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
+	rtlpriv->btcoexist.btc_info.in_4way = false;
+
 	if (rtlpriv->cfg->mod_params->sw_crypto || rtlpriv->sec.use_sw_sec) {
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING,
 			 "not open hw encryption\n");
diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c
index 032b631..08dc891 100644
--- a/drivers/net/wireless/realtek/rtlwifi/pci.c
+++ b/drivers/net/wireless/realtek/rtlwifi/pci.c
@@ -2257,7 +2257,7 @@ int rtl_pci_probe(struct pci_dev *pdev,
 	/* find adapter */
 	if (!_rtl_pci_find_adapter(pdev, hw)) {
 		err = -ENODEV;
-		goto fail3;
+		goto fail2;
 	}
 
 	/* Init IO handler */
@@ -2318,10 +2318,10 @@ fail3:
 	pci_set_drvdata(pdev, NULL);
 	rtl_deinit_core(hw);
 
+fail2:
 	if (rtlpriv->io.pci_mem_start != 0)
 		pci_iounmap(pdev, (void __iomem *)rtlpriv->io.pci_mem_start);
 
-fail2:
 	pci_release_regions(pdev);
 	complete(&rtlpriv->firmware_loading_complete);
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rc.c b/drivers/net/wireless/realtek/rtlwifi/rc.c
index 951d257..02811ed 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rc.c
@@ -283,7 +283,7 @@ static void rtl_rate_free_sta(void *rtlpriv,
 	kfree(rate_priv);
 }
 
-static struct rate_control_ops rtl_rate_ops = {
+static const struct rate_control_ops rtl_rate_ops = {
 	.name = "rtl_rc",
 	.alloc = rtl_rate_alloc,
 	.free = rtl_rate_free,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c
index 774e720..57e5d5c 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c
@@ -175,6 +175,8 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw)
 				      rtl_fw_cb);
 	if (err) {
 		pr_info("Failed to request firmware!\n");
+		vfree(rtlpriv->rtlhal.pfirmware);
+		rtlpriv->rtlhal.pfirmware = NULL;
 		return 1;
 	}
 
@@ -376,7 +378,7 @@ static const struct rtl_hal_cfg rtl88ee_hal_cfg = {
 	.maps[RTL_RC_HT_RATEMCS15] = DESC92C_RATEMCS15,
 };
 
-static struct pci_device_id rtl88ee_pci_ids[] = {
+static const struct pci_device_id rtl88ee_pci_ids[] = {
 	{RTL_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8179, rtl88ee_hal_cfg)},
 	{},
 };
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c
index bcbb0c6..38f85bf 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c
@@ -176,6 +176,8 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw)
 				      rtl_fw_cb);
 	if (err) {
 		pr_err("Failed to request firmware!\n");
+		vfree(rtlpriv->rtlhal.pfirmware);
+		rtlpriv->rtlhal.pfirmware = NULL;
 		return 1;
 	}
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
index f95a645..530e80f 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
@@ -777,10 +777,6 @@ static void _rtl92cu_init_queue_priority(struct ieee80211_hw *hw,
 						   queue_sel);
 }
 
-static void _rtl92cu_init_usb_aggregation(struct ieee80211_hw *hw)
-{
-}
-
 static void _rtl92cu_init_wmac_setting(struct ieee80211_hw *hw)
 {
 	u16 value16;
@@ -870,7 +866,6 @@ static int _rtl92cu_init_mac(struct ieee80211_hw *hw)
 	rtl92c_init_edca(hw);
 	rtl92c_init_rate_fallback(hw);
 	rtl92c_init_retry_function(hw);
-	_rtl92cu_init_usb_aggregation(hw);
 	rtlpriv->cfg->ops->set_bw_mode(hw, NL80211_CHAN_HT20);
 	rtl92c_set_min_space(hw, IS_92C_SERIAL(rtlhal->version));
 	_rtl92cu_init_beacon_parameters(hw);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c
index 1b124ea..5657b1e 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/mac.c
@@ -352,11 +352,10 @@ u32 rtl92c_get_txdma_status(struct ieee80211_hw *hw)
 void rtl92c_enable_interrupt(struct ieee80211_hw *hw)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
-	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
 	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
 	struct rtl_usb *rtlusb = rtl_usbdev(rtl_usbpriv(hw));
 
-	if (IS_HARDWARE_TYPE_8192CE(rtlhal)) {
+	if (IS_HARDWARE_TYPE_8192CE(rtlpriv)) {
 		rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] &
 				0xFFFFFFFF);
 		rtl_write_dword(rtlpriv, REG_HIMRE, rtlpci->irq_mask[1] &
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
index 96c923b..43e021b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
@@ -85,6 +85,10 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw)
 	err = request_firmware_nowait(THIS_MODULE, 1,
 				      fw_name, rtlpriv->io.dev,
 				      GFP_KERNEL, hw, rtl_fw_cb);
+	if (err) {
+		vfree(rtlpriv->rtlhal.pfirmware);
+		rtlpriv->rtlhal.pfirmware = NULL;
+	}
 	return err;
 }
 
@@ -173,7 +177,7 @@ static struct rtl_hal_usbint_cfg rtl92cu_interface_cfg = {
 	.rx_urb_num = RTL92C_NUM_RX_URBS,
 	.rx_max_size = RTL92C_SIZE_MAX_RX_BUFFER,
 	.usb_rx_hdl = rtl8192cu_rx_hdl,
-	.usb_rx_segregate_hdl = NULL, /* rtl8192c_rx_segregate_hdl; */
+	.usb_rx_segregate_hdl = NULL,
 	/* tx */
 	.usb_tx_cleanup = rtl8192c_tx_cleanup,
 	.usb_tx_post_hdl = rtl8192c_tx_post_hdl,
@@ -275,7 +279,7 @@ static struct rtl_hal_cfg rtl92cu_hal_cfg = {
 #define USB_VENDER_ID_REALTEK		0x0bda
 
 /* 2010-10-19 DID_USB_V3.4 */
-static struct usb_device_id rtl8192c_usb_ids[] = {
+static const struct usb_device_id rtl8192c_usb_ids[] = {
 
 	/*=== Realtek demoboard ===*/
 	/* Default ID */
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
index de6c342..ac4a82d 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c
@@ -436,13 +436,6 @@ void  rtl8192cu_rx_hdl(struct ieee80211_hw *hw, struct sk_buff * skb)
 	_rtl_rx_process(hw, skb);
 }
 
-void rtl8192c_rx_segregate_hdl(
-	struct ieee80211_hw *hw,
-	struct sk_buff *skb,
-	struct sk_buff_head *skb_list)
-{
-}
-
 /*----------------------------------------------------------------------
  *
  *	Tx handler
@@ -675,8 +668,3 @@ void rtl92cu_tx_fill_cmddesc(struct ieee80211_hw *hw,
 	RT_PRINT_DATA(rtlpriv, COMP_CMD, DBG_LOUD, "H2C Tx Cmd Content",
 		      pdesc, RTL_TX_DESC_SIZE);
 }
-
-bool rtl92cu_cmd_send_packet(struct ieee80211_hw *hw, struct sk_buff *skb)
-{
-	return true;
-}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.h
index 487eec8..15a66c5 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.h
@@ -385,8 +385,6 @@ bool rtl92cu_rx_query_desc(struct ieee80211_hw *hw,
 			   struct ieee80211_rx_status *rx_status,
 			   u8 *p_desc, struct sk_buff *skb);
 void  rtl8192cu_rx_hdl(struct ieee80211_hw *hw, struct sk_buff * skb);
-void rtl8192c_rx_segregate_hdl(struct ieee80211_hw *, struct sk_buff *,
-			       struct sk_buff_head *);
 void rtl8192c_tx_cleanup(struct ieee80211_hw *hw, struct sk_buff  *skb);
 int rtl8192c_tx_post_hdl(struct ieee80211_hw *hw, struct urb *urb,
 			 struct sk_buff *skb);
@@ -404,6 +402,5 @@ void rtl92cu_fill_fake_txdesc(struct ieee80211_hw *hw, u8 * pDesc,
 void rtl92cu_tx_fill_cmddesc(struct ieee80211_hw *hw,
 			     u8 *pdesc, bool b_firstseg,
 			     bool b_lastseg, struct sk_buff *skb);
-bool rtl92cu_cmd_send_packet(struct ieee80211_hw *hw, struct sk_buff *skb);
 
 #endif
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c
index 16132c6..a6549f5 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c
@@ -183,6 +183,8 @@ static int rtl92d_init_sw_vars(struct ieee80211_hw *hw)
 				      rtl_fw_cb);
 	if (err) {
 		pr_err("Failed to request firmware!\n");
+		vfree(rtlpriv->rtlhal.pfirmware);
+		rtlpriv->rtlhal.pfirmware = NULL;
 		return 1;
 	}
 
@@ -347,7 +349,7 @@ static const struct rtl_hal_cfg rtl92de_hal_cfg = {
 	.maps[RTL_RC_HT_RATEMCS15] = DESC_RATEMCS15,
 };
 
-static struct pci_device_id rtl92de_pci_ids[] = {
+static const struct pci_device_id rtl92de_pci_ids[] = {
 	{RTL_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8193, rtl92de_hal_cfg)},
 	{RTL_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x002B, rtl92de_hal_cfg)},
 	{},
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c
index f5d4df9..7eae27f 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c
@@ -887,6 +887,7 @@ void rtl92ee_c2h_content_parsing(struct ieee80211_hw *hw, u8 c2h_cmd_id,
 				 u8 c2h_cmd_len, u8 *tmp_buf)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	struct rtl_btc_ops *btc_ops = rtlpriv->btcoexist.btc_ops;
 
 	switch (c2h_cmd_id) {
 	case C2H_8192E_DBG:
@@ -905,12 +906,16 @@ void rtl92ee_c2h_content_parsing(struct ieee80211_hw *hw, u8 c2h_cmd_id,
 	case C2H_8192E_BT_INFO:
 		RT_TRACE(rtlpriv, COMP_FW, DBG_TRACE,
 			 "[C2H], C2H_8723BE_BT_INFO!!\n");
-		rtlpriv->btcoexist.btc_ops->btc_btinfo_notify(rtlpriv, tmp_buf,
-							      c2h_cmd_len);
+		if (rtlpriv->cfg->ops->get_btc_status())
+			btc_ops->btc_btinfo_notify(rtlpriv, tmp_buf,
+						   c2h_cmd_len);
 		break;
 	case C2H_8192E_BT_MP:
 		RT_TRACE(rtlpriv, COMP_FW, DBG_TRACE,
 			 "[C2H], C2H_8723BE_BT_MP!!\n");
+		if (rtlpriv->cfg->ops->get_btc_status())
+			btc_ops->btc_btmpinfo_notify(rtlpriv, tmp_buf,
+						     c2h_cmd_len);
 		break;
 	case C2H_8192E_RA_RPT:
 		_rtl92ee_c2h_ra_report_handler(hw, tmp_buf, c2h_cmd_len);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c
index d84ac7a..ef9394b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c
@@ -2133,7 +2133,12 @@ static void _rtl92ee_read_adapter_info(struct ieee80211_hw *hw)
 	if ((*(u8 *)&hwinfo[EEPROM_RF_BOARD_OPTION_92E]) == 0xFF)
 		rtlefuse->board_type = 0;
 
+	if (rtlpriv->btcoexist.btc_info.btcoexist == 1)
+		rtlefuse->board_type |= BIT(2); /* ODM_BOARD_BT */
+
 	rtlhal->board_type = rtlefuse->board_type;
+	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
+		 "board_type = 0x%x\n", rtlefuse->board_type);
 	/*parse xtal*/
 	rtlefuse->crystalcap = hwinfo[EEPROM_XTAL_92E];
 	if (hwinfo[EEPROM_XTAL_92E] == 0xFF)
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c
index eaa503b..a349008 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c
@@ -177,6 +177,8 @@ int rtl92ee_init_sw_vars(struct ieee80211_hw *hw)
 				      rtl_fw_cb);
 	if (err) {
 		pr_err("Failed to request firmware!\n");
+		vfree(rtlpriv->rtlhal.pfirmware);
+		rtlpriv->rtlhal.pfirmware = NULL;
 		return 1;
 	}
 
@@ -354,7 +356,7 @@ static const struct rtl_hal_cfg rtl92ee_hal_cfg = {
 	.maps[RTL_RC_HT_RATEMCS15] = DESC92C_RATEMCS15,
 };
 
-static struct pci_device_id rtl92ee_pci_ids[] = {
+static const struct pci_device_id rtl92ee_pci_ids[] = {
 	{RTL_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x818B, rtl92ee_hal_cfg)},
 	{},
 };
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c
index 55f238a..c58393e 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c
@@ -478,7 +478,6 @@ u16 rtl92ee_rx_desc_buff_remained_cnt(struct ieee80211_hw *hw, u8 queue_index)
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	u16 read_point = 0, write_point = 0, remind_cnt = 0;
 	u32 tmp_4byte = 0;
-	static u16 last_read_point;
 	static bool start_rx;
 
 	tmp_4byte = rtl_read_dword(rtlpriv, REG_RXQ_TXBD_IDX);
@@ -506,7 +505,6 @@ u16 rtl92ee_rx_desc_buff_remained_cnt(struct ieee80211_hw *hw, u8 queue_index)
 
 	rtlpci->rx_ring[queue_index].next_rx_rp = write_point;
 
-	last_read_point = read_point;
 	return remind_cnt;
 }
 
@@ -917,7 +915,6 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	u16 cur_tx_rp = 0;
 	u16 cur_tx_wp = 0;
-	static u16 last_txw_point;
 	static bool over_run;
 	u32 tmp = 0;
 	u8 q_idx = *val;
@@ -951,9 +948,6 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
 				rtl_write_word(rtlpriv,
 					       get_desc_addr_fr_q_idx(q_idx),
 					       ring->cur_tx_wp);
-
-				if (q_idx == 1)
-					last_txw_point = cur_tx_wp;
 			}
 
 			if (ring->avl_desc < (max_tx_desc - 15)) {
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c
index 2006b09..d7945b9 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c
@@ -216,6 +216,8 @@ static int rtl92s_init_sw_vars(struct ieee80211_hw *hw)
 				      rtl92se_fw_cb);
 	if (err) {
 		pr_err("Failed to request firmware!\n");
+		vfree(rtlpriv->rtlhal.pfirmware);
+		rtlpriv->rtlhal.pfirmware = NULL;
 		return 1;
 	}
 
@@ -396,7 +398,7 @@ static const struct rtl_hal_cfg rtl92se_hal_cfg = {
 	.maps[RTL_RC_HT_RATEMCS15] = DESC_RATEMCS15,
 };
 
-static struct pci_device_id rtl92se_pci_ids[] = {
+static const struct pci_device_id rtl92se_pci_ids[] = {
 	{RTL_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8192, rtl92se_hal_cfg)},
 	{RTL_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8171, rtl92se_hal_cfg)},
 	{RTL_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8172, rtl92se_hal_cfg)},
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c
index 7bf9f25..97b8bd2 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c
@@ -184,6 +184,8 @@ int rtl8723e_init_sw_vars(struct ieee80211_hw *hw)
 				      rtl_fw_cb);
 	if (err) {
 		pr_err("Failed to request firmware!\n");
+		vfree(rtlpriv->rtlhal.pfirmware);
+		rtlpriv->rtlhal.pfirmware = NULL;
 		return 1;
 	}
 	return 0;
@@ -367,7 +369,7 @@ static const struct rtl_hal_cfg rtl8723e_hal_cfg = {
 	.maps[RTL_RC_HT_RATEMCS15] = DESC92C_RATEMCS15,
 };
 
-static struct pci_device_id rtl8723e_pci_ids[] = {
+static const struct pci_device_id rtl8723e_pci_ids[] = {
 	{RTL_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8723, rtl8723e_hal_cfg)},
 	{},
 };
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c
index 131c0d1..15c117e 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c
@@ -883,12 +883,8 @@ static void rtl8723be_dm_txpower_tracking_callback_thermalmeter(
 	if ((rtldm->power_index_offset[RF90_PATH_A] != 0) &&
 	    (rtldm->txpower_track_control)) {
 		rtldm->done_txpower = true;
-		if (thermalvalue > rtlefuse->eeprom_thermalmeter)
-			rtl8723be_dm_tx_power_track_set_power(hw, BBSWING, 0,
-							     index_for_channel);
-		else
-			rtl8723be_dm_tx_power_track_set_power(hw, BBSWING, 0,
-							     index_for_channel);
+		rtl8723be_dm_tx_power_track_set_power(hw, BBSWING, 0,
+						      index_for_channel);
 
 		rtldm->swing_idx_cck_base = rtldm->swing_idx_cck;
 		rtldm->swing_idx_ofdm_base[RF90_PATH_A] =
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c
index dd6f95c..4b963fd 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c
@@ -709,6 +709,7 @@ void rtl8723be_c2h_content_parsing(struct ieee80211_hw *hw,
 				   u8 c2h_cmd_len, u8 *tmp_buf)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	struct rtl_btc_ops *btc_ops = rtlpriv->btcoexist.btc_ops;
 
 	switch (c2h_cmd_id) {
 	case C2H_8723B_DBG:
@@ -723,12 +724,16 @@ void rtl8723be_c2h_content_parsing(struct ieee80211_hw *hw,
 	case C2H_8723B_BT_INFO:
 		RT_TRACE(rtlpriv, COMP_FW, DBG_TRACE,
 			 "[C2H], C2H_8723BE_BT_INFO!!\n");
-		rtlpriv->btcoexist.btc_ops->btc_btinfo_notify(rtlpriv, tmp_buf,
-							      c2h_cmd_len);
+		if (rtlpriv->cfg->ops->get_btc_status())
+			btc_ops->btc_btinfo_notify(rtlpriv, tmp_buf,
+						   c2h_cmd_len);
 		break;
 	case C2H_8723B_BT_MP:
 		RT_TRACE(rtlpriv, COMP_FW, DBG_TRACE,
 			 "[C2H], C2H_8723BE_BT_MP!!\n");
+		if (rtlpriv->cfg->ops->get_btc_status())
+			btc_ops->btc_btmpinfo_notify(rtlpriv, tmp_buf,
+						     c2h_cmd_len);
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_FW, DBG_TRACE,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
index cd5dc6d..4d47b97 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
@@ -2111,6 +2111,13 @@ static void _rtl8723be_read_adapter_info(struct ieee80211_hw *hw,
 						 rtlefuse->autoload_failflag,
 						 hwinfo);
 
+	if (rtlpriv->btcoexist.btc_info.btcoexist == 1)
+		rtlefuse->board_type |= BIT(2); /* ODM_BOARD_BT */
+
+	rtlhal->board_type = rtlefuse->board_type;
+	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
+		 "board_type = 0x%x\n", rtlefuse->board_type);
+
 	rtlhal->package_type = _rtl8723be_read_package_type(hw);
 
 	/* set channel plan from efuse */
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c
index 9752175..9606641 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c
@@ -152,33 +152,86 @@ bool rtl8723be_phy_rf_config(struct ieee80211_hw *hw)
 	return rtl8723be_phy_rf6052_config(hw);
 }
 
-static bool _rtl8723be_check_condition(struct ieee80211_hw *hw,
-				       const u32  condition)
+static bool _rtl8723be_check_positive(struct ieee80211_hw *hw,
+				      const u32 condition1,
+				      const u32 condition2)
 {
-	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
-	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
-	u32 _board = rtlefuse->board_type; /*need efuse define*/
-	u32 _interface = rtlhal->interface;
-	u32 _platform = 0x08;/*SupportPlatform */
-	u32 cond = condition;
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	struct rtl_hal *rtlhal = rtl_hal(rtlpriv);
+	u32 cut_ver = ((rtlhal->version & CHIP_VER_RTL_MASK)
+					>> CHIP_VER_RTL_SHIFT);
+	u32 intf = (rtlhal->interface == INTF_USB ? BIT(1) : BIT(0));
+
+	u8  board_type = ((rtlhal->board_type & BIT(4)) >> 4) << 0 | /* _GLNA */
+			 ((rtlhal->board_type & BIT(3)) >> 3) << 1 | /* _GPA  */
+			 ((rtlhal->board_type & BIT(7)) >> 7) << 2 | /* _ALNA */
+			 ((rtlhal->board_type & BIT(6)) >> 6) << 3 | /* _APA  */
+			 ((rtlhal->board_type & BIT(2)) >> 2) << 4;  /* _BT   */
+
+	u32 cond1 = condition1, cond2 = condition2;
+	u32 driver1 = cut_ver << 24 |	/* CUT ver */
+		      0 << 20 |			/* interface 2/2 */
+		      0x04 << 16 |		/* platform */
+		      rtlhal->package_type << 12 |
+		      intf << 8 |			/* interface 1/2 */
+		      board_type;
+
+	u32 driver2 = rtlhal->type_glna <<  0 |
+		      rtlhal->type_gpa  <<  8 |
+		      rtlhal->type_alna << 16 |
+		      rtlhal->type_apa  << 24;
 
-	if (condition == 0xCDCDCDCD)
-		return true;
+	RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
+		 "===> [8812A] CheckPositive (cond1, cond2) = (0x%X 0x%X)\n",
+		 cond1, cond2);
+	RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
+		 "===> [8812A] CheckPositive (driver1, driver2) = (0x%X 0x%X)\n",
+		 driver1, driver2);
 
-	cond = condition & 0xFF;
-	if ((_board & cond) == 0 && cond != 0x1F)
-		return false;
+	RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
+		 "	(Platform, Interface) = (0x%X, 0x%X)\n", 0x04, intf);
+	RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
+		 "	(Board, Package) = (0x%X, 0x%X)\n",
+		 rtlhal->board_type, rtlhal->package_type);
 
-	cond = condition & 0xFF00;
-	cond = cond >> 8;
-	if ((_interface & cond) == 0 && cond != 0x07)
-		return false;
+	/*============== Value Defined Check ===============*/
+	/*QFN Type [15:12] and Cut Version [27:24] need to do value check*/
 
-	cond = condition & 0xFF0000;
-	cond = cond >> 16;
-	if ((_platform & cond) == 0 && cond != 0x0F)
+	if (((cond1 & 0x0000F000) != 0) && ((cond1 & 0x0000F000) !=
+		(driver1 & 0x0000F000)))
 		return false;
-	return true;
+	if (((cond1 & 0x0F000000) != 0) && ((cond1 & 0x0F000000) !=
+		(driver1 & 0x0F000000)))
+		return false;
+
+	/*=============== Bit Defined Check ================*/
+	/* We don't care [31:28] */
+
+	cond1   &= 0x00FF0FFF;
+	driver1 &= 0x00FF0FFF;
+
+	if ((cond1 & driver1) == cond1) {
+		u32 mask = 0;
+
+		if ((cond1 & 0x0F) == 0) /* BoardType is DONTCARE*/
+			return true;
+
+		if ((cond1 & BIT(0)) != 0) /*GLNA*/
+			mask |= 0x000000FF;
+		if ((cond1 & BIT(1)) != 0) /*GPA*/
+			mask |= 0x0000FF00;
+		if ((cond1 & BIT(2)) != 0) /*ALNA*/
+			mask |= 0x00FF0000;
+		if ((cond1 & BIT(3)) != 0) /*APA*/
+			mask |= 0xFF000000;
+
+		/* BoardType of each RF path is matched*/
+		if ((cond2 & mask) == (driver2 & mask))
+			return true;
+		else
+			return false;
+	}
+	return false;
 }
 
 static void _rtl8723be_config_rf_reg(struct ieee80211_hw *hw, u32 addr,
@@ -464,6 +517,16 @@ static bool _rtl8723be_phy_bb8723b_config_parafile(struct ieee80211_hw *hw)
 	struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
 	bool rtstatus;
 
+	/* switch ant to BT */
+	if (rtlpriv->rtlhal.interface == INTF_USB) {
+		rtl_write_dword(rtlpriv, 0x948, 0x0);
+	} else {
+		if (rtlpriv->btcoexist.btc_info.single_ant_path == 0)
+			rtl_write_dword(rtlpriv, 0x948, 0x280);
+		else
+			rtl_write_dword(rtlpriv, 0x948, 0x0);
+	}
+
 	rtstatus = _rtl8723be_phy_config_bb_with_headerfile(hw,
 						BASEBAND_CONFIG_PHY_REG);
 	if (!rtstatus) {
@@ -493,142 +556,84 @@ static bool _rtl8723be_phy_bb8723b_config_parafile(struct ieee80211_hw *hw)
 	return true;
 }
 
+static bool rtl8723be_phy_config_with_headerfile(struct ieee80211_hw *hw,
+						 u32 *array_table,
+						 u16 arraylen,
+		void (*set_reg)(struct ieee80211_hw *hw, u32 regaddr, u32 data))
+{
+	#define COND_ELSE  2
+	#define COND_ENDIF 3
+
+	int i = 0;
+	u8 cond;
+	bool matched = true, skipped = false;
+
+	while ((i + 1) < arraylen) {
+		u32 v1 = array_table[i];
+		u32 v2 = array_table[i + 1];
+
+		if (v1 & (BIT(31) | BIT(30))) {/*positive & negative condition*/
+			if (v1 & BIT(31)) {/* positive condition*/
+				cond  = (u8)((v1 & (BIT(29) | BIT(28))) >> 28);
+				if (cond == COND_ENDIF) { /*end*/
+					matched = true;
+					skipped = false;
+				} else if (cond == COND_ELSE) { /*else*/
+					matched = skipped ? false : true;
+				} else {/*if , else if*/
+					if (skipped) {
+						matched = false;
+					} else {
+						if (_rtl8723be_check_positive(
+								hw, v1, v2)) {
+							matched = true;
+							skipped = true;
+						} else {
+							matched = false;
+							skipped = false;
+						}
+					}
+				}
+			} else if (v1 & BIT(30)) { /*negative condition*/
+			/*do nothing*/
+			}
+		} else {
+			if (matched)
+				set_reg(hw, v1, v2);
+		}
+		i = i + 2;
+	}
+
+	return true;
+}
+
 static bool _rtl8723be_phy_config_mac_with_headerfile(struct ieee80211_hw *hw)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
-	u32 i;
-	u32 arraylength;
-	u32 *ptrarray;
 
 	RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, "Read rtl8723beMACPHY_Array\n");
-	arraylength = RTL8723BEMAC_1T_ARRAYLEN;
-	ptrarray = RTL8723BEMAC_1T_ARRAY;
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "Img:RTL8723bEMAC_1T_ARRAY LEN %d\n", arraylength);
-	for (i = 0; i < arraylength; i = i + 2)
-		rtl_write_byte(rtlpriv, ptrarray[i], (u8)ptrarray[i + 1]);
-	return true;
+
+	return rtl8723be_phy_config_with_headerfile(hw,
+			RTL8723BEMAC_1T_ARRAY, RTL8723BEMAC_1T_ARRAYLEN,
+			rtl_write_byte_with_val32);
 }
 
 static bool _rtl8723be_phy_config_bb_with_headerfile(struct ieee80211_hw *hw,
 						     u8 configtype)
 {
-	#define READ_NEXT_PAIR(v1, v2, i) \
-		do { \
-			i += 2; \
-			v1 = array_table[i];\
-			v2 = array_table[i+1]; \
-		} while (0)
-
-	int i;
-	u32 *array_table;
-	u16 arraylen;
-	struct rtl_priv *rtlpriv = rtl_priv(hw);
-	u32 v1 = 0, v2 = 0;
-
-	if (configtype == BASEBAND_CONFIG_PHY_REG) {
-		arraylen = RTL8723BEPHY_REG_1TARRAYLEN;
-		array_table = RTL8723BEPHY_REG_1TARRAY;
-
-		for (i = 0; i < arraylen; i = i + 2) {
-			v1 = array_table[i];
-			v2 = array_table[i+1];
-			if (v1 < 0xcdcdcdcd) {
-				_rtl8723be_config_bb_reg(hw, v1, v2);
-			} else {/*This line is the start line of branch.*/
-				/* to protect READ_NEXT_PAIR not overrun */
-				if (i >= arraylen - 2)
-					break;
-
-				if (!_rtl8723be_check_condition(hw,
-						array_table[i])) {
-					/*Discard the following
-					 *(offset, data) pairs
-					 */
-					READ_NEXT_PAIR(v1, v2, i);
-					while (v2 != 0xDEAD &&
-					       v2 != 0xCDEF &&
-					       v2 != 0xCDCD &&
-					       i < arraylen - 2) {
-						READ_NEXT_PAIR(v1, v2, i);
-					}
-					i -= 2; /* prevent from for-loop += 2*/
-				/*Configure matched pairs and
-				 *skip to end of if-else.
-				 */
-				} else {
-					READ_NEXT_PAIR(v1, v2, i);
-					while (v2 != 0xDEAD &&
-					       v2 != 0xCDEF &&
-					       v2 != 0xCDCD &&
-					       i < arraylen - 2) {
-						_rtl8723be_config_bb_reg(hw,
-								    v1, v2);
-						READ_NEXT_PAIR(v1, v2, i);
-					}
 
-					while (v2 != 0xDEAD && i < arraylen - 2)
-						READ_NEXT_PAIR(v1, v2, i);
-				}
-			}
-		}
-	} else if (configtype == BASEBAND_CONFIG_AGC_TAB) {
-		arraylen = RTL8723BEAGCTAB_1TARRAYLEN;
-		array_table = RTL8723BEAGCTAB_1TARRAY;
-
-		for (i = 0; i < arraylen; i = i + 2) {
-			v1 = array_table[i];
-			v2 = array_table[i+1];
-			if (v1 < 0xCDCDCDCD) {
-				rtl_set_bbreg(hw, array_table[i],
-					      MASKDWORD,
-					      array_table[i + 1]);
-				udelay(1);
-				continue;
-			} else {/*This line is the start line of branch.*/
-				/* to protect READ_NEXT_PAIR not overrun */
-				if (i >= arraylen - 2)
-					break;
-
-				if (!_rtl8723be_check_condition(hw,
-					array_table[i])) {
-					/*Discard the following
-					 *(offset, data) pairs
-					 */
-					READ_NEXT_PAIR(v1, v2, i);
-					while (v2 != 0xDEAD &&
-					       v2 != 0xCDEF &&
-					       v2 != 0xCDCD &&
-					       i < arraylen - 2) {
-						READ_NEXT_PAIR(v1, v2, i);
-					}
-					i -= 2; /* prevent from for-loop += 2*/
-				/*Configure matched pairs and
-				 *skip to end of if-else.
-				 */
-				} else {
-					READ_NEXT_PAIR(v1, v2, i);
-					while (v2 != 0xDEAD &&
-					       v2 != 0xCDEF &&
-					       v2 != 0xCDCD &&
-					       i < arraylen - 2) {
-						rtl_set_bbreg(hw, array_table[i],
-							      MASKDWORD,
-							      array_table[i + 1]);
-						udelay(1);
-						READ_NEXT_PAIR(v1, v2, i);
-					}
+	if (configtype == BASEBAND_CONFIG_PHY_REG)
+		return rtl8723be_phy_config_with_headerfile(hw,
+				RTL8723BEPHY_REG_1TARRAY,
+				RTL8723BEPHY_REG_1TARRAYLEN,
+				_rtl8723be_config_bb_reg);
+	else if (configtype == BASEBAND_CONFIG_AGC_TAB)
+		return rtl8723be_phy_config_with_headerfile(hw,
+				RTL8723BEAGCTAB_1TARRAY,
+				RTL8723BEAGCTAB_1TARRAYLEN,
+				rtl_set_bbreg_with_dwmask);
 
-					while (v2 != 0xDEAD && i < arraylen - 2)
-						READ_NEXT_PAIR(v1, v2, i);
-				}
-			}
-			RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
-				 "The agctab_array_table[0] is %x Rtl818EEPHY_REGArray[1] is %x\n",
-				 array_table[i], array_table[i + 1]);
-		}
-	}
-	return true;
+	return false;
 }
 
 static u8 _rtl8723be_get_rate_section_index(u32 regaddr)
@@ -761,73 +766,17 @@ static bool _rtl8723be_phy_config_bb_with_pgheaderfile(struct ieee80211_hw *hw,
 bool rtl8723be_phy_config_rf_with_headerfile(struct ieee80211_hw *hw,
 					     enum radio_path rfpath)
 {
-	#define READ_NEXT_RF_PAIR(v1, v2, i) \
-		do { \
-			i += 2; \
-			v1 = radioa_array_table[i]; \
-			v2 = radioa_array_table[i+1]; \
-		} while (0)
-
-	int i;
-	bool rtstatus = true;
-	u32 *radioa_array_table;
-	u16 radioa_arraylen;
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
-	u32 v1 = 0, v2 = 0;
+	bool ret = true;
 
-	radioa_arraylen = RTL8723BE_RADIOA_1TARRAYLEN;
-	radioa_array_table = RTL8723BE_RADIOA_1TARRAY;
-	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD,
-		 "Radio_A:RTL8723BE_RADIOA_1TARRAY %d\n", radioa_arraylen);
 	RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "Radio No %x\n", rfpath);
-	rtstatus = true;
 	switch (rfpath) {
 	case RF90_PATH_A:
-		for (i = 0; i < radioa_arraylen; i = i + 2) {
-			v1 = radioa_array_table[i];
-			v2 = radioa_array_table[i+1];
-			if (v1 < 0xcdcdcdcd) {
-				_rtl8723be_config_rf_radio_a(hw, v1, v2);
-			} else {/*This line is the start line of branch.*/
-				/* to protect READ_NEXT_PAIR not overrun */
-				if (i >= radioa_arraylen - 2)
-					break;
-
-				if (!_rtl8723be_check_condition(hw,
-						radioa_array_table[i])) {
-					/*Discard the following
-					 *(offset, data) pairs
-					 */
-					READ_NEXT_RF_PAIR(v1, v2, i);
-					while (v2 != 0xDEAD &&
-					       v2 != 0xCDEF &&
-					       v2 != 0xCDCD &&
-					       i < radioa_arraylen - 2) {
-						READ_NEXT_RF_PAIR(v1, v2, i);
-					}
-					i -= 2; /* prevent from for-loop += 2*/
-				} else {
-					/*Configure matched pairs
-					 *and skip to end of if-else.
-					 */
-					READ_NEXT_RF_PAIR(v1, v2, i);
-					while (v2 != 0xDEAD &&
-					       v2 != 0xCDEF &&
-					       v2 != 0xCDCD &&
-					       i < radioa_arraylen - 2) {
-						_rtl8723be_config_rf_radio_a(hw,
-									v1, v2);
-						READ_NEXT_RF_PAIR(v1, v2, i);
-					}
-
-					while (v2 != 0xDEAD &&
-					       i < radioa_arraylen - 2) {
-						READ_NEXT_RF_PAIR(v1, v2, i);
-					}
-				}
-			}
-		}
+		ret =  rtl8723be_phy_config_with_headerfile(hw,
+				RTL8723BE_RADIOA_1TARRAY,
+				RTL8723BE_RADIOA_1TARRAYLEN,
+				_rtl8723be_config_rf_radio_a);
 
 		if (rtlhal->oem_id == RT_CID_819X_HP)
 			_rtl8723be_config_rf_radio_a(hw, 0x52, 0x7E4BD);
@@ -840,7 +789,7 @@ bool rtl8723be_phy_config_rf_with_headerfile(struct ieee80211_hw *hw,
 			 "switch case %#x not processed\n", rfpath);
 		break;
 	}
-	return true;
+	return ret;
 }
 
 void rtl8723be_phy_get_hw_reg_originalvalue(struct ieee80211_hw *hw)
@@ -1350,7 +1299,7 @@ void rtl8723be_phy_sw_chnl_callback(struct ieee80211_hw *hw)
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
 	struct rtl_phy *rtlphy = &rtlpriv->phy;
-	u32 delay;
+	u32 delay = 0;
 
 	RT_TRACE(rtlpriv, COMP_SCAN, DBG_TRACE,
 		 "switch to channel%d\n", rtlphy->current_channel);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
index f9d10f1..2b16a14 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
@@ -187,16 +187,10 @@ int rtl8723be_init_sw_vars(struct ieee80211_hw *hw)
 				      rtlpriv->io.dev, GFP_KERNEL, hw,
 				      rtl_fw_cb);
 	if (err) {
-		/* Failed to get firmware. Check if old version available */
-		fw_name = "rtlwifi/rtl8723befw.bin";
-		pr_info("Using firmware %s\n", fw_name);
-		err = request_firmware_nowait(THIS_MODULE, 1, fw_name,
-					      rtlpriv->io.dev, GFP_KERNEL, hw,
-					      rtl_fw_cb);
-		if (err) {
-			pr_err("Failed to request firmware!\n");
-			return 1;
-		}
+		pr_err("Failed to request firmware!\n");
+		vfree(rtlpriv->rtlhal.pfirmware);
+		rtlpriv->rtlhal.pfirmware = NULL;
+		return 1;
 	}
 	return 0;
 }
@@ -287,6 +281,7 @@ static const struct rtl_hal_cfg rtl8723be_hal_cfg = {
 	.bar_id = 2,
 	.write_readback = true,
 	.name = "rtl8723be_pci",
+	.alt_fw_name = "rtlwifi/rtl8723befw.bin",
 	.ops = &rtl8723be_hal_ops,
 	.mod_params = &rtl8723be_mod_params,
 	.maps[SYS_ISO_CTRL] = REG_SYS_ISO_CTRL,
@@ -380,7 +375,7 @@ static const struct rtl_hal_cfg rtl8723be_hal_cfg = {
 	.maps[RTL_RC_HT_RATEMCS15] = DESC92C_RATEMCS15,
 };
 
-static struct pci_device_id rtl8723be_pci_ids[] = {
+static const struct pci_device_id rtl8723be_pci_ids[] = {
 	{RTL_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0xB723, rtl8723be_hal_cfg)},
 	{},
 };
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/table.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/table.c
index a180761..381c16b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/table.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/table.c
@@ -26,6 +26,7 @@
  *****************************************************************************/
 
 #include "table.h"
+
 u32 RTL8723BEPHY_REG_1TARRAY[] = {
 		0x800, 0x80040000,
 		0x804, 0x00000003,
@@ -36,7 +37,7 @@ u32 RTL8723BEPHY_REG_1TARRAY[] = {
 		0x818, 0x02200385,
 		0x81C, 0x00000000,
 		0x820, 0x01000100,
-		0x824, 0x00390204,
+		0x824, 0x00190204,
 		0x828, 0x00000000,
 		0x82C, 0x00000000,
 		0x830, 0x00000000,
@@ -73,9 +74,8 @@ u32 RTL8723BEPHY_REG_1TARRAY[] = {
 		0x90C, 0x81121111,
 		0x910, 0x00000002,
 		0x914, 0x00000201,
-		0x948, 0x00000280,
 		0xA00, 0x00D047C8,
-		0xA04, 0x80FF000C,
+		0xA04, 0x80FF800C,
 		0xA08, 0x8C838300,
 		0xA0C, 0x2E7F120F,
 		0xA10, 0x9500BB78,
@@ -114,7 +114,7 @@ u32 RTL8723BEPHY_REG_1TARRAY[] = {
 		0xC4C, 0x007F037F,
 		0xC50, 0x69553420,
 		0xC54, 0x43BC0094,
-		0xC58, 0x00023169,
+		0xC58, 0x00013147,
 		0xC5C, 0x00250492,
 		0xC60, 0x00000000,
 		0xC64, 0x7112848B,
@@ -125,7 +125,7 @@ u32 RTL8723BEPHY_REG_1TARRAY[] = {
 		0xC78, 0x0000001F,
 		0xC7C, 0x00B91612,
 		0xC80, 0x390000E4,
-		0xC84, 0x20F60000,
+		0xC84, 0x21F60000,
 		0xC88, 0x40000100,
 		0xC8C, 0x20200000,
 		0xC90, 0x00020E1A,
@@ -224,15 +224,21 @@ u32 RTL8723BEPHY_REG_1TARRAY[] = {
 
 };
 
+u32 RTL8723BEPHY_REG_1TARRAYLEN =
+	sizeof(RTL8723BEPHY_REG_1TARRAY) / sizeof(u32);
+
 u32 RTL8723BEPHY_REG_ARRAY_PG[] = {
-	0, 0, 0, 0x00000e08, 0x0000ff00, 0x00004000,
-	0, 0, 0, 0x0000086c, 0xffffff00, 0x34363800,
-	0, 0, 0, 0x00000e00, 0xffffffff, 0x42444646,
-	0, 0, 0, 0x00000e04, 0xffffffff, 0x30343840,
+	0, 0, 0, 0x00000e08, 0x0000ff00, 0x00003800,
+	0, 0, 0, 0x0000086c, 0xffffff00, 0x32343600,
+	0, 0, 0, 0x00000e00, 0xffffffff, 0x40424444,
+	0, 0, 0, 0x00000e04, 0xffffffff, 0x28323638,
 	0, 0, 0, 0x00000e10, 0xffffffff, 0x38404244,
 	0, 0, 0, 0x00000e14, 0xffffffff, 0x26303436
 };
 
+u32 RTL8723BEPHY_REG_ARRAY_PGLEN =
+		sizeof(RTL8723BEPHY_REG_ARRAY_PG) / sizeof(u32);
+
 u32 RTL8723BE_RADIOA_1TARRAY[] = {
 		0x000, 0x00010000,
 		0x0B0, 0x000DFFE0,
@@ -257,15 +263,37 @@ u32 RTL8723BE_RADIOA_1TARRAY[] = {
 		0x01E, 0x00000000,
 		0x0DF, 0x00000780,
 		0x050, 0x00067435,
+	0x80002000,	0x00000000,	0x40000000,	0x00000000,
+		0x051, 0x0006F10E,
+		0x052, 0x000007D3,
+	0x90003000,	0x00000000,	0x40000000,	0x00000000,
+		0x051, 0x0006F10E,
+		0x052, 0x000007D3,
+	0x90004000,	0x00000000,	0x40000000,	0x00000000,
+		0x051, 0x0006F10E,
+		0x052, 0x000007D3,
+	0xA0000000,	0x00000000,
 		0x051, 0x0006B04E,
 		0x052, 0x000007D2,
+	0xB0000000,	0x00000000,
 		0x053, 0x00000000,
 		0x054, 0x00050400,
 		0x055, 0x0004026E,
 		0x0DD, 0x0000004C,
 		0x070, 0x00067435,
+	0x80002000,	0x00000000,	0x40000000,	0x00000000,
+		0x071, 0x0006F10E,
+		0x072, 0x000007D3,
+	0x90003000,	0x00000000,	0x40000000,	0x00000000,
+		0x071, 0x0006F10E,
+		0x072, 0x000007D3,
+	0x90004000,	0x00000000,	0x40000000,	0x00000000,
+		0x071, 0x0006F10E,
+		0x072, 0x000007D3,
+	0xA0000000,	0x00000000,
 		0x071, 0x0006B04E,
 		0x072, 0x000007D2,
+	0xB0000000,	0x00000000,
 		0x073, 0x00000000,
 		0x074, 0x00050400,
 		0x075, 0x0004026E,
@@ -308,6 +336,7 @@ u32 RTL8723BE_RADIOA_1TARRAY[] = {
 		0x044, 0x00000051,
 		0x0EF, 0x00000000,
 		0x0ED, 0x00000000,
+		0x07F, 0x00020080,
 		0x0EF, 0x00002000,
 		0x03B, 0x000380EF,
 		0x03B, 0x000302FE,
@@ -336,14 +365,24 @@ u32 RTL8723BE_RADIOA_1TARRAY[] = {
 		0x0A3, 0x00008000,
 		0x0A4, 0x00048D80,
 		0x0A5, 0x00068000,
-		0x000, 0x00033D80,
+		0x0ED, 0x00000002,
+		0x0EF, 0x00000002,
+		0x056, 0x00000032,
+		0x076, 0x00000032,
+		0x001, 0x00000780,
 
 };
 
+u32 RTL8723BE_RADIOA_1TARRAYLEN =
+	sizeof(RTL8723BE_RADIOA_1TARRAY) / sizeof(u32);
+
 u32 RTL8723BEMAC_1T_ARRAY[] = {
 		0x02F, 0x00000030,
 		0x035, 0x00000000,
+		0x039, 0x00000008,
+		0x064, 0x00000000,
 		0x067, 0x00000020,
+		0x421, 0x0000000F,
 		0x428, 0x0000000A,
 		0x429, 0x00000010,
 		0x430, 0x00000000,
@@ -439,9 +478,13 @@ u32 RTL8723BEMAC_1T_ARRAY[] = {
 		0x709, 0x00000043,
 		0x70A, 0x00000065,
 		0x70B, 0x00000087,
+		0x765, 0x00000018,
+		0x76E, 0x00000004,
 
 };
 
+u32 RTL8723BEMAC_1T_ARRAYLEN = sizeof(RTL8723BEMAC_1T_ARRAY) / sizeof(u32);
+
 u32 RTL8723BEAGCTAB_1TARRAY[] = {
 		0xC78, 0xFD000001,
 		0xC78, 0xFC010001,
@@ -466,21 +509,21 @@ u32 RTL8723BEAGCTAB_1TARRAY[] = {
 		0xC78, 0xE9140001,
 		0xC78, 0xE8150001,
 		0xC78, 0xE7160001,
-		0xC78, 0xAA170001,
-		0xC78, 0xA9180001,
-		0xC78, 0xA8190001,
-		0xC78, 0xA71A0001,
-		0xC78, 0xA61B0001,
-		0xC78, 0xA51C0001,
-		0xC78, 0xA41D0001,
-		0xC78, 0xA31E0001,
-		0xC78, 0x671F0001,
-		0xC78, 0x66200001,
-		0xC78, 0x65210001,
-		0xC78, 0x64220001,
-		0xC78, 0x63230001,
-		0xC78, 0x62240001,
-		0xC78, 0x61250001,
+		0xC78, 0xE6170001,
+		0xC78, 0xE5180001,
+		0xC78, 0xE4190001,
+		0xC78, 0xE31A0001,
+		0xC78, 0xA51B0001,
+		0xC78, 0xA41C0001,
+		0xC78, 0xA31D0001,
+		0xC78, 0x671E0001,
+		0xC78, 0x661F0001,
+		0xC78, 0x65200001,
+		0xC78, 0x64210001,
+		0xC78, 0x63220001,
+		0xC78, 0x4A230001,
+		0xC78, 0x49240001,
+		0xC78, 0x48250001,
 		0xC78, 0x47260001,
 		0xC78, 0x46270001,
 		0xC78, 0x45280001,
@@ -491,22 +534,22 @@ u32 RTL8723BEAGCTAB_1TARRAY[] = {
 		0xC78, 0x282D0001,
 		0xC78, 0x272E0001,
 		0xC78, 0x262F0001,
-		0xC78, 0x25300001,
-		0xC78, 0x24310001,
-		0xC78, 0x09320001,
-		0xC78, 0x08330001,
-		0xC78, 0x07340001,
-		0xC78, 0x06350001,
-		0xC78, 0x05360001,
-		0xC78, 0x04370001,
-		0xC78, 0x03380001,
-		0xC78, 0x02390001,
+		0xC78, 0x0A300001,
+		0xC78, 0x09310001,
+		0xC78, 0x08320001,
+		0xC78, 0x07330001,
+		0xC78, 0x06340001,
+		0xC78, 0x05350001,
+		0xC78, 0x04360001,
+		0xC78, 0x03370001,
+		0xC78, 0x02380001,
+		0xC78, 0x01390001,
 		0xC78, 0x013A0001,
-		0xC78, 0x003B0001,
-		0xC78, 0x003C0001,
-		0xC78, 0x003D0001,
-		0xC78, 0x003E0001,
-		0xC78, 0x003F0001,
+		0xC78, 0x013B0001,
+		0xC78, 0x013C0001,
+		0xC78, 0x013D0001,
+		0xC78, 0x013E0001,
+		0xC78, 0x013F0001,
 		0xC78, 0xFC400001,
 		0xC78, 0xFB410001,
 		0xC78, 0xFA420001,
@@ -531,47 +574,50 @@ u32 RTL8723BEAGCTAB_1TARRAY[] = {
 		0xC78, 0xE7550001,
 		0xC78, 0xE6560001,
 		0xC78, 0xE5570001,
-		0xC78, 0xAA580001,
-		0xC78, 0xA9590001,
-		0xC78, 0xA85A0001,
-		0xC78, 0xA75B0001,
-		0xC78, 0xA65C0001,
-		0xC78, 0xA55D0001,
-		0xC78, 0xA45E0001,
-		0xC78, 0x675F0001,
-		0xC78, 0x66600001,
-		0xC78, 0x65610001,
-		0xC78, 0x64620001,
-		0xC78, 0x63630001,
-		0xC78, 0x62640001,
-		0xC78, 0x61650001,
+		0xC78, 0xE4580001,
+		0xC78, 0xE3590001,
+		0xC78, 0xA65A0001,
+		0xC78, 0xA55B0001,
+		0xC78, 0xA45C0001,
+		0xC78, 0xA35D0001,
+		0xC78, 0x675E0001,
+		0xC78, 0x665F0001,
+		0xC78, 0x65600001,
+		0xC78, 0x64610001,
+		0xC78, 0x63620001,
+		0xC78, 0x62630001,
+		0xC78, 0x61640001,
+		0xC78, 0x48650001,
 		0xC78, 0x47660001,
 		0xC78, 0x46670001,
 		0xC78, 0x45680001,
 		0xC78, 0x44690001,
 		0xC78, 0x436A0001,
 		0xC78, 0x426B0001,
-		0xC78, 0x296C0001,
-		0xC78, 0x286D0001,
-		0xC78, 0x276E0001,
-		0xC78, 0x266F0001,
-		0xC78, 0x25700001,
-		0xC78, 0x24710001,
-		0xC78, 0x09720001,
-		0xC78, 0x08730001,
-		0xC78, 0x07740001,
-		0xC78, 0x06750001,
-		0xC78, 0x05760001,
-		0xC78, 0x04770001,
-		0xC78, 0x03780001,
-		0xC78, 0x02790001,
+		0xC78, 0x286C0001,
+		0xC78, 0x276D0001,
+		0xC78, 0x266E0001,
+		0xC78, 0x256F0001,
+		0xC78, 0x24700001,
+		0xC78, 0x09710001,
+		0xC78, 0x08720001,
+		0xC78, 0x07730001,
+		0xC78, 0x06740001,
+		0xC78, 0x05750001,
+		0xC78, 0x04760001,
+		0xC78, 0x03770001,
+		0xC78, 0x02780001,
+		0xC78, 0x01790001,
 		0xC78, 0x017A0001,
-		0xC78, 0x007B0001,
-		0xC78, 0x007C0001,
-		0xC78, 0x007D0001,
-		0xC78, 0x007E0001,
-		0xC78, 0x007F0001,
+		0xC78, 0x017B0001,
+		0xC78, 0x017C0001,
+		0xC78, 0x017D0001,
+		0xC78, 0x017E0001,
+		0xC78, 0x017F0001,
 		0xC50, 0x69553422,
 		0xC50, 0x69553420,
+		0x824, 0x00390204,
 
 };
+
+u32 RTL8723BEAGCTAB_1TARRAYLEN = sizeof(RTL8723BEAGCTAB_1TARRAY) / sizeof(u32);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/table.h b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/table.h
index dc17001..1deaffe 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/table.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/table.h
@@ -29,15 +29,15 @@
 #define __RTL8723BE_TABLE__H_
 
 #include <linux/types.h>
-#define  RTL8723BEPHY_REG_1TARRAYLEN	388
+extern u32 RTL8723BEPHY_REG_1TARRAYLEN;
 extern u32 RTL8723BEPHY_REG_1TARRAY[];
-#define RTL8723BEPHY_REG_ARRAY_PGLEN	36
+extern u32 RTL8723BEPHY_REG_ARRAY_PGLEN;
 extern u32 RTL8723BEPHY_REG_ARRAY_PG[];
-#define	RTL8723BE_RADIOA_1TARRAYLEN	206
+extern u32 RTL8723BE_RADIOA_1TARRAYLEN;
 extern u32 RTL8723BE_RADIOA_1TARRAY[];
-#define RTL8723BEMAC_1T_ARRAYLEN	196
+extern u32 RTL8723BEMAC_1T_ARRAYLEN;
 extern u32 RTL8723BEMAC_1T_ARRAY[];
-#define RTL8723BEAGCTAB_1TARRAYLEN	260
+extern u32 RTL8723BEAGCTAB_1TARRAYLEN;
 extern u32 RTL8723BEAGCTAB_1TARRAY[];
 
 #endif
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c
index 03259aa..f2b2c54 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c
@@ -98,7 +98,7 @@ static int _rtl8821ae_fw_free_to_go(struct ieee80211_hw *hw)
 
 	if (counter >= FW_8821AE_POLLING_TIMEOUT_COUNT) {
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "chksum report faill ! REG_MCUFWDL:0x%08x .\n",
+			 "chksum report fail! REG_MCUFWDL:0x%08x .\n",
 			  value32);
 		goto exit;
 	}
@@ -1923,6 +1923,7 @@ void rtl8821ae_c2h_content_parsing(struct ieee80211_hw *hw,
 				   u8 *tmp_buf)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	struct rtl_btc_ops *btc_ops = rtlpriv->btcoexist.btc_ops;
 
 	switch (c2h_cmd_id) {
 	case C2H_8812_DBG:
@@ -1938,9 +1939,15 @@ void rtl8821ae_c2h_content_parsing(struct ieee80211_hw *hw,
 		RT_TRACE(rtlpriv, COMP_FW, DBG_LOUD,
 			 "[C2H], C2H_8812_BT_INFO!!\n");
 		if (rtlpriv->cfg->ops->get_btc_status())
-			rtlpriv->btcoexist.btc_ops->btc_btinfo_notify(rtlpriv,
-								      tmp_buf,
-								      c2h_cmd_len);
+			btc_ops->btc_btinfo_notify(rtlpriv, tmp_buf,
+						   c2h_cmd_len);
+		break;
+	case C2H_8812_BT_MP:
+		RT_TRACE(rtlpriv, COMP_FW, DBG_TRACE,
+			 "[C2H], C2H_8812_BT_MP!!\n");
+		if (rtlpriv->cfg->ops->get_btc_status())
+			btc_ops->btc_btmpinfo_notify(rtlpriv, tmp_buf,
+						     c2h_cmd_len);
 		break;
 	default:
 		break;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
index 2bc6bac..4f73012 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
@@ -779,7 +779,7 @@ void rtl8821ae_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val)
 			_rtl8821ae_resume_tx_beacon(hw);
 		break; }
 	case HW_VAR_NAV_UPPER: {
-		u32	us_nav_upper = ((u32)*val);
+		u32	us_nav_upper = *(u32 *)val;
 
 		if (us_nav_upper > HAL_92C_NAV_UPPER_UNIT * 0xFF) {
 			RT_TRACE(rtlpriv, COMP_INIT , DBG_WARNING,
@@ -2966,6 +2966,44 @@ static void _rtl8812ae_read_pa_type(struct ieee80211_hw *hw, u8 *hwinfo,
 	}
 }
 
+static void _rtl8812ae_read_amplifier_type(struct ieee80211_hw *hw, u8 *hwinfo,
+					   bool autoload_fail)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	struct rtl_hal *rtlhal = rtl_hal(rtlpriv);
+
+	u8 ext_type_pa_2g_a  = (hwinfo[0xBD] & BIT(2))      >> 2; /* 0xBD[2] */
+	u8 ext_type_pa_2g_b  = (hwinfo[0xBD] & BIT(6))      >> 6; /* 0xBD[6] */
+	u8 ext_type_pa_5g_a  = (hwinfo[0xBF] & BIT(2))      >> 2; /* 0xBF[2] */
+	u8 ext_type_pa_5g_b  = (hwinfo[0xBF] & BIT(6))      >> 6; /* 0xBF[6] */
+	/* 0xBD[1:0] */
+	u8 ext_type_lna_2g_a = (hwinfo[0xBD] & (BIT(1) | BIT(0))) >> 0;
+	/* 0xBD[5:4] */
+	u8 ext_type_lna_2g_b = (hwinfo[0xBD] & (BIT(5) | BIT(4))) >> 4;
+	/* 0xBF[1:0] */
+	u8 ext_type_lna_5g_a = (hwinfo[0xBF] & (BIT(1) | BIT(0))) >> 0;
+	/* 0xBF[5:4] */
+	u8 ext_type_lna_5g_b = (hwinfo[0xBF] & (BIT(5) | BIT(4))) >> 4;
+
+	_rtl8812ae_read_pa_type(hw, hwinfo, autoload_fail);
+
+	/* [2.4G] Path A and B are both extPA */
+	if ((rtlhal->pa_type_2g & (BIT(5) | BIT(4))) == (BIT(5) | BIT(4)))
+		rtlhal->type_gpa  = ext_type_pa_2g_b  << 2 | ext_type_pa_2g_a;
+
+	/* [5G] Path A and B are both extPA */
+	if ((rtlhal->pa_type_5g & (BIT(1) | BIT(0))) == (BIT(1) | BIT(0)))
+		rtlhal->type_apa  = ext_type_pa_5g_b  << 2 | ext_type_pa_5g_a;
+
+	/* [2.4G] Path A and B are both extLNA */
+	if ((rtlhal->lna_type_2g & (BIT(7) | BIT(3))) == (BIT(7) | BIT(3)))
+		rtlhal->type_glna = ext_type_lna_2g_b << 2 | ext_type_lna_2g_a;
+
+	/* [5G] Path A and B are both extLNA */
+	if ((rtlhal->lna_type_5g & (BIT(7) | BIT(3))) == (BIT(7) | BIT(3)))
+		rtlhal->type_alna = ext_type_lna_5g_b << 2 | ext_type_lna_5g_a;
+}
+
 static void _rtl8821ae_read_pa_type(struct ieee80211_hw *hw, u8 *hwinfo,
 				    bool autoload_fail)
 {
@@ -3114,7 +3152,8 @@ static void _rtl8821ae_read_adapter_info(struct ieee80211_hw *hw, bool b_pseudo_
 					       hwinfo);
 
 	if (rtlhal->hw_type == HARDWARE_TYPE_RTL8812AE) {
-		_rtl8812ae_read_pa_type(hw, hwinfo, rtlefuse->autoload_failflag);
+		_rtl8812ae_read_amplifier_type(hw, hwinfo,
+					       rtlefuse->autoload_failflag);
 		_rtl8812ae_read_bt_coexist_info_from_hwpg(hw,
 				rtlefuse->autoload_failflag, hwinfo);
 	} else {
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c
index aa3ccc7..176deb2 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c
@@ -3773,10 +3773,11 @@ static void _rtl8821ae_iqk_tx(struct ieee80211_hw *hw, enum radio_path path)
 	u32	tx_fail, rx_fail, delay_count, iqk_ready, cal_retry, cal = 0, temp_reg65;
 	int	tx_x = 0, tx_y = 0, rx_x = 0, rx_y = 0, tx_average = 0, rx_average = 0;
 	int	tx_x0[cal_num], tx_y0[cal_num], tx_x0_rxk[cal_num],
-		tx_y0_rxk[cal_num], rx_x0[cal_num], rx_y0[cal_num];
+		tx_y0_rxk[cal_num], rx_x0[cal_num], rx_y0[cal_num],
+		tx_dt[cal_num], rx_dt[cal_num];
 	bool	tx0iqkok = false, rx0iqkok = false;
 	bool	vdf_enable = false;
-	int	i, k, vdf_y[3], vdf_x[3], tx_dt[3], rx_dt[3],
+	int	i, k, vdf_y[3], vdf_x[3],
 		ii, dx = 0, dy = 0, tx_finish = 0, rx_finish = 0;
 
 	RT_TRACE(rtlpriv, COMP_IQK, DBG_LOUD,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
index d71d277..0894ef48 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
@@ -196,6 +196,8 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw)
 	rtlpriv->rtlhal.wowlan_firmware = vzalloc(0x8000);
 	if (!rtlpriv->rtlhal.wowlan_firmware) {
 		pr_err("Can't alloc buffer for wowlan fw.\n");
+		vfree(rtlpriv->rtlhal.pfirmware);
+		rtlpriv->rtlhal.pfirmware = NULL;
 		return 1;
 	}
 
@@ -214,16 +216,10 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw)
 				      rtlpriv->io.dev, GFP_KERNEL, hw,
 				      rtl_fw_cb);
 	if (err) {
-		/* Failed to get firmware. Check if old version available */
-		fw_name = "rtlwifi/rtl8821aefw.bin";
-		pr_info("Using firmware %s\n", fw_name);
-		err = request_firmware_nowait(THIS_MODULE, 1, fw_name,
-					      rtlpriv->io.dev, GFP_KERNEL, hw,
-					      rtl_fw_cb);
-		if (err) {
-			pr_err("Failed to request normal firmware!\n");
-			return 1;
-		}
+		pr_err("Failed to request normal firmware!\n");
+		vfree(rtlpriv->rtlhal.wowlan_firmware);
+		vfree(rtlpriv->rtlhal.pfirmware);
+		return 1;
 	}
 	/*load wowlan firmware*/
 	pr_info("Using firmware %s\n", wowlan_fw_name);
@@ -233,6 +229,8 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw)
 				      rtl_wowlan_fw_cb);
 	if (err) {
 		pr_err("Failed to request wowlan firmware!\n");
+		vfree(rtlpriv->rtlhal.wowlan_firmware);
+		vfree(rtlpriv->rtlhal.pfirmware);
 		return 1;
 	}
 	return 0;
@@ -325,6 +323,7 @@ static const struct rtl_hal_cfg rtl8821ae_hal_cfg = {
 	.bar_id = 2,
 	.write_readback = true,
 	.name = "rtl8821ae_pci",
+	.alt_fw_name = "rtlwifi/rtl8821aefw.bin",
 	.ops = &rtl8821ae_hal_ops,
 	.mod_params = &rtl8821ae_mod_params,
 	.maps[SYS_ISO_CTRL] = REG_SYS_ISO_CTRL,
@@ -424,7 +423,7 @@ static const struct rtl_hal_cfg rtl8821ae_hal_cfg = {
 	.maps[RTL_RC_VHT_RATE_2SS_MCS9] = DESC_RATEVHT2SS_MCS9,
 };
 
-static struct pci_device_id rtl8821ae_pci_ids[] = {
+static const struct pci_device_id rtl8821ae_pci_ids[] = {
 	{RTL_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8812, rtl8821ae_hal_cfg)},
 	{RTL_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8821, rtl8821ae_hal_cfg)},
 	{},
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index 70723e6..1ab1024 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -314,35 +314,29 @@ enum hardware_type {
 	HARDWARE_TYPE_RTL8192EE,
 	HARDWARE_TYPE_RTL8821AE,
 	HARDWARE_TYPE_RTL8812AE,
+	HARDWARE_TYPE_RTL8822BE,
 
 	/* keep it last */
 	HARDWARE_TYPE_NUM
 };
 
-#define IS_HARDWARE_TYPE_8192SU(rtlhal)			\
-	(rtlhal->hw_type == HARDWARE_TYPE_RTL8192SU)
-#define IS_HARDWARE_TYPE_8192SE(rtlhal)			\
-	(rtlhal->hw_type == HARDWARE_TYPE_RTL8192SE)
-#define IS_HARDWARE_TYPE_8192CE(rtlhal)			\
-	(rtlhal->hw_type == HARDWARE_TYPE_RTL8192CE)
-#define IS_HARDWARE_TYPE_8192CU(rtlhal)			\
-	(rtlhal->hw_type == HARDWARE_TYPE_RTL8192CU)
-#define IS_HARDWARE_TYPE_8192DE(rtlhal)			\
-	(rtlhal->hw_type == HARDWARE_TYPE_RTL8192DE)
-#define IS_HARDWARE_TYPE_8192DU(rtlhal)			\
-	(rtlhal->hw_type == HARDWARE_TYPE_RTL8192DU)
-#define IS_HARDWARE_TYPE_8723E(rtlhal)			\
-	(rtlhal->hw_type == HARDWARE_TYPE_RTL8723E)
-#define IS_HARDWARE_TYPE_8723U(rtlhal)			\
-	(rtlhal->hw_type == HARDWARE_TYPE_RTL8723U)
-#define	IS_HARDWARE_TYPE_8192S(rtlhal)			\
-(IS_HARDWARE_TYPE_8192SE(rtlhal) || IS_HARDWARE_TYPE_8192SU(rtlhal))
-#define	IS_HARDWARE_TYPE_8192C(rtlhal)			\
-(IS_HARDWARE_TYPE_8192CE(rtlhal) || IS_HARDWARE_TYPE_8192CU(rtlhal))
-#define	IS_HARDWARE_TYPE_8192D(rtlhal)			\
-(IS_HARDWARE_TYPE_8192DE(rtlhal) || IS_HARDWARE_TYPE_8192DU(rtlhal))
-#define	IS_HARDWARE_TYPE_8723(rtlhal)			\
-(IS_HARDWARE_TYPE_8723E(rtlhal) || IS_HARDWARE_TYPE_8723U(rtlhal))
+#define RTL_HW_TYPE(rtlpriv)	(rtl_hal((struct rtl_priv *)rtlpriv)->hw_type)
+#define IS_NEW_GENERATION_IC(rtlpriv)			\
+			(RTL_HW_TYPE(rtlpriv) >= HARDWARE_TYPE_RTL8192EE)
+#define IS_HARDWARE_TYPE_8192CE(rtlpriv)		\
+			(RTL_HW_TYPE(rtlpriv) == HARDWARE_TYPE_RTL8192CE)
+#define IS_HARDWARE_TYPE_8812(rtlpriv)			\
+			(RTL_HW_TYPE(rtlpriv) == HARDWARE_TYPE_RTL8812AE)
+#define IS_HARDWARE_TYPE_8821(rtlpriv)			\
+			(RTL_HW_TYPE(rtlpriv) == HARDWARE_TYPE_RTL8821AE)
+#define IS_HARDWARE_TYPE_8723A(rtlpriv)			\
+			(RTL_HW_TYPE(rtlpriv) == HARDWARE_TYPE_RTL8723AE)
+#define IS_HARDWARE_TYPE_8723B(rtlpriv)			\
+			(RTL_HW_TYPE(rtlpriv) == HARDWARE_TYPE_RTL8723BE)
+#define IS_HARDWARE_TYPE_8192E(rtlpriv)			\
+			(RTL_HW_TYPE(rtlpriv) == HARDWARE_TYPE_RTL8192EE)
+#define IS_HARDWARE_TYPE_8822B(rtlpriv)			\
+			(RTL_HW_TYPE(rtlpriv) == HARDWARE_TYPE_RTL8822BE)
 
 #define RX_HAL_IS_CCK_RATE(rxmcs)			\
 	((rxmcs) == DESC_RATE1M ||			\
@@ -592,7 +586,7 @@ enum rtl_hal_state {
 	_HAL_STATE_START = 1,
 };
 
-enum rtl_desc92_rate {
+enum rtl_desc_rate {
 	DESC_RATE1M = 0x00,
 	DESC_RATE2M = 0x01,
 	DESC_RATE5_5M = 0x02,
@@ -2477,6 +2471,8 @@ struct rtl_global_var {
 	spinlock_t glb_list_lock;
 };
 
+#define IN_4WAY_TIMEOUT_TIME	(30 * MSEC_PER_SEC)	/* 30 seconds */
+
 struct rtl_btc_info {
 	u8 bt_type;
 	u8 btcoexist;
@@ -2485,6 +2481,7 @@ struct rtl_btc_info {
 
 	u8 ap_num;
 	bool in_4way;
+	unsigned long in_4way_ts;
 };
 
 struct bt_coexist_info {
@@ -2558,6 +2555,8 @@ struct rtl_btc_ops {
 	void (*btc_halt_notify) (void);
 	void (*btc_btinfo_notify) (struct rtl_priv *rtlpriv,
 				   u8 *tmp_buf, u8 length);
+	void (*btc_btmpinfo_notify)(struct rtl_priv *rtlpriv,
+				    u8 *tmp_buf, u8 length);
 	bool (*btc_is_limited_dig) (struct rtl_priv *rtlpriv);
 	bool (*btc_is_disable_edca_turbo) (struct rtl_priv *rtlpriv);
 	bool (*btc_is_bt_disabled) (struct rtl_priv *rtlpriv);
diff --git a/drivers/net/wireless/rsi/Makefile b/drivers/net/wireless/rsi/Makefile
index a475c81..ebb8996 100644
--- a/drivers/net/wireless/rsi/Makefile
+++ b/drivers/net/wireless/rsi/Makefile
@@ -3,6 +3,7 @@ rsi_91x-y			+= rsi_91x_core.o
 rsi_91x-y			+= rsi_91x_mac80211.o
 rsi_91x-y			+= rsi_91x_mgmt.o
 rsi_91x-y			+= rsi_91x_hal.o
+rsi_91x-y			+= rsi_91x_ps.o
 rsi_91x-$(CONFIG_RSI_DEBUGFS)	+= rsi_91x_debugfs.o
 
 rsi_usb-y			+= rsi_91x_usb.o rsi_91x_usb_ops.o
diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c
index 68f04a7..2b0516d 100644
--- a/drivers/net/wireless/rsi/rsi_91x_core.c
+++ b/drivers/net/wireless/rsi/rsi_91x_core.c
@@ -16,6 +16,7 @@
 
 #include "rsi_mgmt.h"
 #include "rsi_common.h"
+#include "rsi_hal.h"
 
 /**
  * rsi_determine_min_weight_queue() - This function determines the queue with
@@ -136,6 +137,10 @@ static u8 rsi_core_determine_hal_queue(struct rsi_common *common)
 	u8 q_num = INVALID_QUEUE;
 	u8 ii = 0;
 
+	if (skb_queue_len(&common->tx_queue[MGMT_BEACON_Q])) {
+		q_num = MGMT_BEACON_Q;
+		return q_num;
+	}
 	if (skb_queue_len(&common->tx_queue[MGMT_SOFT_Q])) {
 		if (!common->mgmt_q_block)
 			q_num = MGMT_SOFT_Q;
@@ -268,11 +273,11 @@ void rsi_core_qos_processor(struct rsi_common *common)
 			break;
 		}
 
-		mutex_lock(&common->tx_rxlock);
+		mutex_lock(&common->tx_lock);
 
 		status = adapter->check_hw_queue_status(adapter, q_num);
 		if ((status <= 0)) {
-			mutex_unlock(&common->tx_rxlock);
+			mutex_unlock(&common->tx_lock);
 			break;
 		}
 
@@ -287,30 +292,48 @@ void rsi_core_qos_processor(struct rsi_common *common)
 		skb = rsi_core_dequeue_pkt(common, q_num);
 		if (skb == NULL) {
 			rsi_dbg(ERR_ZONE, "skb null\n");
-			mutex_unlock(&common->tx_rxlock);
+			mutex_unlock(&common->tx_lock);
 			break;
 		}
 
-		if (q_num == MGMT_SOFT_Q)
+		if (q_num == MGMT_SOFT_Q) {
 			status = rsi_send_mgmt_pkt(common, skb);
-		else
+		} else if (q_num == MGMT_BEACON_Q) {
+			status = rsi_send_pkt_to_bus(common, skb);
+			dev_kfree_skb(skb);
+		} else {
 			status = rsi_send_data_pkt(common, skb);
+		}
 
 		if (status) {
-			mutex_unlock(&common->tx_rxlock);
+			mutex_unlock(&common->tx_lock);
 			break;
 		}
 
 		common->tx_stats.total_tx_pkt_send[q_num]++;
 
 		tstamp_2 = jiffies;
-		mutex_unlock(&common->tx_rxlock);
+		mutex_unlock(&common->tx_lock);
 
 		if (time_after(tstamp_2, tstamp_1 + (300 * HZ) / 1000))
 			schedule();
 	}
 }
 
+struct rsi_sta *rsi_find_sta(struct rsi_common *common, u8 *mac_addr)
+{
+	int i;
+
+	for (i = 0; i < common->max_stations; i++) {
+		if (!common->stations[i].sta)
+			continue;
+		if (!(memcmp(common->stations[i].sta->addr,
+			     mac_addr, ETH_ALEN)))
+			return &common->stations[i];
+	}
+	return NULL;
+}
+
 /**
  * rsi_core_xmit() - This function transmits the packets received from mac80211
  * @common: Pointer to the driver private structure.
@@ -323,42 +346,63 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb)
 	struct rsi_hw *adapter = common->priv;
 	struct ieee80211_tx_info *info;
 	struct skb_info *tx_params;
-	struct ieee80211_hdr *tmp_hdr = NULL;
+	struct ieee80211_hdr *wh;
+	struct ieee80211_vif *vif = adapter->vifs[0];
 	u8 q_num, tid = 0;
+	struct rsi_sta *rsta = NULL;
 
 	if ((!skb) || (!skb->len)) {
 		rsi_dbg(ERR_ZONE, "%s: Null skb/zero Length packet\n",
 			__func__);
 		goto xmit_fail;
 	}
-	info = IEEE80211_SKB_CB(skb);
-	tx_params = (struct skb_info *)info->driver_data;
-	tmp_hdr = (struct ieee80211_hdr *)&skb->data[0];
-
 	if (common->fsm_state != FSM_MAC_INIT_DONE) {
 		rsi_dbg(ERR_ZONE, "%s: FSM state not open\n", __func__);
 		goto xmit_fail;
 	}
 
-	if ((ieee80211_is_mgmt(tmp_hdr->frame_control)) ||
-	    (ieee80211_is_ctl(tmp_hdr->frame_control)) ||
-	    (ieee80211_is_qos_nullfunc(tmp_hdr->frame_control))) {
+	info = IEEE80211_SKB_CB(skb);
+	tx_params = (struct skb_info *)info->driver_data;
+	wh = (struct ieee80211_hdr *)&skb->data[0];
+	tx_params->sta_id = 0;
+
+	if ((ieee80211_is_mgmt(wh->frame_control)) ||
+	    (ieee80211_is_ctl(wh->frame_control)) ||
+	    (ieee80211_is_qos_nullfunc(wh->frame_control))) {
 		q_num = MGMT_SOFT_Q;
 		skb->priority = q_num;
 	} else {
-		if (ieee80211_is_data_qos(tmp_hdr->frame_control)) {
+		if (ieee80211_is_data_qos(wh->frame_control)) {
 			tid = (skb->data[24] & IEEE80211_QOS_TID);
 			skb->priority = TID_TO_WME_AC(tid);
 		} else {
 			tid = IEEE80211_NONQOS_TID;
 			skb->priority = BE_Q;
 		}
+
 		q_num = skb->priority;
 		tx_params->tid = tid;
-		tx_params->sta_id = 0;
+
+		if ((vif->type == NL80211_IFTYPE_AP) &&
+		    (!is_broadcast_ether_addr(wh->addr1)) &&
+		    (!is_multicast_ether_addr(wh->addr1))) {
+			rsta = rsi_find_sta(common, wh->addr1);
+			if (!rsta)
+				goto xmit_fail;
+			tx_params->sta_id = rsta->sta_id;
+		}
+
+		if (rsta) {
+			/* Start aggregation if not done for this tid */
+			if (!rsta->start_tx_aggr[tid]) {
+				rsta->start_tx_aggr[tid] = true;
+				ieee80211_start_tx_ba_session(rsta->sta,
+							      tid, 0);
+			}
+		}
 	}
 
-	if ((q_num != MGMT_SOFT_Q) &&
+	if ((q_num < MGMT_SOFT_Q) &&
 	    ((skb_queue_len(&common->tx_queue[q_num]) + 1) >=
 	     DATA_QUEUE_WATER_MARK)) {
 		rsi_dbg(ERR_ZONE, "%s: sw queue full\n", __func__);
diff --git a/drivers/net/wireless/rsi/rsi_91x_debugfs.c b/drivers/net/wireless/rsi/rsi_91x_debugfs.c
index 4c0a493..e98eb55 100644
--- a/drivers/net/wireless/rsi/rsi_91x_debugfs.c
+++ b/drivers/net/wireless/rsi/rsi_91x_debugfs.c
@@ -130,6 +130,7 @@ static int rsi_stats_read(struct seq_file *seq, void *data)
 		"FSM_COMMON_DEV_PARAMS_SENT",
 		"FSM_BOOT_PARAMS_SENT",
 		"FSM_EEPROM_READ_MAC_ADDR",
+		"FSM_EEPROM_READ_RF_TYPE",
 		"FSM_RESET_MAC_SENT",
 		"FSM_RADIO_CAPS_SENT",
 		"FSM_BB_RF_PROG_SENT",
@@ -138,6 +139,8 @@ static int rsi_stats_read(struct seq_file *seq, void *data)
 	seq_puts(seq, "==> RSI STA DRIVER STATUS <==\n");
 	seq_puts(seq, "DRIVER_FSM_STATE: ");
 
+	BUILD_BUG_ON(ARRAY_SIZE(fsm_state) != NUM_FSM_STATES);
+
 	if (common->fsm_state <= FSM_MAC_INIT_DONE)
 		seq_printf(seq, "%s", fsm_state[common->fsm_state]);
 
diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c
index c230359..070dfd6 100644
--- a/drivers/net/wireless/rsi/rsi_91x_hal.c
+++ b/drivers/net/wireless/rsi/rsi_91x_hal.c
@@ -18,6 +18,7 @@
 #include "rsi_mgmt.h"
 #include "rsi_hal.h"
 #include "rsi_sdio.h"
+#include "rsi_common.h"
 
 /* FLASH Firmware */
 static struct ta_metadata metadata_flash_content[] = {
@@ -25,99 +26,268 @@ static struct ta_metadata metadata_flash_content[] = {
 	{"rsi/rs9113_wlan_qspi.rps", 0x00010000},
 };
 
-/**
- * rsi_send_data_pkt() - This function sends the recieved data packet from
- *			 driver to device.
- * @common: Pointer to the driver private structure.
- * @skb: Pointer to the socket buffer structure.
- *
- * Return: status: 0 on success, -1 on failure.
- */
-int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb)
+int rsi_send_pkt_to_bus(struct rsi_common *common, struct sk_buff *skb)
+{
+	struct rsi_hw *adapter = common->priv;
+	int status;
+
+	status = adapter->host_intf_ops->write_pkt(common->priv,
+						   skb->data, skb->len);
+	return status;
+}
+
+static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb)
 {
 	struct rsi_hw *adapter = common->priv;
-	struct ieee80211_hdr *tmp_hdr;
+	struct ieee80211_hdr *wh = NULL;
+	struct ieee80211_tx_info *info;
+	struct ieee80211_conf *conf = &adapter->hw->conf;
+	struct ieee80211_vif *vif = adapter->vifs[0];
+	struct rsi_mgmt_desc *mgmt_desc;
+	struct skb_info *tx_params;
+	struct ieee80211_bss_conf *bss = NULL;
+	struct xtended_desc *xtend_desc = NULL;
+	u8 header_size;
+	u32 dword_align_bytes = 0;
+
+	if (skb->len > MAX_MGMT_PKT_SIZE) {
+		rsi_dbg(INFO_ZONE, "%s: Dropping mgmt pkt > 512\n", __func__);
+		return -EINVAL;
+	}
+
+	info = IEEE80211_SKB_CB(skb);
+	tx_params = (struct skb_info *)info->driver_data;
+
+	/* Update header size */
+	header_size = FRAME_DESC_SZ + sizeof(struct xtended_desc);
+	if (header_size > skb_headroom(skb)) {
+		rsi_dbg(ERR_ZONE,
+			"%s: Failed to add extended descriptor\n",
+			__func__);
+		return -ENOSPC;
+	}
+	skb_push(skb, header_size);
+	dword_align_bytes = ((unsigned long)skb->data & 0x3f);
+	if (dword_align_bytes > skb_headroom(skb)) {
+		rsi_dbg(ERR_ZONE,
+			"%s: Failed to add dword align\n", __func__);
+		return -ENOSPC;
+	}
+	skb_push(skb, dword_align_bytes);
+	header_size += dword_align_bytes;
+
+	tx_params->internal_hdr_size = header_size;
+	memset(&skb->data[0], 0, header_size);
+	bss = &info->control.vif->bss_conf;
+	wh = (struct ieee80211_hdr *)&skb->data[header_size];
+
+	mgmt_desc = (struct rsi_mgmt_desc *)skb->data;
+	xtend_desc = (struct xtended_desc *)&skb->data[FRAME_DESC_SZ];
+
+	rsi_set_len_qno(&mgmt_desc->len_qno, (skb->len - FRAME_DESC_SZ),
+			RSI_WIFI_MGMT_Q);
+	mgmt_desc->frame_type = TX_DOT11_MGMT;
+	mgmt_desc->header_len = MIN_802_11_HDR_LEN;
+	mgmt_desc->xtend_desc_size = header_size - FRAME_DESC_SZ;
+	mgmt_desc->frame_info |= cpu_to_le16(RATE_INFO_ENABLE);
+	if (is_broadcast_ether_addr(wh->addr1))
+		mgmt_desc->frame_info |= cpu_to_le16(RSI_BROADCAST_PKT);
+
+	mgmt_desc->seq_ctrl =
+		cpu_to_le16(IEEE80211_SEQ_TO_SN(le16_to_cpu(wh->seq_ctrl)));
+	if (common->band == NL80211_BAND_2GHZ)
+		mgmt_desc->rate_info = RSI_RATE_1;
+	else
+		mgmt_desc->rate_info = RSI_RATE_6;
+
+	if (conf_is_ht40(conf))
+		mgmt_desc->bbp_info = cpu_to_le16(FULL40M_ENABLE);
+
+	if (ieee80211_is_probe_req(wh->frame_control)) {
+		if (!bss->assoc) {
+			rsi_dbg(INFO_ZONE,
+				"%s: blocking mgmt queue\n", __func__);
+			mgmt_desc->misc_flags = RSI_DESC_REQUIRE_CFM_TO_HOST;
+			xtend_desc->confirm_frame_type = PROBEREQ_CONFIRM;
+			common->mgmt_q_block = true;
+			rsi_dbg(INFO_ZONE, "Mgmt queue blocked\n");
+		}
+	}
+
+	if (ieee80211_is_probe_resp(wh->frame_control)) {
+		mgmt_desc->misc_flags |= (RSI_ADD_DELTA_TSF_VAP_ID |
+					  RSI_FETCH_RETRY_CNT_FRM_HST);
+#define PROBE_RESP_RETRY_CNT	3
+		xtend_desc->retry_cnt = PROBE_RESP_RETRY_CNT;
+	}
+
+	if ((vif->type == NL80211_IFTYPE_AP) &&
+	    (ieee80211_is_action(wh->frame_control))) {
+		struct rsi_sta *rsta = rsi_find_sta(common, wh->addr1);
+
+		if (rsta)
+			mgmt_desc->sta_id = tx_params->sta_id;
+		else
+			return -EINVAL;
+	}
+	return 0;
+}
+
+/* This function prepares descriptor for given data packet */
+static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb)
+{
+	struct rsi_hw *adapter = common->priv;
+	struct ieee80211_vif *vif;
+	struct ieee80211_hdr *wh = NULL;
 	struct ieee80211_tx_info *info;
 	struct skb_info *tx_params;
 	struct ieee80211_bss_conf *bss;
-	int status;
+	struct rsi_data_desc *data_desc;
+	struct xtended_desc *xtend_desc;
 	u8 ieee80211_size = MIN_802_11_HDR_LEN;
-	u8 extnd_size;
-	__le16 *frame_desc;
+	u8 header_size;
+	u8 vap_id = 0;
+	u8 dword_align_bytes;
 	u16 seq_num;
 
 	info = IEEE80211_SKB_CB(skb);
 	bss = &info->control.vif->bss_conf;
 	tx_params = (struct skb_info *)info->driver_data;
 
-	if (!bss->assoc) {
-		status = -EINVAL;
-		goto err;
+	header_size = FRAME_DESC_SZ + sizeof(struct xtended_desc);
+	if (header_size > skb_headroom(skb)) {
+		rsi_dbg(ERR_ZONE, "%s: Unable to send pkt\n", __func__);
+		return -ENOSPC;
 	}
+	skb_push(skb, header_size);
+	dword_align_bytes = ((unsigned long)skb->data & 0x3f);
+	if (header_size > skb_headroom(skb)) {
+		rsi_dbg(ERR_ZONE, "%s: Not enough headroom\n", __func__);
+		return -ENOSPC;
+	}
+	skb_push(skb, dword_align_bytes);
+	header_size += dword_align_bytes;
 
-	tmp_hdr = (struct ieee80211_hdr *)&skb->data[0];
-	seq_num = (le16_to_cpu(tmp_hdr->seq_ctrl) >> 4);
+	tx_params->internal_hdr_size = header_size;
+	data_desc = (struct rsi_data_desc *)skb->data;
+	memset(data_desc, 0, header_size);
 
-	extnd_size = ((uintptr_t)skb->data & 0x3);
+	xtend_desc = (struct xtended_desc *)&skb->data[FRAME_DESC_SZ];
+	wh = (struct ieee80211_hdr *)&skb->data[header_size];
+	seq_num = IEEE80211_SEQ_TO_SN(le16_to_cpu(wh->seq_ctrl));
+	vif = adapter->vifs[0];
 
-	if ((FRAME_DESC_SZ + extnd_size) > skb_headroom(skb)) {
-		rsi_dbg(ERR_ZONE, "%s: Unable to send pkt\n", __func__);
-		status = -ENOSPC;
-		goto err;
-	}
+	data_desc->xtend_desc_size = header_size - FRAME_DESC_SZ;
 
-	skb_push(skb, (FRAME_DESC_SZ + extnd_size));
-	frame_desc = (__le16 *)&skb->data[0];
-	memset((u8 *)frame_desc, 0, FRAME_DESC_SZ);
-
-	if (ieee80211_is_data_qos(tmp_hdr->frame_control)) {
+	if (ieee80211_is_data_qos(wh->frame_control)) {
 		ieee80211_size += 2;
-		frame_desc[6] |= cpu_to_le16(BIT(12));
+		data_desc->mac_flags |= cpu_to_le16(RSI_QOS_ENABLE);
 	}
 
+	if ((vif->type == NL80211_IFTYPE_STATION) &&
+	    (adapter->ps_state == PS_ENABLED))
+		wh->frame_control |= cpu_to_le16(RSI_SET_PS_ENABLE);
+
 	if ((!(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)) &&
 	    (common->secinfo.security_enable)) {
 		if (rsi_is_cipher_wep(common))
 			ieee80211_size += 4;
 		else
 			ieee80211_size += 8;
-		frame_desc[6] |= cpu_to_le16(BIT(15));
+		data_desc->mac_flags |= cpu_to_le16(RSI_ENCRYPT_PKT);
 	}
+	rsi_set_len_qno(&data_desc->len_qno, (skb->len - FRAME_DESC_SZ),
+			RSI_WIFI_DATA_Q);
+	data_desc->header_len = ieee80211_size;
 
-	frame_desc[0] = cpu_to_le16((skb->len - FRAME_DESC_SZ) |
-				    (RSI_WIFI_DATA_Q << 12));
-	frame_desc[2] = cpu_to_le16((extnd_size) | (ieee80211_size) << 8);
-
-	if (common->min_rate != 0xffff) {
+	if (common->min_rate != RSI_RATE_AUTO) {
 		/* Send fixed rate */
-		frame_desc[3] = cpu_to_le16(RATE_INFO_ENABLE);
-		frame_desc[4] = cpu_to_le16(common->min_rate);
+		data_desc->frame_info = cpu_to_le16(RATE_INFO_ENABLE);
+		data_desc->rate_info = cpu_to_le16(common->min_rate);
 
 		if (conf_is_ht40(&common->priv->hw->conf))
-			frame_desc[5] = cpu_to_le16(FULL40M_ENABLE);
+			data_desc->bbp_info = cpu_to_le16(FULL40M_ENABLE);
 
-		if (common->vif_info[0].sgi) {
-			if (common->min_rate & 0x100) /* Only MCS rates */
-				frame_desc[4] |=
-					cpu_to_le16(ENABLE_SHORTGI_RATE);
+		if ((common->vif_info[0].sgi) && (common->min_rate & 0x100)) {
+		       /* Only MCS rates */
+			data_desc->rate_info |=
+				cpu_to_le16(ENABLE_SHORTGI_RATE);
 		}
+	}
 
+	if (skb->protocol == cpu_to_be16(ETH_P_PAE)) {
+		rsi_dbg(INFO_ZONE, "*** Tx EAPOL ***\n");
+
+		data_desc->frame_info = cpu_to_le16(RATE_INFO_ENABLE);
+		if (common->band == NL80211_BAND_5GHZ)
+			data_desc->rate_info = cpu_to_le16(RSI_RATE_6);
+		else
+			data_desc->rate_info = cpu_to_le16(RSI_RATE_1);
+		data_desc->mac_flags |= cpu_to_le16(RSI_REKEY_PURPOSE);
+		data_desc->misc_flags |= RSI_FETCH_RETRY_CNT_FRM_HST;
+#define EAPOL_RETRY_CNT 15
+		xtend_desc->retry_cnt = EAPOL_RETRY_CNT;
+	}
+
+	data_desc->mac_flags = cpu_to_le16(seq_num & 0xfff);
+	data_desc->qid_tid = ((skb->priority & 0xf) |
+			      ((tx_params->tid & 0xf) << 4));
+	data_desc->sta_id = tx_params->sta_id;
+
+	if ((is_broadcast_ether_addr(wh->addr1)) ||
+	    (is_multicast_ether_addr(wh->addr1))) {
+		data_desc->frame_info = cpu_to_le16(RATE_INFO_ENABLE);
+		data_desc->frame_info |= cpu_to_le16(RSI_BROADCAST_PKT);
+		data_desc->sta_id = vap_id;
+
+		if (vif->type == NL80211_IFTYPE_AP) {
+			if (common->band == NL80211_BAND_5GHZ)
+				data_desc->rate_info = cpu_to_le16(RSI_RATE_6);
+			else
+				data_desc->rate_info = cpu_to_le16(RSI_RATE_1);
+		}
 	}
+	if ((vif->type == NL80211_IFTYPE_AP) &&
+	    (ieee80211_has_moredata(wh->frame_control)))
+		data_desc->frame_info |= cpu_to_le16(MORE_DATA_PRESENT);
+
+	return 0;
+}
+
+/* This function sends received data packet from driver to device */
+int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb)
+{
+	struct rsi_hw *adapter = common->priv;
+	struct ieee80211_vif *vif = adapter->vifs[0];
+	struct ieee80211_tx_info *info;
+	struct ieee80211_bss_conf *bss;
+	int status = -EINVAL;
+
+	if (!skb)
+		return 0;
+	if (common->iface_down)
+		goto err;
 
-	frame_desc[6] |= cpu_to_le16(seq_num & 0xfff);
-	frame_desc[7] = cpu_to_le16(((tx_params->tid & 0xf) << 4) |
-				    (skb->priority & 0xf) |
-				    (tx_params->sta_id << 8));
+	info = IEEE80211_SKB_CB(skb);
+	if (!info->control.vif)
+		goto err;
+	bss = &info->control.vif->bss_conf;
+
+	if ((vif->type == NL80211_IFTYPE_STATION) && (!bss->assoc))
+		goto err;
+
+	status = rsi_prepare_data_desc(common, skb);
+	if (status)
+		goto err;
 
 	status = adapter->host_intf_ops->write_pkt(common->priv, skb->data,
 						   skb->len);
 	if (status)
-		rsi_dbg(ERR_ZONE, "%s: Failed to write pkt\n",
-			__func__);
+		rsi_dbg(ERR_ZONE, "%s: Failed to write pkt\n", __func__);
 
 err:
 	++common->tx_stats.total_tx_pkt_freed[skb->priority];
-	rsi_indicate_tx_status(common->priv, skb, status);
+	rsi_indicate_tx_status(adapter, skb, status);
 	return status;
 }
 
@@ -133,22 +303,17 @@ int rsi_send_mgmt_pkt(struct rsi_common *common,
 		      struct sk_buff *skb)
 {
 	struct rsi_hw *adapter = common->priv;
-	struct ieee80211_hdr *wh;
 	struct ieee80211_tx_info *info;
-	struct ieee80211_bss_conf *bss;
-	struct ieee80211_hw *hw = adapter->hw;
-	struct ieee80211_conf *conf = &hw->conf;
 	struct skb_info *tx_params;
 	int status = -E2BIG;
-	__le16 *msg;
 	u8 extnd_size;
-	u8 vap_id = 0;
 
 	info = IEEE80211_SKB_CB(skb);
 	tx_params = (struct skb_info *)info->driver_data;
 	extnd_size = ((uintptr_t)skb->data & 0x3);
 
 	if (tx_params->flags & INTERNAL_MGMT_PKT) {
+		skb->data[1] |= BIT(7); /* Immediate Wakeup bit*/
 		if ((extnd_size) > skb_headroom(skb)) {
 			rsi_dbg(ERR_ZONE, "%s: Unable to send pkt\n", __func__);
 			dev_kfree_skb(skb);
@@ -167,58 +332,73 @@ int rsi_send_mgmt_pkt(struct rsi_common *common,
 		return status;
 	}
 
-	bss = &info->control.vif->bss_conf;
-	wh = (struct ieee80211_hdr *)&skb->data[0];
-
 	if (FRAME_DESC_SZ > skb_headroom(skb))
 		goto err;
 
-	skb_push(skb, FRAME_DESC_SZ);
-	memset(skb->data, 0, FRAME_DESC_SZ);
-	msg = (__le16 *)skb->data;
+	rsi_prepare_mgmt_desc(common, skb);
+	status = adapter->host_intf_ops->write_pkt(common->priv,
+						   (u8 *)skb->data, skb->len);
+	if (status)
+		rsi_dbg(ERR_ZONE, "%s: Failed to write the packet\n", __func__);
 
-	if (skb->len > MAX_MGMT_PKT_SIZE) {
-		rsi_dbg(INFO_ZONE, "%s: Dropping mgmt pkt > 512\n", __func__);
-		goto err;
+err:
+	rsi_indicate_tx_status(common->priv, skb, status);
+	return status;
+}
+
+int rsi_prepare_beacon(struct rsi_common *common, struct sk_buff *skb)
+{
+	struct rsi_hw *adapter = (struct rsi_hw *)common->priv;
+	struct rsi_data_desc *bcn_frm;
+	struct ieee80211_hw *hw = common->priv->hw;
+	struct ieee80211_conf *conf = &hw->conf;
+	struct sk_buff *mac_bcn;
+	u8 vap_id = 0;
+	u16 tim_offset;
+
+	mac_bcn = ieee80211_beacon_get_tim(adapter->hw,
+					   adapter->vifs[adapter->sc_nvifs - 1],
+					   &tim_offset, NULL);
+	if (!mac_bcn) {
+		rsi_dbg(ERR_ZONE, "Failed to get beacon from mac80211\n");
+		return -EINVAL;
 	}
 
-	msg[0] = cpu_to_le16((skb->len - FRAME_DESC_SZ) |
-			    (RSI_WIFI_MGMT_Q << 12));
-	msg[1] = cpu_to_le16(TX_DOT11_MGMT);
-	msg[2] = cpu_to_le16(MIN_802_11_HDR_LEN << 8);
-	msg[3] = cpu_to_le16(RATE_INFO_ENABLE);
-	msg[6] = cpu_to_le16(le16_to_cpu(wh->seq_ctrl) >> 4);
+	common->beacon_cnt++;
+	bcn_frm = (struct rsi_data_desc *)skb->data;
+	rsi_set_len_qno(&bcn_frm->len_qno, mac_bcn->len, RSI_WIFI_DATA_Q);
+	bcn_frm->header_len = MIN_802_11_HDR_LEN;
+	bcn_frm->frame_info = cpu_to_le16(RSI_DATA_DESC_MAC_BBP_INFO |
+					  RSI_DATA_DESC_NO_ACK_IND |
+					  RSI_DATA_DESC_BEACON_FRAME |
+					  RSI_DATA_DESC_INSERT_TSF |
+					  RSI_DATA_DESC_INSERT_SEQ_NO |
+					  RATE_INFO_ENABLE);
+	bcn_frm->rate_info = cpu_to_le16(vap_id << 14);
+	bcn_frm->qid_tid = BEACON_HW_Q;
 
-	if (wh->addr1[0] & BIT(0))
-		msg[3] |= cpu_to_le16(RSI_BROADCAST_PKT);
+	if (conf_is_ht40_plus(conf)) {
+		bcn_frm->bbp_info = cpu_to_le16(LOWER_20_ENABLE);
+		bcn_frm->bbp_info |= cpu_to_le16(LOWER_20_ENABLE >> 12);
+	} else if (conf_is_ht40_minus(conf)) {
+		bcn_frm->bbp_info = cpu_to_le16(UPPER_20_ENABLE);
+		bcn_frm->bbp_info |= cpu_to_le16(UPPER_20_ENABLE >> 12);
+	}
 
 	if (common->band == NL80211_BAND_2GHZ)
-		msg[4] = cpu_to_le16(RSI_11B_MODE);
+		bcn_frm->bbp_info |= cpu_to_le16(RSI_RATE_1);
 	else
-		msg[4] = cpu_to_le16((RSI_RATE_6 & 0x0f) | RSI_11G_MODE);
+		bcn_frm->bbp_info |= cpu_to_le16(RSI_RATE_6);
 
-	if (conf_is_ht40(conf)) {
-		msg[4] = cpu_to_le16(0xB | RSI_11G_MODE);
-		msg[5] = cpu_to_le16(0x6);
-	}
+	if (mac_bcn->data[tim_offset + 2] == 0)
+		bcn_frm->frame_info |= cpu_to_le16(RSI_DATA_DESC_DTIM_BEACON);
 
-	/* Indicate to firmware to give cfm */
-	if ((skb->data[16] == IEEE80211_STYPE_PROBE_REQ) && (!bss->assoc)) {
-		msg[1] |= cpu_to_le16(BIT(10));
-		msg[7] = cpu_to_le16(PROBEREQ_CONFIRM);
-		common->mgmt_q_block = true;
-	}
+	memcpy(&skb->data[FRAME_DESC_SZ], mac_bcn->data, mac_bcn->len);
+	skb_put(skb, mac_bcn->len + FRAME_DESC_SZ);
 
-	msg[7] |= cpu_to_le16(vap_id << 8);
+	dev_kfree_skb(mac_bcn);
 
-	status = adapter->host_intf_ops->write_pkt(common->priv, (u8 *)msg,
-						   skb->len);
-	if (status)
-		rsi_dbg(ERR_ZONE, "%s: Failed to write the packet\n", __func__);
-
-err:
-	rsi_indicate_tx_status(common->priv, skb, status);
-	return status;
+	return 0;
 }
 
 static void bl_cmd_timeout(unsigned long priv)
diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
index 021e5ac..fa12c05 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
@@ -18,6 +18,7 @@
 #include "rsi_debugfs.h"
 #include "rsi_mgmt.h"
 #include "rsi_common.h"
+#include "rsi_ps.h"
 
 static const struct ieee80211_channel rsi_2ghz_channels[] = {
 	{ .band = NL80211_BAND_2GHZ, .center_freq = 2412,
@@ -121,6 +122,23 @@ const u16 rsi_mcsrates[8] = {
 	RSI_RATE_MCS4, RSI_RATE_MCS5, RSI_RATE_MCS6, RSI_RATE_MCS7
 };
 
+static const u32 rsi_max_ap_stas[16] = {
+	32,	/* 1 - Wi-Fi alone */
+	0,	/* 2 */
+	0,	/* 3 */
+	0,	/* 4 - BT EDR alone */
+	4,	/* 5 - STA + BT EDR */
+	32,	/* 6 - AP + BT EDR */
+	0,	/* 7 */
+	0,	/* 8 - BT LE alone */
+	4,	/* 9 - STA + BE LE */
+	0,	/* 10 */
+	0,	/* 11 */
+	0,	/* 12 */
+	1,	/* 13 - STA + BT Dual */
+	4,	/* 14 - AP + BT Dual */
+};
+
 /**
  * rsi_is_cipher_wep() -  This function determines if the cipher is WEP or not.
  * @common: Pointer to the driver private structure.
@@ -229,12 +247,20 @@ void rsi_indicate_tx_status(struct rsi_hw *adapter,
 			    int status)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct skb_info *tx_params;
 
-	memset(info->driver_data, 0, IEEE80211_TX_INFO_DRIVER_DATA_SIZE);
+	if (!adapter->hw) {
+		rsi_dbg(ERR_ZONE, "##### No MAC #####\n");
+		return;
+	}
 
 	if (!status)
 		info->flags |= IEEE80211_TX_STAT_ACK;
 
+	tx_params = (struct skb_info *)info->driver_data;
+	skb_pull(skb, tx_params->internal_hdr_size);
+	memset(info->driver_data, 0, IEEE80211_TX_INFO_DRIVER_DATA_SIZE);
+
 	ieee80211_tx_status_irqsafe(adapter->hw, skb);
 }
 
@@ -271,11 +297,12 @@ static int rsi_mac80211_start(struct ieee80211_hw *hw)
 	struct rsi_hw *adapter = hw->priv;
 	struct rsi_common *common = adapter->priv;
 
+	rsi_dbg(ERR_ZONE, "===> Interface UP <===\n");
 	mutex_lock(&common->mutex);
 	common->iface_down = false;
-	mutex_unlock(&common->mutex);
-
+	wiphy_rfkill_start_polling(hw->wiphy);
 	rsi_send_rx_filter_frame(common, 0);
+	mutex_unlock(&common->mutex);
 
 	return 0;
 }
@@ -291,8 +318,14 @@ static void rsi_mac80211_stop(struct ieee80211_hw *hw)
 	struct rsi_hw *adapter = hw->priv;
 	struct rsi_common *common = adapter->priv;
 
+	rsi_dbg(ERR_ZONE, "===> Interface DOWN <===\n");
 	mutex_lock(&common->mutex);
 	common->iface_down = true;
+	wiphy_rfkill_stop_polling(hw->wiphy);
+
+	/* Block all rx frames */
+	rsi_send_rx_filter_frame(common, 0xffff);
+
 	mutex_unlock(&common->mutex);
 }
 
@@ -309,24 +342,51 @@ static int rsi_mac80211_add_interface(struct ieee80211_hw *hw,
 {
 	struct rsi_hw *adapter = hw->priv;
 	struct rsi_common *common = adapter->priv;
+	enum opmode intf_mode;
 	int ret = -EOPNOTSUPP;
 
+	vif->driver_flags |= IEEE80211_VIF_SUPPORTS_UAPSD;
 	mutex_lock(&common->mutex);
+
+	if (adapter->sc_nvifs > 1) {
+		mutex_unlock(&common->mutex);
+		return -EOPNOTSUPP;
+	}
+
 	switch (vif->type) {
 	case NL80211_IFTYPE_STATION:
-		if (!adapter->sc_nvifs) {
-			++adapter->sc_nvifs;
-			adapter->vifs[0] = vif;
-			ret = rsi_set_vap_capabilities(common,
-						       STA_OPMODE,
-						       VAP_ADD);
-		}
+		rsi_dbg(INFO_ZONE, "Station Mode");
+		intf_mode = STA_OPMODE;
+		break;
+	case NL80211_IFTYPE_AP:
+		rsi_dbg(INFO_ZONE, "AP Mode");
+		intf_mode = AP_OPMODE;
 		break;
 	default:
 		rsi_dbg(ERR_ZONE,
 			"%s: Interface type %d not supported\n", __func__,
 			vif->type);
+		goto out;
+	}
+
+	adapter->vifs[adapter->sc_nvifs++] = vif;
+	ret = rsi_set_vap_capabilities(common, intf_mode, common->mac_addr,
+				       0, VAP_ADD);
+	if (ret) {
+		rsi_dbg(ERR_ZONE, "Failed to set VAP capabilities\n");
+		goto out;
+	}
+
+	if (vif->type == NL80211_IFTYPE_AP) {
+		int i;
+
+		rsi_send_rx_filter_frame(common, DISALLOW_BEACONS);
+		common->min_rate = RSI_RATE_AUTO;
+		for (i = 0; i < common->max_stations; i++)
+			common->stations[i].sta = NULL;
 	}
+
+out:
 	mutex_unlock(&common->mutex);
 
 	return ret;
@@ -345,13 +405,32 @@ static void rsi_mac80211_remove_interface(struct ieee80211_hw *hw,
 {
 	struct rsi_hw *adapter = hw->priv;
 	struct rsi_common *common = adapter->priv;
+	enum opmode opmode;
+
+	rsi_dbg(INFO_ZONE, "Remove Interface Called\n");
 
 	mutex_lock(&common->mutex);
-	if (vif->type == NL80211_IFTYPE_STATION) {
-		adapter->sc_nvifs--;
-		rsi_set_vap_capabilities(common, STA_OPMODE, VAP_DELETE);
+
+	if (adapter->sc_nvifs <= 0) {
+		mutex_unlock(&common->mutex);
+		return;
 	}
 
+	switch (vif->type) {
+	case NL80211_IFTYPE_STATION:
+		opmode = STA_OPMODE;
+		break;
+	case NL80211_IFTYPE_AP:
+		opmode = AP_OPMODE;
+		break;
+	default:
+		mutex_unlock(&common->mutex);
+		return;
+	}
+	rsi_set_vap_capabilities(common, opmode, vif->addr,
+				 0, VAP_DELETE);
+	adapter->sc_nvifs--;
+
 	if (!memcmp(adapter->vifs[0], vif, sizeof(struct ieee80211_vif)))
 		adapter->vifs[0] = NULL;
 	mutex_unlock(&common->mutex);
@@ -452,6 +531,8 @@ static int rsi_mac80211_config(struct ieee80211_hw *hw,
 {
 	struct rsi_hw *adapter = hw->priv;
 	struct rsi_common *common = adapter->priv;
+	struct ieee80211_vif *vif = adapter->vifs[0];
+	struct ieee80211_conf *conf = &hw->conf;
 	int status = -EOPNOTSUPP;
 
 	mutex_lock(&common->mutex);
@@ -465,6 +546,28 @@ static int rsi_mac80211_config(struct ieee80211_hw *hw,
 		status = rsi_config_power(hw);
 	}
 
+	/* Power save parameters */
+	if ((changed & IEEE80211_CONF_CHANGE_PS) &&
+	    (vif->type == NL80211_IFTYPE_STATION)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&adapter->ps_lock, flags);
+		if (conf->flags & IEEE80211_CONF_PS)
+			rsi_enable_ps(adapter);
+		else
+			rsi_disable_ps(adapter);
+		spin_unlock_irqrestore(&adapter->ps_lock, flags);
+	}
+
+	/* RTS threshold */
+	if (changed & WIPHY_PARAM_RTS_THRESHOLD) {
+		rsi_dbg(INFO_ZONE, "RTS threshold\n");
+		if ((common->rts_threshold) <= IEEE80211_MAX_RTS_THRESHOLD) {
+			rsi_dbg(INFO_ZONE,
+				"%s: Sending vap updates....\n", __func__);
+			status = rsi_send_vap_dynamic_update(common);
+		}
+	}
 	mutex_unlock(&common->mutex);
 
 	return status;
@@ -507,6 +610,8 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw,
 {
 	struct rsi_hw *adapter = hw->priv;
 	struct rsi_common *common = adapter->priv;
+	struct ieee80211_bss_conf *bss = &vif->bss_conf;
+	struct ieee80211_conf *conf = &hw->conf;
 	u16 rx_filter_word = 0;
 
 	mutex_lock(&common->mutex);
@@ -521,10 +626,24 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw,
 			rsi_send_rx_filter_frame(common, rx_filter_word);
 		}
 		rsi_inform_bss_status(common,
+				      STA_OPMODE,
 				      bss_conf->assoc,
 				      bss_conf->bssid,
 				      bss_conf->qos,
-				      bss_conf->aid);
+				      bss_conf->aid,
+				      NULL, 0);
+		adapter->ps_info.dtim_interval_duration = bss->dtim_period;
+		adapter->ps_info.listen_interval = conf->listen_interval;
+
+	/* If U-APSD is updated, send ps parameters to firmware */
+	if (bss->assoc) {
+		if (common->uapsd_bitmap) {
+			rsi_dbg(INFO_ZONE, "Configuring UAPSD\n");
+			rsi_conf_uapsd(adapter);
+		}
+	} else {
+		common->uapsd_bitmap = 0;
+	}
 	}
 
 	if (changed & BSS_CHANGED_CQM) {
@@ -535,6 +654,18 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw,
 			common->cqm_info.rssi_thold,
 			common->cqm_info.rssi_hyst);
 	}
+
+	if ((changed & BSS_CHANGED_BEACON_ENABLED) &&
+	    (vif->type == NL80211_IFTYPE_AP)) {
+		if (bss->enable_beacon) {
+			rsi_dbg(INFO_ZONE, "===> BEACON ENABLED <===\n");
+			common->beacon_enabled = 1;
+		} else {
+			rsi_dbg(INFO_ZONE, "===> BEACON DISABLED <===\n");
+			common->beacon_enabled = 0;
+		}
+	}
+
 	mutex_unlock(&common->mutex);
 }
 
@@ -606,6 +737,12 @@ static int rsi_mac80211_conf_tx(struct ieee80211_hw *hw,
 	memcpy(&common->edca_params[idx],
 	       params,
 	       sizeof(struct ieee80211_tx_queue_params));
+
+	if (params->uapsd)
+		common->uapsd_bitmap |= idx;
+	else
+		common->uapsd_bitmap &= (~idx);
+
 	mutex_unlock(&common->mutex);
 
 	return 0;
@@ -617,15 +754,18 @@ static int rsi_mac80211_conf_tx(struct ieee80211_hw *hw,
  * @vif: Pointer to the ieee80211_vif structure.
  * @key: Pointer to the ieee80211_key_conf structure.
  *
- * Return: status: 0 on success, -1 on failure.
+ * Return: status: 0 on success, negative error codes on failure.
  */
 static int rsi_hal_key_config(struct ieee80211_hw *hw,
 			      struct ieee80211_vif *vif,
-			      struct ieee80211_key_conf *key)
+			      struct ieee80211_key_conf *key,
+			      struct ieee80211_sta *sta)
 {
 	struct rsi_hw *adapter = hw->priv;
+	struct rsi_sta *rsta = NULL;
 	int status;
 	u8 key_type;
+	s16 sta_id = 0;
 
 	if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE)
 		key_type = RSI_PAIRWISE_KEY;
@@ -635,23 +775,35 @@ static int rsi_hal_key_config(struct ieee80211_hw *hw,
 	rsi_dbg(ERR_ZONE, "%s: Cipher 0x%x key_type: %d key_len: %d\n",
 		__func__, key->cipher, key_type, key->keylen);
 
-	if ((key->cipher == WLAN_CIPHER_SUITE_WEP104) ||
-	    (key->cipher == WLAN_CIPHER_SUITE_WEP40)) {
-		status = rsi_hal_load_key(adapter->priv,
-					  key->key,
-					  key->keylen,
-					  RSI_PAIRWISE_KEY,
-					  key->keyidx,
-					  key->cipher);
-		if (status)
-			return status;
+	if (vif->type == NL80211_IFTYPE_AP) {
+		if (sta) {
+			rsta = rsi_find_sta(adapter->priv, sta->addr);
+			if (rsta)
+				sta_id = rsta->sta_id;
+		}
+		adapter->priv->key = key;
+	} else {
+		if ((key->cipher == WLAN_CIPHER_SUITE_WEP104) ||
+		    (key->cipher == WLAN_CIPHER_SUITE_WEP40)) {
+			status = rsi_hal_load_key(adapter->priv,
+						  key->key,
+						  key->keylen,
+						  RSI_PAIRWISE_KEY,
+						  key->keyidx,
+						  key->cipher,
+						  sta_id);
+			if (status)
+				return status;
+		}
 	}
+
 	return rsi_hal_load_key(adapter->priv,
 				key->key,
 				key->keylen,
 				key_type,
 				key->keyidx,
-				key->cipher);
+				key->cipher,
+				sta_id);
 }
 
 /**
@@ -679,7 +831,7 @@ static int rsi_mac80211_set_key(struct ieee80211_hw *hw,
 	switch (cmd) {
 	case SET_KEY:
 		secinfo->security_enable = true;
-		status = rsi_hal_key_config(hw, vif, key);
+		status = rsi_hal_key_config(hw, vif, key, sta);
 		if (status) {
 			mutex_unlock(&common->mutex);
 			return status;
@@ -697,10 +849,11 @@ static int rsi_mac80211_set_key(struct ieee80211_hw *hw,
 		break;
 
 	case DISABLE_KEY:
-		secinfo->security_enable = false;
+		if (vif->type == NL80211_IFTYPE_STATION)
+			secinfo->security_enable = false;
 		rsi_dbg(ERR_ZONE, "%s: RSI del key\n", __func__);
 		memset(key, 0, sizeof(struct ieee80211_key_conf));
-		status = rsi_hal_key_config(hw, vif, key);
+		status = rsi_hal_key_config(hw, vif, key, sta);
 		break;
 
 	default:
@@ -729,9 +882,11 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw,
 	int status = -EOPNOTSUPP;
 	struct rsi_hw *adapter = hw->priv;
 	struct rsi_common *common = adapter->priv;
-	u16 seq_no = 0;
+	struct rsi_sta *rsta = NULL;
+	u16 seq_no = 0, seq_start = 0;
 	u8 ii = 0;
 	struct ieee80211_sta *sta = params->sta;
+	u8 sta_id = 0;
 	enum ieee80211_ampdu_mlme_action action = params->action;
 	u16 tid = params->tid;
 	u16 *ssn = &params->ssn;
@@ -743,17 +898,32 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw,
 	}
 
 	mutex_lock(&common->mutex);
-	rsi_dbg(INFO_ZONE, "%s: AMPDU action %d called\n", __func__, action);
+
 	if (ssn != NULL)
 		seq_no = *ssn;
 
+	if (vif->type == NL80211_IFTYPE_AP) {
+		rsta = rsi_find_sta(common, sta->addr);
+		if (!rsta) {
+			rsi_dbg(ERR_ZONE, "No station mapped\n");
+			status = 0;
+			goto unlock;
+		}
+		sta_id = rsta->sta_id;
+	}
+
+	rsi_dbg(INFO_ZONE,
+		"%s: AMPDU action tid=%d ssn=0x%x, buf_size=%d sta_id=%d\n",
+		__func__, tid, seq_no, buf_size, sta_id);
+
 	switch (action) {
 	case IEEE80211_AMPDU_RX_START:
 		status = rsi_send_aggregation_params_frame(common,
 							   tid,
 							   seq_no,
 							   buf_size,
-							   STA_RX_ADDBA_DONE);
+							   STA_RX_ADDBA_DONE,
+							   sta_id);
 		break;
 
 	case IEEE80211_AMPDU_RX_STOP:
@@ -761,11 +931,15 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw,
 							   tid,
 							   0,
 							   buf_size,
-							   STA_RX_DELBA);
+							   STA_RX_DELBA,
+							   sta_id);
 		break;
 
 	case IEEE80211_AMPDU_TX_START:
-		common->vif_info[ii].seq_start = seq_no;
+		if (vif->type == NL80211_IFTYPE_STATION)
+			common->vif_info[ii].seq_start = seq_no;
+		else if (vif->type == NL80211_IFTYPE_AP)
+			rsta->seq_start[tid] = seq_no;
 		ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
 		status = 0;
 		break;
@@ -777,18 +951,23 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw,
 							   tid,
 							   seq_no,
 							   buf_size,
-							   STA_TX_DELBA);
+							   STA_TX_DELBA,
+							   sta_id);
 		if (!status)
 			ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
 		break;
 
 	case IEEE80211_AMPDU_TX_OPERATIONAL:
+		if (vif->type == NL80211_IFTYPE_STATION)
+			seq_start = common->vif_info[ii].seq_start;
+		else if (vif->type == NL80211_IFTYPE_AP)
+			seq_start = rsta->seq_start[tid];
 		status = rsi_send_aggregation_params_frame(common,
 							   tid,
-							   common->vif_info[ii]
-								.seq_start,
+							   seq_start,
 							   buf_size,
-							   STA_TX_ADDBA_DONE);
+							   STA_TX_ADDBA_DONE,
+							   sta_id);
 		break;
 
 	default:
@@ -796,6 +975,7 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw,
 		break;
 	}
 
+unlock:
 	mutex_unlock(&common->mutex);
 	return status;
 }
@@ -1014,7 +1194,7 @@ static void rsi_set_min_rate(struct ieee80211_hw *hw,
  * @vif: Pointer to the ieee80211_vif structure.
  * @sta: Pointer to the ieee80211_sta structure.
  *
- * Return: 0 on success, -1 on failure.
+ * Return: 0 on success, negative error codes on failure.
  */
 static int rsi_mac80211_sta_add(struct ieee80211_hw *hw,
 				struct ieee80211_vif *vif,
@@ -1022,22 +1202,101 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw,
 {
 	struct rsi_hw *adapter = hw->priv;
 	struct rsi_common *common = adapter->priv;
+	bool sta_exist = false;
+	struct rsi_sta *rsta;
+	int status = 0;
+
+	rsi_dbg(INFO_ZONE, "Station Add: %pM\n", sta->addr);
 
 	mutex_lock(&common->mutex);
 
-	rsi_set_min_rate(hw, sta, common);
+	if (vif->type == NL80211_IFTYPE_AP) {
+		u8 cnt;
+		int sta_idx = -1;
+		int free_index = -1;
+
+		/* Check if max stations reached */
+		if (common->num_stations >= common->max_stations) {
+			rsi_dbg(ERR_ZONE, "Reject: Max Stations exists\n");
+			status = -EOPNOTSUPP;
+			goto unlock;
+		}
+		for (cnt = 0; cnt < common->max_stations; cnt++) {
+			rsta = &common->stations[cnt];
 
-	if ((sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_20) ||
-	    (sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_40)) {
-		common->vif_info[0].sgi = true;
+			if (!rsta->sta) {
+				if (free_index < 0)
+					free_index = cnt;
+				continue;
+			}
+			if (!memcmp(rsta->sta->addr, sta->addr, ETH_ALEN)) {
+				rsi_dbg(INFO_ZONE, "Station exists\n");
+				sta_idx = cnt;
+				sta_exist = true;
+				break;
+			}
+		}
+		if (!sta_exist) {
+			if (free_index >= 0)
+				sta_idx = free_index;
+		}
+		if (sta_idx < 0) {
+			rsi_dbg(ERR_ZONE,
+				"%s: Some problem reaching here...\n",
+				__func__);
+			status = -EINVAL;
+			goto unlock;
+		}
+		rsta = &common->stations[sta_idx];
+		rsta->sta = sta;
+		rsta->sta_id = sta_idx;
+		for (cnt = 0; cnt < IEEE80211_NUM_TIDS; cnt++)
+			rsta->start_tx_aggr[cnt] = false;
+		for (cnt = 0; cnt < IEEE80211_NUM_TIDS; cnt++)
+			rsta->seq_start[cnt] = 0;
+		if (!sta_exist) {
+			rsi_dbg(INFO_ZONE, "New Station\n");
+
+			/* Send peer notify to device */
+			rsi_dbg(INFO_ZONE, "Indicate bss status to device\n");
+			rsi_inform_bss_status(common, AP_OPMODE, 1, sta->addr,
+					      sta->wme, sta->aid, sta, sta_idx);
+
+			if (common->key) {
+				struct ieee80211_key_conf *key = common->key;
+
+				if ((key->cipher == WLAN_CIPHER_SUITE_WEP104) ||
+				    (key->cipher == WLAN_CIPHER_SUITE_WEP40))
+					rsi_hal_load_key(adapter->priv,
+							 key->key,
+							 key->keylen,
+							 RSI_PAIRWISE_KEY,
+							 key->keyidx,
+							 key->cipher,
+							 sta_idx);
+			}
+
+			common->num_stations++;
+		}
 	}
 
-	if (sta->ht_cap.ht_supported)
-		ieee80211_start_tx_ba_session(sta, 0, 0);
+	if (vif->type == NL80211_IFTYPE_STATION) {
+		rsi_set_min_rate(hw, sta, common);
+		if (sta->ht_cap.ht_supported) {
+			common->vif_info[0].is_ht = true;
+			common->bitrate_mask[NL80211_BAND_2GHZ] =
+					sta->supp_rates[NL80211_BAND_2GHZ];
+			if ((sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_20) ||
+			    (sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_40))
+				common->vif_info[0].sgi = true;
+			ieee80211_start_tx_ba_session(sta, 0, 0);
+		}
+	}
 
+unlock:
 	mutex_unlock(&common->mutex);
 
-	return 0;
+	return status;
 }
 
 /**
@@ -1047,7 +1306,7 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw,
  * @vif: Pointer to the ieee80211_vif structure.
  * @sta: Pointer to the ieee80211_sta structure.
  *
- * Return: 0 on success, -1 on failure.
+ * Return: 0 on success, negative error codes on failure.
  */
 static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw,
 				   struct ieee80211_vif *vif,
@@ -1055,21 +1314,55 @@ static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw,
 {
 	struct rsi_hw *adapter = hw->priv;
 	struct rsi_common *common = adapter->priv;
+	struct ieee80211_bss_conf *bss = &vif->bss_conf;
+	struct rsi_sta *rsta;
+
+	rsi_dbg(INFO_ZONE, "Station Remove: %pM\n", sta->addr);
 
 	mutex_lock(&common->mutex);
 
-	/* Resetting all the fields to default values */
-	common->bitrate_mask[NL80211_BAND_2GHZ] = 0;
-	common->bitrate_mask[NL80211_BAND_5GHZ] = 0;
-	common->min_rate = 0xffff;
-	common->vif_info[0].is_ht = false;
-	common->vif_info[0].sgi = false;
-	common->vif_info[0].seq_start = 0;
-	common->secinfo.ptk_cipher = 0;
-	common->secinfo.gtk_cipher = 0;
+	if (vif->type == NL80211_IFTYPE_AP) {
+		u8 sta_idx, cnt;
+
+		/* Send peer notify to device */
+		rsi_dbg(INFO_ZONE, "Indicate bss status to device\n");
+		for (sta_idx = 0; sta_idx < common->max_stations; sta_idx++) {
+			rsta = &common->stations[sta_idx];
+
+			if (!rsta->sta)
+				continue;
+			if (!memcmp(rsta->sta->addr, sta->addr, ETH_ALEN)) {
+				rsi_inform_bss_status(common, AP_OPMODE, 0,
+						      sta->addr, sta->wme,
+						      sta->aid, sta, sta_idx);
+				rsta->sta = NULL;
+				rsta->sta_id = -1;
+				for (cnt = 0; cnt < IEEE80211_NUM_TIDS; cnt++)
+					rsta->start_tx_aggr[cnt] = false;
+				if (common->num_stations > 0)
+					common->num_stations--;
+				break;
+			}
+		}
+		if (sta_idx >= common->max_stations)
+			rsi_dbg(ERR_ZONE, "%s: No station found\n", __func__);
+	}
 
-	rsi_send_rx_filter_frame(common, 0);
-	
+	if (vif->type == NL80211_IFTYPE_STATION) {
+		/* Resetting all the fields to default values */
+		memcpy((u8 *)bss->bssid, (u8 *)sta->addr, ETH_ALEN);
+		bss->qos = sta->wme;
+		common->bitrate_mask[NL80211_BAND_2GHZ] = 0;
+		common->bitrate_mask[NL80211_BAND_5GHZ] = 0;
+		common->min_rate = 0xffff;
+		common->vif_info[0].is_ht = false;
+		common->vif_info[0].sgi = false;
+		common->vif_info[0].seq_start = 0;
+		common->secinfo.ptk_cipher = 0;
+		common->secinfo.gtk_cipher = 0;
+		if (!common->iface_down)
+			rsi_send_rx_filter_frame(common, 0);
+	}
 	mutex_unlock(&common->mutex);
 	
 	return 0;
@@ -1133,7 +1426,7 @@ fail_set_antenna:
  * @tx_ant: Bitmap for tx antenna
  * @rx_ant: Bitmap for rx antenna
  * 
- * Return: 0 on success, -1 on failure.
+ * Return: 0 on success, negative error codes on failure.
  */
 static int rsi_mac80211_get_antenna(struct ieee80211_hw *hw,
 				    u32 *tx_ant, u32 *rx_ant)
@@ -1151,6 +1444,21 @@ static int rsi_mac80211_get_antenna(struct ieee80211_hw *hw,
 	return 0;	
 }
 
+static int rsi_map_region_code(enum nl80211_dfs_regions region_code)
+{
+	switch (region_code) {
+	case NL80211_DFS_FCC:
+		return RSI_REGION_FCC;
+	case NL80211_DFS_ETSI:
+		return RSI_REGION_ETSI;
+	case NL80211_DFS_JP:
+		return RSI_REGION_TELEC;
+	case NL80211_DFS_UNSET:
+		return RSI_REGION_WORLD;
+	}
+	return RSI_REGION_WORLD;
+}
+
 static void rsi_reg_notify(struct wiphy *wiphy,
 			   struct regulatory_request *request)
 {
@@ -1158,26 +1466,49 @@ static void rsi_reg_notify(struct wiphy *wiphy,
 	struct ieee80211_channel *ch;
 	struct ieee80211_hw *hw = wiphy_to_ieee80211_hw(wiphy);
 	struct rsi_hw * adapter = hw->priv; 
+	struct rsi_common *common = adapter->priv;
 	int i;
-
-	sband = wiphy->bands[NL80211_BAND_5GHZ];
 	
-	for (i = 0; i < sband->n_channels; i++) {
-		ch = &sband->channels[i];
-		if (ch->flags & IEEE80211_CHAN_DISABLED)
-			continue;
+	mutex_lock(&common->mutex);
+
+	rsi_dbg(INFO_ZONE, "country = %s dfs_region = %d\n",
+		request->alpha2, request->dfs_region);
+
+	if (common->num_supp_bands > 1) {
+		sband = wiphy->bands[NL80211_BAND_5GHZ];
 
-		if (ch->flags & IEEE80211_CHAN_RADAR)
-			ch->flags |= IEEE80211_CHAN_NO_IR;
+		for (i = 0; i < sband->n_channels; i++) {
+			ch = &sband->channels[i];
+			if (ch->flags & IEEE80211_CHAN_DISABLED)
+				continue;
+
+			if (ch->flags & IEEE80211_CHAN_RADAR)
+				ch->flags |= IEEE80211_CHAN_NO_IR;
+		}
 	}
+	adapter->dfs_region = rsi_map_region_code(request->dfs_region);
+	rsi_dbg(INFO_ZONE, "RSI region code = %d\n", adapter->dfs_region);
 	
-	rsi_dbg(INFO_ZONE,
-		"country = %s dfs_region = %d\n",
-		request->alpha2, request->dfs_region);
-	adapter->dfs_region = request->dfs_region;
+	adapter->country[0] = request->alpha2[0];
+	adapter->country[1] = request->alpha2[1];
+
+	mutex_unlock(&common->mutex);
+}
+
+static void rsi_mac80211_rfkill_poll(struct ieee80211_hw *hw)
+{
+	struct rsi_hw *adapter = hw->priv;
+	struct rsi_common *common = adapter->priv;
+
+	mutex_lock(&common->mutex);
+	if (common->fsm_state != FSM_MAC_INIT_DONE)
+		wiphy_rfkill_set_hw_state(hw->wiphy, true);
+	else
+		wiphy_rfkill_set_hw_state(hw->wiphy, false);
+	mutex_unlock(&common->mutex);
 }
 
-static struct ieee80211_ops mac80211_ops = {
+static const struct ieee80211_ops mac80211_ops = {
 	.tx = rsi_mac80211_tx,
 	.start = rsi_mac80211_start,
 	.stop = rsi_mac80211_stop,
@@ -1195,13 +1526,14 @@ static struct ieee80211_ops mac80211_ops = {
 	.sta_remove = rsi_mac80211_sta_remove,
 	.set_antenna = rsi_mac80211_set_antenna,
 	.get_antenna = rsi_mac80211_get_antenna,
+	.rfkill_poll = rsi_mac80211_rfkill_poll,
 };
 
 /**
  * rsi_mac80211_attach() - This function is used to initialize Mac80211 stack.
  * @common: Pointer to the driver private structure.
  *
- * Return: 0 on success, -1 on failure.
+ * Return: 0 on success, negative error codes on failure.
  */
 int rsi_mac80211_attach(struct rsi_common *common)
 {
@@ -1229,12 +1561,16 @@ int rsi_mac80211_attach(struct rsi_common *common)
 	ieee80211_hw_set(hw, SIGNAL_DBM);
 	ieee80211_hw_set(hw, HAS_RATE_CONTROL);
 	ieee80211_hw_set(hw, AMPDU_AGGREGATION);
+	ieee80211_hw_set(hw, SUPPORTS_PS);
+	ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS);
 
 	hw->queues = MAX_HW_QUEUES;
 	hw->extra_tx_headroom = RSI_NEEDED_HEADROOM;
 
 	hw->max_rates = 1;
 	hw->max_rate_tries = MAX_RETRIES;
+	hw->uapsd_queues = RSI_IEEE80211_UAPSD_QUEUES;
+	hw->uapsd_max_sp_len = IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL;
 
 	hw->max_tx_aggregation_subframes = 6;
 	rsi_register_rates_channels(adapter, NL80211_BAND_2GHZ);
@@ -1244,7 +1580,8 @@ int rsi_mac80211_attach(struct rsi_common *common)
 	SET_IEEE80211_PERM_ADDR(hw, common->mac_addr);
 	ether_addr_copy(hw->wiphy->addr_mask, addr_mask);
 
-	wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION);
+	wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) |
+				 BIT(NL80211_IFTYPE_AP);
 	wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
 	wiphy->retry_short = RETRY_SHORT;
 	wiphy->retry_long  = RETRY_LONG;
@@ -1259,6 +1596,14 @@ int rsi_mac80211_attach(struct rsi_common *common)
 	wiphy->bands[NL80211_BAND_5GHZ] =
 		&adapter->sbands[NL80211_BAND_5GHZ];
 
+	/* AP Parameters */
+	wiphy->max_ap_assoc_sta = rsi_max_ap_stas[common->oper_mode - 1];
+	common->max_stations = wiphy->max_ap_assoc_sta;
+	rsi_dbg(ERR_ZONE, "Max Stations Allowed = %d\n", common->max_stations);
+	hw->sta_data_size = sizeof(struct rsi_sta);
+	wiphy->flags = WIPHY_FLAG_REPORTS_OBSS;
+	wiphy->flags |= WIPHY_FLAG_AP_UAPSD;
+	wiphy->features |= NL80211_FEATURE_INACTIVITY_TIMER;
 	wiphy->reg_notifier = rsi_reg_notify;
 
 	wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST);
diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c
index f1cde0c..3e1e808 100644
--- a/drivers/net/wireless/rsi/rsi_91x_main.c
+++ b/drivers/net/wireless/rsi/rsi_91x_main.c
@@ -220,7 +220,8 @@ struct rsi_hw *rsi_91x_init(void)
 
 	rsi_init_event(&common->tx_thread.event);
 	mutex_init(&common->mutex);
-	mutex_init(&common->tx_rxlock);
+	mutex_init(&common->tx_lock);
+	mutex_init(&common->rx_lock);
 
 	if (rsi_create_kthread(common,
 			       &common->tx_thread,
@@ -230,6 +231,8 @@ struct rsi_hw *rsi_91x_init(void)
 		goto err;
 	}
 
+	rsi_default_ps_params(adapter);
+	spin_lock_init(&adapter->ps_lock);
 	common->init_done = true;
 	return adapter;
 
diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
index d4d365b..f7b550f 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
@@ -17,6 +17,8 @@
 #include <linux/etherdevice.h>
 #include "rsi_mgmt.h"
 #include "rsi_common.h"
+#include "rsi_ps.h"
+#include "rsi_hal.h"
 
 static struct bootup_params boot_params_20 = {
 	.magic_number = cpu_to_le16(0x5aa5),
@@ -230,6 +232,8 @@ static void rsi_set_default_parameters(struct rsi_common *common)
 	common->rf_power_val = 0; /* Default 1.9V */
 	common->wlan_rf_power_mode = 0;
 	common->obm_ant_sel_val = 2;
+	common->beacon_interval = RSI_BEACON_INTERVAL;
+	common->dtim_cnt = RSI_DTIM_COUNT;
 }
 
 /**
@@ -266,11 +270,15 @@ static int rsi_send_internal_mgmt_frame(struct rsi_common *common,
 					struct sk_buff *skb)
 {
 	struct skb_info *tx_params;
+	struct rsi_cmd_desc *desc;
 
 	if (skb == NULL) {
 		rsi_dbg(ERR_ZONE, "%s: Unable to allocate skb\n", __func__);
 		return -ENOMEM;
 	}
+	desc = (struct rsi_cmd_desc *)skb->data;
+	desc->desc_dword0.len_qno |= cpu_to_le16(DESC_IMMEDIATE_WAKEUP);
+	skb->priority = MGMT_SOFT_Q;
 	tx_params = (struct skb_info *)&IEEE80211_SKB_CB(skb)->driver_data;
 	tx_params->flags |= INTERNAL_MGMT_PKT;
 	skb_queue_tail(&common->tx_queue[MGMT_SOFT_Q], skb);
@@ -298,10 +306,11 @@ static int rsi_load_radio_caps(struct rsi_common *common)
 		      0xf0, 0xf0, 0xf0, 0xf0,
 		      0xf0, 0xf0, 0xf0, 0xf0};
 	struct sk_buff *skb;
+	u16 frame_len = sizeof(struct rsi_radio_caps);
 
 	rsi_dbg(INFO_ZONE, "%s: Sending rate symbol req frame\n", __func__);
 
-	skb = dev_alloc_skb(sizeof(struct rsi_radio_caps));
+	skb = dev_alloc_skb(frame_len);
 
 	if (!skb) {
 		rsi_dbg(ERR_ZONE, "%s: Failed in allocation of skb\n",
@@ -309,37 +318,40 @@ static int rsi_load_radio_caps(struct rsi_common *common)
 		return -ENOMEM;
 	}
 
-	memset(skb->data, 0, sizeof(struct rsi_radio_caps));
+	memset(skb->data, 0, frame_len);
 	radio_caps = (struct rsi_radio_caps *)skb->data;
 
-	radio_caps->desc_word[1] = cpu_to_le16(RADIO_CAPABILITIES);
-	radio_caps->desc_word[4] = cpu_to_le16(RSI_RF_TYPE << 8);
+	radio_caps->desc_dword0.frame_type = RADIO_CAPABILITIES;
+	radio_caps->channel_num = common->channel;
+	radio_caps->rf_model = RSI_RF_TYPE;
 
 	if (common->channel_width == BW_40MHZ) {
-		radio_caps->desc_word[7] |= cpu_to_le16(RSI_LMAC_CLOCK_80MHZ);
-		radio_caps->desc_word[7] |= cpu_to_le16(RSI_ENABLE_40MHZ);
+		radio_caps->radio_cfg_info = RSI_LMAC_CLOCK_80MHZ;
+		radio_caps->radio_cfg_info |= RSI_ENABLE_40MHZ;
 
 		if (common->fsm_state == FSM_MAC_INIT_DONE) {
 			struct ieee80211_hw *hw = adapter->hw;
 			struct ieee80211_conf *conf = &hw->conf;
+
 			if (conf_is_ht40_plus(conf)) {
-				radio_caps->desc_word[5] =
-					cpu_to_le16(LOWER_20_ENABLE);
-				radio_caps->desc_word[5] |=
-					cpu_to_le16(LOWER_20_ENABLE >> 12);
+				radio_caps->radio_cfg_info =
+					RSI_CMDDESC_LOWER_20_ENABLE;
+				radio_caps->radio_info =
+					RSI_CMDDESC_LOWER_20_ENABLE;
 			} else if (conf_is_ht40_minus(conf)) {
-				radio_caps->desc_word[5] =
-					cpu_to_le16(UPPER_20_ENABLE);
-				radio_caps->desc_word[5] |=
-					cpu_to_le16(UPPER_20_ENABLE >> 12);
+				radio_caps->radio_cfg_info =
+					RSI_CMDDESC_UPPER_20_ENABLE;
+				radio_caps->radio_info =
+					RSI_CMDDESC_UPPER_20_ENABLE;
 			} else {
-				radio_caps->desc_word[5] =
-					cpu_to_le16(BW_40MHZ << 12);
-				radio_caps->desc_word[5] |=
-					cpu_to_le16(FULL40M_ENABLE);
+				radio_caps->radio_cfg_info =
+					RSI_CMDDESC_40MHZ;
+				radio_caps->radio_info =
+					RSI_CMDDESC_FULL_40_ENABLE;
 			}
 		}
 	}
+	radio_caps->radio_info |= radio_id;
 
 	radio_caps->sifs_tx_11n = cpu_to_le16(SIFS_TX_11N_VALUE);
 	radio_caps->sifs_tx_11b = cpu_to_le16(SIFS_TX_11B_VALUE);
@@ -348,8 +360,6 @@ static int rsi_load_radio_caps(struct rsi_common *common)
 	radio_caps->cck_ack_tout = cpu_to_le16(CCK_ACK_TOUT_VALUE);
 	radio_caps->preamble_type = cpu_to_le16(LONG_PREAMBLE);
 
-	radio_caps->desc_word[7] |= cpu_to_le16(radio_id << 8);
-
 	for (ii = 0; ii < MAX_HW_QUEUES; ii++) {
 		radio_caps->qos_params[ii].cont_win_min_q = cpu_to_le16(3);
 		radio_caps->qos_params[ii].cont_win_max_q = cpu_to_le16(0x3f);
@@ -357,7 +367,7 @@ static int rsi_load_radio_caps(struct rsi_common *common)
 		radio_caps->qos_params[ii].txop_q = 0;
 	}
 
-	for (ii = 0; ii < MAX_HW_QUEUES - 4; ii++) {
+	for (ii = 0; ii < NUM_EDCA_QUEUES; ii++) {
 		radio_caps->qos_params[ii].cont_win_min_q =
 			cpu_to_le16(common->edca_params[ii].cw_min);
 		radio_caps->qos_params[ii].cont_win_max_q =
@@ -368,17 +378,19 @@ static int rsi_load_radio_caps(struct rsi_common *common)
 			cpu_to_le16(common->edca_params[ii].txop);
 	}
 
+	radio_caps->qos_params[BROADCAST_HW_Q].txop_q = cpu_to_le16(0xffff);
+	radio_caps->qos_params[MGMT_HW_Q].txop_q = 0;
+	radio_caps->qos_params[BEACON_HW_Q].txop_q = cpu_to_le16(0xffff);
+
 	memcpy(&common->rate_pwr[0], &gc[0], 40);
 	for (ii = 0; ii < 20; ii++)
 		radio_caps->gcpd_per_rate[inx++] =
 			cpu_to_le16(common->rate_pwr[ii]  & 0x00FF);
 
-	radio_caps->desc_word[0] = cpu_to_le16((sizeof(struct rsi_radio_caps) -
-						FRAME_DESC_SZ) |
-					       (RSI_WIFI_MGMT_Q << 12));
-
+	rsi_set_len_qno(&radio_caps->desc_dword0.len_qno,
+			(frame_len - FRAME_DESC_SZ), RSI_WIFI_MGMT_Q);
 
-	skb_put(skb, (sizeof(struct rsi_radio_caps)));
+	skb_put(skb, frame_len);
 
 	return rsi_send_internal_mgmt_frame(common, skb);
 }
@@ -394,8 +406,7 @@ static int rsi_load_radio_caps(struct rsi_common *common)
  */
 static int rsi_mgmt_pkt_to_core(struct rsi_common *common,
 				u8 *msg,
-				s32 msg_len,
-				u8 type)
+				s32 msg_len)
 {
 	struct rsi_hw *adapter = common->priv;
 	struct ieee80211_tx_info *info;
@@ -403,37 +414,30 @@ static int rsi_mgmt_pkt_to_core(struct rsi_common *common,
 	u8 pad_bytes = msg[4];
 	struct sk_buff *skb;
 
-	if (type == RX_DOT11_MGMT) {
-		if (!adapter->sc_nvifs)
-			return -ENOLINK;
+	if (!adapter->sc_nvifs)
+		return -ENOLINK;
 
-		msg_len -= pad_bytes;
-		if (msg_len <= 0) {
-			rsi_dbg(MGMT_RX_ZONE,
-				"%s: Invalid rx msg of len = %d\n",
-				__func__, msg_len);
-			return -EINVAL;
-		}
+	msg_len -= pad_bytes;
+	if (msg_len <= 0) {
+		rsi_dbg(MGMT_RX_ZONE,
+			"%s: Invalid rx msg of len = %d\n",
+			__func__, msg_len);
+		return -EINVAL;
+	}
 
-		skb = dev_alloc_skb(msg_len);
-		if (!skb) {
-			rsi_dbg(ERR_ZONE, "%s: Failed to allocate skb\n",
-				__func__);
-			return -ENOMEM;
-		}
+	skb = dev_alloc_skb(msg_len);
+	if (!skb)
+		return -ENOMEM;
 
-		skb_put_data(skb,
-			     (u8 *)(msg + FRAME_DESC_SZ + pad_bytes),
-			     msg_len);
+	skb_put_data(skb,
+		     (u8 *)(msg + FRAME_DESC_SZ + pad_bytes),
+		     msg_len);
 
-		info = IEEE80211_SKB_CB(skb);
-		rx_params = (struct skb_info *)info->driver_data;
-		rx_params->rssi = rsi_get_rssi(msg);
-		rx_params->channel = rsi_get_channel(msg);
-		rsi_indicate_pkt_to_os(common, skb);
-	} else {
-		rsi_dbg(MGMT_TX_ZONE, "%s: Internal Packet\n", __func__);
-	}
+	info = IEEE80211_SKB_CB(skb);
+	rx_params = (struct skb_info *)info->driver_data;
+	rx_params->rssi = rsi_get_rssi(msg);
+	rx_params->channel = rsi_get_channel(msg);
+	rsi_indicate_pkt_to_os(common, skb);
 
 	return 0;
 }
@@ -451,20 +455,23 @@ static int rsi_mgmt_pkt_to_core(struct rsi_common *common,
  * Return: status: 0 on success, corresponding negative error code on failure.
  */
 static int rsi_hal_send_sta_notify_frame(struct rsi_common *common,
-					 u8 opmode,
+					 enum opmode opmode,
 					 u8 notify_event,
 					 const unsigned char *bssid,
 					 u8 qos_enable,
-					 u16 aid)
+					 u16 aid,
+					 u16 sta_id)
 {
+	struct ieee80211_vif *vif = common->priv->vifs[0];
 	struct sk_buff *skb = NULL;
 	struct rsi_peer_notify *peer_notify;
 	u16 vap_id = 0;
 	int status;
+	u16 frame_len = sizeof(struct rsi_peer_notify);
 
 	rsi_dbg(MGMT_TX_ZONE, "%s: Sending sta notify frame\n", __func__);
 
-	skb = dev_alloc_skb(sizeof(struct rsi_peer_notify));
+	skb = dev_alloc_skb(frame_len);
 
 	if (!skb) {
 		rsi_dbg(ERR_ZONE, "%s: Failed in allocation of skb\n",
@@ -472,10 +479,13 @@ static int rsi_hal_send_sta_notify_frame(struct rsi_common *common,
 		return -ENOMEM;
 	}
 
-	memset(skb->data, 0, sizeof(struct rsi_peer_notify));
+	memset(skb->data, 0, frame_len);
 	peer_notify = (struct rsi_peer_notify *)skb->data;
 
-	peer_notify->command = cpu_to_le16(opmode << 1);
+	if (opmode == STA_OPMODE)
+		peer_notify->command = cpu_to_le16(PEER_TYPE_AP << 1);
+	else if (opmode == AP_OPMODE)
+		peer_notify->command = cpu_to_le16(PEER_TYPE_STA << 1);
 
 	switch (notify_event) {
 	case STA_CONNECTED:
@@ -490,20 +500,22 @@ static int rsi_hal_send_sta_notify_frame(struct rsi_common *common,
 
 	peer_notify->command |= cpu_to_le16((aid & 0xfff) << 4);
 	ether_addr_copy(peer_notify->mac_addr, bssid);
-
+	peer_notify->mpdu_density = cpu_to_le16(RSI_MPDU_DENSITY);
 	peer_notify->sta_flags = cpu_to_le32((qos_enable) ? 1 : 0);
 
-	peer_notify->desc_word[0] =
-		cpu_to_le16((sizeof(struct rsi_peer_notify) - FRAME_DESC_SZ) |
-			    (RSI_WIFI_MGMT_Q << 12));
-	peer_notify->desc_word[1] = cpu_to_le16(PEER_NOTIFY);
-	peer_notify->desc_word[7] |= cpu_to_le16(vap_id << 8);
+	rsi_set_len_qno(&peer_notify->desc.desc_dword0.len_qno,
+			(frame_len - FRAME_DESC_SZ),
+			RSI_WIFI_MGMT_Q);
+	peer_notify->desc.desc_dword0.frame_type = PEER_NOTIFY;
+	peer_notify->desc.desc_dword3.qid_tid = sta_id;
+	peer_notify->desc.desc_dword3.sta_id = vap_id;
 
-	skb_put(skb, sizeof(struct rsi_peer_notify));
+	skb_put(skb, frame_len);
 
 	status = rsi_send_internal_mgmt_frame(common, skb);
 
-	if (!status && qos_enable) {
+	if ((vif->type == NL80211_IFTYPE_STATION) &&
+	    (!status && qos_enable)) {
 		rsi_set_contention_vals(common);
 		status = rsi_load_radio_caps(common);
 	}
@@ -525,13 +537,14 @@ int rsi_send_aggregation_params_frame(struct rsi_common *common,
 				      u16 tid,
 				      u16 ssn,
 				      u8 buf_size,
-				      u8 event)
+				      u8 event,
+				      u8 sta_id)
 {
 	struct sk_buff *skb = NULL;
-	struct rsi_mac_frame *mgmt_frame;
-	u8 peer_id = 0;
+	struct rsi_aggr_params *aggr_params;
+	u16 frame_len = sizeof(struct rsi_aggr_params);
 
-	skb = dev_alloc_skb(FRAME_DESC_SZ);
+	skb = dev_alloc_skb(frame_len);
 
 	if (!skb) {
 		rsi_dbg(ERR_ZONE, "%s: Failed in allocation of skb\n",
@@ -539,37 +552,29 @@ int rsi_send_aggregation_params_frame(struct rsi_common *common,
 		return -ENOMEM;
 	}
 
-	memset(skb->data, 0, FRAME_DESC_SZ);
-	mgmt_frame = (struct rsi_mac_frame *)skb->data;
+	memset(skb->data, 0, frame_len);
+	aggr_params = (struct rsi_aggr_params *)skb->data;
 
 	rsi_dbg(MGMT_TX_ZONE, "%s: Sending AMPDU indication frame\n", __func__);
 
-	mgmt_frame->desc_word[0] = cpu_to_le16(RSI_WIFI_MGMT_Q << 12);
-	mgmt_frame->desc_word[1] = cpu_to_le16(AMPDU_IND);
+	rsi_set_len_qno(&aggr_params->desc_dword0.len_qno, 0, RSI_WIFI_MGMT_Q);
+	aggr_params->desc_dword0.frame_type = AMPDU_IND;
 
+	aggr_params->aggr_params = tid & RSI_AGGR_PARAMS_TID_MASK;
+	aggr_params->peer_id = sta_id;
 	if (event == STA_TX_ADDBA_DONE) {
-		mgmt_frame->desc_word[4] = cpu_to_le16(ssn);
-		mgmt_frame->desc_word[5] = cpu_to_le16(buf_size);
-		mgmt_frame->desc_word[7] =
-		cpu_to_le16((tid | (START_AMPDU_AGGR << 4) | (peer_id << 8)));
+		aggr_params->seq_start = cpu_to_le16(ssn);
+		aggr_params->baw_size = cpu_to_le16(buf_size);
+		aggr_params->aggr_params |= RSI_AGGR_PARAMS_START;
 	} else if (event == STA_RX_ADDBA_DONE) {
-		mgmt_frame->desc_word[4] = cpu_to_le16(ssn);
-		mgmt_frame->desc_word[7] = cpu_to_le16(tid |
-						       (START_AMPDU_AGGR << 4) |
-						       (RX_BA_INDICATION << 5) |
-						       (peer_id << 8));
-	} else if (event == STA_TX_DELBA) {
-		mgmt_frame->desc_word[7] = cpu_to_le16(tid |
-						       (STOP_AMPDU_AGGR << 4) |
-						       (peer_id << 8));
+		aggr_params->seq_start = cpu_to_le16(ssn);
+		aggr_params->aggr_params |= (RSI_AGGR_PARAMS_START |
+					     RSI_AGGR_PARAMS_RX_AGGR);
 	} else if (event == STA_RX_DELBA) {
-		mgmt_frame->desc_word[7] = cpu_to_le16(tid |
-						       (STOP_AMPDU_AGGR << 4) |
-						       (RX_BA_INDICATION << 5) |
-						       (peer_id << 8));
+		aggr_params->aggr_params |= RSI_AGGR_PARAMS_RX_AGGR;
 	}
 
-	skb_put(skb, FRAME_DESC_SZ);
+	skb_put(skb, frame_len);
 
 	return rsi_send_internal_mgmt_frame(common, skb);
 }
@@ -584,34 +589,36 @@ int rsi_send_aggregation_params_frame(struct rsi_common *common,
 static int rsi_program_bb_rf(struct rsi_common *common)
 {
 	struct sk_buff *skb;
-	struct rsi_mac_frame *mgmt_frame;
+	struct rsi_bb_rf_prog *bb_rf_prog;
+	u16 frame_len = sizeof(struct rsi_bb_rf_prog);
 
 	rsi_dbg(MGMT_TX_ZONE, "%s: Sending program BB/RF frame\n", __func__);
 
-	skb = dev_alloc_skb(FRAME_DESC_SZ);
+	skb = dev_alloc_skb(frame_len);
 	if (!skb) {
 		rsi_dbg(ERR_ZONE, "%s: Failed in allocation of skb\n",
 			__func__);
 		return -ENOMEM;
 	}
 
-	memset(skb->data, 0, FRAME_DESC_SZ);
-	mgmt_frame = (struct rsi_mac_frame *)skb->data;
+	memset(skb->data, 0, frame_len);
+	bb_rf_prog = (struct rsi_bb_rf_prog *)skb->data;
 
-	mgmt_frame->desc_word[0] = cpu_to_le16(RSI_WIFI_MGMT_Q << 12);
-	mgmt_frame->desc_word[1] = cpu_to_le16(BBP_PROG_IN_TA);
-	mgmt_frame->desc_word[4] = cpu_to_le16(common->endpoint);
+	rsi_set_len_qno(&bb_rf_prog->desc_dword0.len_qno, 0, RSI_WIFI_MGMT_Q);
+	bb_rf_prog->desc_dword0.frame_type = BBP_PROG_IN_TA;
+	bb_rf_prog->endpoint = common->endpoint;
+	bb_rf_prog->rf_power_mode = common->wlan_rf_power_mode;
 
 	if (common->rf_reset) {
-		mgmt_frame->desc_word[7] =  cpu_to_le16(RF_RESET_ENABLE);
+		bb_rf_prog->flags =  cpu_to_le16(RF_RESET_ENABLE);
 		rsi_dbg(MGMT_TX_ZONE, "%s: ===> RF RESET REQUEST SENT <===\n",
 			__func__);
 		common->rf_reset = 0;
 	}
 	common->bb_rf_prog_count = 1;
-	mgmt_frame->desc_word[7] |= cpu_to_le16(PUT_BBP_RESET |
-				     BBP_REG_WRITE | (RSI_RF_TYPE << 4));
-	skb_put(skb, FRAME_DESC_SZ);
+	bb_rf_prog->flags |= cpu_to_le16(PUT_BBP_RESET | BBP_REG_WRITE |
+					 (RSI_RF_TYPE << 4));
+	skb_put(skb, frame_len);
 
 	return rsi_send_internal_mgmt_frame(common, skb);
 }
@@ -625,6 +632,8 @@ static int rsi_program_bb_rf(struct rsi_common *common)
  */
 int rsi_set_vap_capabilities(struct rsi_common *common,
 			     enum opmode mode,
+			     u8 *mac_addr,
+			     u8 vap_id,
 			     u8 vap_status)
 {
 	struct sk_buff *skb = NULL;
@@ -632,59 +641,60 @@ int rsi_set_vap_capabilities(struct rsi_common *common,
 	struct rsi_hw *adapter = common->priv;
 	struct ieee80211_hw *hw = adapter->hw;
 	struct ieee80211_conf *conf = &hw->conf;
-	u16 vap_id = 0;
+	u16 frame_len = sizeof(struct rsi_vap_caps);
 
 	rsi_dbg(MGMT_TX_ZONE, "%s: Sending VAP capabilities frame\n", __func__);
 
-	skb = dev_alloc_skb(sizeof(struct rsi_vap_caps));
+	skb = dev_alloc_skb(frame_len);
 	if (!skb) {
 		rsi_dbg(ERR_ZONE, "%s: Failed in allocation of skb\n",
 			__func__);
 		return -ENOMEM;
 	}
 
-	memset(skb->data, 0, sizeof(struct rsi_vap_caps));
+	memset(skb->data, 0, frame_len);
 	vap_caps = (struct rsi_vap_caps *)skb->data;
 
-	vap_caps->desc_word[0] = cpu_to_le16((sizeof(struct rsi_vap_caps) -
-					     FRAME_DESC_SZ) |
-					     (RSI_WIFI_MGMT_Q << 12));
-	vap_caps->desc_word[1] = cpu_to_le16(VAP_CAPABILITIES);
-	vap_caps->desc_word[2] = cpu_to_le16(vap_status << 8);
-	vap_caps->desc_word[4] = cpu_to_le16(mode |
-					     (common->channel_width << 8));
-	vap_caps->desc_word[7] = cpu_to_le16((vap_id << 8) |
-					     (common->mac_id << 4) |
-					     common->radio_id);
-
-	memcpy(vap_caps->mac_addr, common->mac_addr, IEEE80211_ADDR_LEN);
+	rsi_set_len_qno(&vap_caps->desc_dword0.len_qno,
+			(frame_len - FRAME_DESC_SZ), RSI_WIFI_MGMT_Q);
+	vap_caps->desc_dword0.frame_type = VAP_CAPABILITIES;
+	vap_caps->status = vap_status;
+	vap_caps->vif_type = mode;
+	vap_caps->channel_bw = common->channel_width;
+	vap_caps->vap_id = vap_id;
+	vap_caps->radioid_macid = ((common->mac_id & 0xf) << 4) |
+				   (common->radio_id & 0xf);
+
+	memcpy(vap_caps->mac_addr, mac_addr, IEEE80211_ADDR_LEN);
 	vap_caps->keep_alive_period = cpu_to_le16(90);
 	vap_caps->frag_threshold = cpu_to_le16(IEEE80211_MAX_FRAG_THRESHOLD);
 
 	vap_caps->rts_threshold = cpu_to_le16(common->rts_threshold);
-	vap_caps->default_mgmt_rate = cpu_to_le32(RSI_RATE_6);
 
 	if (common->band == NL80211_BAND_5GHZ) {
-		vap_caps->default_ctrl_rate = cpu_to_le32(RSI_RATE_6);
-		if (conf_is_ht40(&common->priv->hw->conf)) {
-			vap_caps->default_ctrl_rate |=
-				cpu_to_le32(FULL40M_ENABLE << 16);
-		}
+		vap_caps->default_ctrl_rate = cpu_to_le16(RSI_RATE_6);
+		vap_caps->default_mgmt_rate = cpu_to_le32(RSI_RATE_6);
 	} else {
-		vap_caps->default_ctrl_rate = cpu_to_le32(RSI_RATE_1);
+		vap_caps->default_ctrl_rate = cpu_to_le16(RSI_RATE_1);
+		vap_caps->default_mgmt_rate = cpu_to_le32(RSI_RATE_1);
+	}
+	if (conf_is_ht40(conf)) {
 		if (conf_is_ht40_minus(conf))
-			vap_caps->default_ctrl_rate |=
-				cpu_to_le32(UPPER_20_ENABLE << 16);
+			vap_caps->ctrl_rate_flags =
+				cpu_to_le16(UPPER_20_ENABLE);
 		else if (conf_is_ht40_plus(conf))
-			vap_caps->default_ctrl_rate |=
-				cpu_to_le32(LOWER_20_ENABLE << 16);
+			vap_caps->ctrl_rate_flags =
+				cpu_to_le16(LOWER_20_ENABLE);
+		else
+			vap_caps->ctrl_rate_flags =
+				cpu_to_le16(FULL40M_ENABLE);
 	}
 
 	vap_caps->default_data_rate = 0;
-	vap_caps->beacon_interval = cpu_to_le16(200);
-	vap_caps->dtim_period = cpu_to_le16(4);
+	vap_caps->beacon_interval = cpu_to_le16(common->beacon_interval);
+	vap_caps->dtim_period = cpu_to_le16(common->dtim_cnt);
 
-	skb_put(skb, sizeof(*vap_caps));
+	skb_put(skb, frame_len);
 
 	return rsi_send_internal_mgmt_frame(common, skb);
 }
@@ -705,58 +715,66 @@ int rsi_hal_load_key(struct rsi_common *common,
 		     u16 key_len,
 		     u8 key_type,
 		     u8 key_id,
-		     u32 cipher)
+		     u32 cipher,
+		     s16 sta_id)
 {
+	struct ieee80211_vif *vif = common->priv->vifs[0];
 	struct sk_buff *skb = NULL;
 	struct rsi_set_key *set_key;
 	u16 key_descriptor = 0;
+	u16 frame_len = sizeof(struct rsi_set_key);
 
 	rsi_dbg(MGMT_TX_ZONE, "%s: Sending load key frame\n", __func__);
 
-	skb = dev_alloc_skb(sizeof(struct rsi_set_key));
+	skb = dev_alloc_skb(frame_len);
 	if (!skb) {
 		rsi_dbg(ERR_ZONE, "%s: Failed in allocation of skb\n",
 			__func__);
 		return -ENOMEM;
 	}
 
-	memset(skb->data, 0, sizeof(struct rsi_set_key));
+	memset(skb->data, 0, frame_len);
 	set_key = (struct rsi_set_key *)skb->data;
 
+	if (key_type == RSI_GROUP_KEY) {
+		key_descriptor = RSI_KEY_TYPE_BROADCAST;
+		if (vif->type == NL80211_IFTYPE_AP)
+			key_descriptor |= RSI_KEY_MODE_AP;
+	}
 	if ((cipher == WLAN_CIPHER_SUITE_WEP40) ||
 	    (cipher == WLAN_CIPHER_SUITE_WEP104)) {
-		key_len += 1;
-		key_descriptor |= BIT(2);
+		key_id = 0;
+		key_descriptor |= RSI_WEP_KEY;
 		if (key_len >= 13)
-			key_descriptor |= BIT(3);
+			key_descriptor |= RSI_WEP_KEY_104;
 	} else if (cipher != KEY_TYPE_CLEAR) {
-		key_descriptor |= BIT(4);
-		if (key_type == RSI_PAIRWISE_KEY)
-			key_id = 0;
+		key_descriptor |= RSI_CIPHER_WPA;
 		if (cipher == WLAN_CIPHER_SUITE_TKIP)
-			key_descriptor |= BIT(5);
+			key_descriptor |= RSI_CIPHER_TKIP;
 	}
-	key_descriptor |= (key_type | BIT(13) | (key_id << 14));
-
-	set_key->desc_word[0] = cpu_to_le16((sizeof(struct rsi_set_key) -
-					    FRAME_DESC_SZ) |
-					    (RSI_WIFI_MGMT_Q << 12));
-	set_key->desc_word[1] = cpu_to_le16(SET_KEY_REQ);
-	set_key->desc_word[4] = cpu_to_le16(key_descriptor);
-
-	if ((cipher == WLAN_CIPHER_SUITE_WEP40) ||
-	    (cipher == WLAN_CIPHER_SUITE_WEP104)) {
-		memcpy(&set_key->key[key_id][1],
-		       data,
-		       key_len * 2);
+	key_descriptor |= RSI_PROTECT_DATA_FRAMES;
+	key_descriptor |= ((key_id << RSI_KEY_ID_OFFSET) & RSI_KEY_ID_MASK);
+
+	rsi_set_len_qno(&set_key->desc_dword0.len_qno,
+			(frame_len - FRAME_DESC_SZ), RSI_WIFI_MGMT_Q);
+	set_key->desc_dword0.frame_type = SET_KEY_REQ;
+	set_key->key_desc = cpu_to_le16(key_descriptor);
+	set_key->sta_id = sta_id;
+
+	if (data) {
+		if ((cipher == WLAN_CIPHER_SUITE_WEP40) ||
+		    (cipher == WLAN_CIPHER_SUITE_WEP104)) {
+			memcpy(&set_key->key[key_id][1], data, key_len * 2);
+		} else {
+			memcpy(&set_key->key[0][0], data, key_len);
+		}
+		memcpy(set_key->tx_mic_key, &data[16], 8);
+		memcpy(set_key->rx_mic_key, &data[24], 8);
 	} else {
-		memcpy(&set_key->key[0][0], data, key_len);
+		memset(&set_key[FRAME_DESC_SZ], 0, frame_len - FRAME_DESC_SZ);
 	}
 
-	memcpy(set_key->tx_mic_key, &data[16], 8);
-	memcpy(set_key->rx_mic_key, &data[24], 8);
-
-	skb_put(skb, sizeof(struct rsi_set_key));
+	skb_put(skb, frame_len);
 
 	return rsi_send_internal_mgmt_frame(common, skb);
 }
@@ -970,12 +988,13 @@ int rsi_set_channel(struct rsi_common *common,
 		    struct ieee80211_channel *channel)
 {
 	struct sk_buff *skb = NULL;
-	struct rsi_mac_frame *mgmt_frame;
+	struct rsi_chan_config *chan_cfg;
+	u16 frame_len = sizeof(struct rsi_chan_config);
 
 	rsi_dbg(MGMT_TX_ZONE,
 		"%s: Sending scan req frame\n", __func__);
 
-	skb = dev_alloc_skb(FRAME_DESC_SZ);
+	skb = dev_alloc_skb(frame_len);
 	if (!skb) {
 		rsi_dbg(ERR_ZONE, "%s: Failed in allocation of skb\n",
 			__func__);
@@ -986,37 +1005,33 @@ int rsi_set_channel(struct rsi_common *common,
 		dev_kfree_skb(skb);
 		return 0;
 	}
-	memset(skb->data, 0, FRAME_DESC_SZ);
-	mgmt_frame = (struct rsi_mac_frame *)skb->data;
-
-	mgmt_frame->desc_word[0] = cpu_to_le16(RSI_WIFI_MGMT_Q << 12);
-	mgmt_frame->desc_word[1] = cpu_to_le16(SCAN_REQUEST);
-	mgmt_frame->desc_word[4] = cpu_to_le16(channel->hw_value);
-
-	mgmt_frame->desc_word[4] |=
-		cpu_to_le16(((char)(channel->max_antenna_gain)) << 8);
-	mgmt_frame->desc_word[5] =
-		cpu_to_le16((char)(channel->max_antenna_gain));
-
-	mgmt_frame->desc_word[7] = cpu_to_le16(PUT_BBP_RESET |
-					       BBP_REG_WRITE |
-					       (RSI_RF_TYPE << 4));
-
-	if (!(channel->flags & IEEE80211_CHAN_NO_IR) &&
-	       !(channel->flags & IEEE80211_CHAN_RADAR)) {
+	memset(skb->data, 0, frame_len);
+	chan_cfg = (struct rsi_chan_config *)skb->data;
+
+	rsi_set_len_qno(&chan_cfg->desc_dword0.len_qno, 0, RSI_WIFI_MGMT_Q);
+	chan_cfg->desc_dword0.frame_type = SCAN_REQUEST;
+	chan_cfg->channel_number = channel->hw_value;
+	chan_cfg->antenna_gain_offset_2g = channel->max_antenna_gain;
+	chan_cfg->antenna_gain_offset_5g = channel->max_antenna_gain;
+	chan_cfg->region_rftype = (RSI_RF_TYPE & 0xf) << 4;
+
+	if ((channel->flags & IEEE80211_CHAN_NO_IR) ||
+	    (channel->flags & IEEE80211_CHAN_RADAR)) {
+		chan_cfg->antenna_gain_offset_2g |= RSI_CHAN_RADAR;
+	} else {
 		if (common->tx_power < channel->max_power)
-			mgmt_frame->desc_word[6] = cpu_to_le16(common->tx_power);
+			chan_cfg->tx_power = cpu_to_le16(common->tx_power);
 		else
-			mgmt_frame->desc_word[6] = cpu_to_le16(channel->max_power);
+			chan_cfg->tx_power = cpu_to_le16(channel->max_power);
 	}
-	mgmt_frame->desc_word[7] = cpu_to_le16(common->priv->dfs_region);
+	chan_cfg->region_rftype |= (common->priv->dfs_region & 0xf);
 
 	if (common->channel_width == BW_40MHZ)
-		mgmt_frame->desc_word[5] |= cpu_to_le16(0x1 << 8);
+		chan_cfg->channel_width = 0x1;
 
 	common->channel = channel->hw_value;
 
-	skb_put(skb, FRAME_DESC_SZ);
+	skb_put(skb, frame_len);
 
 	return rsi_send_internal_mgmt_frame(common, skb);
 }
@@ -1058,6 +1073,37 @@ int rsi_send_radio_params_update(struct rsi_common *common)
 	return rsi_send_internal_mgmt_frame(common, skb);
 }
 
+/* This function programs the threshold. */
+int rsi_send_vap_dynamic_update(struct rsi_common *common)
+{
+	struct sk_buff *skb;
+	struct rsi_dynamic_s *dynamic_frame;
+
+	rsi_dbg(MGMT_TX_ZONE,
+		"%s: Sending vap update indication frame\n", __func__);
+
+	skb = dev_alloc_skb(sizeof(struct rsi_dynamic_s));
+	if (!skb)
+		return -ENOMEM;
+
+	memset(skb->data, 0, sizeof(struct rsi_dynamic_s));
+	dynamic_frame = (struct rsi_dynamic_s *)skb->data;
+	rsi_set_len_qno(&dynamic_frame->desc_dword0.len_qno,
+			sizeof(dynamic_frame->frame_body), RSI_WIFI_MGMT_Q);
+
+	dynamic_frame->desc_dword0.frame_type = VAP_DYNAMIC_UPDATE;
+	dynamic_frame->desc_dword2.pkt_info =
+					cpu_to_le32(common->rts_threshold);
+	/* Beacon miss threshold */
+	dynamic_frame->frame_body.keep_alive_period =
+					cpu_to_le16(RSI_DEF_KEEPALIVE);
+	dynamic_frame->desc_dword3.sta_id = 0; /* vap id */
+
+	skb_put(skb, sizeof(struct rsi_dynamic_s));
+
+	return rsi_send_internal_mgmt_frame(common, skb);
+}
+
 /**
  * rsi_compare() - This function is used to compare two integers
  * @a: pointer to the first integer
@@ -1112,8 +1158,11 @@ static bool rsi_map_rates(u16 rate, int *offset)
  *
  * Return: 0 on success, corresponding error code on failure.
  */
-static int rsi_send_auto_rate_request(struct rsi_common *common)
+static int rsi_send_auto_rate_request(struct rsi_common *common,
+				      struct ieee80211_sta *sta,
+				      u16 sta_id)
 {
+	struct ieee80211_vif *vif = common->priv->vifs[0];
 	struct sk_buff *skb;
 	struct rsi_auto_rate *auto_rate;
 	int ii = 0, jj = 0, kk = 0;
@@ -1121,11 +1170,15 @@ static int rsi_send_auto_rate_request(struct rsi_common *common)
 	u8 band = hw->conf.chandef.chan->band;
 	u8 num_supported_rates = 0;
 	u8 rate_table_offset, rate_offset = 0;
-	u32 rate_bitmap = common->bitrate_mask[band];
-
+	u32 rate_bitmap;
 	u16 *selected_rates, min_rate;
+	bool is_ht = false, is_sgi = false;
+	u16 frame_len = sizeof(struct rsi_auto_rate);
+
+	rsi_dbg(MGMT_TX_ZONE,
+		"%s: Sending auto rate request frame\n", __func__);
 
-	skb = dev_alloc_skb(sizeof(struct rsi_auto_rate));
+	skb = dev_alloc_skb(frame_len);
 	if (!skb) {
 		rsi_dbg(ERR_ZONE, "%s: Failed in allocation of skb\n",
 			__func__);
@@ -1140,8 +1193,6 @@ static int rsi_send_auto_rate_request(struct rsi_common *common)
 		return -ENOMEM;
 	}
 
-	memset(skb->data, 0, sizeof(struct rsi_auto_rate));
-
 	auto_rate = (struct rsi_auto_rate *)skb->data;
 
 	auto_rate->aarf_rssi = cpu_to_le16(((u16)3 << 6) | (u16)(18 & 0x3f));
@@ -1150,16 +1201,35 @@ static int rsi_send_auto_rate_request(struct rsi_common *common)
 	auto_rate->initial_boundary = cpu_to_le16(3);
 	auto_rate->max_threshold_limt = cpu_to_le16(27);
 
-	auto_rate->desc_word[1] = cpu_to_le16(AUTO_RATE_IND);
+	auto_rate->desc.desc_dword0.frame_type = AUTO_RATE_IND;
 
 	if (common->channel_width == BW_40MHZ)
-		auto_rate->desc_word[7] |= cpu_to_le16(1);
+		auto_rate->desc.desc_dword3.qid_tid = BW_40MHZ;
+	auto_rate->desc.desc_dword3.sta_id = sta_id;
+
+	if (vif->type == NL80211_IFTYPE_STATION) {
+		rate_bitmap = common->bitrate_mask[band];
+		is_ht = common->vif_info[0].is_ht;
+		is_sgi = common->vif_info[0].sgi;
+	} else {
+		rate_bitmap = sta->supp_rates[band];
+		is_ht = sta->ht_cap.ht_supported;
+		if ((sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_20) ||
+		    (sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_40))
+			is_sgi = true;
+	}
 
 	if (band == NL80211_BAND_2GHZ) {
-		min_rate = RSI_RATE_1;
+		if ((rate_bitmap == 0) && (is_ht))
+			min_rate = RSI_RATE_MCS0;
+		else
+			min_rate = RSI_RATE_1;
 		rate_table_offset = 0;
 	} else {
-		min_rate = RSI_RATE_6;
+		if ((rate_bitmap == 0) && (is_ht))
+			min_rate = RSI_RATE_MCS0;
+		else
+			min_rate = RSI_RATE_6;
 		rate_table_offset = 4;
 	}
 
@@ -1173,7 +1243,7 @@ static int rsi_send_auto_rate_request(struct rsi_common *common)
 	}
 	num_supported_rates = jj;
 
-	if (common->vif_info[0].is_ht) {
+	if (is_ht) {
 		for (ii = 0; ii < ARRAY_SIZE(mcs); ii++)
 			selected_rates[jj++] = mcs[ii];
 		num_supported_rates += ARRAY_SIZE(mcs);
@@ -1194,13 +1264,15 @@ static int rsi_send_auto_rate_request(struct rsi_common *common)
 	}
 
 	/* loading HT rates in the bottom half of the auto rate table */
-	if (common->vif_info[0].is_ht) {
+	if (is_ht) {
 		for (ii = rate_offset, kk = ARRAY_SIZE(rsi_mcsrates) - 1;
 		     ii < rate_offset + 2 * ARRAY_SIZE(rsi_mcsrates); ii++) {
-			if (common->vif_info[0].sgi ||
-			    conf_is_ht40(&common->priv->hw->conf))
+			if (is_sgi || conf_is_ht40(&common->priv->hw->conf))
 				auto_rate->supported_rates[ii++] =
 					cpu_to_le16(rsi_mcsrates[kk] | BIT(9));
+			else
+				auto_rate->supported_rates[ii++] =
+					cpu_to_le16(rsi_mcsrates[kk]);
 			auto_rate->supported_rates[ii] =
 				cpu_to_le16(rsi_mcsrates[kk--]);
 		}
@@ -1216,15 +1288,12 @@ static int rsi_send_auto_rate_request(struct rsi_common *common)
 
 	auto_rate->num_supported_rates = cpu_to_le16(num_supported_rates * 2);
 	auto_rate->moderate_rate_inx = cpu_to_le16(num_supported_rates / 2);
-	auto_rate->desc_word[7] |= cpu_to_le16(0 << 8);
 	num_supported_rates *= 2;
 
-	auto_rate->desc_word[0] = cpu_to_le16((sizeof(*auto_rate) -
-					       FRAME_DESC_SZ) |
-					       (RSI_WIFI_MGMT_Q << 12));
+	rsi_set_len_qno(&auto_rate->desc.desc_dword0.len_qno,
+			(frame_len - FRAME_DESC_SZ), RSI_WIFI_MGMT_Q);
 
-	skb_put(skb,
-		sizeof(struct rsi_auto_rate));
+	skb_put(skb, frame_len);
 	kfree(selected_rates);
 
 	return rsi_send_internal_mgmt_frame(common, skb);
@@ -1243,27 +1312,40 @@ static int rsi_send_auto_rate_request(struct rsi_common *common)
  * Return: None.
  */
 void rsi_inform_bss_status(struct rsi_common *common,
+			   enum opmode opmode,
 			   u8 status,
-			   const unsigned char *bssid,
+			   const u8 *addr,
 			   u8 qos_enable,
-			   u16 aid)
+			   u16 aid,
+			   struct ieee80211_sta *sta,
+			   u16 sta_id)
 {
 	if (status) {
+		if (opmode == STA_OPMODE)
+			common->hw_data_qs_blocked = true;
 		rsi_hal_send_sta_notify_frame(common,
-					      RSI_IFTYPE_STATION,
+					      opmode,
 					      STA_CONNECTED,
-					      bssid,
+					      addr,
 					      qos_enable,
-					      aid);
+					      aid, sta_id);
 		if (common->min_rate == 0xffff)
-			rsi_send_auto_rate_request(common);
+			rsi_send_auto_rate_request(common, sta, sta_id);
+		if (opmode == STA_OPMODE) {
+			if (!rsi_send_block_unblock_frame(common, false))
+				common->hw_data_qs_blocked = false;
+		}
 	} else {
+		if (opmode == STA_OPMODE)
+			common->hw_data_qs_blocked = true;
 		rsi_hal_send_sta_notify_frame(common,
-					      RSI_IFTYPE_STATION,
+					      opmode,
 					      STA_DISCONNECTED,
-					      bssid,
+					      addr,
 					      qos_enable,
-					      aid);
+					      aid, sta_id);
+		if (opmode == STA_OPMODE)
+			rsi_send_block_unblock_frame(common, true);
 	}
 }
 
@@ -1276,7 +1358,8 @@ void rsi_inform_bss_status(struct rsi_common *common,
  */
 static int rsi_eeprom_read(struct rsi_common *common)
 {
-	struct rsi_mac_frame *mgmt_frame;
+	struct rsi_eeprom_read_frame *mgmt_frame;
+	struct rsi_hw *adapter = common->priv;
 	struct sk_buff *skb;
 
 	rsi_dbg(MGMT_TX_ZONE, "%s: Sending EEPROM read req frame\n", __func__);
@@ -1289,18 +1372,21 @@ static int rsi_eeprom_read(struct rsi_common *common)
 	}
 
 	memset(skb->data, 0, FRAME_DESC_SZ);
-	mgmt_frame = (struct rsi_mac_frame *)skb->data;
+	mgmt_frame = (struct rsi_eeprom_read_frame *)skb->data;
 
 	/* FrameType */
-	mgmt_frame->desc_word[1] = cpu_to_le16(EEPROM_READ_TYPE);
-	mgmt_frame->desc_word[0] = cpu_to_le16(RSI_WIFI_MGMT_Q << 12);
+	rsi_set_len_qno(&mgmt_frame->len_qno, 0, RSI_WIFI_MGMT_Q);
+	mgmt_frame->pkt_type = EEPROM_READ;
+
 	/* Number of bytes to read */
-	mgmt_frame->desc_word[3] = cpu_to_le16(ETH_ALEN +
-					       WLAN_MAC_MAGIC_WORD_LEN +
-					       WLAN_HOST_MODE_LEN +
-					       WLAN_FW_VERSION_LEN);
+	mgmt_frame->pkt_info =
+		cpu_to_le32((adapter->eeprom.length << RSI_EEPROM_LEN_OFFSET) &
+			    RSI_EEPROM_LEN_MASK);
+	mgmt_frame->pkt_info |= cpu_to_le32((3 << RSI_EEPROM_HDR_SIZE_OFFSET) &
+					    RSI_EEPROM_HDR_SIZE_MASK);
+
 	/* Address to read */
-	mgmt_frame->desc_word[4] = cpu_to_le16(WLAN_MAC_EEPROM_ADDR);
+	mgmt_frame->eeprom_offset = cpu_to_le32(adapter->eeprom.offset);
 
 	skb_put(skb, FRAME_DESC_SZ);
 
@@ -1317,7 +1403,7 @@ static int rsi_eeprom_read(struct rsi_common *common)
  */
 int rsi_send_block_unblock_frame(struct rsi_common *common, bool block_event)
 {
-	struct rsi_mac_frame *mgmt_frame;
+	struct rsi_block_unblock_data *mgmt_frame;
 	struct sk_buff *skb;
 
 	rsi_dbg(MGMT_TX_ZONE, "%s: Sending block/unblock frame\n", __func__);
@@ -1330,23 +1416,25 @@ int rsi_send_block_unblock_frame(struct rsi_common *common, bool block_event)
 	}
 
 	memset(skb->data, 0, FRAME_DESC_SZ);
-	mgmt_frame = (struct rsi_mac_frame *)skb->data;
+	mgmt_frame = (struct rsi_block_unblock_data *)skb->data;
 
-	mgmt_frame->desc_word[0] = cpu_to_le16(RSI_WIFI_MGMT_Q << 12);
-	mgmt_frame->desc_word[1] = cpu_to_le16(BLOCK_HW_QUEUE);
+	rsi_set_len_qno(&mgmt_frame->desc_dword0.len_qno, 0, RSI_WIFI_MGMT_Q);
+	mgmt_frame->desc_dword0.frame_type = BLOCK_HW_QUEUE;
+	mgmt_frame->host_quiet_info = QUIET_INFO_VALID;
 
 	if (block_event) {
 		rsi_dbg(INFO_ZONE, "blocking the data qs\n");
-		mgmt_frame->desc_word[4] = cpu_to_le16(0xf);
+		mgmt_frame->block_q_bitmap = cpu_to_le16(0xf);
+		mgmt_frame->block_q_bitmap |= cpu_to_le16(0xf << 4);
 	} else {
 		rsi_dbg(INFO_ZONE, "unblocking the data qs\n");
-		mgmt_frame->desc_word[5] = cpu_to_le16(0xf);
+		mgmt_frame->unblock_q_bitmap = cpu_to_le16(0xf);
+		mgmt_frame->unblock_q_bitmap |= cpu_to_le16(0xf << 4);
 	}
 
 	skb_put(skb, FRAME_DESC_SZ);
 
 	return rsi_send_internal_mgmt_frame(common, skb);
-
 }
 
 /**
@@ -1383,6 +1471,61 @@ int rsi_send_rx_filter_frame(struct rsi_common *common, u16 rx_filter_word)
 	return rsi_send_internal_mgmt_frame(common, skb);
 }
 
+int rsi_send_ps_request(struct rsi_hw *adapter, bool enable)
+{
+	struct rsi_common *common = adapter->priv;
+	struct ieee80211_bss_conf *bss = &adapter->vifs[0]->bss_conf;
+	struct rsi_request_ps *ps;
+	struct rsi_ps_info *ps_info;
+	struct sk_buff *skb;
+	int frame_len = sizeof(*ps);
+
+	skb = dev_alloc_skb(frame_len);
+	if (!skb)
+		return -ENOMEM;
+	memset(skb->data, 0, frame_len);
+
+	ps = (struct rsi_request_ps *)skb->data;
+	ps_info = &adapter->ps_info;
+
+	rsi_set_len_qno(&ps->desc.desc_dword0.len_qno,
+			(frame_len - FRAME_DESC_SZ), RSI_WIFI_MGMT_Q);
+	ps->desc.desc_dword0.frame_type = WAKEUP_SLEEP_REQUEST;
+	if (enable) {
+		ps->ps_sleep.enable = RSI_PS_ENABLE;
+		ps->desc.desc_dword3.token = cpu_to_le16(RSI_SLEEP_REQUEST);
+	} else {
+		ps->ps_sleep.enable = RSI_PS_DISABLE;
+		ps->desc.desc_dword0.len_qno |= cpu_to_le16(RSI_PS_DISABLE_IND);
+		ps->desc.desc_dword3.token = cpu_to_le16(RSI_WAKEUP_REQUEST);
+	}
+
+	ps->ps_uapsd_acs = common->uapsd_bitmap;
+
+	ps->ps_sleep.sleep_type = ps_info->sleep_type;
+	ps->ps_sleep.num_bcns_per_lis_int =
+		cpu_to_le16(ps_info->num_bcns_per_lis_int);
+	ps->ps_sleep.sleep_duration =
+		cpu_to_le32(ps_info->deep_sleep_wakeup_period);
+
+	if (bss->assoc)
+		ps->ps_sleep.connected_sleep = RSI_CONNECTED_SLEEP;
+	else
+		ps->ps_sleep.connected_sleep = RSI_DEEP_SLEEP;
+
+	ps->ps_listen_interval = cpu_to_le32(ps_info->listen_interval);
+	ps->ps_dtim_interval_duration =
+		cpu_to_le32(ps_info->dtim_interval_duration);
+
+	if (ps_info->listen_interval > ps_info->dtim_interval_duration)
+		ps->ps_listen_interval = cpu_to_le32(RSI_PS_DISABLE);
+
+	ps->ps_num_dtim_intervals = cpu_to_le16(ps_info->num_dtims_per_sleep);
+	skb_put(skb, frame_len);
+
+	return rsi_send_internal_mgmt_frame(common, skb);
+}
+
 /**
  * rsi_set_antenna() - This fuction send antenna configuration request
  *		       to device
@@ -1394,7 +1537,7 @@ int rsi_send_rx_filter_frame(struct rsi_common *common, u16 rx_filter_word)
  */
 int rsi_set_antenna(struct rsi_common *common, u8 antenna)
 {
-	struct rsi_mac_frame *cmd_frame;
+	struct rsi_ant_sel_frame *ant_sel_frame;
 	struct sk_buff *skb;
 
 	skb = dev_alloc_skb(FRAME_DESC_SZ);
@@ -1405,17 +1548,43 @@ int rsi_set_antenna(struct rsi_common *common, u8 antenna)
 	}
 
 	memset(skb->data, 0, FRAME_DESC_SZ);
-	cmd_frame = (struct rsi_mac_frame *)skb->data;
-
-	cmd_frame->desc_word[1] = cpu_to_le16(ANT_SEL_FRAME);
-	cmd_frame->desc_word[3] = cpu_to_le16(antenna & 0x00ff);
-	cmd_frame->desc_word[0] = cpu_to_le16(RSI_WIFI_MGMT_Q << 12);
 
+	ant_sel_frame = (struct rsi_ant_sel_frame *)skb->data;
+	ant_sel_frame->desc_dword0.frame_type = ANT_SEL_FRAME;
+	ant_sel_frame->sub_frame_type = ANTENNA_SEL_TYPE;
+	ant_sel_frame->ant_value = cpu_to_le16(antenna & ANTENNA_MASK_VALUE);
+	rsi_set_len_qno(&ant_sel_frame->desc_dword0.len_qno,
+			0, RSI_WIFI_MGMT_Q);
 	skb_put(skb, FRAME_DESC_SZ);
 
 	return rsi_send_internal_mgmt_frame(common, skb);
 }
 
+static int rsi_send_beacon(struct rsi_common *common)
+{
+	struct sk_buff *skb = NULL;
+	u8 dword_align_bytes = 0;
+
+	skb = dev_alloc_skb(MAX_MGMT_PKT_SIZE);
+	if (!skb)
+		return -ENOMEM;
+
+	memset(skb->data, 0, MAX_MGMT_PKT_SIZE);
+
+	dword_align_bytes = ((unsigned long)skb->data & 0x3f);
+	if (dword_align_bytes)
+		skb_pull(skb, (64 - dword_align_bytes));
+	if (rsi_prepare_beacon(common, skb)) {
+		rsi_dbg(ERR_ZONE, "Failed to prepare beacon\n");
+		return -EINVAL;
+	}
+	skb_queue_tail(&common->tx_queue[MGMT_BEACON_Q], skb);
+	rsi_set_event(&common->tx_thread.event);
+	rsi_dbg(DATA_TX_ZONE, "%s: Added to beacon queue\n", __func__);
+
+	return 0;
+}
+
 /**
  * rsi_handle_ta_confirm_type() - This function handles the confirm frames.
  * @common: Pointer to the driver private structure.
@@ -1426,19 +1595,25 @@ int rsi_set_antenna(struct rsi_common *common, u8 antenna)
 static int rsi_handle_ta_confirm_type(struct rsi_common *common,
 				      u8 *msg)
 {
+	struct rsi_hw *adapter = common->priv;
 	u8 sub_type = (msg[15] & 0xff);
+	u16 msg_len = ((u16 *)msg)[0] & 0xfff;
+	u8 offset;
 
 	switch (sub_type) {
 	case BOOTUP_PARAMS_REQUEST:
 		rsi_dbg(FSM_ZONE, "%s: Boot up params confirm received\n",
 			__func__);
 		if (common->fsm_state == FSM_BOOT_PARAMS_SENT) {
+			adapter->eeprom.length = (IEEE80211_ADDR_LEN +
+						  WLAN_MAC_MAGIC_WORD_LEN +
+						  WLAN_HOST_MODE_LEN);
+			adapter->eeprom.offset = WLAN_MAC_EEPROM_ADDR;
 			if (rsi_eeprom_read(common)) {
 				common->fsm_state = FSM_CARD_NOT_READY;
 				goto out;
-			} else {
-				common->fsm_state = FSM_EEPROM_READ_MAC_ADDR;
 			}
+			common->fsm_state = FSM_EEPROM_READ_MAC_ADDR;
 		} else {
 			rsi_dbg(INFO_ZONE,
 				"%s: Received bootup params cfm in %d state\n",
@@ -1447,30 +1622,52 @@ static int rsi_handle_ta_confirm_type(struct rsi_common *common,
 		}
 		break;
 
-	case EEPROM_READ_TYPE:
+	case EEPROM_READ:
+		rsi_dbg(FSM_ZONE, "EEPROM READ confirm received\n");
+		if (msg_len <= 0) {
+			rsi_dbg(FSM_ZONE,
+				"%s: [EEPROM_READ] Invalid len %d\n",
+				__func__, msg_len);
+			goto out;
+		}
+		if (msg[16] != MAGIC_WORD) {
+			rsi_dbg(FSM_ZONE,
+				"%s: [EEPROM_READ] Invalid token\n", __func__);
+			common->fsm_state = FSM_CARD_NOT_READY;
+			goto out;
+		}
 		if (common->fsm_state == FSM_EEPROM_READ_MAC_ADDR) {
-			if (msg[16] == MAGIC_WORD) {
-				u8 offset = (FRAME_DESC_SZ + WLAN_HOST_MODE_LEN
-					     + WLAN_MAC_MAGIC_WORD_LEN);
-				memcpy(common->mac_addr,
-				       &msg[offset],
-				       ETH_ALEN);
-				memcpy(&common->fw_ver,
-				       &msg[offset + ETH_ALEN],
-				       sizeof(struct version_info));
-
-			} else {
+			offset = (FRAME_DESC_SZ + WLAN_HOST_MODE_LEN +
+				  WLAN_MAC_MAGIC_WORD_LEN);
+			memcpy(common->mac_addr, &msg[offset], ETH_ALEN);
+			adapter->eeprom.length =
+				((WLAN_MAC_MAGIC_WORD_LEN + 3) & (~3));
+			adapter->eeprom.offset = WLAN_EEPROM_RFTYPE_ADDR;
+			if (rsi_eeprom_read(common)) {
+				rsi_dbg(ERR_ZONE,
+					"%s: Failed reading RF band\n",
+					__func__);
 				common->fsm_state = FSM_CARD_NOT_READY;
-				break;
+				goto out;
+			}
+			common->fsm_state = FSM_EEPROM_READ_RF_TYPE;
+		} else if (common->fsm_state == FSM_EEPROM_READ_RF_TYPE) {
+			if ((msg[17] & 0x3) == 0x3) {
+				rsi_dbg(INIT_ZONE, "Dual band supported\n");
+				common->band = NL80211_BAND_5GHZ;
+				common->num_supp_bands = 2;
+			} else if ((msg[17] & 0x3) == 0x1) {
+				rsi_dbg(INIT_ZONE,
+					"Only 2.4Ghz band supported\n");
+				common->band = NL80211_BAND_2GHZ;
+				common->num_supp_bands = 1;
 			}
 			if (rsi_send_reset_mac(common))
 				goto out;
-			else
-				common->fsm_state = FSM_RESET_MAC_SENT;
+			common->fsm_state = FSM_RESET_MAC_SENT;
 		} else {
-			rsi_dbg(ERR_ZONE,
-				"%s: Received eeprom mac addr in %d state\n",
-				__func__, common->fsm_state);
+			rsi_dbg(ERR_ZONE, "%s: Invalid EEPROM read type\n",
+				__func__);
 			return 0;
 		}
 		break;
@@ -1527,7 +1724,9 @@ static int rsi_handle_ta_confirm_type(struct rsi_common *common,
 			return 0;
 		}
 		break;
-
+	case WAKEUP_SLEEP_REQUEST:
+		rsi_dbg(INFO_ZONE, "Wakeup/Sleep confirmation.\n");
+		return rsi_handle_ps_confirm(adapter, msg);
 	default:
 		rsi_dbg(INFO_ZONE, "%s: Invalid TA confirm pkt received\n",
 			__func__);
@@ -1590,20 +1789,34 @@ int rsi_mgmt_pkt_recv(struct rsi_common *common, u8 *msg)
 	rsi_dbg(FSM_ZONE, "%s: Msg Len: %d, Msg Type: %4x\n",
 		__func__, msg_len, msg_type);
 
-	if (msg_type == TA_CONFIRM_TYPE) {
+	switch (msg_type) {
+	case TA_CONFIRM_TYPE:
 		return rsi_handle_ta_confirm_type(common, msg);
-	} else if (msg_type == CARD_READY_IND) {
+	case CARD_READY_IND:
 		rsi_dbg(FSM_ZONE, "%s: Card ready indication received\n",
 			__func__);
 		return rsi_handle_card_ready(common, msg);
-	} else if (msg_type == TX_STATUS_IND) {
+	case TX_STATUS_IND:
 		if (msg[15] == PROBEREQ_CONFIRM) {
 			common->mgmt_q_block = false;
 			rsi_dbg(FSM_ZONE, "%s: Probe confirm received\n",
 				__func__);
 		}
-	} else {
-		return rsi_mgmt_pkt_to_core(common, msg, msg_len, msg_type);
+		break;
+	case BEACON_EVENT_IND:
+		rsi_dbg(INFO_ZONE, "Beacon event\n");
+		if (common->fsm_state != FSM_MAC_INIT_DONE)
+			return -1;
+		if (common->iface_down)
+			return -1;
+		if (!common->beacon_enabled)
+			return -1;
+		rsi_send_beacon(common);
+		break;
+	case RX_DOT11_MGMT:
+		return rsi_mgmt_pkt_to_core(common, msg, msg_len);
+	default:
+		rsi_dbg(INFO_ZONE, "Received packet type: 0x%x\n", msg_type);
 	}
 	return 0;
 }
diff --git a/drivers/net/wireless/rsi/rsi_91x_ps.c b/drivers/net/wireless/rsi/rsi_91x_ps.c
new file mode 100644
index 0000000..48c79f0
--- /dev/null
+++ b/drivers/net/wireless/rsi/rsi_91x_ps.c
@@ -0,0 +1,146 @@
+/**
+ * Copyright (c) 2014 Redpine Signals Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/if.h>
+#include <linux/version.h>
+#include "rsi_debugfs.h"
+#include "rsi_mgmt.h"
+#include "rsi_common.h"
+#include "rsi_ps.h"
+
+char *str_psstate(enum ps_state state)
+{
+	switch (state) {
+	case PS_NONE:
+		return "PS_NONE";
+	case PS_DISABLE_REQ_SENT:
+		return "PS_DISABLE_REQ_SENT";
+	case PS_ENABLE_REQ_SENT:
+		return "PS_ENABLE_REQ_SENT";
+	case PS_ENABLED:
+		return "PS_ENABLED";
+	default:
+		return "INVALID_STATE";
+	}
+	return "INVALID_STATE";
+}
+
+static inline void rsi_modify_ps_state(struct rsi_hw *adapter,
+				       enum ps_state nstate)
+{
+	rsi_dbg(INFO_ZONE, "PS state changed %s => %s\n",
+		str_psstate(adapter->ps_state),
+		str_psstate(nstate));
+
+	adapter->ps_state = nstate;
+}
+
+void rsi_default_ps_params(struct rsi_hw *adapter)
+{
+	struct rsi_ps_info *ps_info = &adapter->ps_info;
+
+	ps_info->enabled = true;
+	ps_info->sleep_type = RSI_SLEEP_TYPE_LP;
+	ps_info->tx_threshold = 0;
+	ps_info->rx_threshold = 0;
+	ps_info->tx_hysterisis = 0;
+	ps_info->rx_hysterisis = 0;
+	ps_info->monitor_interval = 0;
+	ps_info->listen_interval = RSI_DEF_LISTEN_INTERVAL;
+	ps_info->num_bcns_per_lis_int = 0;
+	ps_info->dtim_interval_duration = 0;
+	ps_info->num_dtims_per_sleep = 0;
+	ps_info->deep_sleep_wakeup_period = RSI_DEF_DS_WAKEUP_PERIOD;
+}
+
+void rsi_enable_ps(struct rsi_hw *adapter)
+{
+	if (adapter->ps_state != PS_NONE) {
+		rsi_dbg(ERR_ZONE,
+			"%s: Cannot accept enable PS in %s state\n",
+			__func__, str_psstate(adapter->ps_state));
+		return;
+	}
+
+	if (rsi_send_ps_request(adapter, true)) {
+		rsi_dbg(ERR_ZONE,
+			"%s: Failed to send PS request to device\n",
+			__func__);
+		return;
+	}
+
+	rsi_modify_ps_state(adapter, PS_ENABLE_REQ_SENT);
+}
+
+void rsi_disable_ps(struct rsi_hw *adapter)
+{
+	if (adapter->ps_state != PS_ENABLED) {
+		rsi_dbg(ERR_ZONE,
+			"%s: Cannot accept disable PS in %s state\n",
+			__func__, str_psstate(adapter->ps_state));
+		return;
+	}
+
+	if (rsi_send_ps_request(adapter, false)) {
+		rsi_dbg(ERR_ZONE,
+			"%s: Failed to send PS request to device\n",
+			__func__);
+		return;
+	}
+
+	rsi_modify_ps_state(adapter, PS_DISABLE_REQ_SENT);
+}
+
+void rsi_conf_uapsd(struct rsi_hw *adapter)
+{
+	int ret;
+
+	if (adapter->ps_state != PS_ENABLED)
+		return;
+
+	ret = rsi_send_ps_request(adapter, false);
+	if (!ret)
+		ret = rsi_send_ps_request(adapter, true);
+	if (ret)
+		rsi_dbg(ERR_ZONE,
+			"%s: Failed to send PS request to device\n",
+			__func__);
+}
+
+int rsi_handle_ps_confirm(struct rsi_hw *adapter, u8 *msg)
+{
+	u16 cfm_type = get_unaligned_le16(msg + PS_CONFIRM_INDEX);
+
+	switch (cfm_type) {
+	case RSI_SLEEP_REQUEST:
+		if (adapter->ps_state == PS_ENABLE_REQ_SENT)
+			rsi_modify_ps_state(adapter, PS_ENABLED);
+		break;
+	case RSI_WAKEUP_REQUEST:
+		if (adapter->ps_state == PS_DISABLE_REQ_SENT)
+			rsi_modify_ps_state(adapter, PS_NONE);
+		break;
+	default:
+		rsi_dbg(ERR_ZONE,
+			"Invalid PS confirm type %x in state %s\n",
+			cfm_type, str_psstate(adapter->ps_state));
+		return -1;
+	}
+
+	return 0;
+}
+
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index e5ea99b..8d3a483 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -138,12 +138,15 @@ static int rsi_issue_sdiocommand(struct sdio_func *func,
 static void rsi_handle_interrupt(struct sdio_func *function)
 {
 	struct rsi_hw *adapter = sdio_get_drvdata(function);
+	struct rsi_91x_sdiodev *dev =
+		(struct rsi_91x_sdiodev *)adapter->rsi_dev;
 
 	if (adapter->priv->fsm_state == FSM_FW_NOT_LOADED)
 		return;
-	sdio_release_host(function);
+
+	dev->sdio_irq_task = current;
 	rsi_interrupt_handler(adapter);
-	sdio_claim_host(function);
+	dev->sdio_irq_task = NULL;
 }
 
 /**
@@ -219,26 +222,18 @@ static void rsi_reset_card(struct sdio_func *pfunction)
 	if (err)
 		rsi_dbg(ERR_ZONE, "%s: CMD0 failed : %d\n", __func__, err);
 
-	if (!host->ocr_avail) {
-		/* Issue CMD5, arg = 0 */
-		err = rsi_issue_sdiocommand(pfunction,
-					    SD_IO_SEND_OP_COND,
-					    0,
-					    (MMC_RSP_R4 | MMC_CMD_BCR),
-					    &resp);
-		if (err)
-			rsi_dbg(ERR_ZONE, "%s: CMD5 failed : %d\n",
-				__func__, err);
-		host->ocr_avail = resp;
-	}
+	/* Issue CMD5, arg = 0 */
+	err = rsi_issue_sdiocommand(pfunction,	SD_IO_SEND_OP_COND, 0,
+				    (MMC_RSP_R4 | MMC_CMD_BCR), &resp);
+	if (err)
+		rsi_dbg(ERR_ZONE, "%s: CMD5 failed : %d\n", __func__, err);
+	card->ocr = resp;
 
 	/* Issue CMD5, arg = ocr. Wait till card is ready  */
 	for (i = 0; i < 100; i++) {
-		err = rsi_issue_sdiocommand(pfunction,
-					    SD_IO_SEND_OP_COND,
-					    host->ocr_avail,
-					    (MMC_RSP_R4 | MMC_CMD_BCR),
-					    &resp);
+		err = rsi_issue_sdiocommand(pfunction, SD_IO_SEND_OP_COND,
+					    card->ocr,
+					    (MMC_RSP_R4 | MMC_CMD_BCR), &resp);
 		if (err) {
 			rsi_dbg(ERR_ZONE, "%s: CMD5 failed : %d\n",
 				__func__, err);
@@ -415,14 +410,16 @@ int rsi_sdio_read_register(struct rsi_hw *adapter,
 	u8 fun_num = 0;
 	int status;
 
-	sdio_claim_host(dev->pfunction);
+	if (likely(dev->sdio_irq_task != current))
+		sdio_claim_host(dev->pfunction);
 
 	if (fun_num == 0)
 		*data = sdio_f0_readb(dev->pfunction, addr, &status);
 	else
 		*data = sdio_readb(dev->pfunction, addr, &status);
 
-	sdio_release_host(dev->pfunction);
+	if (likely(dev->sdio_irq_task != current))
+		sdio_release_host(dev->pfunction);
 
 	return status;
 }
@@ -446,14 +443,16 @@ int rsi_sdio_write_register(struct rsi_hw *adapter,
 		(struct rsi_91x_sdiodev *)adapter->rsi_dev;
 	int status = 0;
 
-	sdio_claim_host(dev->pfunction);
+	if (likely(dev->sdio_irq_task != current))
+		sdio_claim_host(dev->pfunction);
 
 	if (function == 0)
 		sdio_f0_writeb(dev->pfunction, *data, addr, &status);
 	else
 		sdio_writeb(dev->pfunction, *data, addr, &status);
 
-	sdio_release_host(dev->pfunction);
+	if (likely(dev->sdio_irq_task != current))
+		sdio_release_host(dev->pfunction);
 
 	return status;
 }
@@ -498,11 +497,13 @@ static int rsi_sdio_read_register_multiple(struct rsi_hw *adapter,
 		(struct rsi_91x_sdiodev *)adapter->rsi_dev;
 	u32 status;
 
-	sdio_claim_host(dev->pfunction);
+	if (likely(dev->sdio_irq_task != current))
+		sdio_claim_host(dev->pfunction);
 
 	status =  sdio_readsb(dev->pfunction, data, addr, count);
 
-	sdio_release_host(dev->pfunction);
+	if (likely(dev->sdio_irq_task != current))
+		sdio_release_host(dev->pfunction);
 
 	if (status != 0)
 		rsi_dbg(ERR_ZONE, "%s: Synch Cmd53 read failed\n", __func__);
@@ -540,11 +541,13 @@ int rsi_sdio_write_register_multiple(struct rsi_hw *adapter,
 		dev->write_fail++;
 	}
 
-	sdio_claim_host(dev->pfunction);
+	if (likely(dev->sdio_irq_task != current))
+		sdio_claim_host(dev->pfunction);
 
 	status = sdio_writesb(dev->pfunction, addr, data, count);
 
-	sdio_release_host(dev->pfunction);
+	if (likely(dev->sdio_irq_task != current))
+		sdio_release_host(dev->pfunction);
 
 	if (status) {
 		rsi_dbg(ERR_ZONE, "%s: Synch Cmd53 write failed %d\n",
@@ -581,7 +584,6 @@ static int rsi_sdio_load_data_master_write(struct rsi_hw *adapter,
 	}
 
 	for (offset = 0, i = 0; i < num_blocks; i++, offset += block_size) {
-		memset(temp_buf, 0, block_size);
 		memcpy(temp_buf, ta_firmware + offset, block_size);
 		lsb_address = (u16)base_address;
 		status = rsi_sdio_write_register_multiple
@@ -857,7 +859,7 @@ static int rsi_init_sdio_interface(struct rsi_hw *adapter,
 	sdio_release_host(pfunction);
 
 	adapter->determine_event_timeout = rsi_sdio_determine_event_timeout;
-	adapter->check_hw_queue_status = rsi_sdio_read_buffer_status_register;
+	adapter->check_hw_queue_status = rsi_sdio_check_buffer_status;
 
 #ifdef CONFIG_RSI_DEBUGFS
 	adapter->num_debugfs_entries = MAX_DEBUGFS_ENTRIES;
@@ -941,6 +943,84 @@ fail:
 	return 1;
 }
 
+static void ulp_read_write(struct rsi_hw *adapter, u16 addr, u32 data,
+			   u16 len_in_bits)
+{
+	rsi_sdio_master_reg_write(adapter, RSI_GSPI_DATA_REG1,
+				  ((addr << 6) | ((data >> 16) & 0xffff)), 2);
+	rsi_sdio_master_reg_write(adapter, RSI_GSPI_DATA_REG0,
+				  (data & 0xffff), 2);
+	rsi_sdio_master_reg_write(adapter, RSI_GSPI_CTRL_REG0,
+				  RSI_GSPI_CTRL_REG0_VALUE, 2);
+	rsi_sdio_master_reg_write(adapter, RSI_GSPI_CTRL_REG1,
+				  ((len_in_bits - 1) | RSI_GSPI_TRIG), 2);
+	msleep(20);
+}
+
+/*This function resets and re-initializes the chip.*/
+static void rsi_reset_chip(struct rsi_hw *adapter)
+{
+	__le32 data;
+	u8 sdio_interrupt_status = 0;
+	u8 request = 1;
+	int ret;
+
+	rsi_dbg(INFO_ZONE, "Writing disable to wakeup register\n");
+	ret =  rsi_sdio_write_register(adapter, 0, SDIO_WAKEUP_REG, &request);
+	if (ret < 0) {
+		rsi_dbg(ERR_ZONE,
+			"%s: Failed to write SDIO wakeup register\n", __func__);
+		return;
+	}
+	msleep(20);
+	ret =  rsi_sdio_read_register(adapter, RSI_FN1_INT_REGISTER,
+				      &sdio_interrupt_status);
+	if (ret < 0) {
+		rsi_dbg(ERR_ZONE, "%s: Failed to Read Intr Status Register\n",
+			__func__);
+		return;
+	}
+	rsi_dbg(INFO_ZONE, "%s: Intr Status Register value = %d\n",
+		__func__, sdio_interrupt_status);
+
+	/* Put Thread-Arch processor on hold */
+	if (rsi_sdio_master_access_msword(adapter, TA_BASE_ADDR)) {
+		rsi_dbg(ERR_ZONE,
+			"%s: Unable to set ms word to common reg\n",
+			__func__);
+		return;
+	}
+
+	data = TA_HOLD_THREAD_VALUE;
+	if (rsi_sdio_write_register_multiple(adapter, TA_HOLD_THREAD_REG |
+					     RSI_SD_REQUEST_MASTER,
+					     (u8 *)&data, 4)) {
+		rsi_dbg(ERR_ZONE,
+			"%s: Unable to hold Thread-Arch processor threads\n",
+			__func__);
+		return;
+	}
+
+	/* This msleep will ensure Thread-Arch processor to go to hold
+	 * and any pending dma transfers to rf spi in device to finish.
+	 */
+	msleep(100);
+
+	ulp_read_write(adapter, RSI_ULP_RESET_REG, RSI_ULP_WRITE_0, 32);
+	ulp_read_write(adapter, RSI_WATCH_DOG_TIMER_1, RSI_ULP_WRITE_2, 32);
+	ulp_read_write(adapter, RSI_WATCH_DOG_TIMER_2, RSI_ULP_WRITE_0, 32);
+	ulp_read_write(adapter, RSI_WATCH_DOG_DELAY_TIMER_1, RSI_ULP_WRITE_50,
+		       32);
+	ulp_read_write(adapter, RSI_WATCH_DOG_DELAY_TIMER_2, RSI_ULP_WRITE_0,
+		       32);
+	ulp_read_write(adapter, RSI_WATCH_DOG_TIMER_ENABLE,
+		       RSI_ULP_TIMER_ENABLE, 32);
+	/* This msleep will be sufficient for the ulp
+	 * read write operations to complete for chip reset.
+	 */
+	msleep(500);
+}
+
 /**
  * rsi_disconnect() - This function performs the reverse of the probe function.
  * @pfunction: Pointer to the sdio_func structure.
@@ -956,17 +1036,26 @@ static void rsi_disconnect(struct sdio_func *pfunction)
 		return;
 
 	dev = (struct rsi_91x_sdiodev *)adapter->rsi_dev;
+	sdio_claim_host(pfunction);
+	sdio_release_irq(pfunction);
+	sdio_release_host(pfunction);
+	mdelay(10);
 
-	dev->write_fail = 2;
 	rsi_mac80211_detach(adapter);
+	mdelay(10);
+
+	/* Reset Chip */
+	rsi_reset_chip(adapter);
 
-	sdio_claim_host(pfunction);
-	sdio_release_irq(pfunction);
-	sdio_disable_func(pfunction);
-	rsi_91x_deinit(adapter);
 	/* Resetting to take care of the case, where-in driver is re-loaded */
+	sdio_claim_host(pfunction);
 	rsi_reset_card(pfunction);
+	sdio_disable_func(pfunction);
 	sdio_release_host(pfunction);
+	dev->write_fail = 2;
+	rsi_91x_deinit(adapter);
+	rsi_dbg(ERR_ZONE, "##### RSI SDIO device disconnected #####\n");
+
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
index df2a63b..8e2a95c 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c
@@ -69,20 +69,37 @@ int rsi_sdio_master_access_msword(struct rsi_hw *adapter, u16 ms_word)
 static int rsi_process_pkt(struct rsi_common *common)
 {
 	struct rsi_hw *adapter = common->priv;
+	struct rsi_91x_sdiodev *dev =
+		(struct rsi_91x_sdiodev *)adapter->rsi_dev;
 	u8 num_blks = 0;
 	u32 rcv_pkt_len = 0;
 	int status = 0;
+	u8 value = 0;
 
-	status = rsi_sdio_read_register(adapter,
-					SDIO_RX_NUM_BLOCKS_REG,
-					&num_blks);
+	num_blks = ((adapter->interrupt_status & 1) |
+			((adapter->interrupt_status >> RECV_NUM_BLOCKS) << 1));
 
-	if (status) {
-		rsi_dbg(ERR_ZONE,
-			"%s: Failed to read pkt length from the card:\n",
-			__func__);
-		return status;
+	if (!num_blks) {
+		status = rsi_sdio_read_register(adapter,
+						SDIO_RX_NUM_BLOCKS_REG,
+						&value);
+		if (status) {
+			rsi_dbg(ERR_ZONE,
+				"%s: Failed to read pkt length from the card:\n",
+				__func__);
+			return status;
+		}
+		num_blks = value & 0x1f;
+	}
+
+	if (dev->write_fail == 2)
+		rsi_sdio_ack_intr(common->priv, (1 << MSDU_PKT_PENDING));
+
+	if (unlikely(!num_blks)) {
+		dev->write_fail = 2;
+		return -1;
 	}
+
 	rcv_pkt_len = (num_blks * 256);
 
 	common->rx_data_pkt = kmalloc(rcv_pkt_len, GFP_KERNEL);
@@ -213,7 +230,7 @@ void rsi_interrupt_handler(struct rsi_hw *adapter)
 	dev->rx_info.sdio_int_counter++;
 
 	do {
-		mutex_lock(&common->tx_rxlock);
+		mutex_lock(&common->rx_lock);
 		status = rsi_sdio_read_register(common->priv,
 						RSI_FN1_INT_REGISTER,
 						&isr_status);
@@ -221,14 +238,15 @@ void rsi_interrupt_handler(struct rsi_hw *adapter)
 			rsi_dbg(ERR_ZONE,
 				"%s: Failed to Read Intr Status Register\n",
 				__func__);
-			mutex_unlock(&common->tx_rxlock);
+			mutex_unlock(&common->rx_lock);
 			return;
 		}
+		adapter->interrupt_status = isr_status;
 
 		if (isr_status == 0) {
 			rsi_set_event(&common->tx_thread.event);
 			dev->rx_info.sdio_intr_status_zero++;
-			mutex_unlock(&common->tx_rxlock);
+			mutex_unlock(&common->rx_lock);
 			return;
 		}
 
@@ -241,10 +259,12 @@ void rsi_interrupt_handler(struct rsi_hw *adapter)
 
 			switch (isr_type) {
 			case BUFFER_AVAILABLE:
-				dev->rx_info.watch_bufferfull_count = 0;
-				dev->rx_info.buffer_full = false;
-				dev->rx_info.semi_buffer_full = false;
-				dev->rx_info.mgmt_buffer_full = false;
+				status = rsi_sdio_check_buffer_status(adapter,
+								      0);
+				if (status < 0)
+					rsi_dbg(ERR_ZONE,
+						"%s: Failed to check buffer status\n",
+						__func__);
 				rsi_sdio_ack_intr(common->priv,
 						  (1 << PKT_BUFF_AVAILABLE));
 				rsi_set_event(&common->tx_thread.event);
@@ -252,7 +272,7 @@ void rsi_interrupt_handler(struct rsi_hw *adapter)
 				rsi_dbg(ISR_ZONE,
 					"%s: ==> BUFFER_AVAILABLE <==\n",
 					__func__);
-				dev->rx_info.buf_available_counter++;
+				dev->buff_status_updated = true;
 				break;
 
 			case FIRMWARE_ASSERT_IND:
@@ -286,7 +306,7 @@ void rsi_interrupt_handler(struct rsi_hw *adapter)
 					rsi_dbg(ERR_ZONE,
 						"%s: Failed to read pkt\n",
 						__func__);
-					mutex_unlock(&common->tx_rxlock);
+					mutex_unlock(&common->rx_lock);
 					return;
 				}
 				break;
@@ -301,28 +321,28 @@ void rsi_interrupt_handler(struct rsi_hw *adapter)
 			}
 			isr_status ^= BIT(isr_type - 1);
 		} while (isr_status);
-		mutex_unlock(&common->tx_rxlock);
+		mutex_unlock(&common->rx_lock);
 	} while (1);
 }
 
-/**
- * rsi_sdio_read_buffer_status_register() - This function is used to the read
- *					    buffer status register and set
- *					    relevant fields in
- *					    rsi_91x_sdiodev struct.
- * @adapter: Pointer to the driver hw structure.
- * @q_num: The Q number whose status is to be found.
- *
- * Return: status: -1 on failure or else queue full/stop is indicated.
+/* This function is used to read buffer status register and
+ * set relevant fields in rsi_91x_sdiodev struct.
  */
-int rsi_sdio_read_buffer_status_register(struct rsi_hw *adapter, u8 q_num)
+int rsi_sdio_check_buffer_status(struct rsi_hw *adapter, u8 q_num)
 {
 	struct rsi_common *common = adapter->priv;
 	struct rsi_91x_sdiodev *dev =
 		(struct rsi_91x_sdiodev *)adapter->rsi_dev;
 	u8 buf_status = 0;
 	int status = 0;
+	static int counter = 4;
+
+	if (!dev->buff_status_updated && counter) {
+		counter--;
+		goto out;
+	}
 
+	dev->buff_status_updated = false;
 	status = rsi_sdio_read_register(common->priv,
 					RSI_DEVICE_BUFFER_STATUS_REGISTER,
 					&buf_status);
@@ -357,10 +377,16 @@ int rsi_sdio_read_buffer_status_register(struct rsi_hw *adapter, u8 q_num)
 		dev->rx_info.semi_buffer_full = false;
 	}
 
+	if (dev->rx_info.mgmt_buffer_full || dev->rx_info.buf_full_counter)
+		counter = 1;
+	else
+		counter = 4;
+
+out:
 	if ((q_num == MGMT_SOFT_Q) && (dev->rx_info.mgmt_buffer_full))
 		return QUEUE_FULL;
 
-	if (dev->rx_info.buffer_full)
+	if ((q_num < MGMT_SOFT_Q) && (dev->rx_info.buffer_full))
 		return QUEUE_FULL;
 
 	return QUEUE_NOT_FULL;
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c
index bcd7f45..81df09d 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb.c
@@ -29,19 +29,24 @@
  * Return: status: 0 on success, a negative error code on failure.
  */
 static int rsi_usb_card_write(struct rsi_hw *adapter,
-			      void *buf,
+			      u8 *buf,
 			      u16 len,
 			      u8 endpoint)
 {
 	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
 	int status;
-	s32 transfer;
+	u8 *seg = dev->tx_buffer;
+	int transfer;
+	int ep = dev->bulkout_endpoint_addr[endpoint - 1];
 
+	memset(seg, 0, len + RSI_USB_TX_HEAD_ROOM);
+	memcpy(seg + RSI_USB_TX_HEAD_ROOM, buf, len);
+	len += RSI_USB_TX_HEAD_ROOM;
+	transfer = len;
 	status = usb_bulk_msg(dev->usbdev,
-			      usb_sndbulkpipe(dev->usbdev,
-			      dev->bulkout_endpoint_addr[endpoint - 1]),
-			      buf,
-			      len,
+			      usb_sndbulkpipe(dev->usbdev, ep),
+			      (void *)seg,
+			      (int)len,
 			      &transfer,
 			      HZ * 5);
 
@@ -68,23 +73,19 @@ static int rsi_write_multiple(struct rsi_hw *adapter,
 			      u8 *data,
 			      u32 count)
 {
-	struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
-	u8 *seg = dev->tx_buffer;
+	struct rsi_91x_usbdev *dev =
+		(struct rsi_91x_usbdev *)adapter->rsi_dev;
 
-	if (dev->write_fail)
-		return 0;
+	if (!adapter)
+		return -ENODEV;
 
-	if (endpoint == MGMT_EP) {
-		memset(seg, 0, RSI_USB_TX_HEAD_ROOM);
-		memcpy(seg + RSI_USB_TX_HEAD_ROOM, data, count);
-	} else {
-		seg = ((u8 *)data - RSI_USB_TX_HEAD_ROOM);
-	}
+	if (endpoint == 0)
+		return -EINVAL;
 
-	return rsi_usb_card_write(adapter,
-				  seg,
-				  count + RSI_USB_TX_HEAD_ROOM,
-				  endpoint);
+	if (dev->write_fail)
+		return -ENETDOWN;
+
+	return rsi_usb_card_write(adapter, data, count, endpoint);
 }
 
 /**
@@ -161,10 +162,13 @@ static int rsi_usb_reg_read(struct usb_device *usbdev,
 	u8 *buf;
 	int status = -ENOMEM;
 
-	buf  = kmalloc(0x04, GFP_KERNEL);
+	buf  = kmalloc(RSI_USB_CTRL_BUF_SIZE, GFP_KERNEL);
 	if (!buf)
 		return status;
 
+	if (len > RSI_USB_CTRL_BUF_SIZE)
+		return -EINVAL;
+
 	status = usb_control_msg(usbdev,
 				 usb_rcvctrlpipe(usbdev, 0),
 				 USB_VENDOR_REGISTER_READ,
@@ -203,10 +207,13 @@ static int rsi_usb_reg_write(struct usb_device *usbdev,
 	u8 *usb_reg_buf;
 	int status = -ENOMEM;
 
-	usb_reg_buf  = kmalloc(0x04, GFP_KERNEL);
+	usb_reg_buf  = kmalloc(RSI_USB_CTRL_BUF_SIZE, GFP_KERNEL);
 	if (!usb_reg_buf)
 		return status;
 
+	if (len > RSI_USB_CTRL_BUF_SIZE)
+		return -EINVAL;
+
 	usb_reg_buf[0] = (value & 0x00ff);
 	usb_reg_buf[1] = (value & 0xff00) >> 8;
 	usb_reg_buf[2] = 0x0;
@@ -380,10 +387,11 @@ static int rsi_usb_host_intf_write_pkt(struct rsi_hw *adapter,
 				       u8 *pkt,
 				       u32 len)
 {
-	u32 queueno = ((pkt[1] >> 4) & 0xf);
+	u32 queueno = ((pkt[1] >> 4) & 0x7);
 	u8 endpoint;
 
-	endpoint = ((queueno == RSI_WIFI_MGMT_Q) ? MGMT_EP : DATA_EP);
+	endpoint = ((queueno == RSI_WIFI_MGMT_Q || queueno == RSI_WIFI_DATA_Q ||
+		     queueno == RSI_COEX_Q) ? WLAN_EP : BT_EP);
 
 	return rsi_write_multiple(adapter,
 				  endpoint,
@@ -396,8 +404,15 @@ static int rsi_usb_master_reg_read(struct rsi_hw *adapter, u32 reg,
 {
 	struct usb_device *usbdev =
 		((struct rsi_91x_usbdev *)adapter->rsi_dev)->usbdev;
+	u16 temp;
+	int ret;
+
+	ret = rsi_usb_reg_read(usbdev, reg, &temp, len);
+	if (ret < 0)
+		return ret;
+	*value = temp;
 
-	return rsi_usb_reg_read(usbdev, reg, (u16 *)value, len);
+	return 0;
 }
 
 static int rsi_usb_master_reg_write(struct rsi_hw *adapter,
@@ -424,7 +439,6 @@ static int rsi_usb_load_data_master_write(struct rsi_hw *adapter,
 	rsi_dbg(INFO_ZONE, "num_blocks: %d\n", num_blocks);
 
 	for (cur_indx = 0, i = 0; i < num_blocks; i++, cur_indx += block_size) {
-		memset(temp_buf, 0, block_size);
 		memcpy(temp_buf, ta_firmware + cur_indx, block_size);
 		status = rsi_usb_write_register_multiple(adapter, base_address,
 							 (u8 *)(temp_buf),
@@ -558,6 +572,77 @@ fail_tx:
 	return status;
 }
 
+static int usb_ulp_read_write(struct rsi_hw *adapter, u16 addr, u32 data,
+			      u16 len_in_bits)
+{
+	int ret;
+
+	ret = rsi_usb_master_reg_write
+			(adapter, RSI_GSPI_DATA_REG1,
+			 ((addr << 6) | ((data >> 16) & 0xffff)), 2);
+	if (ret < 0)
+		return ret;
+
+	ret = rsi_usb_master_reg_write(adapter, RSI_GSPI_DATA_REG0,
+				       (data & 0xffff), 2);
+	if (ret < 0)
+		return ret;
+
+	/* Initializing GSPI for ULP read/writes */
+	rsi_usb_master_reg_write(adapter, RSI_GSPI_CTRL_REG0,
+				 RSI_GSPI_CTRL_REG0_VALUE, 2);
+
+	ret = rsi_usb_master_reg_write(adapter, RSI_GSPI_CTRL_REG1,
+				       ((len_in_bits - 1) | RSI_GSPI_TRIG), 2);
+	if (ret < 0)
+		return ret;
+
+	msleep(20);
+
+	return 0;
+}
+
+static int rsi_reset_card(struct rsi_hw *adapter)
+{
+	int ret;
+
+	rsi_dbg(INFO_ZONE, "Resetting Card...\n");
+	rsi_usb_master_reg_write(adapter, RSI_TA_HOLD_REG, 0xE, 4);
+
+	/* This msleep will ensure Thread-Arch processor to go to hold
+	 * and any pending dma transfers to rf in device to finish.
+	 */
+	msleep(100);
+
+	ret = usb_ulp_read_write(adapter, RSI_WATCH_DOG_TIMER_1,
+				 RSI_ULP_WRITE_2, 32);
+	if (ret < 0)
+		goto fail;
+	ret = usb_ulp_read_write(adapter, RSI_WATCH_DOG_TIMER_2,
+				 RSI_ULP_WRITE_0, 32);
+	if (ret < 0)
+		goto fail;
+	ret = usb_ulp_read_write(adapter, RSI_WATCH_DOG_DELAY_TIMER_1,
+				 RSI_ULP_WRITE_50, 32);
+	if (ret < 0)
+		goto fail;
+	ret = usb_ulp_read_write(adapter, RSI_WATCH_DOG_DELAY_TIMER_2,
+				 RSI_ULP_WRITE_0, 32);
+	if (ret < 0)
+		goto fail;
+	ret = usb_ulp_read_write(adapter, RSI_WATCH_DOG_TIMER_ENABLE,
+				 RSI_ULP_TIMER_ENABLE, 32);
+	if (ret < 0)
+		goto fail;
+
+	rsi_dbg(INFO_ZONE, "Reset card done\n");
+	return ret;
+
+fail:
+	rsi_dbg(ERR_ZONE, "Reset card failed\n");
+	return ret;
+}
+
 /**
  * rsi_probe() - This function is called by kernel when the driver provided
  *		 Vendor and device IDs are matched. All the initialization
@@ -641,6 +726,7 @@ static void rsi_disconnect(struct usb_interface *pfunction)
 		return;
 
 	rsi_mac80211_detach(adapter);
+	rsi_reset_card(adapter);
 	rsi_deinit_usb_interface(adapter);
 	rsi_91x_deinit(adapter);
 
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
index d3e0a07..465692b 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c
@@ -37,14 +37,14 @@ void rsi_usb_rx_thread(struct rsi_common *common)
 		if (atomic_read(&dev->rx_thread.thread_done))
 			goto out;
 
-		mutex_lock(&common->tx_rxlock);
+		mutex_lock(&common->rx_lock);
 		status = rsi_read_pkt(common, 0);
 		if (status) {
 			rsi_dbg(ERR_ZONE, "%s: Failed To read data", __func__);
-			mutex_unlock(&common->tx_rxlock);
+			mutex_unlock(&common->rx_lock);
 			return;
 		}
-		mutex_unlock(&common->tx_rxlock);
+		mutex_unlock(&common->rx_lock);
 		rsi_reset_event(&dev->rx_thread.event);
 		if (adapter->rx_urb_submit(adapter)) {
 			rsi_dbg(ERR_ZONE,
diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h
index 4434969..e579d69 100644
--- a/drivers/net/wireless/rsi/rsi_common.h
+++ b/drivers/net/wireless/rsi/rsi_common.h
@@ -83,4 +83,5 @@ u16 rsi_get_connected_channel(struct rsi_hw *adapter);
 struct rsi_hw *rsi_91x_init(void);
 void rsi_91x_deinit(struct rsi_hw *adapter);
 int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len);
+struct rsi_sta *rsi_find_sta(struct rsi_common *common, u8 *mac_addr);
 #endif
diff --git a/drivers/net/wireless/rsi/rsi_hal.h b/drivers/net/wireless/rsi/rsi_hal.h
index 902dc54..7c14505 100644
--- a/drivers/net/wireless/rsi/rsi_hal.h
+++ b/drivers/net/wireless/rsi/rsi_hal.h
@@ -52,6 +52,39 @@
 #define FW_LOADING_SUCCESSFUL		'S'
 #define LOADING_INITIATED		'1'
 
+#define RSI_ULP_RESET_REG		0x161
+#define RSI_WATCH_DOG_TIMER_1		0x16c
+#define RSI_WATCH_DOG_TIMER_2		0x16d
+#define RSI_WATCH_DOG_DELAY_TIMER_1		0x16e
+#define RSI_WATCH_DOG_DELAY_TIMER_2		0x16f
+#define RSI_WATCH_DOG_TIMER_ENABLE		0x170
+
+#define RSI_ULP_WRITE_0			00
+#define RSI_ULP_WRITE_2			02
+#define RSI_ULP_WRITE_50		50
+
+#define RSI_RESTART_WDT			BIT(11)
+#define RSI_BYPASS_ULP_ON_WDT		BIT(1)
+
+#define RSI_ULP_TIMER_ENABLE		((0xaa000) | RSI_RESTART_WDT |	\
+					 RSI_BYPASS_ULP_ON_WDT)
+#define RSI_RF_SPI_PROG_REG_BASE_ADDR	0x40080000
+
+#define RSI_GSPI_CTRL_REG0		(RSI_RF_SPI_PROG_REG_BASE_ADDR)
+#define RSI_GSPI_CTRL_REG1		(RSI_RF_SPI_PROG_REG_BASE_ADDR + 0x2)
+#define RSI_GSPI_DATA_REG0		(RSI_RF_SPI_PROG_REG_BASE_ADDR + 0x4)
+#define RSI_GSPI_DATA_REG1		(RSI_RF_SPI_PROG_REG_BASE_ADDR + 0x6)
+#define RSI_GSPI_DATA_REG2		(RSI_RF_SPI_PROG_REG_BASE_ADDR + 0x8)
+
+#define RSI_GSPI_CTRL_REG0_VALUE		0x340
+
+#define RSI_GSPI_DMA_MODE			BIT(13)
+
+#define RSI_GSPI_2_ULP			BIT(12)
+#define RSI_GSPI_TRIG			BIT(7)
+#define RSI_GSPI_READ			BIT(6)
+#define RSI_GSPI_RF_SPI_ACTIVE		BIT(8)
+
 /* Boot loader commands */
 #define SEND_RPS_FILE			'2'
 
@@ -66,6 +99,8 @@
 #define RSI_DEV_OPMODE_WIFI_ALONE	1
 #define RSI_DEV_COEX_MODE_WIFI_ALONE	1
 
+#define BBP_INFO_40MHZ 0x6
+
 struct bl_header {
 	__le32 flags;
 	__le32 image_no;
@@ -79,6 +114,37 @@ struct ta_metadata {
 	unsigned int address;
 };
 
+struct rsi_mgmt_desc {
+	__le16 len_qno;
+	u8 frame_type;
+	u8 misc_flags;
+	u8 xtend_desc_size;
+	u8 header_len;
+	__le16 frame_info;
+	u8 rate_info;
+	u8 reserved1;
+	__le16 bbp_info;
+	__le16 seq_ctrl;
+	u8 reserved2;
+	u8 sta_id;
+} __packed;
+
+struct rsi_data_desc {
+	__le16 len_qno;
+	u8 cfm_frame_type;
+	u8 misc_flags;
+	u8 xtend_desc_size;
+	u8 header_len;
+	__le16 frame_info;
+	__le16 rate_info;
+	__le16 bbp_info;
+	__le16 mac_flags;
+	u8 qid_tid;
+	u8 sta_id;
+} __packed;
+
 int rsi_hal_device_init(struct rsi_hw *adapter);
+int rsi_prepare_beacon(struct rsi_common *common, struct sk_buff *skb);
+int rsi_send_pkt_to_bus(struct rsi_common *common, struct sk_buff *skb);
 
 #endif
diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h
index f398525..2c18dde 100644
--- a/drivers/net/wireless/rsi/rsi_main.h
+++ b/drivers/net/wireless/rsi/rsi_main.h
@@ -21,6 +21,17 @@
 #include <linux/skbuff.h>
 #include <net/mac80211.h>
 
+struct rsi_sta {
+	struct ieee80211_sta *sta;
+	s16 sta_id;
+	u16 seq_start[IEEE80211_NUM_TIDS];
+	bool start_tx_aggr[IEEE80211_NUM_TIDS];
+};
+
+struct rsi_hw;
+
+#include "rsi_ps.h"
+
 #define ERR_ZONE                        BIT(0)  /* For Error Msgs             */
 #define INFO_ZONE                       BIT(1)  /* For General Status Msgs    */
 #define INIT_ZONE                       BIT(2)  /* For Driver Init Seq Msgs   */
@@ -37,10 +48,13 @@ enum RSI_FSM_STATES {
 	FSM_COMMON_DEV_PARAMS_SENT,
 	FSM_BOOT_PARAMS_SENT,
 	FSM_EEPROM_READ_MAC_ADDR,
+	FSM_EEPROM_READ_RF_TYPE,
 	FSM_RESET_MAC_SENT,
 	FSM_RADIO_CAPS_SENT,
 	FSM_BB_RF_PROG_SENT,
-	FSM_MAC_INIT_DONE
+	FSM_MAC_INIT_DONE,
+
+	NUM_FSM_STATES
 };
 
 extern u32 rsi_zone_enabled;
@@ -51,18 +65,24 @@ extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...);
 #define IEEE80211_ADDR_LEN              6
 #define FRAME_DESC_SZ                   16
 #define MIN_802_11_HDR_LEN              24
+#define RSI_DEF_KEEPALIVE               90
 
 #define DATA_QUEUE_WATER_MARK           400
 #define MIN_DATA_QUEUE_WATER_MARK       300
 #define MULTICAST_WATER_MARK            200
 #define MAC_80211_HDR_FRAME_CONTROL     0
 #define WME_NUM_AC                      4
-#define NUM_SOFT_QUEUES                 5
-#define MAX_HW_QUEUES                   8
+#define NUM_SOFT_QUEUES                 6
+#define MAX_HW_QUEUES                   12
 #define INVALID_QUEUE                   0xff
 #define MAX_CONTINUOUS_VO_PKTS          8
 #define MAX_CONTINUOUS_VI_PKTS          4
 
+/* Hardware queue info */
+#define BROADCAST_HW_Q			9
+#define MGMT_HW_Q			10
+#define BEACON_HW_Q			11
+
 /* Queue information */
 #define RSI_COEX_Q			0x0
 #define RSI_WIFI_MGMT_Q                 0x4
@@ -70,6 +90,7 @@ extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...);
 #define IEEE80211_MGMT_FRAME            0x00
 #define IEEE80211_CTL_FRAME             0x04
 
+#define RSI_MAX_ASSOC_STAS		32
 #define IEEE80211_QOS_TID               0x0f
 #define IEEE80211_NONQOS_TID            16
 
@@ -102,6 +123,7 @@ struct skb_info {
 	u16 channel;
 	s8 tid;
 	s8 sta_id;
+	u8 internal_hdr_size;
 };
 
 enum edca_queue {
@@ -109,7 +131,8 @@ enum edca_queue {
 	BE_Q,
 	VI_Q,
 	VO_Q,
-	MGMT_SOFT_Q
+	MGMT_SOFT_Q,
+	MGMT_BEACON_Q
 };
 
 struct security_info {
@@ -126,8 +149,8 @@ struct wmm_qinfo {
 };
 
 struct transmit_q_stats {
-	u32 total_tx_pkt_send[NUM_EDCA_QUEUES + 1];
-	u32 total_tx_pkt_freed[NUM_EDCA_QUEUES + 1];
+	u32 total_tx_pkt_send[NUM_EDCA_QUEUES + 2];
+	u32 total_tx_pkt_freed[NUM_EDCA_QUEUES + 2];
 };
 
 struct vif_priv {
@@ -155,7 +178,18 @@ struct cqm_info {
 	u32 rssi_hyst;
 };
 
-struct rsi_hw;
+struct xtended_desc {
+	u8 confirm_frame_type;
+	u8 retry_cnt;
+	u16 reserved;
+};
+
+enum rsi_dfs_regions {
+	RSI_REGION_FCC = 0,
+	RSI_REGION_ETSI,
+	RSI_REGION_TELEC,
+	RSI_REGION_WORLD
+};
 
 struct rsi_common {
 	struct rsi_hw *priv;
@@ -166,15 +200,18 @@ struct rsi_common {
 	struct version_info fw_ver;
 
 	struct rsi_thread tx_thread;
-	struct sk_buff_head tx_queue[NUM_EDCA_QUEUES + 1];
+	struct sk_buff_head tx_queue[NUM_EDCA_QUEUES + 2];
 	/* Mutex declaration */
 	struct mutex mutex;
-	/* Mutex used between tx/rx threads */
-	struct mutex tx_rxlock;
+	/* Mutex used for tx thread */
+	struct mutex tx_lock;
+	/* Mutex used for rx thread */
+	struct mutex rx_lock;
 	u8 endpoint;
 
 	/* Channel/band related */
 	u8 band;
+	u8 num_supp_bands;
 	u8 channel_width;
 
 	u16 rts_threshold;
@@ -216,11 +253,23 @@ struct rsi_common {
 	u16 oper_mode;
 	u8 lp_ps_handshake_mode;
 	u8 ulp_ps_handshake_mode;
+	u8 uapsd_bitmap;
 	u8 rf_power_val;
 	u8 wlan_rf_power_mode;
 	u8 obm_ant_sel_val;
 	int tx_power;
 	u8 ant_in_use;
+
+	u16 beacon_interval;
+	u8 dtim_cnt;
+
+	/* AP mode parameters */
+	u8 beacon_enabled;
+	u16 beacon_cnt;
+	struct rsi_sta stations[RSI_MAX_ASSOC_STAS + 1];
+	int num_stations;
+	int max_stations;
+	struct ieee80211_key_conf *key;
 };
 
 enum host_intf {
@@ -228,6 +277,19 @@ enum host_intf {
 	RSI_HOST_INTF_USB
 };
 
+struct eepromrw_info {
+	u32 offset;
+	u32 length;
+	u8  write;
+	u16 eeprom_erase;
+	u8 data[480];
+};
+
+struct eeprom_read {
+	u16 length;
+	u16 off_set;
+};
+
 struct rsi_hw {
 	struct rsi_common *priv;
 	u8 device_model;
@@ -241,6 +303,9 @@ struct rsi_hw {
 
 	enum host_intf rsi_host_intf;
 	u16 block_size;
+	enum ps_state ps_state;
+	struct rsi_ps_info ps_info;
+	spinlock_t ps_lock; /*To protect power save config*/
 	u32 usb_buffer_status_reg;
 #ifdef CONFIG_RSI_DEBUGFS
 	struct rsi_debugfs *dfsentry;
@@ -250,7 +315,10 @@ struct rsi_hw {
 	struct timer_list bl_cmd_timer;
 	bool blcmd_timer_expired;
 	u32 flash_capacity;
+	struct eepromrw_info eeprom;
+	u32 interrupt_status;
 	u8 dfs_region;
+	char country[2];
 	void *rsi_dev;
 	struct rsi_host_intf_ops *host_intf_ops;
 	int (*check_hw_queue_status)(struct rsi_hw *adapter, u8 q_num);
diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h
index dcb6db7..c6e1fa6 100644
--- a/drivers/net/wireless/rsi/rsi_mgmt.h
+++ b/drivers/net/wireless/rsi/rsi_mgmt.h
@@ -43,11 +43,13 @@
 #define WLAN_HOST_MODE_LEN              0x04
 #define WLAN_FW_VERSION_LEN             0x08
 #define MAGIC_WORD                      0x5A
+#define WLAN_EEPROM_RFTYPE_ADDR		424
 
 /* Receive Frame Types */
 #define TA_CONFIRM_TYPE                 0x01
 #define RX_DOT11_MGMT                   0x02
 #define TX_STATUS_IND                   0x04
+#define BEACON_EVENT_IND		0x08
 #define PROBEREQ_CONFIRM                2
 #define CARD_READY_IND                  0x00
 
@@ -61,8 +63,20 @@
 #define BBP_REG_WRITE                   0
 #define RF_RESET_ENABLE                 BIT(3)
 #define RATE_INFO_ENABLE                BIT(0)
+#define MORE_DATA_PRESENT		BIT(1)
 #define RSI_BROADCAST_PKT               BIT(9)
-
+#define RSI_DESC_REQUIRE_CFM_TO_HOST	BIT(2)
+#define RSI_ADD_DELTA_TSF_VAP_ID	BIT(3)
+#define RSI_FETCH_RETRY_CNT_FRM_HST	BIT(4)
+#define RSI_QOS_ENABLE			BIT(12)
+#define RSI_REKEY_PURPOSE		BIT(13)
+#define RSI_ENCRYPT_PKT			BIT(15)
+#define RSI_SET_PS_ENABLE		BIT(12)
+
+#define RSI_CMDDESC_40MHZ		BIT(4)
+#define RSI_CMDDESC_UPPER_20_ENABLE	BIT(5)
+#define RSI_CMDDESC_LOWER_20_ENABLE	BIT(6)
+#define RSI_CMDDESC_FULL_40_ENABLE	(BIT(5) | BIT(6))
 #define UPPER_20_ENABLE                 (0x2 << 12)
 #define LOWER_20_ENABLE                 (0x4 << 12)
 #define FULL40M_ENABLE                  0x6
@@ -120,6 +134,7 @@
 #define RSI_RATE_MCS6                   0x106
 #define RSI_RATE_MCS7                   0x107
 #define RSI_RATE_MCS7_SG                0x307
+#define RSI_RATE_AUTO			0xffff
 
 #define BW_20MHZ                        0
 #define BW_40MHZ                        1
@@ -143,6 +158,8 @@
 
 #define ANTENNA_SEL_INT			0x02 /* RF_OUT_2 / Integerated */
 #define ANTENNA_SEL_UFL			0x03 /* RF_OUT_1 / U.FL */
+#define ANTENNA_MASK_VALUE		0x00ff
+#define ANTENNA_SEL_TYPE		1
 
 /* Rx filter word definitions */
 #define PROMISCOUS_MODE			BIT(0)
@@ -153,9 +170,38 @@
 #define ALLOW_CONN_PEER_MGMT_WHILE_BUF_FULL BIT(5)
 #define DISALLOW_BROADCAST_DATA		BIT(6)
 
+#define RSI_MPDU_DENSITY		0x8
+#define RSI_CHAN_RADAR			BIT(7)
+#define RSI_BEACON_INTERVAL		200
+#define RSI_DTIM_COUNT			2
+
+#define RSI_PS_DISABLE_IND		BIT(15)
+#define RSI_PS_ENABLE			1
+#define RSI_PS_DISABLE			0
+#define RSI_DEEP_SLEEP			1
+#define RSI_CONNECTED_SLEEP		2
+#define RSI_SLEEP_REQUEST		1
+#define RSI_WAKEUP_REQUEST		2
+
+#define RSI_IEEE80211_UAPSD_QUEUES \
+	(IEEE80211_WMM_IE_STA_QOSINFO_AC_VO | \
+	 IEEE80211_WMM_IE_STA_QOSINFO_AC_VI | \
+	 IEEE80211_WMM_IE_STA_QOSINFO_AC_BE | \
+	 IEEE80211_WMM_IE_STA_QOSINFO_AC_BK)
+
+#define RSI_DATA_DESC_MAC_BBP_INFO	BIT(0)
+#define RSI_DATA_DESC_NO_ACK_IND	BIT(9)
+#define RSI_DATA_DESC_QOS_EN		BIT(12)
+#define RSI_DATA_DESC_NORMAL_FRAME	0x00
+#define RSI_DATA_DESC_DTIM_BEACON_GATED_FRAME	BIT(10)
+#define RSI_DATA_DESC_BEACON_FRAME	BIT(11)
+#define RSI_DATA_DESC_DTIM_BEACON	(BIT(10) | BIT(11))
+#define RSI_DATA_DESC_INSERT_TSF	BIT(15)
+#define RSI_DATA_DESC_INSERT_SEQ_NO	BIT(2)
+
 enum opmode {
-	STA_OPMODE = 1,
-	AP_OPMODE = 2
+	AP_OPMODE = 0,
+	STA_OPMODE,
 };
 
 enum vap_status {
@@ -164,6 +210,10 @@ enum vap_status {
 	VAP_UPDATE = 3
 };
 
+enum peer_type {
+	PEER_TYPE_AP,
+	PEER_TYPE_STA,
+};
 extern struct ieee80211_rate rsi_rates[12];
 extern const u16 rsi_mcsrates[8];
 
@@ -192,7 +242,7 @@ enum cmd_frame_type {
 	AUTO_RATE_IND,
 	BOOTUP_PARAMS_REQUEST,
 	VAP_CAPABILITIES,
-	EEPROM_READ_TYPE ,
+	EEPROM_READ,
 	EEPROM_WRITE,
 	GPIO_PIN_CONFIG ,
 	SET_RX_FILTER,
@@ -205,6 +255,7 @@ enum cmd_frame_type {
 	CW_MODE_REQ,
 	PER_CMD_PKT,
 	ANT_SEL_FRAME = 0x20,
+	VAP_DYNAMIC_UPDATE = 0x27,
 	COMMON_DEV_CONFIG = 0x28,
 	RADIO_PARAMS_UPDATE = 0x29
 };
@@ -213,13 +264,52 @@ struct rsi_mac_frame {
 	__le16 desc_word[8];
 } __packed;
 
+#define PWR_SAVE_WAKEUP_IND		BIT(0)
+#define TCP_CHECK_SUM_OFFLOAD		BIT(1)
+#define CONFIRM_REQUIRED_TO_HOST	BIT(2)
+#define ADD_DELTA_TSF			BIT(3)
+#define FETCH_RETRY_CNT_FROM_HOST_DESC	BIT(4)
+#define EOSP_INDICATION			BIT(5)
+#define REQUIRE_TSF_SYNC_CONFIRM	BIT(6)
+#define ENCAP_MGMT_PKT			BIT(7)
+#define DESC_IMMEDIATE_WAKEUP		BIT(15)
+
+struct rsi_cmd_desc_dword0 {
+	__le16 len_qno;
+	u8 frame_type;
+	u8 misc_flags;
+};
+
+struct rsi_cmd_desc_dword1 {
+	u8 xtend_desc_size;
+	u8 reserved1;
+	__le16 reserved2;
+};
+
+struct rsi_cmd_desc_dword2 {
+	__le32 pkt_info; /* Packet specific data */
+};
+
+struct rsi_cmd_desc_dword3 {
+	__le16 token;
+	u8 qid_tid;
+	u8 sta_id;
+};
+
+struct rsi_cmd_desc {
+	struct rsi_cmd_desc_dword0 desc_dword0;
+	struct rsi_cmd_desc_dword1 desc_dword1;
+	struct rsi_cmd_desc_dword2 desc_dword2;
+	struct rsi_cmd_desc_dword3 desc_dword3;
+};
+
 struct rsi_boot_params {
 	__le16 desc_word[8];
 	struct bootup_params bootup_params;
 } __packed;
 
 struct rsi_peer_notify {
-	__le16 desc_word[8];
+	struct rsi_cmd_desc desc;
 	u8 mac_addr[6];
 	__le16 command;
 	__le16 mpdu_density;
@@ -227,31 +317,116 @@ struct rsi_peer_notify {
 	__le32 sta_flags;
 } __packed;
 
+/* Aggregation params flags */
+#define RSI_AGGR_PARAMS_TID_MASK	0xf
+#define RSI_AGGR_PARAMS_START		BIT(4)
+#define RSI_AGGR_PARAMS_RX_AGGR		BIT(5)
+struct rsi_aggr_params {
+	struct rsi_cmd_desc_dword0 desc_dword0;
+	struct rsi_cmd_desc_dword0 desc_dword1;
+	__le16 seq_start;
+	__le16 baw_size;
+	__le16 token;
+	u8 aggr_params;
+	u8 peer_id;
+} __packed;
+
+struct rsi_bb_rf_prog {
+	struct rsi_cmd_desc_dword0 desc_dword0;
+	__le16 reserved1;
+	u8 rf_power_mode;
+	u8 reserved2;
+	u8 endpoint;
+	u8 reserved3;
+	__le16 reserved4;
+	__le16 reserved5;
+	__le16 flags;
+} __packed;
+
+struct rsi_chan_config {
+	struct rsi_cmd_desc_dword0 desc_dword0;
+	struct rsi_cmd_desc_dword1 desc_dword1;
+	u8 channel_number;
+	u8 antenna_gain_offset_2g;
+	u8 antenna_gain_offset_5g;
+	u8 channel_width;
+	__le16 tx_power;
+	u8 region_rftype;
+	u8 flags;
+} __packed;
+
 struct rsi_vap_caps {
-	__le16 desc_word[8];
+	struct rsi_cmd_desc_dword0 desc_dword0;
+	u8 reserved1;
+	u8 status;
+	__le16 reserved2;
+	u8 vif_type;
+	u8 channel_bw;
+	__le16 antenna_info;
+	u8 radioid_macid;
+	u8 vap_id;
+	__le16 reserved3;
 	u8 mac_addr[6];
 	__le16 keep_alive_period;
 	u8 bssid[6];
-	__le16 reserved;
+	__le16 reserved4;
 	__le32 flags;
 	__le16 frag_threshold;
 	__le16 rts_threshold;
 	__le32 default_mgmt_rate;
-	__le32 default_ctrl_rate;
+	__le16 default_ctrl_rate;
+	__le16 ctrl_rate_flags;
 	__le32 default_data_rate;
 	__le16 beacon_interval;
 	__le16 dtim_period;
+	__le16 beacon_miss_threshold;
 } __packed;
 
+struct rsi_ant_sel_frame {
+	struct rsi_cmd_desc_dword0 desc_dword0;
+	u8 reserved;
+	u8 sub_frame_type;
+	__le16 ant_value;
+	__le32 reserved1;
+	__le32 reserved2;
+} __packed;
+
+struct rsi_dynamic_s {
+	struct rsi_cmd_desc_dword0 desc_dword0;
+	struct rsi_cmd_desc_dword1 desc_dword1;
+	struct rsi_cmd_desc_dword2 desc_dword2;
+	struct rsi_cmd_desc_dword3 desc_dword3;
+	struct framebody {
+		__le16 data_rate;
+		__le16 mgmt_rate;
+		__le16 keep_alive_period;
+	} frame_body;
+} __packed;
+
+/* Key descriptor flags */
+#define RSI_KEY_TYPE_BROADCAST	BIT(1)
+#define RSI_WEP_KEY		BIT(2)
+#define RSI_WEP_KEY_104		BIT(3)
+#define RSI_CIPHER_WPA		BIT(4)
+#define RSI_CIPHER_TKIP		BIT(5)
+#define RSI_KEY_MODE_AP		BIT(7)
+#define RSI_PROTECT_DATA_FRAMES	BIT(13)
+#define RSI_KEY_ID_MASK		0xC0
+#define RSI_KEY_ID_OFFSET	14
 struct rsi_set_key {
-	__le16 desc_word[8];
+	struct rsi_cmd_desc_dword0 desc_dword0;
+	struct rsi_cmd_desc_dword1 desc_dword1;
+	__le16 key_desc;
+	__le32 bpn;
+	u8 sta_id;
+	u8 vap_id;
 	u8 key[4][32];
 	u8 tx_mic_key[8];
 	u8 rx_mic_key[8];
 } __packed;
 
 struct rsi_auto_rate {
-	__le16 desc_word[8];
+	struct rsi_cmd_desc desc;
 	__le16 failure_limit;
 	__le16 initial_boundary;
 	__le16 max_threshold_limt;
@@ -262,6 +437,19 @@ struct rsi_auto_rate {
 	__le16 supported_rates[40];
 } __packed;
 
+#define QUIET_INFO_VALID	BIT(0)
+#define QUIET_ENABLE		BIT(1)
+struct rsi_block_unblock_data {
+	struct rsi_cmd_desc_dword0 desc_dword0;
+	u8 xtend_desc_size;
+	u8 host_quiet_info;
+	__le16 reserved;
+	__le16 block_q_bitmap;
+	__le16 unblock_q_bitmap;
+	__le16 token;
+	__le16 flush_q_bitmap;
+} __packed;
+
 struct qos_params {
 	__le16 cont_win_min_q;
 	__le16 cont_win_max_q;
@@ -270,7 +458,14 @@ struct qos_params {
 } __packed;
 
 struct rsi_radio_caps {
-	__le16 desc_word[8];
+	struct rsi_cmd_desc_dword0 desc_dword0;
+	struct rsi_cmd_desc_dword0 desc_dword1;
+	u8 channel_num;
+	u8 rf_model;
+	__le16 ppe_ack_rate;
+	__le16 mode_11j;
+	u8 radio_cfg_info;
+	u8 radio_info;
 	struct qos_params qos_params[MAX_HW_QUEUES];
 	u8 num_11n_rates;
 	u8 num_11ac_rates;
@@ -353,6 +548,34 @@ struct rsi_config_vals {
 	u8 reserved2[16];
 } __packed;
 
+/* Packet info flags */
+#define RSI_EEPROM_HDR_SIZE_OFFSET		8
+#define RSI_EEPROM_HDR_SIZE_MASK		0x300
+#define RSI_EEPROM_LEN_OFFSET			20
+#define RSI_EEPROM_LEN_MASK			0xFFF00000
+
+struct rsi_eeprom_read_frame {
+	__le16 len_qno;
+	u8 pkt_type;
+	u8 misc_flags;
+	__le32 pkt_info;
+	__le32 eeprom_offset;
+	__le16 delay_ms;
+	__le16 reserved3;
+} __packed;
+
+struct rsi_request_ps {
+	struct rsi_cmd_desc desc;
+	struct ps_sleep_params ps_sleep;
+	u8 ps_mimic_support;
+	u8 ps_uapsd_acs;
+	u8 ps_uapsd_wakeup_period;
+	u8 reserved;
+	__le32 ps_listen_interval;
+	__le32 ps_dtim_interval_duration;
+	__le16 ps_num_dtim_intervals;
+} __packed;
+
 static inline u32 rsi_get_queueno(u8 *addr, u16 offset)
 {
 	return (le16_to_cpu(*(__le16 *)&addr[offset]) & 0x7000) >> 12;
@@ -385,16 +608,19 @@ static inline void rsi_set_len_qno(__le16 *addr, u16 len, u8 qno)
 
 int rsi_mgmt_pkt_recv(struct rsi_common *common, u8 *msg);
 int rsi_set_vap_capabilities(struct rsi_common *common, enum opmode mode,
-			     u8 vap_status);
+			     u8 *mac_addr, u8 vap_id, u8 vap_status);
 int rsi_send_aggregation_params_frame(struct rsi_common *common, u16 tid,
-				      u16 ssn, u8 buf_size, u8 event);
+				      u16 ssn, u8 buf_size, u8 event,
+				      u8 sta_id);
 int rsi_hal_load_key(struct rsi_common *common, u8 *data, u16 key_len,
-		     u8 key_type, u8 key_id, u32 cipher);
+		     u8 key_type, u8 key_id, u32 cipher, s16 sta_id);
 int rsi_set_channel(struct rsi_common *common,
 		    struct ieee80211_channel *channel);
+int rsi_send_vap_dynamic_update(struct rsi_common *common);
 int rsi_send_block_unblock_frame(struct rsi_common *common, bool event);
-void rsi_inform_bss_status(struct rsi_common *common, u8 status,
-			   const u8 *bssid, u8 qos_enable, u16 aid);
+void rsi_inform_bss_status(struct rsi_common *common, enum opmode opmode,
+			   u8 status, const u8 *addr, u8 qos_enable, u16 aid,
+			   struct ieee80211_sta *sta, u16 sta_id);
 void rsi_indicate_pkt_to_os(struct rsi_common *common, struct sk_buff *skb);
 int rsi_mac80211_attach(struct rsi_common *common);
 void rsi_indicate_tx_status(struct rsi_hw *common, struct sk_buff *skb,
diff --git a/drivers/net/wireless/rsi/rsi_ps.h b/drivers/net/wireless/rsi/rsi_ps.h
new file mode 100644
index 0000000..d847587
--- /dev/null
+++ b/drivers/net/wireless/rsi/rsi_ps.h
@@ -0,0 +1,64 @@
+/**
+ * Copyright (c) 2017 Redpine Signals Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __RSI_PS_H__
+#define __RSI_PS_H__
+
+#define PS_CONFIRM_INDEX	12
+#define RSI_DEF_DS_WAKEUP_PERIOD	200
+#define RSI_DEF_LISTEN_INTERVAL		200
+#define RSI_SLEEP_TYPE_LP		1
+
+enum ps_state {
+	PS_NONE = 0,
+	PS_ENABLE_REQ_SENT = 1,
+	PS_DISABLE_REQ_SENT = 2,
+	PS_ENABLED = 3
+};
+
+struct ps_sleep_params {
+	u8 enable;
+	u8 sleep_type;
+	u8 connected_sleep;
+	u8 reserved1;
+	__le16 num_bcns_per_lis_int;
+	__le16 wakeup_type;
+	__le32 sleep_duration;
+} __packed;
+
+struct rsi_ps_info {
+	u8 enabled;
+	u8 sleep_type;
+	u8 tx_threshold;
+	u8 rx_threshold;
+	u8 tx_hysterisis;
+	u8 rx_hysterisis;
+	u16 monitor_interval;
+	u32 listen_interval;
+	u16 num_bcns_per_lis_int;
+	u32 dtim_interval_duration;
+	u16 num_dtims_per_sleep;
+	u32 deep_sleep_wakeup_period;
+} __packed;
+
+char *str_psstate(enum ps_state state);
+void rsi_enable_ps(struct rsi_hw *adapter);
+void rsi_disable_ps(struct rsi_hw *adapter);
+int rsi_handle_ps_confirm(struct rsi_hw *adapter, u8 *msg);
+void rsi_default_ps_params(struct rsi_hw *hw);
+int rsi_send_ps_request(struct rsi_hw *adapter, bool enable);
+void rsi_conf_uapsd(struct rsi_hw *adapter);
+#endif
diff --git a/drivers/net/wireless/rsi/rsi_sdio.h b/drivers/net/wireless/rsi/rsi_sdio.h
index 9fb73f6..95e4bed 100644
--- a/drivers/net/wireless/rsi/rsi_sdio.h
+++ b/drivers/net/wireless/rsi/rsi_sdio.h
@@ -41,6 +41,7 @@ enum sdio_interrupt_type {
 #define PKT_BUFF_FULL                           1
 #define PKT_MGMT_BUFF_FULL                      2
 #define MSDU_PKT_PENDING                        3
+#define RECV_NUM_BLOCKS                         4
 /* Interrupt Bit Related Macros */
 #define PKT_BUFF_AVAILABLE                      1
 #define FW_ASSERT_IND                           2
@@ -58,6 +59,7 @@ enum sdio_interrupt_type {
 #define SDIO_READ_START_LVL                     0x000FC
 #define SDIO_READ_FIFO_CTL                      0x000FD
 #define SDIO_WRITE_FIFO_CTL                     0x000FE
+#define SDIO_WAKEUP_REG				0x000FF
 #define SDIO_FUN1_INTR_CLR_REG                  0x0008
 #define SDIO_REG_HIGH_SPEED                     0x0013
 
@@ -103,7 +105,7 @@ struct receive_info {
 
 struct rsi_91x_sdiodev {
 	struct sdio_func *pfunction;
-	struct task_struct *in_sdio_litefi_irq;
+	struct task_struct *sdio_irq_task;
 	struct receive_info rx_info;
 	u32 next_read_delay;
 	u32 sdio_high_speed_enable;
@@ -112,6 +114,7 @@ struct rsi_91x_sdiodev {
 	u8 prev_desc[16];
 	u16 tx_blk_size;
 	u8 write_fail;
+	bool buff_status_updated;
 };
 
 void rsi_interrupt_handler(struct rsi_hw *adapter);
@@ -125,5 +128,5 @@ int rsi_sdio_write_register_multiple(struct rsi_hw *adapter, u32 addr,
 int rsi_sdio_master_access_msword(struct rsi_hw *adapter, u16 ms_word);
 void rsi_sdio_ack_intr(struct rsi_hw *adapter, u8 int_bit);
 int rsi_sdio_determine_event_timeout(struct rsi_hw *adapter);
-int rsi_sdio_read_buffer_status_register(struct rsi_hw *adapter, u8 q_num);
+int rsi_sdio_check_buffer_status(struct rsi_hw *adapter, u8 q_num);
 #endif
diff --git a/drivers/net/wireless/rsi/rsi_usb.h b/drivers/net/wireless/rsi/rsi_usb.h
index 59513ac..891daea 100644
--- a/drivers/net/wireless/rsi/rsi_usb.h
+++ b/drivers/net/wireless/rsi/rsi_usb.h
@@ -25,6 +25,7 @@
 #define USB_INTERNAL_REG_1           0x25000
 #define RSI_USB_READY_MAGIC_NUM      0xab
 #define FW_STATUS_REG                0x41050012
+#define RSI_TA_HOLD_REG              0x22000844
 
 #define USB_VENDOR_REGISTER_READ     0x15
 #define USB_VENDOR_REGISTER_WRITE    0x16
@@ -32,10 +33,11 @@
 
 #define MAX_RX_URBS                  1
 #define MAX_BULK_EP                  8
-#define MGMT_EP                      1
-#define DATA_EP                      2
+#define WLAN_EP                      1
+#define BT_EP                        2
 
 #define RSI_USB_BUF_SIZE	     4096
+#define RSI_USB_CTRL_BUF_SIZE	     0x04
 
 struct rsi_91x_usbdev {
 	struct rsi_thread rx_thread;
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 60aaa85..c346c02 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -6016,6 +6016,8 @@ static int wl1271_register_hw(struct wl1271 *wl)
 {
 	int ret;
 	u32 oui_addr = 0, nic_addr = 0;
+	struct platform_device *pdev = wl->pdev;
+	struct wlcore_platdev_data *pdev_data = dev_get_platdata(&pdev->dev);
 
 	if (wl->mac80211_registered)
 		return 0;
@@ -6040,6 +6042,27 @@ static int wl1271_register_hw(struct wl1271 *wl)
 		nic_addr = wl->fuse_nic_addr + 1;
 	}
 
+	if (oui_addr == 0xdeadbe && nic_addr == 0xef0000) {
+		wl1271_warning("Detected unconfigured mac address in nvs, derive from fuse instead.\n");
+		if (!strcmp(pdev_data->family->name, "wl18xx")) {
+			wl1271_warning("This default nvs file can be removed from the file system\n");
+		} else {
+			wl1271_warning("Your device performance is not optimized.\n");
+			wl1271_warning("Please use the calibrator tool to configure your device.\n");
+		}
+
+		if (wl->fuse_oui_addr == 0 && wl->fuse_nic_addr == 0) {
+			wl1271_warning("Fuse mac address is zero. using random mac\n");
+			/* Use TI oui and a random nic */
+			oui_addr = WLCORE_TI_OUI_ADDRESS;
+			nic_addr = get_random_int();
+		} else {
+			oui_addr = wl->fuse_oui_addr;
+			/* fuse has the BD_ADDR, the WLAN addresses are the next two */
+			nic_addr = wl->fuse_nic_addr + 1;
+		}
+	}
+
 	wl12xx_derive_mac_addresses(wl, oui_addr, nic_addr);
 
 	ret = ieee80211_register_hw(wl->hw);
diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c
index 2fb3871..f8a1fea 100644
--- a/drivers/net/wireless/ti/wlcore/sdio.c
+++ b/drivers/net/wireless/ti/wlcore/sdio.c
@@ -230,6 +230,7 @@ static const struct wilink_family_data wl128x_data = {
 static const struct wilink_family_data wl18xx_data = {
 	.name = "wl18xx",
 	.cfg_name = "ti-connectivity/wl18xx-conf.bin",
+	.nvs_name = "ti-connectivity/wl1271-nvs.bin",
 };
 
 static const struct of_device_id wlcore_sdio_of_match_table[] = {
diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c
index fdabb92..62ce54a 100644
--- a/drivers/net/wireless/ti/wlcore/spi.c
+++ b/drivers/net/wireless/ti/wlcore/spi.c
@@ -92,6 +92,7 @@ static const struct wilink_family_data wl128x_data = {
 static const struct wilink_family_data wl18xx_data = {
 	.name = "wl18xx",
 	.cfg_name = "ti-connectivity/wl18xx-conf.bin",
+	.nvs_name = "ti-connectivity/wl1271-nvs.bin",
 };
 
 struct wl12xx_spi_glue {
diff --git a/drivers/net/wireless/ti/wlcore/sysfs.c b/drivers/net/wireless/ti/wlcore/sysfs.c
index a9218e5..b72e210 100644
--- a/drivers/net/wireless/ti/wlcore/sysfs.c
+++ b/drivers/net/wireless/ti/wlcore/sysfs.c
@@ -138,7 +138,7 @@ static ssize_t wl1271_sysfs_read_fwlog(struct file *filp, struct kobject *kobj,
 	return len;
 }
 
-static struct bin_attribute fwlog_attr = {
+static const struct bin_attribute fwlog_attr = {
 	.attr = {.name = "fwlog", .mode = S_IRUSR},
 	.read = wl1271_sysfs_read_fwlog,
 };
diff --git a/drivers/net/wireless/ti/wlcore/wlcore.h b/drivers/net/wireless/ti/wlcore/wlcore.h
index 1827546..95fbedc 100644
--- a/drivers/net/wireless/ti/wlcore/wlcore.h
+++ b/drivers/net/wireless/ti/wlcore/wlcore.h
@@ -40,6 +40,9 @@
 /* wl12xx/wl18xx maximum transmission power (in dBm) */
 #define WLCORE_MAX_TXPWR        25
 
+/* Texas Instruments pre assigned OUI */
+#define WLCORE_TI_OUI_ADDRESS 0x080028
+
 /* forward declaration */
 struct wl1271_tx_hw_descr;
 enum wl_rx_buf_align;
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index acec0d9..da62220 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -965,7 +965,7 @@ static inline void wl3501_md_ind_interrupt(struct net_device *dev,
 			    &addr4, sizeof(addr4));
 	if (!(addr4[0] == 0xAA && addr4[1] == 0xAA &&
 	      addr4[2] == 0x03 && addr4[4] == 0x00)) {
-		printk(KERN_INFO "Insupported packet type!\n");
+		printk(KERN_INFO "Unsupported packet type!\n");
 		return;
 	}
 	pkt_len = sig.size + 12 - 24 - 4 - 6;
diff --git a/drivers/net/wireless/zydas/zd1201.c b/drivers/net/wireless/zydas/zd1201.c
index 7f586d7..581e857 100644
--- a/drivers/net/wireless/zydas/zd1201.c
+++ b/drivers/net/wireless/zydas/zd1201.c
@@ -25,7 +25,7 @@
 #include <linux/firmware.h>
 #include "zd1201.h"
 
-static struct usb_device_id zd1201_table[] = {
+static const struct usb_device_id zd1201_table[] = {
 	{USB_DEVICE(0x0586, 0x3400)}, /* Peabird Wireless USB Adapter */
 	{USB_DEVICE(0x0ace, 0x1201)}, /* ZyDAS ZD1201 Wireless USB Adapter */
 	{USB_DEVICE(0x050d, 0x6051)}, /* Belkin F5D6051 usb  adapter */
diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_rf_rf2959.c b/drivers/net/wireless/zydas/zd1211rw/zd_rf_rf2959.c
index a93f657..d4e512f 100644
--- a/drivers/net/wireless/zydas/zd1211rw/zd_rf_rf2959.c
+++ b/drivers/net/wireless/zydas/zd1211rw/zd_rf_rf2959.c
@@ -61,7 +61,7 @@ static void dump_regwrite(u32 rw)
 
 	switch (reg) {
 	case 0:
-		PDEBUG("reg0 CFG1 ref_sel %d hybernate %d rf_vco_reg_en %d"
+		PDEBUG("reg0 CFG1 ref_sel %d hibernate %d rf_vco_reg_en %d"
 		       " if_vco_reg_en %d if_vga_en %d",
 		       bits(rw, 14, 15), bit(rw, 3), bit(rw, 2), bit(rw, 1),
 		       bit(rw, 0));
diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
index 01ca1d5..c30bf11 100644
--- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
@@ -35,7 +35,7 @@
 #include "zd_mac.h"
 #include "zd_usb.h"
 
-static struct usb_device_id usb_ids[] = {
+static const struct usb_device_id usb_ids[] = {
 	/* ZD1211 */
 	{ USB_DEVICE(0x0105, 0x145f), .driver_info = DEVICE_ZD1211 },
 	{ USB_DEVICE(0x0586, 0x3401), .driver_info = DEVICE_ZD1211 },
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index e322a86..ee8ed9da 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -551,8 +551,8 @@ int xenvif_init_queue(struct xenvif_queue *queue)
 	for (i = 0; i < MAX_PENDING_REQS; i++) {
 		queue->pending_tx_info[i].callback_struct = (struct ubuf_info)
 			{ .callback = xenvif_zerocopy_callback,
-			  .ctx = NULL,
-			  .desc = i };
+			  { { .ctx = NULL,
+			      .desc = i } } };
 		queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
 	}
 
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 7b61adb..523387e 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -611,7 +611,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		nskb = skb_copy(skb, GFP_ATOMIC);
 		if (!nskb)
 			goto drop;
-		dev_kfree_skb_any(skb);
+		dev_consume_skb_any(skb);
 		skb = nskb;
 		page = virt_to_page(skb->data);
 		offset = offset_in_page(skb->data);
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index e0dbd6e..b14a000 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -421,19 +421,14 @@ int of_phy_register_fixed_link(struct device_node *np)
 {
 	struct fixed_phy_status status = {};
 	struct device_node *fixed_link_node;
-	const __be32 *fixed_link_prop;
-	int link_gpio;
-	int len, err;
-	struct phy_device *phy;
+	u32 fixed_link_prop[5];
 	const char *managed;
+	int link_gpio = -1;
 
-	err = of_property_read_string(np, "managed", &managed);
-	if (err == 0) {
-		if (strcmp(managed, "in-band-status") == 0) {
-			/* status is zeroed, namely its .link member */
-			phy = fixed_phy_register(PHY_POLL, &status, -1, np);
-			return PTR_ERR_OR_ZERO(phy);
-		}
+	if (of_property_read_string(np, "managed", &managed) == 0 &&
+	    strcmp(managed, "in-band-status") == 0) {
+		/* status is zeroed, namely its .link member */
+		goto register_phy;
 	}
 
 	/* New binding */
@@ -456,23 +451,25 @@ int of_phy_register_fixed_link(struct device_node *np)
 		if (link_gpio == -EPROBE_DEFER)
 			return -EPROBE_DEFER;
 
-		phy = fixed_phy_register(PHY_POLL, &status, link_gpio, np);
-		return PTR_ERR_OR_ZERO(phy);
+		goto register_phy;
 	}
 
 	/* Old binding */
-	fixed_link_prop = of_get_property(np, "fixed-link", &len);
-	if (fixed_link_prop && len == (5 * sizeof(__be32))) {
+	if (of_property_read_u32_array(np, "fixed-link", fixed_link_prop,
+				       ARRAY_SIZE(fixed_link_prop)) == 0) {
 		status.link = 1;
-		status.duplex = be32_to_cpu(fixed_link_prop[1]);
-		status.speed = be32_to_cpu(fixed_link_prop[2]);
-		status.pause = be32_to_cpu(fixed_link_prop[3]);
-		status.asym_pause = be32_to_cpu(fixed_link_prop[4]);
-		phy = fixed_phy_register(PHY_POLL, &status, -1, np);
-		return PTR_ERR_OR_ZERO(phy);
+		status.duplex = fixed_link_prop[1];
+		status.speed  = fixed_link_prop[2];
+		status.pause  = fixed_link_prop[3];
+		status.asym_pause = fixed_link_prop[4];
+		goto register_phy;
 	}
 
 	return -ENODEV;
+
+register_phy:
+	return PTR_ERR_OR_ZERO(fixed_phy_register(PHY_POLL, &status, link_gpio,
+						  np));
 }
 EXPORT_SYMBOL(of_phy_register_fixed_link);
 
diff --git a/drivers/phy/marvell/Kconfig b/drivers/phy/marvell/Kconfig
index 048d889..68e3212 100644
--- a/drivers/phy/marvell/Kconfig
+++ b/drivers/phy/marvell/Kconfig
@@ -21,6 +21,17 @@ config PHY_BERLIN_USB
 	help
 	  Enable this to support the USB PHY on Marvell Berlin SoCs.
 
+config PHY_MVEBU_CP110_COMPHY
+	tristate "Marvell CP110 comphy driver"
+	depends on ARCH_MVEBU || COMPILE_TEST
+	depends on OF
+	select GENERIC_PHY
+	help
+	  This driver allows to control the comphy, an hardware block providing
+	  shared serdes PHYs on Marvell Armada 7k/8k (in the CP110). Its serdes
+	  lanes can be used by various controllers (Ethernet, sata, usb,
+	  PCIe...).
+
 config PHY_MVEBU_SATA
 	def_bool y
 	depends on ARCH_DOVE || MACH_DOVE || MACH_KIRKWOOD
diff --git a/drivers/phy/marvell/Makefile b/drivers/phy/marvell/Makefile
index 3fc188f..0cf6a7c 100644
--- a/drivers/phy/marvell/Makefile
+++ b/drivers/phy/marvell/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_ARMADA375_USBCLUSTER_PHY)	+= phy-armada375-usb2.o
 obj-$(CONFIG_PHY_BERLIN_SATA)		+= phy-berlin-sata.o
 obj-$(CONFIG_PHY_BERLIN_USB)		+= phy-berlin-usb.o
+obj-$(CONFIG_PHY_MVEBU_CP110_COMPHY)	+= phy-mvebu-cp110-comphy.o
 obj-$(CONFIG_PHY_MVEBU_SATA)		+= phy-mvebu-sata.o
 obj-$(CONFIG_PHY_PXA_28NM_HSIC)		+= phy-pxa-28nm-hsic.o
 obj-$(CONFIG_PHY_PXA_28NM_USB2)		+= phy-pxa-28nm-usb2.o
diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
new file mode 100644
index 0000000..73ebad6
--- /dev/null
+++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
@@ -0,0 +1,644 @@
+/*
+ * Copyright (C) 2017 Marvell
+ *
+ * Antoine Tenart <antoine.tenart@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+/* Relative to priv->base */
+#define MVEBU_COMPHY_SERDES_CFG0(n)		(0x0 + (n) * 0x1000)
+#define     MVEBU_COMPHY_SERDES_CFG0_PU_PLL	BIT(1)
+#define     MVEBU_COMPHY_SERDES_CFG0_GEN_RX(n)	((n) << 3)
+#define     MVEBU_COMPHY_SERDES_CFG0_GEN_TX(n)	((n) << 7)
+#define     MVEBU_COMPHY_SERDES_CFG0_PU_RX	BIT(11)
+#define     MVEBU_COMPHY_SERDES_CFG0_PU_TX	BIT(12)
+#define     MVEBU_COMPHY_SERDES_CFG0_HALF_BUS	BIT(14)
+#define MVEBU_COMPHY_SERDES_CFG1(n)		(0x4 + (n) * 0x1000)
+#define     MVEBU_COMPHY_SERDES_CFG1_RESET	BIT(3)
+#define     MVEBU_COMPHY_SERDES_CFG1_RX_INIT	BIT(4)
+#define     MVEBU_COMPHY_SERDES_CFG1_CORE_RESET	BIT(5)
+#define     MVEBU_COMPHY_SERDES_CFG1_RF_RESET	BIT(6)
+#define MVEBU_COMPHY_SERDES_CFG2(n)		(0x8 + (n) * 0x1000)
+#define     MVEBU_COMPHY_SERDES_CFG2_DFE_EN	BIT(4)
+#define MVEBU_COMPHY_SERDES_STATUS0(n)		(0x18 + (n) * 0x1000)
+#define     MVEBU_COMPHY_SERDES_STATUS0_TX_PLL_RDY	BIT(2)
+#define     MVEBU_COMPHY_SERDES_STATUS0_RX_PLL_RDY	BIT(3)
+#define     MVEBU_COMPHY_SERDES_STATUS0_RX_INIT		BIT(4)
+#define MVEBU_COMPHY_PWRPLL_CTRL(n)		(0x804 + (n) * 0x1000)
+#define     MVEBU_COMPHY_PWRPLL_CTRL_RFREQ(n)	((n) << 0)
+#define     MVEBU_COMPHY_PWRPLL_PHY_MODE(n)	((n) << 5)
+#define MVEBU_COMPHY_IMP_CAL(n)			(0x80c + (n) * 0x1000)
+#define     MVEBU_COMPHY_IMP_CAL_TX_EXT(n)	((n) << 10)
+#define     MVEBU_COMPHY_IMP_CAL_TX_EXT_EN	BIT(15)
+#define MVEBU_COMPHY_DFE_RES(n)			(0x81c + (n) * 0x1000)
+#define     MVEBU_COMPHY_DFE_RES_FORCE_GEN_TBL	BIT(15)
+#define MVEBU_COMPHY_COEF(n)			(0x828 + (n) * 0x1000)
+#define     MVEBU_COMPHY_COEF_DFE_EN		BIT(14)
+#define     MVEBU_COMPHY_COEF_DFE_CTRL		BIT(15)
+#define MVEBU_COMPHY_GEN1_S0(n)			(0x834 + (n) * 0x1000)
+#define     MVEBU_COMPHY_GEN1_S0_TX_AMP(n)	((n) << 1)
+#define     MVEBU_COMPHY_GEN1_S0_TX_EMPH(n)	((n) << 7)
+#define MVEBU_COMPHY_GEN1_S1(n)			(0x838 + (n) * 0x1000)
+#define     MVEBU_COMPHY_GEN1_S1_RX_MUL_PI(n)	((n) << 0)
+#define     MVEBU_COMPHY_GEN1_S1_RX_MUL_PF(n)	((n) << 3)
+#define     MVEBU_COMPHY_GEN1_S1_RX_MUL_FI(n)	((n) << 6)
+#define     MVEBU_COMPHY_GEN1_S1_RX_MUL_FF(n)	((n) << 8)
+#define     MVEBU_COMPHY_GEN1_S1_RX_DFE_EN	BIT(10)
+#define     MVEBU_COMPHY_GEN1_S1_RX_DIV(n)	((n) << 11)
+#define MVEBU_COMPHY_GEN1_S2(n)			(0x8f4 + (n) * 0x1000)
+#define     MVEBU_COMPHY_GEN1_S2_TX_EMPH(n)	((n) << 0)
+#define     MVEBU_COMPHY_GEN1_S2_TX_EMPH_EN	BIT(4)
+#define MVEBU_COMPHY_LOOPBACK(n)		(0x88c + (n) * 0x1000)
+#define     MVEBU_COMPHY_LOOPBACK_DBUS_WIDTH(n)	((n) << 1)
+#define MVEBU_COMPHY_VDD_CAL0(n)		(0x908 + (n) * 0x1000)
+#define     MVEBU_COMPHY_VDD_CAL0_CONT_MODE	BIT(15)
+#define MVEBU_COMPHY_EXT_SELV(n)		(0x914 + (n) * 0x1000)
+#define     MVEBU_COMPHY_EXT_SELV_RX_SAMPL(n)	((n) << 5)
+#define MVEBU_COMPHY_MISC_CTRL0(n)		(0x93c + (n) * 0x1000)
+#define     MVEBU_COMPHY_MISC_CTRL0_ICP_FORCE	BIT(5)
+#define     MVEBU_COMPHY_MISC_CTRL0_REFCLK_SEL	BIT(10)
+#define MVEBU_COMPHY_RX_CTRL1(n)		(0x940 + (n) * 0x1000)
+#define     MVEBU_COMPHY_RX_CTRL1_RXCLK2X_SEL	BIT(11)
+#define     MVEBU_COMPHY_RX_CTRL1_CLK8T_EN	BIT(12)
+#define MVEBU_COMPHY_SPEED_DIV(n)		(0x954 + (n) * 0x1000)
+#define     MVEBU_COMPHY_SPEED_DIV_TX_FORCE	BIT(7)
+#define MVEBU_SP_CALIB(n)			(0x96c + (n) * 0x1000)
+#define     MVEBU_SP_CALIB_SAMPLER(n)		((n) << 8)
+#define     MVEBU_SP_CALIB_SAMPLER_EN		BIT(12)
+#define MVEBU_COMPHY_TX_SLEW_RATE(n)		(0x974 + (n) * 0x1000)
+#define     MVEBU_COMPHY_TX_SLEW_RATE_EMPH(n)	((n) << 5)
+#define     MVEBU_COMPHY_TX_SLEW_RATE_SLC(n)	((n) << 10)
+#define MVEBU_COMPHY_DLT_CTRL(n)		(0x984 + (n) * 0x1000)
+#define     MVEBU_COMPHY_DLT_CTRL_DTL_FLOOP_EN	BIT(2)
+#define MVEBU_COMPHY_FRAME_DETECT0(n)		(0xa14 + (n) * 0x1000)
+#define     MVEBU_COMPHY_FRAME_DETECT0_PATN(n)	((n) << 7)
+#define MVEBU_COMPHY_FRAME_DETECT3(n)		(0xa20 + (n) * 0x1000)
+#define     MVEBU_COMPHY_FRAME_DETECT3_LOST_TIMEOUT_EN	BIT(12)
+#define MVEBU_COMPHY_DME(n)			(0xa28 + (n) * 0x1000)
+#define     MVEBU_COMPHY_DME_ETH_MODE		BIT(7)
+#define MVEBU_COMPHY_TRAINING0(n)		(0xa68 + (n) * 0x1000)
+#define     MVEBU_COMPHY_TRAINING0_P2P_HOLD	BIT(15)
+#define MVEBU_COMPHY_TRAINING5(n)		(0xaa4 + (n) * 0x1000)
+#define	    MVEBU_COMPHY_TRAINING5_RX_TIMER(n)	((n) << 0)
+#define MVEBU_COMPHY_TX_TRAIN_PRESET(n)		(0xb1c + (n) * 0x1000)
+#define     MVEBU_COMPHY_TX_TRAIN_PRESET_16B_AUTO_EN	BIT(8)
+#define     MVEBU_COMPHY_TX_TRAIN_PRESET_PRBS11		BIT(9)
+#define MVEBU_COMPHY_GEN1_S3(n)			(0xc40 + (n) * 0x1000)
+#define     MVEBU_COMPHY_GEN1_S3_FBCK_SEL	BIT(9)
+#define MVEBU_COMPHY_GEN1_S4(n)			(0xc44 + (n) * 0x1000)
+#define	    MVEBU_COMPHY_GEN1_S4_DFE_RES(n)	((n) << 8)
+#define MVEBU_COMPHY_TX_PRESET(n)		(0xc68 + (n) * 0x1000)
+#define     MVEBU_COMPHY_TX_PRESET_INDEX(n)	((n) << 0)
+#define MVEBU_COMPHY_GEN1_S5(n)			(0xd38 + (n) * 0x1000)
+#define     MVEBU_COMPHY_GEN1_S5_ICP(n)		((n) << 0)
+
+/* Relative to priv->regmap */
+#define MVEBU_COMPHY_CONF1(n)			(0x1000 + (n) * 0x28)
+#define     MVEBU_COMPHY_CONF1_PWRUP		BIT(1)
+#define     MVEBU_COMPHY_CONF1_USB_PCIE		BIT(2)	/* 0: Ethernet/SATA */
+#define MVEBU_COMPHY_CONF6(n)			(0x1014 + (n) * 0x28)
+#define     MVEBU_COMPHY_CONF6_40B		BIT(18)
+#define MVEBU_COMPHY_SELECTOR			0x1140
+#define     MVEBU_COMPHY_SELECTOR_PHY(n)	((n) * 0x4)
+
+#define MVEBU_COMPHY_LANES	6
+#define MVEBU_COMPHY_PORTS	3
+
+struct mvebu_comhy_conf {
+	enum phy_mode mode;
+	unsigned lane;
+	unsigned port;
+	u32 mux;
+};
+
+#define MVEBU_COMPHY_CONF(_lane, _port, _mode, _mux)	\
+	{						\
+		.lane = _lane,				\
+		.port = _port,				\
+		.mode = _mode,				\
+		.mux = _mux,				\
+	}
+
+static const struct mvebu_comhy_conf mvebu_comphy_cp110_modes[] = {
+	/* lane 0 */
+	MVEBU_COMPHY_CONF(0, 1, PHY_MODE_SGMII, 0x1),
+	/* lane 1 */
+	MVEBU_COMPHY_CONF(1, 2, PHY_MODE_SGMII, 0x1),
+	/* lane 2 */
+	MVEBU_COMPHY_CONF(2, 0, PHY_MODE_SGMII, 0x1),
+	MVEBU_COMPHY_CONF(2, 0, PHY_MODE_10GKR, 0x1),
+	/* lane 3 */
+	MVEBU_COMPHY_CONF(3, 1, PHY_MODE_SGMII, 0x2),
+	/* lane 4 */
+	MVEBU_COMPHY_CONF(4, 0, PHY_MODE_SGMII, 0x2),
+	MVEBU_COMPHY_CONF(4, 0, PHY_MODE_10GKR, 0x2),
+	MVEBU_COMPHY_CONF(4, 1, PHY_MODE_SGMII, 0x1),
+	/* lane 5 */
+	MVEBU_COMPHY_CONF(5, 2, PHY_MODE_SGMII, 0x1),
+};
+
+struct mvebu_comphy_priv {
+	void __iomem *base;
+	struct regmap *regmap;
+	struct device *dev;
+	int modes[MVEBU_COMPHY_LANES];
+};
+
+struct mvebu_comphy_lane {
+	struct mvebu_comphy_priv *priv;
+	unsigned id;
+	enum phy_mode mode;
+	int port;
+};
+
+static int mvebu_comphy_get_mux(int lane, int port, enum phy_mode mode)
+{
+	int i, n = ARRAY_SIZE(mvebu_comphy_cp110_modes);
+
+	/* Unused PHY mux value is 0x0 */
+	if (mode == PHY_MODE_INVALID)
+		return 0;
+
+	for (i = 0; i < n; i++) {
+		if (mvebu_comphy_cp110_modes[i].lane == lane &&
+		    mvebu_comphy_cp110_modes[i].port == port &&
+		    mvebu_comphy_cp110_modes[i].mode == mode)
+			break;
+	}
+
+	if (i == n)
+		return -EINVAL;
+
+	return mvebu_comphy_cp110_modes[i].mux;
+}
+
+static void mvebu_comphy_ethernet_init_reset(struct mvebu_comphy_lane *lane,
+					     enum phy_mode mode)
+{
+	struct mvebu_comphy_priv *priv = lane->priv;
+	u32 val;
+
+	regmap_read(priv->regmap, MVEBU_COMPHY_CONF1(lane->id), &val);
+	val &= ~MVEBU_COMPHY_CONF1_USB_PCIE;
+	val |= MVEBU_COMPHY_CONF1_PWRUP;
+	regmap_write(priv->regmap, MVEBU_COMPHY_CONF1(lane->id), val);
+
+	/* Select baud rates and PLLs */
+	val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG0(lane->id));
+	val &= ~(MVEBU_COMPHY_SERDES_CFG0_PU_PLL |
+		 MVEBU_COMPHY_SERDES_CFG0_PU_RX |
+		 MVEBU_COMPHY_SERDES_CFG0_PU_TX |
+		 MVEBU_COMPHY_SERDES_CFG0_HALF_BUS |
+		 MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0xf) |
+		 MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0xf));
+	if (mode == PHY_MODE_10GKR)
+		val |= MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0xe) |
+		       MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0xe);
+	else if (mode == PHY_MODE_SGMII)
+		val |= MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0x6) |
+		       MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0x6) |
+		       MVEBU_COMPHY_SERDES_CFG0_HALF_BUS;
+	writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG0(lane->id));
+
+	/* reset */
+	val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+	val &= ~(MVEBU_COMPHY_SERDES_CFG1_RESET |
+		 MVEBU_COMPHY_SERDES_CFG1_CORE_RESET |
+		 MVEBU_COMPHY_SERDES_CFG1_RF_RESET);
+	writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+
+	/* de-assert reset */
+	val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+	val |= MVEBU_COMPHY_SERDES_CFG1_RESET |
+	       MVEBU_COMPHY_SERDES_CFG1_CORE_RESET;
+	writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+
+	/* wait until clocks are ready */
+	mdelay(1);
+
+	/* exlicitly disable 40B, the bits isn't clear on reset */
+	regmap_read(priv->regmap, MVEBU_COMPHY_CONF6(lane->id), &val);
+	val &= ~MVEBU_COMPHY_CONF6_40B;
+	regmap_write(priv->regmap, MVEBU_COMPHY_CONF6(lane->id), val);
+
+	/* refclk selection */
+	val = readl(priv->base + MVEBU_COMPHY_MISC_CTRL0(lane->id));
+	val &= ~MVEBU_COMPHY_MISC_CTRL0_REFCLK_SEL;
+	if (mode == PHY_MODE_10GKR)
+		val |= MVEBU_COMPHY_MISC_CTRL0_ICP_FORCE;
+	writel(val, priv->base + MVEBU_COMPHY_MISC_CTRL0(lane->id));
+
+	/* power and pll selection */
+	val = readl(priv->base + MVEBU_COMPHY_PWRPLL_CTRL(lane->id));
+	val &= ~(MVEBU_COMPHY_PWRPLL_CTRL_RFREQ(0x1f) |
+		 MVEBU_COMPHY_PWRPLL_PHY_MODE(0x7));
+	val |= MVEBU_COMPHY_PWRPLL_CTRL_RFREQ(0x1) |
+	       MVEBU_COMPHY_PWRPLL_PHY_MODE(0x4);
+	writel(val, priv->base + MVEBU_COMPHY_PWRPLL_CTRL(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_LOOPBACK(lane->id));
+	val &= ~MVEBU_COMPHY_LOOPBACK_DBUS_WIDTH(0x7);
+	val |= MVEBU_COMPHY_LOOPBACK_DBUS_WIDTH(0x1);
+	writel(val, priv->base + MVEBU_COMPHY_LOOPBACK(lane->id));
+}
+
+static int mvebu_comphy_init_plls(struct mvebu_comphy_lane *lane,
+				  enum phy_mode mode)
+{
+	struct mvebu_comphy_priv *priv = lane->priv;
+	u32 val;
+
+	/* SERDES external config */
+	val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG0(lane->id));
+	val |= MVEBU_COMPHY_SERDES_CFG0_PU_PLL |
+	       MVEBU_COMPHY_SERDES_CFG0_PU_RX |
+	       MVEBU_COMPHY_SERDES_CFG0_PU_TX;
+	writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG0(lane->id));
+
+	/* check rx/tx pll */
+	readl_poll_timeout(priv->base + MVEBU_COMPHY_SERDES_STATUS0(lane->id),
+			   val,
+			   val & (MVEBU_COMPHY_SERDES_STATUS0_RX_PLL_RDY |
+				  MVEBU_COMPHY_SERDES_STATUS0_TX_PLL_RDY),
+			   1000, 150000);
+	if (!(val & (MVEBU_COMPHY_SERDES_STATUS0_RX_PLL_RDY |
+		     MVEBU_COMPHY_SERDES_STATUS0_TX_PLL_RDY)))
+		return -ETIMEDOUT;
+
+	/* rx init */
+	val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+	val |= MVEBU_COMPHY_SERDES_CFG1_RX_INIT;
+	writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+
+	/* check rx */
+	readl_poll_timeout(priv->base + MVEBU_COMPHY_SERDES_STATUS0(lane->id),
+			   val, val & MVEBU_COMPHY_SERDES_STATUS0_RX_INIT,
+			   1000, 10000);
+	if (!(val & MVEBU_COMPHY_SERDES_STATUS0_RX_INIT))
+		return -ETIMEDOUT;
+
+	val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+	val &= ~MVEBU_COMPHY_SERDES_CFG1_RX_INIT;
+	writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+
+	return 0;
+}
+
+static int mvebu_comphy_set_mode_sgmii(struct phy *phy)
+{
+	struct mvebu_comphy_lane *lane = phy_get_drvdata(phy);
+	struct mvebu_comphy_priv *priv = lane->priv;
+	u32 val;
+
+	mvebu_comphy_ethernet_init_reset(lane, PHY_MODE_SGMII);
+
+	val = readl(priv->base + MVEBU_COMPHY_RX_CTRL1(lane->id));
+	val &= ~MVEBU_COMPHY_RX_CTRL1_CLK8T_EN;
+	val |= MVEBU_COMPHY_RX_CTRL1_RXCLK2X_SEL;
+	writel(val, priv->base + MVEBU_COMPHY_RX_CTRL1(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_DLT_CTRL(lane->id));
+	val &= ~MVEBU_COMPHY_DLT_CTRL_DTL_FLOOP_EN;
+	writel(val, priv->base + MVEBU_COMPHY_DLT_CTRL(lane->id));
+
+	regmap_read(priv->regmap, MVEBU_COMPHY_CONF1(lane->id), &val);
+	val &= ~MVEBU_COMPHY_CONF1_USB_PCIE;
+	val |= MVEBU_COMPHY_CONF1_PWRUP;
+	regmap_write(priv->regmap, MVEBU_COMPHY_CONF1(lane->id), val);
+
+	val = readl(priv->base + MVEBU_COMPHY_GEN1_S0(lane->id));
+	val &= ~MVEBU_COMPHY_GEN1_S0_TX_EMPH(0xf);
+	val |= MVEBU_COMPHY_GEN1_S0_TX_EMPH(0x1);
+	writel(val, priv->base + MVEBU_COMPHY_GEN1_S0(lane->id));
+
+	return mvebu_comphy_init_plls(lane, PHY_MODE_SGMII);
+}
+
+static int mvebu_comphy_set_mode_10gkr(struct phy *phy)
+{
+	struct mvebu_comphy_lane *lane = phy_get_drvdata(phy);
+	struct mvebu_comphy_priv *priv = lane->priv;
+	u32 val;
+
+	mvebu_comphy_ethernet_init_reset(lane, PHY_MODE_10GKR);
+
+	val = readl(priv->base + MVEBU_COMPHY_RX_CTRL1(lane->id));
+	val |= MVEBU_COMPHY_RX_CTRL1_RXCLK2X_SEL |
+	       MVEBU_COMPHY_RX_CTRL1_CLK8T_EN;
+	writel(val, priv->base + MVEBU_COMPHY_RX_CTRL1(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_DLT_CTRL(lane->id));
+	val |= MVEBU_COMPHY_DLT_CTRL_DTL_FLOOP_EN;
+	writel(val, priv->base + MVEBU_COMPHY_DLT_CTRL(lane->id));
+
+	/* Speed divider */
+	val = readl(priv->base + MVEBU_COMPHY_SPEED_DIV(lane->id));
+	val |= MVEBU_COMPHY_SPEED_DIV_TX_FORCE;
+	writel(val, priv->base + MVEBU_COMPHY_SPEED_DIV(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG2(lane->id));
+	val |= MVEBU_COMPHY_SERDES_CFG2_DFE_EN;
+	writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG2(lane->id));
+
+	/* DFE resolution */
+	val = readl(priv->base + MVEBU_COMPHY_DFE_RES(lane->id));
+	val |= MVEBU_COMPHY_DFE_RES_FORCE_GEN_TBL;
+	writel(val, priv->base + MVEBU_COMPHY_DFE_RES(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_GEN1_S0(lane->id));
+	val &= ~(MVEBU_COMPHY_GEN1_S0_TX_AMP(0x1f) |
+		 MVEBU_COMPHY_GEN1_S0_TX_EMPH(0xf));
+	val |= MVEBU_COMPHY_GEN1_S0_TX_AMP(0x1c) |
+	       MVEBU_COMPHY_GEN1_S0_TX_EMPH(0xe);
+	writel(val, priv->base + MVEBU_COMPHY_GEN1_S0(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_GEN1_S2(lane->id));
+	val &= ~MVEBU_COMPHY_GEN1_S2_TX_EMPH(0xf);
+	val |= MVEBU_COMPHY_GEN1_S2_TX_EMPH_EN;
+	writel(val, priv->base + MVEBU_COMPHY_GEN1_S2(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_TX_SLEW_RATE(lane->id));
+	val |= MVEBU_COMPHY_TX_SLEW_RATE_EMPH(0x3) |
+	       MVEBU_COMPHY_TX_SLEW_RATE_SLC(0x3f);
+	writel(val, priv->base + MVEBU_COMPHY_TX_SLEW_RATE(lane->id));
+
+	/* Impedance calibration */
+	val = readl(priv->base + MVEBU_COMPHY_IMP_CAL(lane->id));
+	val &= ~MVEBU_COMPHY_IMP_CAL_TX_EXT(0x1f);
+	val |= MVEBU_COMPHY_IMP_CAL_TX_EXT(0xe) |
+	       MVEBU_COMPHY_IMP_CAL_TX_EXT_EN;
+	writel(val, priv->base + MVEBU_COMPHY_IMP_CAL(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_GEN1_S5(lane->id));
+	val &= ~MVEBU_COMPHY_GEN1_S5_ICP(0xf);
+	writel(val, priv->base + MVEBU_COMPHY_GEN1_S5(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_GEN1_S1(lane->id));
+	val &= ~(MVEBU_COMPHY_GEN1_S1_RX_MUL_PI(0x7) |
+		 MVEBU_COMPHY_GEN1_S1_RX_MUL_PF(0x7) |
+		 MVEBU_COMPHY_GEN1_S1_RX_MUL_FI(0x3) |
+		 MVEBU_COMPHY_GEN1_S1_RX_MUL_FF(0x3));
+	val |= MVEBU_COMPHY_GEN1_S1_RX_DFE_EN |
+	       MVEBU_COMPHY_GEN1_S1_RX_MUL_PI(0x2) |
+	       MVEBU_COMPHY_GEN1_S1_RX_MUL_PF(0x2) |
+	       MVEBU_COMPHY_GEN1_S1_RX_MUL_FF(0x1) |
+	       MVEBU_COMPHY_GEN1_S1_RX_DIV(0x3);
+	writel(val, priv->base + MVEBU_COMPHY_GEN1_S1(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_COEF(lane->id));
+	val &= ~(MVEBU_COMPHY_COEF_DFE_EN | MVEBU_COMPHY_COEF_DFE_CTRL);
+	writel(val, priv->base + MVEBU_COMPHY_COEF(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_GEN1_S4(lane->id));
+	val &= ~MVEBU_COMPHY_GEN1_S4_DFE_RES(0x3);
+	val |= MVEBU_COMPHY_GEN1_S4_DFE_RES(0x1);
+	writel(val, priv->base + MVEBU_COMPHY_GEN1_S4(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_GEN1_S3(lane->id));
+	val |= MVEBU_COMPHY_GEN1_S3_FBCK_SEL;
+	writel(val, priv->base + MVEBU_COMPHY_GEN1_S3(lane->id));
+
+	/* rx training timer */
+	val = readl(priv->base + MVEBU_COMPHY_TRAINING5(lane->id));
+	val &= ~MVEBU_COMPHY_TRAINING5_RX_TIMER(0x3ff);
+	val |= MVEBU_COMPHY_TRAINING5_RX_TIMER(0x13);
+	writel(val, priv->base + MVEBU_COMPHY_TRAINING5(lane->id));
+
+	/* tx train peak to peak hold */
+	val = readl(priv->base + MVEBU_COMPHY_TRAINING0(lane->id));
+	val |= MVEBU_COMPHY_TRAINING0_P2P_HOLD;
+	writel(val, priv->base + MVEBU_COMPHY_TRAINING0(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_TX_PRESET(lane->id));
+	val &= ~MVEBU_COMPHY_TX_PRESET_INDEX(0xf);
+	val |= MVEBU_COMPHY_TX_PRESET_INDEX(0x2);	/* preset coeff */
+	writel(val, priv->base + MVEBU_COMPHY_TX_PRESET(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_FRAME_DETECT3(lane->id));
+	val &= ~MVEBU_COMPHY_FRAME_DETECT3_LOST_TIMEOUT_EN;
+	writel(val, priv->base + MVEBU_COMPHY_FRAME_DETECT3(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_TX_TRAIN_PRESET(lane->id));
+	val |= MVEBU_COMPHY_TX_TRAIN_PRESET_16B_AUTO_EN |
+	       MVEBU_COMPHY_TX_TRAIN_PRESET_PRBS11;
+	writel(val, priv->base + MVEBU_COMPHY_TX_TRAIN_PRESET(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_FRAME_DETECT0(lane->id));
+	val &= ~MVEBU_COMPHY_FRAME_DETECT0_PATN(0x1ff);
+	val |= MVEBU_COMPHY_FRAME_DETECT0_PATN(0x88);
+	writel(val, priv->base + MVEBU_COMPHY_FRAME_DETECT0(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_DME(lane->id));
+	val |= MVEBU_COMPHY_DME_ETH_MODE;
+	writel(val, priv->base + MVEBU_COMPHY_DME(lane->id));
+
+	val = readl(priv->base + MVEBU_COMPHY_VDD_CAL0(lane->id));
+	val |= MVEBU_COMPHY_VDD_CAL0_CONT_MODE;
+	writel(val, priv->base + MVEBU_COMPHY_VDD_CAL0(lane->id));
+
+	val = readl(priv->base + MVEBU_SP_CALIB(lane->id));
+	val &= ~MVEBU_SP_CALIB_SAMPLER(0x3);
+	val |= MVEBU_SP_CALIB_SAMPLER(0x3) |
+	       MVEBU_SP_CALIB_SAMPLER_EN;
+	writel(val, priv->base + MVEBU_SP_CALIB(lane->id));
+	val &= ~MVEBU_SP_CALIB_SAMPLER_EN;
+	writel(val, priv->base + MVEBU_SP_CALIB(lane->id));
+
+	/* External rx regulator */
+	val = readl(priv->base + MVEBU_COMPHY_EXT_SELV(lane->id));
+	val &= ~MVEBU_COMPHY_EXT_SELV_RX_SAMPL(0x1f);
+	val |= MVEBU_COMPHY_EXT_SELV_RX_SAMPL(0x1a);
+	writel(val, priv->base + MVEBU_COMPHY_EXT_SELV(lane->id));
+
+	return mvebu_comphy_init_plls(lane, PHY_MODE_10GKR);
+}
+
+static int mvebu_comphy_power_on(struct phy *phy)
+{
+	struct mvebu_comphy_lane *lane = phy_get_drvdata(phy);
+	struct mvebu_comphy_priv *priv = lane->priv;
+	int ret;
+	u32 mux, val;
+
+	mux = mvebu_comphy_get_mux(lane->id, lane->port, lane->mode);
+	if (mux < 0)
+		return -ENOTSUPP;
+
+	regmap_read(priv->regmap, MVEBU_COMPHY_SELECTOR, &val);
+	val &= ~(0xf << MVEBU_COMPHY_SELECTOR_PHY(lane->id));
+	val |= mux << MVEBU_COMPHY_SELECTOR_PHY(lane->id);
+	regmap_write(priv->regmap, MVEBU_COMPHY_SELECTOR, val);
+
+	switch (lane->mode) {
+	case PHY_MODE_SGMII:
+		ret = mvebu_comphy_set_mode_sgmii(phy);
+		break;
+	case PHY_MODE_10GKR:
+		ret = mvebu_comphy_set_mode_10gkr(phy);
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
+	/* digital reset */
+	val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+	val |= MVEBU_COMPHY_SERDES_CFG1_RF_RESET;
+	writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+
+	return ret;
+}
+
+static int mvebu_comphy_set_mode(struct phy *phy, enum phy_mode mode)
+{
+	struct mvebu_comphy_lane *lane = phy_get_drvdata(phy);
+
+	if (mvebu_comphy_get_mux(lane->id, lane->port, mode) < 0)
+		return -EINVAL;
+
+	lane->mode = mode;
+	return 0;
+}
+
+static int mvebu_comphy_power_off(struct phy *phy)
+{
+	struct mvebu_comphy_lane *lane = phy_get_drvdata(phy);
+	struct mvebu_comphy_priv *priv = lane->priv;
+	u32 val;
+
+	val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+	val &= ~(MVEBU_COMPHY_SERDES_CFG1_RESET |
+		 MVEBU_COMPHY_SERDES_CFG1_CORE_RESET |
+		 MVEBU_COMPHY_SERDES_CFG1_RF_RESET);
+	writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id));
+
+	regmap_read(priv->regmap, MVEBU_COMPHY_SELECTOR, &val);
+	val &= ~(0xf << MVEBU_COMPHY_SELECTOR_PHY(lane->id));
+	regmap_write(priv->regmap, MVEBU_COMPHY_SELECTOR, val);
+
+	return 0;
+}
+
+static const struct phy_ops mvebu_comphy_ops = {
+	.power_on	= mvebu_comphy_power_on,
+	.power_off	= mvebu_comphy_power_off,
+	.set_mode	= mvebu_comphy_set_mode,
+	.owner		= THIS_MODULE,
+};
+
+static struct phy *mvebu_comphy_xlate(struct device *dev,
+				      struct of_phandle_args *args)
+{
+	struct mvebu_comphy_lane *lane;
+	struct phy *phy;
+
+	if (WARN_ON(args->args[0] >= MVEBU_COMPHY_PORTS))
+		return ERR_PTR(-EINVAL);
+
+	phy = of_phy_simple_xlate(dev, args);
+	if (IS_ERR(phy))
+		return phy;
+
+	lane = phy_get_drvdata(phy);
+	if (lane->port >= 0)
+		return ERR_PTR(-EBUSY);
+	lane->port = args->args[0];
+
+	return phy;
+}
+
+static int mvebu_comphy_probe(struct platform_device *pdev)
+{
+	struct mvebu_comphy_priv *priv;
+	struct phy_provider *provider;
+	struct device_node *child;
+	struct resource *res;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->dev = &pdev->dev;
+	priv->regmap =
+		syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+						"marvell,system-controller");
+	if (IS_ERR(priv->regmap))
+		return PTR_ERR(priv->regmap);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(&pdev->dev, res);
+	if (!priv->base)
+		return -ENOMEM;
+
+	for_each_available_child_of_node(pdev->dev.of_node, child) {
+		struct mvebu_comphy_lane *lane;
+		struct phy *phy;
+		int ret;
+		u32 val;
+
+		ret = of_property_read_u32(child, "reg", &val);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "missing 'reg' property (%d)\n",
+				ret);
+			continue;
+		}
+
+		if (val >= MVEBU_COMPHY_LANES) {
+			dev_err(&pdev->dev, "invalid 'reg' property\n");
+			continue;
+		}
+
+		lane = devm_kzalloc(&pdev->dev, sizeof(*lane), GFP_KERNEL);
+		if (!lane)
+			return -ENOMEM;
+
+		phy = devm_phy_create(&pdev->dev, child, &mvebu_comphy_ops);
+		if (IS_ERR(phy))
+			return PTR_ERR(phy);
+
+		lane->priv = priv;
+		lane->mode = PHY_MODE_INVALID;
+		lane->id = val;
+		lane->port = -1;
+		phy_set_drvdata(phy, lane);
+
+		/*
+		 * Once all modes are supported in this driver we should call
+		 * mvebu_comphy_power_off(phy) here to avoid relying on the
+		 * bootloader/firmware configuration.
+		 */
+	}
+
+	dev_set_drvdata(&pdev->dev, priv);
+	provider = devm_of_phy_provider_register(&pdev->dev,
+						 mvebu_comphy_xlate);
+	return PTR_ERR_OR_ZERO(provider);
+}
+
+static const struct of_device_id mvebu_comphy_of_match_table[] = {
+	{ .compatible = "marvell,comphy-cp110" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, mvebu_comphy_of_match_table);
+
+static struct platform_driver mvebu_comphy_driver = {
+	.probe	= mvebu_comphy_probe,
+	.driver	= {
+		.name = "mvebu-comphy",
+		.of_match_table = mvebu_comphy_of_match_table,
+	},
+};
+module_platform_driver(mvebu_comphy_driver);
+
+MODULE_AUTHOR("Antoine Tenart <antoine.tenart@free-electrons.com>");
+MODULE_DESCRIPTION("Common PHY driver for mvebu SoCs");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/ptp/ptp_dte.c b/drivers/ptp/ptp_dte.c
index faf6f7a..6edd3b9 100644
--- a/drivers/ptp/ptp_dte.c
+++ b/drivers/ptp/ptp_dte.c
@@ -221,7 +221,7 @@ static int ptp_dte_enable(struct ptp_clock_info *ptp,
 	return -EOPNOTSUPP;
 }
 
-static struct ptp_clock_info ptp_dte_caps = {
+static const struct ptp_clock_info ptp_dte_caps = {
 	.owner		= THIS_MODULE,
 	.name		= "DTE PTP timer",
 	.max_adj	= 50000000,
diff --git a/drivers/ptp/ptp_ixp46x.c b/drivers/ptp/ptp_ixp46x.c
index 344a3ba..1171ffd 100644
--- a/drivers/ptp/ptp_ixp46x.c
+++ b/drivers/ptp/ptp_ixp46x.c
@@ -236,7 +236,7 @@ static int ptp_ixp_enable(struct ptp_clock_info *ptp,
 	return -EOPNOTSUPP;
 }
 
-static struct ptp_clock_info ptp_ixp_caps = {
+static const struct ptp_clock_info ptp_ixp_caps = {
 	.owner		= THIS_MODULE,
 	.name		= "IXP46X timer",
 	.max_adj	= 66666655,
diff --git a/drivers/ptp/ptp_kvm.c b/drivers/ptp/ptp_kvm.c
index bb86569..2b1b212 100644
--- a/drivers/ptp/ptp_kvm.c
+++ b/drivers/ptp/ptp_kvm.c
@@ -150,7 +150,7 @@ static int ptp_kvm_enable(struct ptp_clock_info *ptp,
 	return -EOPNOTSUPP;
 }
 
-static struct ptp_clock_info ptp_kvm_caps = {
+static const struct ptp_clock_info ptp_kvm_caps = {
 	.owner		= THIS_MODULE,
 	.name		= "KVM virtual PTP",
 	.max_adj	= 0,
diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c
index 3aa22ae..b328517 100644
--- a/drivers/ptp/ptp_pch.c
+++ b/drivers/ptp/ptp_pch.c
@@ -509,7 +509,7 @@ static int ptp_pch_enable(struct ptp_clock_info *ptp,
 	return -EOPNOTSUPP;
 }
 
-static struct ptp_clock_info ptp_pch_caps = {
+static const struct ptp_clock_info ptp_pch_caps = {
 	.owner		= THIS_MODULE,
 	.name		= "PCH timer",
 	.max_adj	= 50000000,
diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
index 2ade613..26363e0 100644
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c
@@ -305,7 +305,7 @@ static long ctcm_check_irb_error(struct ccw_device *cdev, struct irb *irb)
  *  ch		The channel, the sense code belongs to.
  *  sense	The sense code to inspect.
  */
-static inline void ccw_unit_check(struct channel *ch, __u8 sense)
+static void ccw_unit_check(struct channel *ch, __u8 sense)
 {
 	CTCM_DBF_TEXT_(TRACE, CTC_DBF_DEBUG,
 			"%s(%s): %02x",
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index 619da81..d01b5c2 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c
@@ -327,8 +327,7 @@ lcs_set_allowed_threads(struct lcs_card *card, unsigned long threads)
 	spin_unlock_irqrestore(&card->mask_lock, flags);
 	wake_up(&card->wait_q);
 }
-static inline int
-lcs_threads_running(struct lcs_card *card, unsigned long threads)
+static int lcs_threads_running(struct lcs_card *card, unsigned long threads)
 {
         unsigned long flags;
         int rc = 0;
@@ -346,8 +345,7 @@ lcs_wait_for_threads(struct lcs_card *card, unsigned long threads)
                         lcs_threads_running(card, threads) == 0);
 }
 
-static inline int
-lcs_set_thread_start_bit(struct lcs_card *card, unsigned long thread)
+static int lcs_set_thread_start_bit(struct lcs_card *card, unsigned long thread)
 {
         unsigned long flags;
 
@@ -373,8 +371,7 @@ lcs_clear_thread_running_bit(struct lcs_card *card, unsigned long thread)
         wake_up(&card->wait_q);
 }
 
-static inline int
-__lcs_do_run_thread(struct lcs_card *card, unsigned long thread)
+static int __lcs_do_run_thread(struct lcs_card *card, unsigned long thread)
 {
         unsigned long flags;
         int rc = 0;
@@ -444,8 +441,7 @@ lcs_setup_card(struct lcs_card *card)
 	INIT_LIST_HEAD(&card->lancmd_waiters);
 }
 
-static inline void
-lcs_clear_multicast_list(struct lcs_card *card)
+static void lcs_clear_multicast_list(struct lcs_card *card)
 {
 #ifdef	CONFIG_IP_MULTICAST
 	struct lcs_ipm_list *ipm;
@@ -656,8 +652,7 @@ __lcs_resume_channel(struct lcs_channel *channel)
 /**
  * Make a buffer ready for processing.
  */
-static inline void
-__lcs_ready_buffer_bits(struct lcs_channel *channel, int index)
+static void __lcs_ready_buffer_bits(struct lcs_channel *channel, int index)
 {
 	int prev, next;
 
@@ -1169,8 +1164,8 @@ lcs_get_mac_for_ipm(__be32 ipm, char *mac, struct net_device *dev)
 /**
  * function called by net device to handle multicast address relevant things
  */
-static inline void
-lcs_remove_mc_addresses(struct lcs_card *card, struct in_device *in4_dev)
+static void lcs_remove_mc_addresses(struct lcs_card *card,
+				    struct in_device *in4_dev)
 {
 	struct ip_mc_list *im4;
 	struct list_head *l;
@@ -1196,8 +1191,9 @@ lcs_remove_mc_addresses(struct lcs_card *card, struct in_device *in4_dev)
 	spin_unlock_irqrestore(&card->ipm_lock, flags);
 }
 
-static inline struct lcs_ipm_list *
-lcs_check_addr_entry(struct lcs_card *card, struct ip_mc_list *im4, char *buf)
+static struct lcs_ipm_list *lcs_check_addr_entry(struct lcs_card *card,
+						 struct ip_mc_list *im4,
+						 char *buf)
 {
 	struct lcs_ipm_list *tmp, *ipm = NULL;
 	struct list_head *l;
@@ -1218,8 +1214,8 @@ lcs_check_addr_entry(struct lcs_card *card, struct ip_mc_list *im4, char *buf)
 	return ipm;
 }
 
-static inline void
-lcs_set_mc_addresses(struct lcs_card *card, struct in_device *in4_dev)
+static void lcs_set_mc_addresses(struct lcs_card *card,
+				 struct in_device *in4_dev)
 {
 
 	struct ip_mc_list *im4;
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 7e0e6a4..b9c7c1e 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -249,14 +249,14 @@ struct ll_header {
  * Compatibility macros for busy handling
  * of network devices.
  */
-static inline void netiucv_clear_busy(struct net_device *dev)
+static void netiucv_clear_busy(struct net_device *dev)
 {
 	struct netiucv_priv *priv = netdev_priv(dev);
 	clear_bit(0, &priv->tbusy);
 	netif_wake_queue(dev);
 }
 
-static inline int netiucv_test_and_set_busy(struct net_device *dev)
+static int netiucv_test_and_set_busy(struct net_device *dev)
 {
 	struct netiucv_priv *priv = netdev_priv(dev);
 	netif_stop_queue(dev);
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 7a0ffc7..59e0985 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -857,11 +857,6 @@ static inline int qeth_get_ip_version(struct sk_buff *skb)
 	}
 }
 
-static inline int qeth_get_ip_protocol(struct sk_buff *skb)
-{
-	return ip_hdr(skb)->protocol;
-}
-
 static inline void qeth_put_buffer_pool_entry(struct qeth_card *card,
 		struct qeth_buffer_pool_entry *entry)
 {
@@ -951,10 +946,13 @@ int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int);
 int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb,
 			 int extra_elems, int data_offset);
 int qeth_get_elements_for_frags(struct sk_buff *);
-int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *,
-			struct sk_buff *, struct qeth_hdr *, int, int);
-int qeth_do_send_packet(struct qeth_card *, struct qeth_qdio_out_q *,
-		    struct sk_buff *, struct qeth_hdr *, int);
+int qeth_do_send_packet_fast(struct qeth_card *card,
+			     struct qeth_qdio_out_q *queue, struct sk_buff *skb,
+			     struct qeth_hdr *hdr, unsigned int offset,
+			     unsigned int hd_len);
+int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
+			struct sk_buff *skb, struct qeth_hdr *hdr,
+			unsigned int hd_len, unsigned int offset, int elements);
 int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 int qeth_core_get_sset_count(struct net_device *, int);
 void qeth_core_get_ethtool_stats(struct net_device *,
@@ -987,6 +985,7 @@ int qeth_set_features(struct net_device *, netdev_features_t);
 int qeth_recover_features(struct net_device *);
 netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
 int qeth_vm_request_mac(struct qeth_card *card);
+int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len);
 
 /* exports for OSN */
 int qeth_osn_assist(struct net_device *, void *, int);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 4792cab..bae7440 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -101,7 +101,7 @@ void qeth_close_dev(struct qeth_card *card)
 }
 EXPORT_SYMBOL_GPL(qeth_close_dev);
 
-static inline const char *qeth_get_cardname(struct qeth_card *card)
+static const char *qeth_get_cardname(struct qeth_card *card)
 {
 	if (card->info.guestlan) {
 		switch (card->info.type) {
@@ -330,7 +330,7 @@ static struct qeth_qdio_q *qeth_alloc_qdio_queue(void)
 	return q;
 }
 
-static inline int qeth_cq_init(struct qeth_card *card)
+static int qeth_cq_init(struct qeth_card *card)
 {
 	int rc;
 
@@ -352,7 +352,7 @@ out:
 	return rc;
 }
 
-static inline int qeth_alloc_cq(struct qeth_card *card)
+static int qeth_alloc_cq(struct qeth_card *card)
 {
 	int rc;
 
@@ -397,7 +397,7 @@ kmsg_out:
 	goto out;
 }
 
-static inline void qeth_free_cq(struct qeth_card *card)
+static void qeth_free_cq(struct qeth_card *card)
 {
 	if (card->qdio.c_q) {
 		--card->qdio.no_in_queues;
@@ -408,8 +408,9 @@ static inline void qeth_free_cq(struct qeth_card *card)
 	card->qdio.out_bufstates = NULL;
 }
 
-static inline enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15,
-	int delayed) {
+static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15,
+							int delayed)
+{
 	enum iucv_tx_notify n;
 
 	switch (sbalf15) {
@@ -432,8 +433,8 @@ static inline enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15,
 	return n;
 }
 
-static inline void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q,
-	int bidx, int forced_cleanup)
+static void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q, int bidx,
+					 int forced_cleanup)
 {
 	if (q->card->options.cq != QETH_CQ_ENABLED)
 		return;
@@ -475,8 +476,9 @@ static inline void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q,
 }
 
 
-static inline void qeth_qdio_handle_aob(struct qeth_card *card,
-		unsigned long phys_aob_addr) {
+static void qeth_qdio_handle_aob(struct qeth_card *card,
+				 unsigned long phys_aob_addr)
+{
 	struct qaob *aob;
 	struct qeth_qdio_out_buffer *buffer;
 	enum iucv_tx_notify notification;
@@ -2228,7 +2230,7 @@ static int qeth_cm_setup(struct qeth_card *card)
 
 }
 
-static inline int qeth_get_initial_mtu_for_card(struct qeth_card *card)
+static int qeth_get_initial_mtu_for_card(struct qeth_card *card)
 {
 	switch (card->info.type) {
 	case QETH_CARD_TYPE_UNKNOWN:
@@ -2251,7 +2253,7 @@ static inline int qeth_get_initial_mtu_for_card(struct qeth_card *card)
 	}
 }
 
-static inline int qeth_get_mtu_outof_framesize(int framesize)
+static int qeth_get_mtu_outof_framesize(int framesize)
 {
 	switch (framesize) {
 	case 0x4000:
@@ -2267,7 +2269,7 @@ static inline int qeth_get_mtu_outof_framesize(int framesize)
 	}
 }
 
-static inline int qeth_mtu_is_valid(struct qeth_card *card, int mtu)
+static int qeth_mtu_is_valid(struct qeth_card *card, int mtu)
 {
 	switch (card->info.type) {
 	case QETH_CARD_TYPE_OSD:
@@ -2738,8 +2740,8 @@ static void qeth_initialize_working_pool_list(struct qeth_card *card)
 	}
 }
 
-static inline struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry(
-		struct qeth_card *card)
+static struct qeth_buffer_pool_entry *qeth_find_free_buffer_pool_entry(
+					struct qeth_card *card)
 {
 	struct list_head *plh;
 	struct qeth_buffer_pool_entry *entry;
@@ -2870,7 +2872,7 @@ int qeth_init_qdio_queues(struct qeth_card *card)
 }
 EXPORT_SYMBOL_GPL(qeth_init_qdio_queues);
 
-static inline __u8 qeth_get_ipa_adp_type(enum qeth_link_types link_type)
+static __u8 qeth_get_ipa_adp_type(enum qeth_link_types link_type)
 {
 	switch (link_type) {
 	case QETH_LINK_TYPE_HSTR:
@@ -3888,27 +3890,45 @@ int qeth_hdr_chk_and_bounce(struct sk_buff *skb, struct qeth_hdr **hdr, int len)
 }
 EXPORT_SYMBOL_GPL(qeth_hdr_chk_and_bounce);
 
-static inline void __qeth_fill_buffer(struct sk_buff *skb,
-	struct qdio_buffer *buffer, int is_tso, int *next_element_to_fill,
-	int offset)
+/**
+ * qeth_push_hdr() - push a qeth_hdr onto an skb.
+ * @skb: skb that the qeth_hdr should be pushed onto.
+ * @hdr: double pointer to a qeth_hdr. When returning with >= 0,
+ *	 it contains a valid pointer to a qeth_hdr.
+ * @len: length of the hdr that needs to be pushed on.
+ *
+ * Returns the pushed length. If the header can't be pushed on
+ * (eg. because it would cross a page boundary), it is allocated from
+ * the cache instead and 0 is returned.
+ * Error to create the hdr is indicated by returning with < 0.
+ */
+int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len)
 {
-	int length = skb_headlen(skb);
-	int length_here;
-	int element;
-	char *data;
-	int first_lap, cnt;
-	struct skb_frag_struct *frag;
-
-	element = *next_element_to_fill;
-	data = skb->data;
-	first_lap = (is_tso == 0 ? 1 : 0);
-
-	if (offset >= 0) {
-		data = skb->data + offset;
-		length -= offset;
-		first_lap = 0;
+	if (skb_headroom(skb) >= len &&
+	    qeth_get_elements_for_range((addr_t)skb->data - len,
+					(addr_t)skb->data) == 1) {
+		*hdr = skb_push(skb, len);
+		return len;
 	}
+	/* fall back */
+	*hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
+	if (!*hdr)
+		return -ENOMEM;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(qeth_push_hdr);
+
+static void __qeth_fill_buffer(struct sk_buff *skb,
+			       struct qeth_qdio_out_buffer *buf,
+			       bool is_first_elem, unsigned int offset)
+{
+	struct qdio_buffer *buffer = buf->buffer;
+	int element = buf->next_element_to_fill;
+	int length = skb_headlen(skb) - offset;
+	char *data = skb->data + offset;
+	int length_here, cnt;
 
+	/* map linear part into buffer element(s) */
 	while (length > 0) {
 		/* length_here is the remaining amount of data in this page */
 		length_here = PAGE_SIZE - ((unsigned long) data % PAGE_SIZE);
@@ -3918,34 +3938,28 @@ static inline void __qeth_fill_buffer(struct sk_buff *skb,
 		buffer->element[element].addr = data;
 		buffer->element[element].length = length_here;
 		length -= length_here;
-		if (!length) {
-			if (first_lap)
-				if (skb_shinfo(skb)->nr_frags)
-					buffer->element[element].eflags =
-						SBAL_EFLAGS_FIRST_FRAG;
-				else
-					buffer->element[element].eflags = 0;
-			else
+		if (is_first_elem) {
+			is_first_elem = false;
+			if (length || skb_is_nonlinear(skb))
+				/* skb needs additional elements */
 				buffer->element[element].eflags =
-				    SBAL_EFLAGS_MIDDLE_FRAG;
-		} else {
-			if (first_lap)
-				buffer->element[element].eflags =
-				    SBAL_EFLAGS_FIRST_FRAG;
+					SBAL_EFLAGS_FIRST_FRAG;
 			else
-				buffer->element[element].eflags =
-				    SBAL_EFLAGS_MIDDLE_FRAG;
+				buffer->element[element].eflags = 0;
+		} else {
+			buffer->element[element].eflags =
+				SBAL_EFLAGS_MIDDLE_FRAG;
 		}
 		data += length_here;
 		element++;
-		first_lap = 0;
 	}
 
+	/* map page frags into buffer element(s) */
 	for (cnt = 0; cnt < skb_shinfo(skb)->nr_frags; cnt++) {
-		frag = &skb_shinfo(skb)->frags[cnt];
-		data = (char *)page_to_phys(skb_frag_page(frag)) +
-			frag->page_offset;
-		length = frag->size;
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[cnt];
+
+		data = skb_frag_address(frag);
+		length = skb_frag_size(frag);
 		while (length > 0) {
 			length_here = PAGE_SIZE -
 				((unsigned long) data % PAGE_SIZE);
@@ -3964,48 +3978,45 @@ static inline void __qeth_fill_buffer(struct sk_buff *skb,
 
 	if (buffer->element[element - 1].eflags)
 		buffer->element[element - 1].eflags = SBAL_EFLAGS_LAST_FRAG;
-	*next_element_to_fill = element;
+	buf->next_element_to_fill = element;
 }
 
-static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
-		struct qeth_qdio_out_buffer *buf, struct sk_buff *skb,
-		struct qeth_hdr *hdr, int offset, int hd_len)
+/**
+ * qeth_fill_buffer() - map skb into an output buffer
+ * @queue:	QDIO queue to submit the buffer on
+ * @buf:	buffer to transport the skb
+ * @skb:	skb to map into the buffer
+ * @hdr:	qeth_hdr for this skb. Either at skb->data, or allocated
+ *		from qeth_core_header_cache.
+ * @offset:	when mapping the skb, start at skb->data + offset
+ * @hd_len:	if > 0, build a dedicated header element of this size
+ */
+static int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
+			    struct qeth_qdio_out_buffer *buf,
+			    struct sk_buff *skb, struct qeth_hdr *hdr,
+			    unsigned int offset, unsigned int hd_len)
 {
-	struct qdio_buffer *buffer;
-	int flush_cnt = 0, hdr_len, large_send = 0;
+	struct qdio_buffer *buffer = buf->buffer;
+	bool is_first_elem = true;
+	int flush_cnt = 0;
 
-	buffer = buf->buffer;
 	refcount_inc(&skb->users);
 	skb_queue_tail(&buf->skb_list, skb);
 
-	/*check first on TSO ....*/
-	if (hdr->hdr.l3.id == QETH_HEADER_TYPE_TSO) {
+	/* build dedicated header element */
+	if (hd_len) {
 		int element = buf->next_element_to_fill;
+		is_first_elem = false;
 
-		hdr_len = sizeof(struct qeth_hdr_tso) +
-			((struct qeth_hdr_tso *)hdr)->ext.dg_hdr_len;
-		/*fill first buffer entry only with header information */
-		buffer->element[element].addr = skb->data;
-		buffer->element[element].length = hdr_len;
-		buffer->element[element].eflags = SBAL_EFLAGS_FIRST_FRAG;
-		buf->next_element_to_fill++;
-		skb->data += hdr_len;
-		skb->len  -= hdr_len;
-		large_send = 1;
-	}
-
-	if (offset >= 0) {
-		int element = buf->next_element_to_fill;
 		buffer->element[element].addr = hdr;
-		buffer->element[element].length = sizeof(struct qeth_hdr) +
-							hd_len;
+		buffer->element[element].length = hd_len;
 		buffer->element[element].eflags = SBAL_EFLAGS_FIRST_FRAG;
-		buf->is_header[element] = 1;
+		/* remember to free cache-allocated qeth_hdr: */
+		buf->is_header[element] = ((void *)hdr != skb->data);
 		buf->next_element_to_fill++;
 	}
 
-	__qeth_fill_buffer(skb, buffer, large_send,
-		(int *)&buf->next_element_to_fill, offset);
+	__qeth_fill_buffer(skb, buf, is_first_elem, offset);
 
 	if (!queue->do_pack) {
 		QETH_CARD_TEXT(queue->card, 6, "fillbfnp");
@@ -4030,8 +4041,9 @@ static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
 }
 
 int qeth_do_send_packet_fast(struct qeth_card *card,
-		struct qeth_qdio_out_q *queue, struct sk_buff *skb,
-		struct qeth_hdr *hdr, int offset, int hd_len)
+			     struct qeth_qdio_out_q *queue, struct sk_buff *skb,
+			     struct qeth_hdr *hdr, unsigned int offset,
+			     unsigned int hd_len)
 {
 	struct qeth_qdio_out_buffer *buffer;
 	int index;
@@ -4061,8 +4073,9 @@ out:
 EXPORT_SYMBOL_GPL(qeth_do_send_packet_fast);
 
 int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
-		struct sk_buff *skb, struct qeth_hdr *hdr,
-		int elements_needed)
+			struct sk_buff *skb, struct qeth_hdr *hdr,
+			unsigned int offset, unsigned int hd_len,
+			int elements_needed)
 {
 	struct qeth_qdio_out_buffer *buffer;
 	int start_index;
@@ -4111,7 +4124,7 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
 			}
 		}
 	}
-	tmp = qeth_fill_buffer(queue, buffer, skb, hdr, -1, 0);
+	tmp = qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len);
 	queue->next_buf_to_fill = (queue->next_buf_to_fill + tmp) %
 				  QDIO_MAX_BUFFERS_PER_Q;
 	flush_count += tmp;
@@ -4834,7 +4847,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(qeth_vm_request_mac);
 
-static inline int qeth_get_qdio_q_format(struct qeth_card *card)
+static int qeth_get_qdio_q_format(struct qeth_card *card)
 {
 	if (card->info.type == QETH_CARD_TYPE_IQD)
 		return QDIO_IQDIO_QFMT;
@@ -4899,9 +4912,12 @@ out:
 	return;
 }
 
-static inline void qeth_qdio_establish_cq(struct qeth_card *card,
-	struct qdio_buffer **in_sbal_ptrs,
-	void (**queue_start_poll) (struct ccw_device *, int, unsigned long)) {
+static void qeth_qdio_establish_cq(struct qeth_card *card,
+				   struct qdio_buffer **in_sbal_ptrs,
+				   void (**queue_start_poll)
+					(struct ccw_device *, int,
+					 unsigned long))
+{
 	int i;
 
 	if (card->options.cq == QETH_CQ_ENABLED) {
@@ -5193,9 +5209,10 @@ out:
 }
 EXPORT_SYMBOL_GPL(qeth_core_hardsetup_card);
 
-static inline int qeth_create_skb_frag(struct qeth_qdio_buffer *qethbuffer,
-		struct qdio_buffer_element *element,
-		struct sk_buff **pskb, int offset, int *pfrag, int data_len)
+static int qeth_create_skb_frag(struct qeth_qdio_buffer *qethbuffer,
+				struct qdio_buffer_element *element,
+				struct sk_buff **pskb, int offset, int *pfrag,
+				int data_len)
 {
 	struct page *page = virt_to_page(element->addr);
 	if (*pskb == NULL) {
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index 6d255c2..d1ee9e3 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -78,7 +78,7 @@ static ssize_t qeth_dev_card_type_show(struct device *dev,
 
 static DEVICE_ATTR(card_type, 0444, qeth_dev_card_type_show, NULL);
 
-static inline const char *qeth_get_bufsize_str(struct qeth_card *card)
+static const char *qeth_get_bufsize_str(struct qeth_card *card)
 {
 	if (card->qdio.in_buf_size == 16384)
 		return "16k";
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index ad110ab..760b023 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -231,13 +231,7 @@ static void qeth_l2_del_all_macs(struct qeth_card *card)
 	spin_unlock_bh(&card->mclock);
 }
 
-static inline u32 qeth_l2_mac_hash(const u8 *addr)
-{
-	return get_unaligned((u32 *)(&addr[2]));
-}
-
-static inline int qeth_l2_get_cast_type(struct qeth_card *card,
-			struct sk_buff *skb)
+static int qeth_l2_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
 {
 	if (card->info.type == QETH_CARD_TYPE_OSN)
 		return RTN_UNSPEC;
@@ -248,8 +242,8 @@ static inline int qeth_l2_get_cast_type(struct qeth_card *card,
 	return RTN_UNSPEC;
 }
 
-static inline void qeth_l2_hdr_csum(struct qeth_card *card,
-				    struct qeth_hdr *hdr, struct sk_buff *skb)
+static void qeth_l2_hdr_csum(struct qeth_card *card, struct qeth_hdr *hdr,
+			     struct sk_buff *skb)
 {
 	struct iphdr *iph = ip_hdr(skb);
 
@@ -265,13 +259,14 @@ static inline void qeth_l2_hdr_csum(struct qeth_card *card,
 		card->perf_stats.tx_csum++;
 }
 
-static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
-			struct sk_buff *skb, int cast_type)
+static void qeth_l2_fill_header(struct qeth_hdr *hdr, struct sk_buff *skb,
+				int cast_type, unsigned int data_len)
 {
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb_mac_header(skb);
 
 	memset(hdr, 0, sizeof(struct qeth_hdr));
 	hdr->hdr.l2.id = QETH_HEADER_TYPE_LAYER2;
+	hdr->hdr.l2.pkt_length = data_len;
 
 	/* set byte byte 3 to casting flags */
 	if (cast_type == RTN_MULTICAST)
@@ -281,7 +276,6 @@ static void qeth_l2_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 	else
 		hdr->hdr.l2.flags[2] |= QETH_LAYER2_FLAG_UNICAST;
 
-	hdr->hdr.l2.pkt_length = skb->len - sizeof(struct qeth_hdr);
 	/* VSWITCH relies on the VLAN
 	 * information to be present in
 	 * the QDIO header */
@@ -519,15 +513,6 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card)
 		/* fall back to alternative mechanism: */
 	}
 
-	if (qeth_is_supported(card, IPA_SETADAPTERPARMS)) {
-		rc = qeth_query_setadapterparms(card);
-		if (rc) {
-			QETH_DBF_MESSAGE(2, "could not query adapter "
-				"parameters on device %s: x%x\n",
-				CARD_BUS_ID(card), rc);
-		}
-	}
-
 	if (card->info.type == QETH_CARD_TYPE_IQD ||
 	    card->info.type == QETH_CARD_TYPE_OSM ||
 	    card->info.type == QETH_CARD_TYPE_OSX ||
@@ -615,13 +600,13 @@ static void qeth_promisc_to_bridge(struct qeth_card *card)
  * only if there is not in the hash table storage already
  *
 */
-static	void
-qeth_l2_add_mac(struct qeth_card *card, struct netdev_hw_addr *ha, u8 is_uc)
+static void qeth_l2_add_mac(struct qeth_card *card, struct netdev_hw_addr *ha,
+			    u8 is_uc)
 {
+	u32 mac_hash = get_unaligned((u32 *)(&ha->addr[2]));
 	struct qeth_mac *mac;
 
-	hash_for_each_possible(card->mac_htable, mac, hnode,
-			qeth_l2_mac_hash(ha->addr)) {
+	hash_for_each_possible(card->mac_htable, mac, hnode, mac_hash) {
 		if (is_uc == mac->is_uc &&
 		    !memcmp(ha->addr, mac->mac_addr, OSA_ADDR_LEN)) {
 			mac->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
@@ -638,9 +623,7 @@ qeth_l2_add_mac(struct qeth_card *card, struct netdev_hw_addr *ha, u8 is_uc)
 	mac->is_uc = is_uc;
 	mac->disp_flag = QETH_DISP_ADDR_ADD;
 
-	hash_add(card->mac_htable, &mac->hnode,
-			qeth_l2_mac_hash(mac->mac_addr));
-
+	hash_add(card->mac_htable, &mac->hnode, mac_hash);
 }
 
 static void qeth_l2_set_rx_mode(struct net_device *dev)
@@ -693,21 +676,127 @@ static void qeth_l2_set_rx_mode(struct net_device *dev)
 		qeth_promisc_to_bridge(card);
 }
 
-static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
-					   struct net_device *dev)
+static int qeth_l2_xmit_iqd(struct qeth_card *card, struct sk_buff *skb,
+			    struct qeth_qdio_out_q *queue, int cast_type)
 {
+	unsigned int data_offset = ETH_HLEN;
+	struct qeth_hdr *hdr;
 	int rc;
+
+	hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
+	if (!hdr)
+		return -ENOMEM;
+	qeth_l2_fill_header(hdr, skb, cast_type, skb->len);
+	skb_copy_from_linear_data(skb, ((char *)hdr) + sizeof(*hdr),
+				  data_offset);
+
+	if (!qeth_get_elements_no(card, skb, 1, data_offset)) {
+		rc = -E2BIG;
+		goto out;
+	}
+	rc = qeth_do_send_packet_fast(card, queue, skb, hdr, data_offset,
+				      sizeof(*hdr) + data_offset);
+out:
+	if (rc)
+		kmem_cache_free(qeth_core_header_cache, hdr);
+	return rc;
+}
+
+static int qeth_l2_xmit_osa(struct qeth_card *card, struct sk_buff *skb,
+			    struct qeth_qdio_out_q *queue, int cast_type)
+{
+	int push_len = sizeof(struct qeth_hdr);
+	unsigned int elements, nr_frags;
+	unsigned int hdr_elements = 0;
 	struct qeth_hdr *hdr = NULL;
-	int elements = 0;
+	unsigned int hd_len = 0;
+	int rc;
+
+	/* fix hardware limitation: as long as we do not have sbal
+	 * chaining we can not send long frag lists
+	 */
+	if (!qeth_get_elements_no(card, skb, 0, 0)) {
+		rc = skb_linearize(skb);
+
+		if (card->options.performance_stats) {
+			if (rc)
+				card->perf_stats.tx_linfail++;
+			else
+				card->perf_stats.tx_lin++;
+		}
+		if (rc)
+			return rc;
+	}
+	nr_frags = skb_shinfo(skb)->nr_frags;
+
+	rc = skb_cow_head(skb, push_len);
+	if (rc)
+		return rc;
+	push_len = qeth_push_hdr(skb, &hdr, push_len);
+	if (push_len < 0)
+		return push_len;
+	if (!push_len) {
+		/* hdr was allocated from cache */
+		hd_len = sizeof(*hdr);
+		hdr_elements = 1;
+	}
+	qeth_l2_fill_header(hdr, skb, cast_type, skb->len - push_len);
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		qeth_l2_hdr_csum(card, hdr, skb);
+
+	elements = qeth_get_elements_no(card, skb, hdr_elements, 0);
+	if (!elements) {
+		rc = -E2BIG;
+		goto out;
+	}
+	elements += hdr_elements;
+
+	/* TODO: remove the skb_orphan() once TX completion is fast enough */
+	skb_orphan(skb);
+	rc = qeth_do_send_packet(card, queue, skb, hdr, 0, hd_len, elements);
+out:
+	if (!rc) {
+		if (card->options.performance_stats && nr_frags) {
+			card->perf_stats.sg_skbs_sent++;
+			/* nr_frags + skb->data */
+			card->perf_stats.sg_frags_sent += nr_frags + 1;
+		}
+	} else {
+		if (hd_len)
+			kmem_cache_free(qeth_core_header_cache, hdr);
+		if (rc == -EBUSY)
+			/* roll back to ETH header */
+			skb_pull(skb, push_len);
+	}
+	return rc;
+}
+
+static int qeth_l2_xmit_osn(struct qeth_card *card, struct sk_buff *skb,
+			    struct qeth_qdio_out_q *queue)
+{
+	unsigned int elements;
+	struct qeth_hdr *hdr;
+
+	if (skb->protocol == htons(ETH_P_IPV6))
+		return -EPROTONOSUPPORT;
+
+	hdr = (struct qeth_hdr *)skb->data;
+	elements = qeth_get_elements_no(card, skb, 0, 0);
+	if (!elements)
+		return -E2BIG;
+	if (qeth_hdr_chk_and_bounce(skb, &hdr, sizeof(*hdr)))
+		return -EINVAL;
+	return qeth_do_send_packet(card, queue, skb, hdr, 0, 0, elements);
+}
+
+static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
+					   struct net_device *dev)
+{
 	struct qeth_card *card = dev->ml_priv;
-	struct sk_buff *new_skb = skb;
 	int cast_type = qeth_l2_get_cast_type(card, skb);
 	struct qeth_qdio_out_q *queue;
 	int tx_bytes = skb->len;
-	int data_offset = -1;
-	int elements_needed = 0;
-	int hd_len = 0;
-	int nr_frags;
+	int rc;
 
 	if (card->qdio.do_prio_queueing || (cast_type &&
 					card->info.is_multicast_different))
@@ -721,118 +810,38 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
 		goto tx_drop;
 	}
 
-	if ((card->info.type == QETH_CARD_TYPE_OSN) &&
-	    (skb->protocol == htons(ETH_P_IPV6)))
-		goto tx_drop;
-
 	if (card->options.performance_stats) {
 		card->perf_stats.outbound_cnt++;
 		card->perf_stats.outbound_start_time = qeth_get_micros();
 	}
 	netif_stop_queue(dev);
 
-	/* fix hardware limitation: as long as we do not have sbal
-	 * chaining we can not send long frag lists
-	 */
-	if ((card->info.type != QETH_CARD_TYPE_IQD) &&
-	    !qeth_get_elements_no(card, new_skb, 0, 0)) {
-		int lin_rc = skb_linearize(new_skb);
-
-		if (card->options.performance_stats) {
-			if (lin_rc)
-				card->perf_stats.tx_linfail++;
-			else
-				card->perf_stats.tx_lin++;
-		}
-		if (lin_rc)
-			goto tx_drop;
-	}
-
-	if (card->info.type == QETH_CARD_TYPE_OSN)
-		hdr = (struct qeth_hdr *)skb->data;
-	else {
-		if (card->info.type == QETH_CARD_TYPE_IQD) {
-			new_skb = skb;
-			data_offset = ETH_HLEN;
-			hd_len = ETH_HLEN;
-			hdr = kmem_cache_alloc(qeth_core_header_cache,
-						GFP_ATOMIC);
-			if (!hdr)
-				goto tx_drop;
-			elements_needed++;
-			skb_reset_mac_header(new_skb);
-			qeth_l2_fill_header(card, hdr, new_skb, cast_type);
-			hdr->hdr.l2.pkt_length = new_skb->len;
-			memcpy(((char *)hdr) + sizeof(struct qeth_hdr),
-				skb_mac_header(new_skb), ETH_HLEN);
-		} else {
-			/* create a clone with writeable headroom */
-			new_skb = skb_realloc_headroom(skb,
-						sizeof(struct qeth_hdr));
-			if (!new_skb)
-				goto tx_drop;
-			hdr = skb_push(new_skb, sizeof(struct qeth_hdr));
-			skb_set_mac_header(new_skb, sizeof(struct qeth_hdr));
-			qeth_l2_fill_header(card, hdr, new_skb, cast_type);
-			if (new_skb->ip_summed == CHECKSUM_PARTIAL)
-				qeth_l2_hdr_csum(card, hdr, new_skb);
-		}
-	}
-
-	elements = qeth_get_elements_no(card, new_skb, elements_needed,
-					(data_offset > 0) ? data_offset : 0);
-	if (!elements) {
-		if (data_offset >= 0)
-			kmem_cache_free(qeth_core_header_cache, hdr);
-		goto tx_drop;
+	switch (card->info.type) {
+	case QETH_CARD_TYPE_OSN:
+		rc = qeth_l2_xmit_osn(card, skb, queue);
+		break;
+	case QETH_CARD_TYPE_IQD:
+		rc = qeth_l2_xmit_iqd(card, skb, queue, cast_type);
+		break;
+	default:
+		rc = qeth_l2_xmit_osa(card, skb, queue, cast_type);
 	}
 
-	if (card->info.type != QETH_CARD_TYPE_IQD) {
-		if (qeth_hdr_chk_and_bounce(new_skb, &hdr,
-		    sizeof(struct qeth_hdr_layer2)))
-			goto tx_drop;
-		rc = qeth_do_send_packet(card, queue, new_skb, hdr,
-					 elements);
-	} else
-		rc = qeth_do_send_packet_fast(card, queue, new_skb, hdr,
-					      data_offset, hd_len);
 	if (!rc) {
 		card->stats.tx_packets++;
 		card->stats.tx_bytes += tx_bytes;
-		if (card->options.performance_stats) {
-			nr_frags = skb_shinfo(new_skb)->nr_frags;
-			if (nr_frags) {
-				card->perf_stats.sg_skbs_sent++;
-				/* nr_frags + skb->data */
-				card->perf_stats.sg_frags_sent += nr_frags + 1;
-			}
-		}
-		if (new_skb != skb)
-			dev_kfree_skb_any(skb);
-		rc = NETDEV_TX_OK;
-	} else {
-		if (data_offset >= 0)
-			kmem_cache_free(qeth_core_header_cache, hdr);
-
-		if (rc == -EBUSY) {
-			if (new_skb != skb)
-				dev_kfree_skb_any(new_skb);
-			return NETDEV_TX_BUSY;
-		} else
-			goto tx_drop;
-	}
-
-	netif_wake_queue(dev);
-	if (card->options.performance_stats)
-		card->perf_stats.outbound_time += qeth_get_micros() -
-			card->perf_stats.outbound_start_time;
-	return rc;
+		if (card->options.performance_stats)
+			card->perf_stats.outbound_time += qeth_get_micros() -
+				card->perf_stats.outbound_start_time;
+		netif_wake_queue(dev);
+		return NETDEV_TX_OK;
+	} else if (rc == -EBUSY) {
+		return NETDEV_TX_BUSY;
+	} /* else fall through */
 
 tx_drop:
 	card->stats.tx_dropped++;
 	card->stats.tx_errors++;
-	if ((new_skb != skb) && new_skb)
-		dev_kfree_skb_any(new_skb);
 	dev_kfree_skb_any(skb);
 	netif_wake_queue(dev);
 	return NETDEV_TX_OK;
@@ -1010,6 +1019,12 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 			card->dev->vlan_features |= NETIF_F_RXCSUM;
 		}
 	}
+	if (card->info.type != QETH_CARD_TYPE_OSN &&
+	    card->info.type != QETH_CARD_TYPE_IQD) {
+		card->dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+		card->dev->needed_headroom = sizeof(struct qeth_hdr);
+	}
+
 	card->info.broadcast_capable = 1;
 	qeth_l2_request_initial_mac(card);
 	card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
@@ -1744,11 +1759,26 @@ static int qeth_bridgeport_makerc(struct qeth_card *card,
 	return rc;
 }
 
-static inline int ipa_cmd_sbp(struct qeth_card *card)
+static struct qeth_cmd_buffer *qeth_sbp_build_cmd(struct qeth_card *card,
+						  enum qeth_ipa_sbp_cmd sbp_cmd,
+						  unsigned int cmd_length)
 {
-	return (card->info.type == QETH_CARD_TYPE_IQD) ?
-		IPA_CMD_SETBRIDGEPORT_IQD :
-		IPA_CMD_SETBRIDGEPORT_OSA;
+	enum qeth_ipa_cmds ipa_cmd = (card->info.type == QETH_CARD_TYPE_IQD) ?
+					IPA_CMD_SETBRIDGEPORT_IQD :
+					IPA_CMD_SETBRIDGEPORT_OSA;
+	struct qeth_cmd_buffer *iob;
+	struct qeth_ipa_cmd *cmd;
+
+	iob = qeth_get_ipacmd_buffer(card, ipa_cmd, 0);
+	if (!iob)
+		return iob;
+	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
+	cmd->data.sbp.hdr.cmdlength = sizeof(struct qeth_ipacmd_sbp_hdr) +
+				      cmd_length;
+	cmd->data.sbp.hdr.command_code = sbp_cmd;
+	cmd->data.sbp.hdr.used_total = 1;
+	cmd->data.sbp.hdr.seq_no = 1;
+	return iob;
 }
 
 static int qeth_bridgeport_query_support_cb(struct qeth_card *card,
@@ -1778,21 +1808,13 @@ static int qeth_bridgeport_query_support_cb(struct qeth_card *card,
 static void qeth_bridgeport_query_support(struct qeth_card *card)
 {
 	struct qeth_cmd_buffer *iob;
-	struct qeth_ipa_cmd *cmd;
 	struct _qeth_sbp_cbctl cbctl;
 
 	QETH_CARD_TEXT(card, 2, "brqsuppo");
-	iob = qeth_get_ipacmd_buffer(card, ipa_cmd_sbp(card), 0);
+	iob = qeth_sbp_build_cmd(card, IPA_SBP_QUERY_COMMANDS_SUPPORTED,
+				 sizeof(struct qeth_sbp_query_cmds_supp));
 	if (!iob)
 		return;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
-	cmd->data.sbp.hdr.cmdlength =
-		sizeof(struct qeth_ipacmd_sbp_hdr) +
-		sizeof(struct qeth_sbp_query_cmds_supp);
-	cmd->data.sbp.hdr.command_code =
-		IPA_SBP_QUERY_COMMANDS_SUPPORTED;
-	cmd->data.sbp.hdr.used_total = 1;
-	cmd->data.sbp.hdr.seq_no = 1;
 	if (qeth_send_ipa_cmd(card, iob, qeth_bridgeport_query_support_cb,
 							(void *)&cbctl) ||
 	    qeth_bridgeport_makerc(card, &cbctl,
@@ -1846,7 +1868,6 @@ int qeth_bridgeport_query_ports(struct qeth_card *card,
 {
 	int rc = 0;
 	struct qeth_cmd_buffer *iob;
-	struct qeth_ipa_cmd *cmd;
 	struct _qeth_sbp_cbctl cbctl = {
 		.data = {
 			.qports = {
@@ -1859,16 +1880,9 @@ int qeth_bridgeport_query_ports(struct qeth_card *card,
 	QETH_CARD_TEXT(card, 2, "brqports");
 	if (!(card->options.sbp.supported_funcs & IPA_SBP_QUERY_BRIDGE_PORTS))
 		return -EOPNOTSUPP;
-	iob = qeth_get_ipacmd_buffer(card, ipa_cmd_sbp(card), 0);
+	iob = qeth_sbp_build_cmd(card, IPA_SBP_QUERY_BRIDGE_PORTS, 0);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
-	cmd->data.sbp.hdr.cmdlength =
-		sizeof(struct qeth_ipacmd_sbp_hdr);
-	cmd->data.sbp.hdr.command_code =
-		IPA_SBP_QUERY_BRIDGE_PORTS;
-	cmd->data.sbp.hdr.used_total = 1;
-	cmd->data.sbp.hdr.seq_no = 1;
 	rc = qeth_send_ipa_cmd(card, iob, qeth_bridgeport_query_ports_cb,
 				(void *)&cbctl);
 	if (rc < 0)
@@ -1900,7 +1914,6 @@ int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role)
 	int rc = 0;
 	int cmdlength;
 	struct qeth_cmd_buffer *iob;
-	struct qeth_ipa_cmd *cmd;
 	struct _qeth_sbp_cbctl cbctl;
 	enum qeth_ipa_sbp_cmd setcmd;
 
@@ -1908,32 +1921,24 @@ int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role)
 	switch (role) {
 	case QETH_SBP_ROLE_NONE:
 		setcmd = IPA_SBP_RESET_BRIDGE_PORT_ROLE;
-		cmdlength =  sizeof(struct qeth_ipacmd_sbp_hdr) +
-			sizeof(struct qeth_sbp_reset_role);
+		cmdlength = sizeof(struct qeth_sbp_reset_role);
 		break;
 	case QETH_SBP_ROLE_PRIMARY:
 		setcmd = IPA_SBP_SET_PRIMARY_BRIDGE_PORT;
-		cmdlength =  sizeof(struct qeth_ipacmd_sbp_hdr) +
-			sizeof(struct qeth_sbp_set_primary);
+		cmdlength = sizeof(struct qeth_sbp_set_primary);
 		break;
 	case QETH_SBP_ROLE_SECONDARY:
 		setcmd = IPA_SBP_SET_SECONDARY_BRIDGE_PORT;
-		cmdlength =  sizeof(struct qeth_ipacmd_sbp_hdr) +
-			sizeof(struct qeth_sbp_set_secondary);
+		cmdlength = sizeof(struct qeth_sbp_set_secondary);
 		break;
 	default:
 		return -EINVAL;
 	}
 	if (!(card->options.sbp.supported_funcs & setcmd))
 		return -EOPNOTSUPP;
-	iob = qeth_get_ipacmd_buffer(card, ipa_cmd_sbp(card), 0);
+	iob = qeth_sbp_build_cmd(card, setcmd, cmdlength);
 	if (!iob)
 		return -ENOMEM;
-	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
-	cmd->data.sbp.hdr.cmdlength = cmdlength;
-	cmd->data.sbp.hdr.command_code = setcmd;
-	cmd->data.sbp.hdr.used_total = 1;
-	cmd->data.sbp.hdr.seq_no = 1;
 	rc = qeth_send_ipa_cmd(card, iob, qeth_bridgeport_set_cb,
 				(void *)&cbctl);
 	if (rc < 0)
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index d42e758..ab661a4 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -247,7 +247,8 @@ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 		return -ENOENT;
 
 	addr->ref_counter--;
-	if (addr->type == QETH_IP_TYPE_NORMAL && addr->ref_counter > 0)
+	if (addr->ref_counter > 0 && (addr->type == QETH_IP_TYPE_NORMAL ||
+				      addr->type == QETH_IP_TYPE_RXIP))
 		return rc;
 	if (addr->in_progress)
 		return -EINPROGRESS;
@@ -329,8 +330,9 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
 			kfree(addr);
 		}
 	} else {
-			if (addr->type == QETH_IP_TYPE_NORMAL)
-				addr->ref_counter++;
+		if (addr->type == QETH_IP_TYPE_NORMAL ||
+		    addr->type == QETH_IP_TYPE_RXIP)
+			addr->ref_counter++;
 	}
 
 	return rc;
@@ -784,11 +786,11 @@ void qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
 	ipaddr = qeth_l3_get_addr_buffer(proto);
 	if (ipaddr) {
 		if (proto == QETH_PROT_IPV4) {
-			QETH_CARD_TEXT(card, 2, "addrxip4");
+			QETH_CARD_TEXT(card, 2, "delrxip4");
 			memcpy(&ipaddr->u.a4.addr, addr, 4);
 			ipaddr->u.a4.mask = 0;
 		} else if (proto == QETH_PROT_IPV6) {
-			QETH_CARD_TEXT(card, 2, "addrxip6");
+			QETH_CARD_TEXT(card, 2, "delrxip6");
 			memcpy(&ipaddr->u.a6.addr, addr, 16);
 			ipaddr->u.a6.pfxlen = 0;
 		}
@@ -867,7 +869,7 @@ static int qeth_l3_deregister_addr_entry(struct qeth_card *card,
 	return rc;
 }
 
-static inline u8 qeth_l3_get_qeth_hdr_flags4(int cast_type)
+static u8 qeth_l3_get_qeth_hdr_flags4(int cast_type)
 {
 	if (cast_type == RTN_MULTICAST)
 		return QETH_CAST_MULTICAST;
@@ -876,7 +878,7 @@ static inline u8 qeth_l3_get_qeth_hdr_flags4(int cast_type)
 	return QETH_CAST_UNICAST;
 }
 
-static inline u8 qeth_l3_get_qeth_hdr_flags6(int cast_type)
+static u8 qeth_l3_get_qeth_hdr_flags6(int cast_type)
 {
 	u8 ct = QETH_HDR_PASSTHRU | QETH_HDR_IPV6;
 	if (cast_type == RTN_MULTICAST)
@@ -890,22 +892,10 @@ static inline u8 qeth_l3_get_qeth_hdr_flags6(int cast_type)
 
 static int qeth_l3_setadapter_parms(struct qeth_card *card)
 {
-	int rc;
+	int rc = 0;
 
 	QETH_DBF_TEXT(SETUP, 2, "setadprm");
 
-	if (!qeth_is_supported(card, IPA_SETADAPTERPARMS)) {
-		dev_info(&card->gdev->dev,
-			"set adapter parameters not supported.\n");
-		QETH_DBF_TEXT(SETUP, 2, " notsupp");
-		return 0;
-	}
-	rc = qeth_query_setadapterparms(card);
-	if (rc) {
-		QETH_DBF_MESSAGE(2, "%s couldn't set adapter parameters: "
-			"0x%x\n", dev_name(&card->gdev->dev), rc);
-		return rc;
-	}
 	if (qeth_adp_supported(card, IPA_SETADP_ALTER_MAC_ADDRESS)) {
 		rc = qeth_setadpparms_change_macaddr(card);
 		if (rc)
@@ -1656,9 +1646,8 @@ static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev,
 	return 0;
 }
 
-static inline int qeth_l3_rebuild_skb(struct qeth_card *card,
-			struct sk_buff *skb, struct qeth_hdr *hdr,
-			unsigned short *vlan_id)
+static int qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
+			       struct qeth_hdr *hdr, unsigned short *vlan_id)
 {
 	__u16 prot;
 	struct iphdr *ip_hdr;
@@ -2408,7 +2397,7 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	return rc;
 }
 
-inline int qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
+static int qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
 {
 	int cast_type = RTN_UNSPEC;
 	struct neighbour *n = NULL;
@@ -2546,8 +2535,8 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 	rcu_read_unlock();
 }
 
-static inline void qeth_l3_hdr_csum(struct qeth_card *card,
-		struct qeth_hdr *hdr, struct sk_buff *skb)
+static void qeth_l3_hdr_csum(struct qeth_card *card, struct qeth_hdr *hdr,
+			     struct sk_buff *skb)
 {
 	struct iphdr *iph = ip_hdr(skb);
 
@@ -2582,7 +2571,7 @@ static void qeth_tso_fill_header(struct qeth_card *card,
 	hdr->ext.hdr_len     = 28;
 	/*insert non-fix values */
 	hdr->ext.mss = skb_shinfo(skb)->gso_size;
-	hdr->ext.dg_hdr_len = (__u16)(iph->ihl*4 + tcph->doff*4);
+	hdr->ext.dg_hdr_len = (__u16)(ip_hdrlen(skb) + tcp_hdrlen(skb));
 	hdr->ext.payload_len = (__u16)(skb->len - hdr->ext.dg_hdr_len -
 				       sizeof(struct qeth_hdr_tso));
 	tcph->check = 0;
@@ -2648,9 +2637,10 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
 			qeth_get_priority_queue(card, skb, ipv, cast_type) :
 			card->qdio.default_out_queue];
 	int tx_bytes = skb->len;
+	unsigned int hd_len = 0;
 	bool use_tso;
 	int data_offset = -1;
-	int nr_frags;
+	unsigned int nr_frags;
 
 	if (((card->info.type == QETH_CARD_TYPE_IQD) &&
 	     (((card->options.cq != QETH_CQ_ENABLED) && !ipv) ||
@@ -2675,11 +2665,12 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
 
 	/* Ignore segment size from skb_is_gso(), 1 page is always used. */
 	use_tso = skb_is_gso(skb) &&
-		  (qeth_get_ip_protocol(skb) == IPPROTO_TCP) && (ipv == 4);
+		  (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4);
 
 	if (card->info.type == QETH_CARD_TYPE_IQD) {
 		new_skb = skb;
 		data_offset = ETH_HLEN;
+		hd_len = sizeof(*hdr);
 		hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
 		if (!hdr)
 			goto tx_drop;
@@ -2727,6 +2718,7 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
 		if (lin_rc)
 			goto tx_drop;
 	}
+	nr_frags = skb_shinfo(new_skb)->nr_frags;
 
 	if (use_tso) {
 		hdr = skb_push(new_skb, sizeof(struct qeth_hdr_tso));
@@ -2766,19 +2758,21 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
 
 	if (card->info.type != QETH_CARD_TYPE_IQD) {
 		int len;
-		if (use_tso)
-			len = ((unsigned long)tcp_hdr(new_skb) +
-				tcp_hdrlen(new_skb)) -
-				(unsigned long)new_skb->data;
-		else
+		if (use_tso) {
+			hd_len = sizeof(struct qeth_hdr_tso) +
+				 ip_hdrlen(new_skb) + tcp_hdrlen(new_skb);
+			len = hd_len;
+		} else {
 			len = sizeof(struct qeth_hdr_layer3);
+		}
 
 		if (qeth_hdr_chk_and_bounce(new_skb, &hdr, len))
 			goto tx_drop;
-		rc = qeth_do_send_packet(card, queue, new_skb, hdr, elements);
+		rc = qeth_do_send_packet(card, queue, new_skb, hdr, hd_len,
+					 hd_len, elements);
 	} else
 		rc = qeth_do_send_packet_fast(card, queue, new_skb, hdr,
-					      data_offset, 0);
+					      data_offset, hd_len);
 
 	if (!rc) {
 		card->stats.tx_packets++;
@@ -2786,7 +2780,6 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
 		if (new_skb != skb)
 			dev_kfree_skb_any(skb);
 		if (card->options.performance_stats) {
-			nr_frags = skb_shinfo(new_skb)->nr_frags;
 			if (use_tso) {
 				card->perf_stats.large_send_bytes += tx_bytes;
 				card->perf_stats.large_send_cnt++;
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index 1a80ce4..e8bcc31 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -895,9 +895,26 @@ static ssize_t qeth_l3_dev_rxip_add4_show(struct device *dev,
 static int qeth_l3_parse_rxipe(const char *buf, enum qeth_prot_versions proto,
 		 u8 *addr)
 {
+	__be32 ipv4_addr;
+	struct in6_addr ipv6_addr;
+
 	if (qeth_l3_string_to_ipaddr(buf, proto, addr)) {
 		return -EINVAL;
 	}
+	if (proto == QETH_PROT_IPV4) {
+		memcpy(&ipv4_addr, addr, sizeof(ipv4_addr));
+		if (ipv4_is_multicast(ipv4_addr)) {
+			QETH_DBF_MESSAGE(2, "multicast rxip not supported.\n");
+			return -EINVAL;
+		}
+	} else if (proto == QETH_PROT_IPV6) {
+		memcpy(&ipv6_addr, addr, sizeof(ipv6_addr));
+		if (ipv6_addr_is_multicast(&ipv6_addr)) {
+			QETH_DBF_MESSAGE(2, "multicast rxip not supported.\n");
+			return -EINVAL;
+		}
+	}
+
 	return 0;
 }
 
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 1b0a1be..5546839 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -24,6 +24,8 @@ menuconfig STAGING
 
 if STAGING
 
+source "drivers/staging/irda/net/Kconfig"
+
 source "drivers/staging/wlan-ng/Kconfig"
 
 source "drivers/staging/comedi/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 2b61cbd..8951c37 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -2,6 +2,8 @@
 
 obj-y				+= media/
 obj-y				+= typec/
+obj-$(CONFIG_IRDA)		+= irda/net/
+obj-$(CONFIG_IRDA)		+= irda/drivers/
 obj-$(CONFIG_PRISM2_USB)	+= wlan-ng/
 obj-$(CONFIG_COMEDI)		+= comedi/
 obj-$(CONFIG_FB_OLPC_DCON)	+= olpc_dcon/
diff --git a/drivers/staging/irda/TODO b/drivers/staging/irda/TODO
new file mode 100644
index 0000000..7d98a5c
--- /dev/null
+++ b/drivers/staging/irda/TODO
@@ -0,0 +1,4 @@
+The irda code will be removed soon from the kernel tree as it is old and
+obsolete and broken.
+
+Don't worry about fixing up anything here, it's not needed.
diff --git a/drivers/net/irda/Kconfig b/drivers/staging/irda/drivers/Kconfig
index e070e12..e070e12 100644
--- a/drivers/net/irda/Kconfig
+++ b/drivers/staging/irda/drivers/Kconfig
diff --git a/drivers/net/irda/Makefile b/drivers/staging/irda/drivers/Makefile
index 4c34443..e2901b1 100644
--- a/drivers/net/irda/Makefile
+++ b/drivers/staging/irda/drivers/Makefile
@@ -5,6 +5,8 @@
 # Rewritten to use lists instead of if-statements.
 #
 
+subdir-ccflags-y += -I$(srctree)/drivers/staging/irda/include
+
 # FIR drivers
 obj-$(CONFIG_USB_IRDA)		+= irda-usb.o
 obj-$(CONFIG_SIGMATEL_FIR)	+= stir4200.o
diff --git a/drivers/net/irda/act200l-sir.c b/drivers/staging/irda/drivers/act200l-sir.c
index e891751..e891751 100644
--- a/drivers/net/irda/act200l-sir.c
+++ b/drivers/staging/irda/drivers/act200l-sir.c
diff --git a/drivers/net/irda/actisys-sir.c b/drivers/staging/irda/drivers/actisys-sir.c
index e224b8b..e224b8b 100644
--- a/drivers/net/irda/actisys-sir.c
+++ b/drivers/staging/irda/drivers/actisys-sir.c
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/staging/irda/drivers/ali-ircc.c
index 35f198d..35f198d 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/staging/irda/drivers/ali-ircc.c
diff --git a/drivers/net/irda/ali-ircc.h b/drivers/staging/irda/drivers/ali-ircc.h
index c2d9747..c2d9747 100644
--- a/drivers/net/irda/ali-ircc.h
+++ b/drivers/staging/irda/drivers/ali-ircc.h
diff --git a/drivers/net/irda/au1k_ir.c b/drivers/staging/irda/drivers/au1k_ir.c
index be4ea6a..be4ea6a 100644
--- a/drivers/net/irda/au1k_ir.c
+++ b/drivers/staging/irda/drivers/au1k_ir.c
diff --git a/drivers/net/irda/bfin_sir.c b/drivers/staging/irda/drivers/bfin_sir.c
index 3151b58..3151b58 100644
--- a/drivers/net/irda/bfin_sir.c
+++ b/drivers/staging/irda/drivers/bfin_sir.c
diff --git a/drivers/net/irda/bfin_sir.h b/drivers/staging/irda/drivers/bfin_sir.h
index d47cf14..d47cf14 100644
--- a/drivers/net/irda/bfin_sir.h
+++ b/drivers/staging/irda/drivers/bfin_sir.h
diff --git a/drivers/net/irda/donauboe.c b/drivers/staging/irda/drivers/donauboe.c
index b337e6d..b337e6d 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/staging/irda/drivers/donauboe.c
diff --git a/drivers/net/irda/donauboe.h b/drivers/staging/irda/drivers/donauboe.h
index d92d54e..d92d54e 100644
--- a/drivers/net/irda/donauboe.h
+++ b/drivers/staging/irda/drivers/donauboe.h
diff --git a/drivers/net/irda/esi-sir.c b/drivers/staging/irda/drivers/esi-sir.c
index 019a3e8..019a3e8 100644
--- a/drivers/net/irda/esi-sir.c
+++ b/drivers/staging/irda/drivers/esi-sir.c
diff --git a/drivers/net/irda/girbil-sir.c b/drivers/staging/irda/drivers/girbil-sir.c
index 7e0a5b8..7e0a5b8 100644
--- a/drivers/net/irda/girbil-sir.c
+++ b/drivers/staging/irda/drivers/girbil-sir.c
diff --git a/drivers/net/irda/irda-usb.c b/drivers/staging/irda/drivers/irda-usb.c
index 6f3c805..723e49b 100644
--- a/drivers/net/irda/irda-usb.c
+++ b/drivers/staging/irda/drivers/irda-usb.c
@@ -72,7 +72,7 @@
 static int qos_mtt_bits = 0;
 
 /* These are the currently known IrDA USB dongles. Add new dongles here */
-static struct usb_device_id dongles[] = {
+static const struct usb_device_id dongles[] = {
 	/* ACTiSYS Corp.,  ACT-IR2000U FIR-USB Adapter */
 	{ USB_DEVICE(0x9c4, 0x011), .driver_info = IUC_SPEED_BUG | IUC_NO_WINDOW },
 	/* Look like ACTiSYS, Report : IBM Corp., IBM UltraPort IrDA */
diff --git a/drivers/net/irda/irda-usb.h b/drivers/staging/irda/drivers/irda-usb.h
index 8ac389f..8ac389f 100644
--- a/drivers/net/irda/irda-usb.h
+++ b/drivers/staging/irda/drivers/irda-usb.h
diff --git a/drivers/net/irda/irtty-sir.c b/drivers/staging/irda/drivers/irtty-sir.c
index 7a20a9a..7a20a9a 100644
--- a/drivers/net/irda/irtty-sir.c
+++ b/drivers/staging/irda/drivers/irtty-sir.c
diff --git a/drivers/net/irda/irtty-sir.h b/drivers/staging/irda/drivers/irtty-sir.h
index b132d8f..b132d8f 100644
--- a/drivers/net/irda/irtty-sir.h
+++ b/drivers/staging/irda/drivers/irtty-sir.h
diff --git a/drivers/net/irda/kingsun-sir.c b/drivers/staging/irda/drivers/kingsun-sir.c
index 24c0f16..4fd4ac2 100644
--- a/drivers/net/irda/kingsun-sir.c
+++ b/drivers/staging/irda/drivers/kingsun-sir.c
@@ -85,7 +85,7 @@
 #define KING_PRODUCT_ID 0x4200
 
 /* These are the currently known USB ids */
-static struct usb_device_id dongles[] = {
+static const struct usb_device_id dongles[] = {
     /* KingSun Co,Ltd  IrDA/USB Bridge */
     { USB_DEVICE(KING_VENDOR_ID, KING_PRODUCT_ID) },
     { }
diff --git a/drivers/net/irda/ks959-sir.c b/drivers/staging/irda/drivers/ks959-sir.c
index 3affded..8025741 100644
--- a/drivers/net/irda/ks959-sir.c
+++ b/drivers/staging/irda/drivers/ks959-sir.c
@@ -133,7 +133,7 @@
 #define KS959_PRODUCT_ID 0x4959
 
 /* These are the currently known USB ids */
-static struct usb_device_id dongles[] = {
+static const struct usb_device_id dongles[] = {
 	/* KingSun Co,Ltd  IrDA/USB Bridge */
 	{USB_DEVICE(KS959_VENDOR_ID, KS959_PRODUCT_ID)},
 	{}
diff --git a/drivers/net/irda/ksdazzle-sir.c b/drivers/staging/irda/drivers/ksdazzle-sir.c
index 741452c..d2a0755 100644
--- a/drivers/net/irda/ksdazzle-sir.c
+++ b/drivers/staging/irda/drivers/ksdazzle-sir.c
@@ -97,7 +97,7 @@
 #define KSDAZZLE_PRODUCT_ID 0x4100
 
 /* These are the currently known USB ids */
-static struct usb_device_id dongles[] = {
+static const struct usb_device_id dongles[] = {
 	/* KingSun Co,Ltd  IrDA/USB Bridge */
 	{USB_DEVICE(KSDAZZLE_VENDOR_ID, KSDAZZLE_PRODUCT_ID)},
 	{}
diff --git a/drivers/net/irda/litelink-sir.c b/drivers/staging/irda/drivers/litelink-sir.c
index 8eefcb4..8eefcb4 100644
--- a/drivers/net/irda/litelink-sir.c
+++ b/drivers/staging/irda/drivers/litelink-sir.c
diff --git a/drivers/net/irda/ma600-sir.c b/drivers/staging/irda/drivers/ma600-sir.c
index a764817..a764817 100644
--- a/drivers/net/irda/ma600-sir.c
+++ b/drivers/staging/irda/drivers/ma600-sir.c
diff --git a/drivers/net/irda/mcp2120-sir.c b/drivers/staging/irda/drivers/mcp2120-sir.c
index 2e33f91..2e33f91 100644
--- a/drivers/net/irda/mcp2120-sir.c
+++ b/drivers/staging/irda/drivers/mcp2120-sir.c
diff --git a/drivers/net/irda/mcs7780.c b/drivers/staging/irda/drivers/mcs7780.c
index 765de3b..c3f0b25 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/staging/irda/drivers/mcs7780.c
@@ -66,7 +66,7 @@
 #define MCS_VENDOR_ID 0x9710
 #define MCS_PRODUCT_ID 0x7780
 
-static struct usb_device_id mcs_table[] = {
+static const struct usb_device_id mcs_table[] = {
 	/* MosChip Corp.,  MCS7780 FIR-USB Adapter */
 	{USB_DEVICE(MCS_VENDOR_ID, MCS_PRODUCT_ID)},
 	{},
diff --git a/drivers/net/irda/mcs7780.h b/drivers/staging/irda/drivers/mcs7780.h
index a6e8f7d..a6e8f7d 100644
--- a/drivers/net/irda/mcs7780.h
+++ b/drivers/staging/irda/drivers/mcs7780.h
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/staging/irda/drivers/nsc-ircc.c
index 7beae147b..7beae147b 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/staging/irda/drivers/nsc-ircc.c
diff --git a/drivers/net/irda/nsc-ircc.h b/drivers/staging/irda/drivers/nsc-ircc.h
index 7be5acb..7be5acb 100644
--- a/drivers/net/irda/nsc-ircc.h
+++ b/drivers/staging/irda/drivers/nsc-ircc.h
diff --git a/drivers/net/irda/old_belkin-sir.c b/drivers/staging/irda/drivers/old_belkin-sir.c
index a7c2e99..a7c2e99 100644
--- a/drivers/net/irda/old_belkin-sir.c
+++ b/drivers/staging/irda/drivers/old_belkin-sir.c
diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/staging/irda/drivers/pxaficp_ir.c
index 1dba16b..1dba16b 100644
--- a/drivers/net/irda/pxaficp_ir.c
+++ b/drivers/staging/irda/drivers/pxaficp_ir.c
diff --git a/drivers/net/irda/sa1100_ir.c b/drivers/staging/irda/drivers/sa1100_ir.c
index b6e44ff..b6e44ff 100644
--- a/drivers/net/irda/sa1100_ir.c
+++ b/drivers/staging/irda/drivers/sa1100_ir.c
diff --git a/drivers/net/irda/sh_sir.c b/drivers/staging/irda/drivers/sh_sir.c
index fede686..fede686 100644
--- a/drivers/net/irda/sh_sir.c
+++ b/drivers/staging/irda/drivers/sh_sir.c
diff --git a/drivers/net/irda/sir-dev.h b/drivers/staging/irda/drivers/sir-dev.h
index f50b9c1..f50b9c1 100644
--- a/drivers/net/irda/sir-dev.h
+++ b/drivers/staging/irda/drivers/sir-dev.h
diff --git a/drivers/net/irda/sir_dev.c b/drivers/staging/irda/drivers/sir_dev.c
index 6af26a7..6af26a7 100644
--- a/drivers/net/irda/sir_dev.c
+++ b/drivers/staging/irda/drivers/sir_dev.c
diff --git a/drivers/net/irda/sir_dongle.c b/drivers/staging/irda/drivers/sir_dongle.c
index 7436f73..7436f73 100644
--- a/drivers/net/irda/sir_dongle.c
+++ b/drivers/staging/irda/drivers/sir_dongle.c
diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/staging/irda/drivers/smsc-ircc2.c
index 19a55cb..19a55cb 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/staging/irda/drivers/smsc-ircc2.c
diff --git a/drivers/net/irda/smsc-ircc2.h b/drivers/staging/irda/drivers/smsc-ircc2.h
index 4829fa2..4829fa2 100644
--- a/drivers/net/irda/smsc-ircc2.h
+++ b/drivers/staging/irda/drivers/smsc-ircc2.h
diff --git a/drivers/net/irda/smsc-sio.h b/drivers/staging/irda/drivers/smsc-sio.h
index 59e20e6..59e20e6 100644
--- a/drivers/net/irda/smsc-sio.h
+++ b/drivers/staging/irda/drivers/smsc-sio.h
diff --git a/drivers/net/irda/stir4200.c b/drivers/staging/irda/drivers/stir4200.c
index 7ee5148..ee2cb70 100644
--- a/drivers/net/irda/stir4200.c
+++ b/drivers/staging/irda/drivers/stir4200.c
@@ -183,7 +183,7 @@ struct stir_cb {
 
 
 /* These are the currently known USB ids */
-static struct usb_device_id dongles[] = {
+static const struct usb_device_id dongles[] = {
     /* SigmaTel, Inc,  STIr4200 IrDA/USB Bridge */
     { USB_DEVICE(0x066f, 0x4200) },
     { }
diff --git a/drivers/net/irda/tekram-sir.c b/drivers/staging/irda/drivers/tekram-sir.c
index 9dcf0c1..9dcf0c1 100644
--- a/drivers/net/irda/tekram-sir.c
+++ b/drivers/staging/irda/drivers/tekram-sir.c
diff --git a/drivers/net/irda/toim3232-sir.c b/drivers/staging/irda/drivers/toim3232-sir.c
index b977d6d..b977d6d 100644
--- a/drivers/net/irda/toim3232-sir.c
+++ b/drivers/staging/irda/drivers/toim3232-sir.c
diff --git a/drivers/net/irda/via-ircc.c b/drivers/staging/irda/drivers/via-ircc.c
index ca4442a..ca4442a 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/staging/irda/drivers/via-ircc.c
diff --git a/drivers/net/irda/via-ircc.h b/drivers/staging/irda/drivers/via-ircc.h
index ac15255..ac15255 100644
--- a/drivers/net/irda/via-ircc.h
+++ b/drivers/staging/irda/drivers/via-ircc.h
diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/staging/irda/drivers/vlsi_ir.c
index 6638784..6638784 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/staging/irda/drivers/vlsi_ir.c
diff --git a/drivers/net/irda/vlsi_ir.h b/drivers/staging/irda/drivers/vlsi_ir.h
index f9db2ce..f9db2ce 100644
--- a/drivers/net/irda/vlsi_ir.h
+++ b/drivers/staging/irda/drivers/vlsi_ir.h
diff --git a/drivers/net/irda/w83977af.h b/drivers/staging/irda/drivers/w83977af.h
index 04476c2..04476c2 100644
--- a/drivers/net/irda/w83977af.h
+++ b/drivers/staging/irda/drivers/w83977af.h
diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/staging/irda/drivers/w83977af_ir.c
index 282b6c9..282b6c9 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/staging/irda/drivers/w83977af_ir.c
diff --git a/drivers/net/irda/w83977af_ir.h b/drivers/staging/irda/drivers/w83977af_ir.h
index fefe9b1..fefe9b1 100644
--- a/drivers/net/irda/w83977af_ir.h
+++ b/drivers/staging/irda/drivers/w83977af_ir.h
diff --git a/include/net/irda/af_irda.h b/drivers/staging/irda/include/net/irda/af_irda.h
index 0df5749..0df5749 100644
--- a/include/net/irda/af_irda.h
+++ b/drivers/staging/irda/include/net/irda/af_irda.h
diff --git a/include/net/irda/crc.h b/drivers/staging/irda/include/net/irda/crc.h
index f202296..f202296 100644
--- a/include/net/irda/crc.h
+++ b/drivers/staging/irda/include/net/irda/crc.h
diff --git a/include/net/irda/discovery.h b/drivers/staging/irda/include/net/irda/discovery.h
index 63ae325..63ae325 100644
--- a/include/net/irda/discovery.h
+++ b/drivers/staging/irda/include/net/irda/discovery.h
diff --git a/include/net/irda/ircomm_core.h b/drivers/staging/irda/include/net/irda/ircomm_core.h
index 2a580ce..2a580ce 100644
--- a/include/net/irda/ircomm_core.h
+++ b/drivers/staging/irda/include/net/irda/ircomm_core.h
diff --git a/include/net/irda/ircomm_event.h b/drivers/staging/irda/include/net/irda/ircomm_event.h
index 5bbc329..5bbc329 100644
--- a/include/net/irda/ircomm_event.h
+++ b/drivers/staging/irda/include/net/irda/ircomm_event.h
diff --git a/include/net/irda/ircomm_lmp.h b/drivers/staging/irda/include/net/irda/ircomm_lmp.h
index 5042a50..5042a50 100644
--- a/include/net/irda/ircomm_lmp.h
+++ b/drivers/staging/irda/include/net/irda/ircomm_lmp.h
diff --git a/include/net/irda/ircomm_param.h b/drivers/staging/irda/include/net/irda/ircomm_param.h
index 1f67432..1f67432 100644
--- a/include/net/irda/ircomm_param.h
+++ b/drivers/staging/irda/include/net/irda/ircomm_param.h
diff --git a/include/net/irda/ircomm_ttp.h b/drivers/staging/irda/include/net/irda/ircomm_ttp.h
index c562728..c562728 100644
--- a/include/net/irda/ircomm_ttp.h
+++ b/drivers/staging/irda/include/net/irda/ircomm_ttp.h
diff --git a/include/net/irda/ircomm_tty.h b/drivers/staging/irda/include/net/irda/ircomm_tty.h
index 8d4f588..8d4f588 100644
--- a/include/net/irda/ircomm_tty.h
+++ b/drivers/staging/irda/include/net/irda/ircomm_tty.h
diff --git a/include/net/irda/ircomm_tty_attach.h b/drivers/staging/irda/include/net/irda/ircomm_tty_attach.h
index 20dcbdf..20dcbdf 100644
--- a/include/net/irda/ircomm_tty_attach.h
+++ b/drivers/staging/irda/include/net/irda/ircomm_tty_attach.h
diff --git a/include/net/irda/irda.h b/drivers/staging/irda/include/net/irda/irda.h
index 92c8fb5..92c8fb5 100644
--- a/include/net/irda/irda.h
+++ b/drivers/staging/irda/include/net/irda/irda.h
diff --git a/include/net/irda/irda_device.h b/drivers/staging/irda/include/net/irda/irda_device.h
index 664bf81..664bf81 100644
--- a/include/net/irda/irda_device.h
+++ b/drivers/staging/irda/include/net/irda/irda_device.h
diff --git a/include/net/irda/iriap.h b/drivers/staging/irda/include/net/irda/iriap.h
index fcc8964..fcc8964 100644
--- a/include/net/irda/iriap.h
+++ b/drivers/staging/irda/include/net/irda/iriap.h
diff --git a/include/net/irda/iriap_event.h b/drivers/staging/irda/include/net/irda/iriap_event.h
index 89747f0..89747f0 100644
--- a/include/net/irda/iriap_event.h
+++ b/drivers/staging/irda/include/net/irda/iriap_event.h
diff --git a/include/net/irda/irias_object.h b/drivers/staging/irda/include/net/irda/irias_object.h
index 83f7808..83f7808 100644
--- a/include/net/irda/irias_object.h
+++ b/drivers/staging/irda/include/net/irda/irias_object.h
diff --git a/include/net/irda/irlan_client.h b/drivers/staging/irda/include/net/irda/irlan_client.h
index fa8455e..fa8455e 100644
--- a/include/net/irda/irlan_client.h
+++ b/drivers/staging/irda/include/net/irda/irlan_client.h
diff --git a/include/net/irda/irlan_common.h b/drivers/staging/irda/include/net/irda/irlan_common.h
index 550c2d6..550c2d6 100644
--- a/include/net/irda/irlan_common.h
+++ b/drivers/staging/irda/include/net/irda/irlan_common.h
diff --git a/include/net/irda/irlan_eth.h b/drivers/staging/irda/include/net/irda/irlan_eth.h
index de5c816..de5c816 100644
--- a/include/net/irda/irlan_eth.h
+++ b/drivers/staging/irda/include/net/irda/irlan_eth.h
diff --git a/include/net/irda/irlan_event.h b/drivers/staging/irda/include/net/irda/irlan_event.h
index 018b5a7..018b5a7 100644
--- a/include/net/irda/irlan_event.h
+++ b/drivers/staging/irda/include/net/irda/irlan_event.h
diff --git a/include/net/irda/irlan_filter.h b/drivers/staging/irda/include/net/irda/irlan_filter.h
index a5a2539..a5a2539 100644
--- a/include/net/irda/irlan_filter.h
+++ b/drivers/staging/irda/include/net/irda/irlan_filter.h
diff --git a/include/net/irda/irlan_provider.h b/drivers/staging/irda/include/net/irda/irlan_provider.h
index 92f3b0e..92f3b0e 100644
--- a/include/net/irda/irlan_provider.h
+++ b/drivers/staging/irda/include/net/irda/irlan_provider.h
diff --git a/include/net/irda/irlap.h b/drivers/staging/irda/include/net/irda/irlap.h
index 6f23e82..6f23e82 100644
--- a/include/net/irda/irlap.h
+++ b/drivers/staging/irda/include/net/irda/irlap.h
diff --git a/include/net/irda/irlap_event.h b/drivers/staging/irda/include/net/irda/irlap_event.h
index e4325fe..e4325fe 100644
--- a/include/net/irda/irlap_event.h
+++ b/drivers/staging/irda/include/net/irda/irlap_event.h
diff --git a/include/net/irda/irlap_frame.h b/drivers/staging/irda/include/net/irda/irlap_frame.h
index cbc12a9..cbc12a9 100644
--- a/include/net/irda/irlap_frame.h
+++ b/drivers/staging/irda/include/net/irda/irlap_frame.h
diff --git a/include/net/irda/irlmp.h b/drivers/staging/irda/include/net/irda/irlmp.h
index f132924..f132924 100644
--- a/include/net/irda/irlmp.h
+++ b/drivers/staging/irda/include/net/irda/irlmp.h
diff --git a/include/net/irda/irlmp_event.h b/drivers/staging/irda/include/net/irda/irlmp_event.h
index 9e4ec17..9e4ec17 100644
--- a/include/net/irda/irlmp_event.h
+++ b/drivers/staging/irda/include/net/irda/irlmp_event.h
diff --git a/include/net/irda/irlmp_frame.h b/drivers/staging/irda/include/net/irda/irlmp_frame.h
index 1906eb7..1906eb7 100644
--- a/include/net/irda/irlmp_frame.h
+++ b/drivers/staging/irda/include/net/irda/irlmp_frame.h
diff --git a/include/net/irda/irmod.h b/drivers/staging/irda/include/net/irda/irmod.h
index 86f0dbb..86f0dbb 100644
--- a/include/net/irda/irmod.h
+++ b/drivers/staging/irda/include/net/irda/irmod.h
diff --git a/include/net/irda/irqueue.h b/drivers/staging/irda/include/net/irda/irqueue.h
index 37f512b..37f512b 100644
--- a/include/net/irda/irqueue.h
+++ b/drivers/staging/irda/include/net/irda/irqueue.h
diff --git a/include/net/irda/irttp.h b/drivers/staging/irda/include/net/irda/irttp.h
index 98682d4..98682d4 100644
--- a/include/net/irda/irttp.h
+++ b/drivers/staging/irda/include/net/irda/irttp.h
diff --git a/include/net/irda/parameters.h b/drivers/staging/irda/include/net/irda/parameters.h
index 2d9cd00..2d9cd00 100644
--- a/include/net/irda/parameters.h
+++ b/drivers/staging/irda/include/net/irda/parameters.h
diff --git a/include/net/irda/qos.h b/drivers/staging/irda/include/net/irda/qos.h
index 05a5a24..05a5a24 100644
--- a/include/net/irda/qos.h
+++ b/drivers/staging/irda/include/net/irda/qos.h
diff --git a/include/net/irda/timer.h b/drivers/staging/irda/include/net/irda/timer.h
index d784f24..d784f24 100644
--- a/include/net/irda/timer.h
+++ b/drivers/staging/irda/include/net/irda/timer.h
diff --git a/include/net/irda/wrapper.h b/drivers/staging/irda/include/net/irda/wrapper.h
index eef53eb..eef53eb 100644
--- a/include/net/irda/wrapper.h
+++ b/drivers/staging/irda/include/net/irda/wrapper.h
diff --git a/net/irda/Kconfig b/drivers/staging/irda/net/Kconfig
index c8671a7..6abeae6 100644
--- a/net/irda/Kconfig
+++ b/drivers/staging/irda/net/Kconfig
@@ -27,11 +27,11 @@ menuconfig IRDA
 comment "IrDA protocols"
 	depends on IRDA
 
-source "net/irda/irlan/Kconfig"
+source "drivers/staging/irda/net/irlan/Kconfig"
 
-source "net/irda/irnet/Kconfig"
+source "drivers/staging/irda/net/irnet/Kconfig"
 
-source "net/irda/ircomm/Kconfig"
+source "drivers/staging/irda/net/ircomm/Kconfig"
 
 config IRDA_ULTRA
 	bool "Ultra (connectionless) protocol"
@@ -92,5 +92,5 @@ config IRDA_DEBUG
 
 	  If unsure, say Y (since it makes it easier to find the bugs).
 
-source "drivers/net/irda/Kconfig"
+source "drivers/staging/irda/drivers/Kconfig"
 
diff --git a/net/irda/Makefile b/drivers/staging/irda/net/Makefile
index 187f6c5..bd1a635 100644
--- a/net/irda/Makefile
+++ b/drivers/staging/irda/net/Makefile
@@ -2,6 +2,8 @@
 # Makefile for the Linux IrDA protocol layer.
 #
 
+subdir-ccflags-y += -I$(srctree)/drivers/staging/irda/include
+
 obj-$(CONFIG_IRDA) += irda.o
 obj-$(CONFIG_IRLAN) += irlan/
 obj-$(CONFIG_IRNET) += irnet/
diff --git a/net/irda/af_irda.c b/drivers/staging/irda/net/af_irda.c
index 23fa7c8..23fa7c8 100644
--- a/net/irda/af_irda.c
+++ b/drivers/staging/irda/net/af_irda.c
diff --git a/net/irda/discovery.c b/drivers/staging/irda/net/discovery.c
index 364d70a..364d70a 100644
--- a/net/irda/discovery.c
+++ b/drivers/staging/irda/net/discovery.c
diff --git a/net/irda/ircomm/Kconfig b/drivers/staging/irda/net/ircomm/Kconfig
index 19492c1..19492c1 100644
--- a/net/irda/ircomm/Kconfig
+++ b/drivers/staging/irda/net/ircomm/Kconfig
diff --git a/net/irda/ircomm/Makefile b/drivers/staging/irda/net/ircomm/Makefile
index ab23b5b..ab23b5b 100644
--- a/net/irda/ircomm/Makefile
+++ b/drivers/staging/irda/net/ircomm/Makefile
diff --git a/net/irda/ircomm/ircomm_core.c b/drivers/staging/irda/net/ircomm/ircomm_core.c
index 3af2195..3af2195 100644
--- a/net/irda/ircomm/ircomm_core.c
+++ b/drivers/staging/irda/net/ircomm/ircomm_core.c
diff --git a/net/irda/ircomm/ircomm_event.c b/drivers/staging/irda/net/ircomm/ircomm_event.c
index b0730ac..b0730ac 100644
--- a/net/irda/ircomm/ircomm_event.c
+++ b/drivers/staging/irda/net/ircomm/ircomm_event.c
diff --git a/net/irda/ircomm/ircomm_lmp.c b/drivers/staging/irda/net/ircomm/ircomm_lmp.c
index e4cc847..e4cc847 100644
--- a/net/irda/ircomm/ircomm_lmp.c
+++ b/drivers/staging/irda/net/ircomm/ircomm_lmp.c
diff --git a/net/irda/ircomm/ircomm_param.c b/drivers/staging/irda/net/ircomm/ircomm_param.c
index 5728e76..5728e76 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/drivers/staging/irda/net/ircomm/ircomm_param.c
diff --git a/net/irda/ircomm/ircomm_ttp.c b/drivers/staging/irda/net/ircomm/ircomm_ttp.c
index 4b81e09..4b81e09 100644
--- a/net/irda/ircomm/ircomm_ttp.c
+++ b/drivers/staging/irda/net/ircomm/ircomm_ttp.c
diff --git a/net/irda/ircomm/ircomm_tty.c b/drivers/staging/irda/net/ircomm/ircomm_tty.c
index ec157c3..ec157c3 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/drivers/staging/irda/net/ircomm/ircomm_tty.c
diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/drivers/staging/irda/net/ircomm/ircomm_tty_attach.c
index 0a41101..0a41101 100644
--- a/net/irda/ircomm/ircomm_tty_attach.c
+++ b/drivers/staging/irda/net/ircomm/ircomm_tty_attach.c
diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/drivers/staging/irda/net/ircomm/ircomm_tty_ioctl.c
index 171c3de..171c3de 100644
--- a/net/irda/ircomm/ircomm_tty_ioctl.c
+++ b/drivers/staging/irda/net/ircomm/ircomm_tty_ioctl.c
diff --git a/net/irda/irda_device.c b/drivers/staging/irda/net/irda_device.c
index 890b90d0..890b90d0 100644
--- a/net/irda/irda_device.c
+++ b/drivers/staging/irda/net/irda_device.c
diff --git a/net/irda/iriap.c b/drivers/staging/irda/net/iriap.c
index 1138eaf..1138eaf 100644
--- a/net/irda/iriap.c
+++ b/drivers/staging/irda/net/iriap.c
diff --git a/net/irda/iriap_event.c b/drivers/staging/irda/net/iriap_event.c
index e6098b2..e6098b2 100644
--- a/net/irda/iriap_event.c
+++ b/drivers/staging/irda/net/iriap_event.c
diff --git a/net/irda/irias_object.c b/drivers/staging/irda/net/irias_object.c
index 53b86d0..53b86d0 100644
--- a/net/irda/irias_object.c
+++ b/drivers/staging/irda/net/irias_object.c
diff --git a/net/irda/irlan/Kconfig b/drivers/staging/irda/net/irlan/Kconfig
index 951abc2e..951abc2e 100644
--- a/net/irda/irlan/Kconfig
+++ b/drivers/staging/irda/net/irlan/Kconfig
diff --git a/net/irda/irlan/Makefile b/drivers/staging/irda/net/irlan/Makefile
index 94eefbc..94eefbc 100644
--- a/net/irda/irlan/Makefile
+++ b/drivers/staging/irda/net/irlan/Makefile
diff --git a/net/irda/irlan/irlan_client.c b/drivers/staging/irda/net/irlan/irlan_client.c
index c5837a4..c5837a4 100644
--- a/net/irda/irlan/irlan_client.c
+++ b/drivers/staging/irda/net/irlan/irlan_client.c
diff --git a/net/irda/irlan/irlan_client_event.c b/drivers/staging/irda/net/irlan/irlan_client_event.c
index cc93fab..cc93fab 100644
--- a/net/irda/irlan/irlan_client_event.c
+++ b/drivers/staging/irda/net/irlan/irlan_client_event.c
diff --git a/net/irda/irlan/irlan_common.c b/drivers/staging/irda/net/irlan/irlan_common.c
index 481bbc2..481bbc2 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/drivers/staging/irda/net/irlan/irlan_common.c
diff --git a/net/irda/irlan/irlan_eth.c b/drivers/staging/irda/net/irlan/irlan_eth.c
index 3be8528..3be8528 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/drivers/staging/irda/net/irlan/irlan_eth.c
diff --git a/net/irda/irlan/irlan_event.c b/drivers/staging/irda/net/irlan/irlan_event.c
index 9a1cc11..9a1cc11 100644
--- a/net/irda/irlan/irlan_event.c
+++ b/drivers/staging/irda/net/irlan/irlan_event.c
diff --git a/net/irda/irlan/irlan_filter.c b/drivers/staging/irda/net/irlan/irlan_filter.c
index e755e90..e755e90 100644
--- a/net/irda/irlan/irlan_filter.c
+++ b/drivers/staging/irda/net/irlan/irlan_filter.c
diff --git a/net/irda/irlan/irlan_provider.c b/drivers/staging/irda/net/irlan/irlan_provider.c
index 15c292c..15c292c 100644
--- a/net/irda/irlan/irlan_provider.c
+++ b/drivers/staging/irda/net/irlan/irlan_provider.c
diff --git a/net/irda/irlan/irlan_provider_event.c b/drivers/staging/irda/net/irlan/irlan_provider_event.c
index 9c4f7f5..9c4f7f5 100644
--- a/net/irda/irlan/irlan_provider_event.c
+++ b/drivers/staging/irda/net/irlan/irlan_provider_event.c
diff --git a/net/irda/irlap.c b/drivers/staging/irda/net/irlap.c
index 1cde711..1cde711 100644
--- a/net/irda/irlap.c
+++ b/drivers/staging/irda/net/irlap.c
diff --git a/net/irda/irlap_event.c b/drivers/staging/irda/net/irlap_event.c
index 0e1b4d7..0e1b4d7 100644
--- a/net/irda/irlap_event.c
+++ b/drivers/staging/irda/net/irlap_event.c
diff --git a/net/irda/irlap_frame.c b/drivers/staging/irda/net/irlap_frame.c
index debda3d..debda3d 100644
--- a/net/irda/irlap_frame.c
+++ b/drivers/staging/irda/net/irlap_frame.c
diff --git a/net/irda/irlmp.c b/drivers/staging/irda/net/irlmp.c
index 4396459..4396459 100644
--- a/net/irda/irlmp.c
+++ b/drivers/staging/irda/net/irlmp.c
diff --git a/net/irda/irlmp_event.c b/drivers/staging/irda/net/irlmp_event.c
index e306cf2..e306cf2 100644
--- a/net/irda/irlmp_event.c
+++ b/drivers/staging/irda/net/irlmp_event.c
diff --git a/net/irda/irlmp_frame.c b/drivers/staging/irda/net/irlmp_frame.c
index 38b0f99..38b0f99 100644
--- a/net/irda/irlmp_frame.c
+++ b/drivers/staging/irda/net/irlmp_frame.c
diff --git a/net/irda/irmod.c b/drivers/staging/irda/net/irmod.c
index c5e35b8..4319f4f 100644
--- a/net/irda/irmod.c
+++ b/drivers/staging/irda/net/irmod.c
@@ -190,7 +190,7 @@ static void __exit irda_cleanup(void)
  *
  * Jean II
  */
-subsys_initcall(irda_init);
+device_initcall(irda_init);
 module_exit(irda_cleanup);
 
 MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no> & Jean Tourrilhes <jt@hpl.hp.com>");
diff --git a/net/irda/irnet/Kconfig b/drivers/staging/irda/net/irnet/Kconfig
index 28c557f..28c557f 100644
--- a/net/irda/irnet/Kconfig
+++ b/drivers/staging/irda/net/irnet/Kconfig
diff --git a/net/irda/irnet/Makefile b/drivers/staging/irda/net/irnet/Makefile
index 61c365c..61c365c 100644
--- a/net/irda/irnet/Makefile
+++ b/drivers/staging/irda/net/irnet/Makefile
diff --git a/net/irda/irnet/irnet.h b/drivers/staging/irda/net/irnet/irnet.h
index 9d451f8..9d451f8 100644
--- a/net/irda/irnet/irnet.h
+++ b/drivers/staging/irda/net/irnet/irnet.h
diff --git a/net/irda/irnet/irnet_irda.c b/drivers/staging/irda/net/irnet/irnet_irda.c
index e390bce..e390bce 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/drivers/staging/irda/net/irnet/irnet_irda.c
diff --git a/net/irda/irnet/irnet_irda.h b/drivers/staging/irda/net/irnet/irnet_irda.h
index 3e40895..3e40895 100644
--- a/net/irda/irnet/irnet_irda.h
+++ b/drivers/staging/irda/net/irnet/irnet_irda.h
diff --git a/net/irda/irnet/irnet_ppp.c b/drivers/staging/irda/net/irnet/irnet_ppp.c
index 7025dcb..7025dcb 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/drivers/staging/irda/net/irnet/irnet_ppp.c
diff --git a/net/irda/irnet/irnet_ppp.h b/drivers/staging/irda/net/irnet/irnet_ppp.h
index 3206144..3206144 100644
--- a/net/irda/irnet/irnet_ppp.h
+++ b/drivers/staging/irda/net/irnet/irnet_ppp.h
diff --git a/net/irda/irnetlink.c b/drivers/staging/irda/net/irnetlink.c
index 7fc340e..7fc340e 100644
--- a/net/irda/irnetlink.c
+++ b/drivers/staging/irda/net/irnetlink.c
diff --git a/net/irda/irproc.c b/drivers/staging/irda/net/irproc.c
index 77cfdde..77cfdde 100644
--- a/net/irda/irproc.c
+++ b/drivers/staging/irda/net/irproc.c
diff --git a/net/irda/irqueue.c b/drivers/staging/irda/net/irqueue.c
index 160dc89..160dc89 100644
--- a/net/irda/irqueue.c
+++ b/drivers/staging/irda/net/irqueue.c
diff --git a/net/irda/irsysctl.c b/drivers/staging/irda/net/irsysctl.c
index 873da5e..873da5e 100644
--- a/net/irda/irsysctl.c
+++ b/drivers/staging/irda/net/irsysctl.c
diff --git a/net/irda/irttp.c b/drivers/staging/irda/net/irttp.c
index b6ab41d..b6ab41d 100644
--- a/net/irda/irttp.c
+++ b/drivers/staging/irda/net/irttp.c
diff --git a/net/irda/parameters.c b/drivers/staging/irda/net/parameters.c
index 16ce32f..16ce32f 100644
--- a/net/irda/parameters.c
+++ b/drivers/staging/irda/net/parameters.c
diff --git a/net/irda/qos.c b/drivers/staging/irda/net/qos.c
index 25ba850..25ba850 100644
--- a/net/irda/qos.c
+++ b/drivers/staging/irda/net/qos.c
diff --git a/net/irda/timer.c b/drivers/staging/irda/net/timer.c
index f2280f7..f2280f7 100644
--- a/net/irda/timer.c
+++ b/drivers/staging/irda/net/timer.c
diff --git a/net/irda/wrapper.c b/drivers/staging/irda/net/wrapper.c
index 40a0f99..40a0f99 100644
--- a/net/irda/wrapper.c
+++ b/drivers/staging/irda/net/wrapper.c
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 06d0448..58585ec 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -533,6 +533,7 @@ static void handle_tx(struct vhost_net *net)
 			ubuf->callback = vhost_zerocopy_callback;
 			ubuf->ctx = nvq->ubufs;
 			ubuf->desc = nvq->upend_idx;
+			refcount_set(&ubuf->refcnt, 1);
 			msg.msg_control = ubuf;
 			msg.msg_controllen = sizeof(ubuf);
 			ubufs = nvq->ubufs;
@@ -634,8 +635,13 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
 
 		preempt_enable();
 
-		if (vhost_enable_notify(&net->dev, vq))
+		if (!vhost_vq_avail_empty(&net->dev, vq))
 			vhost_poll_queue(&vq->poll);
+		else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+			vhost_disable_notify(&net->dev, vq);
+			vhost_poll_queue(&vq->poll);
+		}
+
 		mutex_unlock(&vq->mutex);
 
 		len = peek_head_len(rvq, sk);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5e1b548..9aaa177 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -391,7 +391,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	vq->desc_state[head].data = data;
 	if (indirect)
 		vq->desc_state[head].indir_desc = desc;
-	if (ctx)
+	else
 		vq->desc_state[head].indir_desc = ctx;
 
 	/* Put entry in available array (but don't update avail->idx until they
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 100b207..c05f1f1 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/errno.h>
-#include <rxrpc/packet.h>
 #include "internal.h"
 #include "afs_fs.h"
 
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 02781e7..0bf191f 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -14,7 +14,6 @@
 
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
-#include <rxrpc/packet.h>
 #include "internal.h"
 #include "afs_cm.h"
 
@@ -293,6 +292,19 @@ static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
 }
 
 /*
+ * Advance the AFS call state when the RxRPC call ends the transmit phase.
+ */
+static void afs_notify_end_request_tx(struct sock *sock,
+				      struct rxrpc_call *rxcall,
+				      unsigned long call_user_ID)
+{
+	struct afs_call *call = (struct afs_call *)call_user_ID;
+
+	if (call->state == AFS_CALL_REQUESTING)
+		call->state = AFS_CALL_AWAIT_REPLY;
+}
+
+/*
  * attach the data from a bunch of pages on an inode to a call
  */
 static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
@@ -311,14 +323,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 		bytes = msg->msg_iter.count;
 		nr = msg->msg_iter.nr_segs;
 
-		/* Have to change the state *before* sending the last
-		 * packet as RxRPC might give us the reply before it
-		 * returns from sending the request.
-		 */
-		if (first + nr - 1 >= last)
-			call->state = AFS_CALL_AWAIT_REPLY;
-		ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
-					     msg, bytes);
+		ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, msg,
+					     bytes, afs_notify_end_request_tx);
 		for (loop = 0; loop < nr; loop++)
 			put_page(bv[loop].bv_page);
 		if (ret < 0)
@@ -410,7 +416,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	if (!call->send_pages)
 		call->state = AFS_CALL_AWAIT_REPLY;
 	ret = rxrpc_kernel_send_data(afs_socket, rxcall,
-				     &msg, call->request_size);
+				     &msg, call->request_size,
+				     afs_notify_end_request_tx);
 	if (ret < 0)
 		goto error_do_abort;
 
@@ -742,6 +749,20 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
 }
 
 /*
+ * Advance the AFS call state when an RxRPC service call ends the transmit
+ * phase.
+ */
+static void afs_notify_end_reply_tx(struct sock *sock,
+				    struct rxrpc_call *rxcall,
+				    unsigned long call_user_ID)
+{
+	struct afs_call *call = (struct afs_call *)call_user_ID;
+
+	if (call->state == AFS_CALL_REPLYING)
+		call->state = AFS_CALL_AWAIT_ACK;
+}
+
+/*
  * send an empty reply
  */
 void afs_send_empty_reply(struct afs_call *call)
@@ -760,7 +781,8 @@ void afs_send_empty_reply(struct afs_call *call)
 	msg.msg_flags		= 0;
 
 	call->state = AFS_CALL_AWAIT_ACK;
-	switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0)) {
+	switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0,
+				       afs_notify_end_reply_tx)) {
 	case 0:
 		_leave(" [replied]");
 		return;
@@ -798,7 +820,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 	msg.msg_flags		= 0;
 
 	call->state = AFS_CALL_AWAIT_ACK;
-	n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len);
+	n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len,
+				   afs_notify_end_reply_tx);
 	if (n >= 0) {
 		/* Success */
 		_leave(" [replied]");
diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h
index 5de0673..8cf829d 100644
--- a/include/keys/rxrpc-type.h
+++ b/include/keys/rxrpc-type.h
@@ -127,4 +127,27 @@ struct rxrpc_key_data_v1 {
 #define AFSTOKEN_K5_ADDRESSES_MAX	16	/* max K5 addresses */
 #define AFSTOKEN_K5_AUTHDATA_MAX	16	/* max K5 pieces of auth data */
 
+/*
+ * Truncate a time64_t to the range from 1970 to 2106 as in the network
+ * protocol.
+ */
+static inline u32 rxrpc_time64_to_u32(time64_t time)
+{
+	if (time < 0)
+		return 0;
+
+	if (time > UINT_MAX)
+		return UINT_MAX;
+
+	return (u32)time;
+}
+
+/*
+ * Extend u32 back to time64_t using the same 1970-2106 range.
+ */
+static inline time64_t rxrpc_u32_to_time64(u32 time)
+{
+	return (time64_t)time;
+}
+
 #endif /* _KEYS_RXRPC_TYPE_H */
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index c893b95..2b03844 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -133,6 +133,8 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_CONFIG_RSS_LUT = 24,
 	VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25,
 	VIRTCHNL_OP_SET_RSS_HENA = 26,
+	VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27,
+	VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28,
 };
 
 /* This macro is used to generate a compilation error if a structure
@@ -223,7 +225,7 @@ struct virtchnl_vsi_resource {
 
 VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 
-/* VF offload flags
+/* VF capability flags
  * VIRTCHNL_VF_OFFLOAD_L2 flag is inclusive of base mode L2 offloads including
  * TX/RX Checksum offloading and TSO for non-tunnelled packets.
  */
@@ -251,7 +253,7 @@ struct virtchnl_vf_resource {
 	u16 max_vectors;
 	u16 max_mtu;
 
-	u32 vf_offload_flags;
+	u32 vf_cap_flags;
 	u32 rss_key_size;
 	u32 rss_lut_size;
 
@@ -686,6 +688,9 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_SET_RSS_HENA:
 		valid_len = sizeof(struct virtchnl_rss_hena);
 		break;
+	case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
+	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
+		break;
 	/* These are always errors coming from the VF. */
 	case VIRTCHNL_OP_EVENT:
 	case VIRTCHNL_OP_UNKNOWN:
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b69e7a5..c2cb1b5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -14,8 +14,10 @@
 #include <linux/percpu.h>
 #include <linux/err.h>
 #include <linux/rbtree_latch.h>
+#include <linux/numa.h>
 
 struct perf_event;
+struct bpf_prog;
 struct bpf_map;
 
 /* map is generic key/value storage optionally accesible by eBPF programs */
@@ -48,6 +50,7 @@ struct bpf_map {
 	u32 map_flags;
 	u32 pages;
 	u32 id;
+	int numa_node;
 	struct user_struct *user;
 	const struct bpf_map_ops *ops;
 	struct work_struct work;
@@ -117,39 +120,27 @@ enum bpf_access_type {
 };
 
 /* types of values stored in eBPF registers */
+/* Pointer types represent:
+ * pointer
+ * pointer + imm
+ * pointer + (u16) var
+ * pointer + (u16) var + imm
+ * if (range > 0) then [ptr, ptr + range - off) is safe to access
+ * if (id > 0) means that some 'var' was added
+ * if (off > 0) means that 'imm' was added
+ */
 enum bpf_reg_type {
 	NOT_INIT = 0,		 /* nothing was written into register */
-	UNKNOWN_VALUE,		 /* reg doesn't contain a valid pointer */
+	SCALAR_VALUE,		 /* reg doesn't contain a valid pointer */
 	PTR_TO_CTX,		 /* reg points to bpf_context */
 	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
 	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
 	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
-	FRAME_PTR,		 /* reg == frame_pointer */
-	PTR_TO_STACK,		 /* reg == frame_pointer + imm */
-	CONST_IMM,		 /* constant integer value */
-
-	/* PTR_TO_PACKET represents:
-	 * skb->data
-	 * skb->data + imm
-	 * skb->data + (u16) var
-	 * skb->data + (u16) var + imm
-	 * if (range > 0) then [ptr, ptr + range - off) is safe to access
-	 * if (id > 0) means that some 'var' was added
-	 * if (off > 0) menas that 'imm' was added
-	 */
-	PTR_TO_PACKET,
+	PTR_TO_STACK,		 /* reg == frame_pointer + offset */
+	PTR_TO_PACKET,		 /* reg points to skb->data */
 	PTR_TO_PACKET_END,	 /* skb->data + headlen */
-
-	/* PTR_TO_MAP_VALUE_ADJ is used for doing pointer math inside of a map
-	 * elem value.  We only allow this if we can statically verify that
-	 * access from this register are going to fall within the size of the
-	 * map element.
-	 */
-	PTR_TO_MAP_VALUE_ADJ,
 };
 
-struct bpf_prog;
-
 /* The information passed from prog-specific *_is_valid_access
  * back to the verifier.
  */
@@ -262,6 +253,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type);
 struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i);
 void bpf_prog_sub(struct bpf_prog *prog, int i);
 struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog);
+struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog);
 void bpf_prog_put(struct bpf_prog *prog);
 int __bpf_prog_charge(struct user_struct *user, u32 pages);
 void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
@@ -272,7 +264,7 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 int bpf_map_precharge_memlock(u32 pages);
-void *bpf_map_area_alloc(size_t size);
+void *bpf_map_area_alloc(size_t size, int numa_node);
 void bpf_map_area_free(void *base);
 
 extern int sysctl_unprivileged_bpf_disabled;
@@ -318,6 +310,19 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
 
 /* verify correctness of eBPF program */
 int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
+
+/* Map specifics */
+struct net_device  *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
+void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
+void __dev_map_flush(struct bpf_map *map);
+
+/* Return map's numa specified by userspace */
+static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
+{
+	return (attr->map_flags & BPF_F_NUMA_NODE) ?
+		attr->numa_node : NUMA_NO_NODE;
+}
+
 #else
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -348,6 +353,12 @@ static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog)
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline struct bpf_prog *__must_check
+bpf_prog_inc_not_zero(struct bpf_prog *prog)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static inline int __bpf_prog_charge(struct user_struct *user, u32 pages)
 {
 	return 0;
@@ -356,8 +367,39 @@ static inline int __bpf_prog_charge(struct user_struct *user, u32 pages)
 static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
 {
 }
+
+static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
+						       u32 key)
+{
+	return NULL;
+}
+
+static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index)
+{
+}
+
+static inline void __dev_map_flush(struct bpf_map *map)
+{
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
+#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL)
+struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
+int sock_map_attach_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
+#else
+static inline struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
+{
+	return NULL;
+}
+
+static inline int sock_map_attach_prog(struct bpf_map *map,
+				       struct bpf_prog *prog,
+				       u32 type)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 /* verifier prototypes for helper functions called from eBPF programs */
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
@@ -374,6 +416,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
 extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
 extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
 extern const struct bpf_func_proto bpf_get_stackid_proto;
+extern const struct bpf_func_proto bpf_sock_map_update_proto;
 
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 3d137c3..6f1a567 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -11,6 +11,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb_prog_ops)
 #endif
 #ifdef CONFIG_BPF_EVENTS
 BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops)
@@ -35,3 +36,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
+#ifdef CONFIG_NET
+BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
+#ifdef CONFIG_STREAM_PARSER
+BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
+#endif
+#endif
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 8e5d31f..b8d200f 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -9,41 +9,75 @@
 
 #include <linux/bpf.h> /* for enum bpf_reg_type */
 #include <linux/filter.h> /* for MAX_BPF_STACK */
+#include <linux/tnum.h>
 
- /* Just some arbitrary values so we can safely do math without overflowing and
-  * are obviously wrong for any sort of memory access.
-  */
-#define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024)
-#define BPF_REGISTER_MIN_RANGE -1
+/* Maximum variable offset umax_value permitted when resolving memory accesses.
+ * In practice this is far bigger than any realistic pointer offset; this limit
+ * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
+ */
+#define BPF_MAX_VAR_OFF	(1ULL << 31)
+/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO].  This ensures
+ * that converting umax_value to int cannot overflow.
+ */
+#define BPF_MAX_VAR_SIZ	INT_MAX
+
+/* Liveness marks, used for registers and spilled-regs (in stack slots).
+ * Read marks propagate upwards until they find a write mark; they record that
+ * "one of this state's descendants read this reg" (and therefore the reg is
+ * relevant for states_equal() checks).
+ * Write marks collect downwards and do not propagate; they record that "the
+ * straight-line code that reached this state (from its parent) wrote this reg"
+ * (and therefore that reads propagated from this state or its descendants
+ * should not propagate to its parent).
+ * A state with a write mark can receive read marks; it just won't propagate
+ * them to its parent, since the write mark is a property, not of the state,
+ * but of the link between it and its parent.  See mark_reg_read() and
+ * mark_stack_slot_read() in kernel/bpf/verifier.c.
+ */
+enum bpf_reg_liveness {
+	REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
+	REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */
+	REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */
+};
 
 struct bpf_reg_state {
 	enum bpf_reg_type type;
 	union {
-		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
-		s64 imm;
-
-		/* valid when type == PTR_TO_PACKET* */
-		struct {
-			u16 off;
-			u16 range;
-		};
+		/* valid when type == PTR_TO_PACKET */
+		u16 range;
 
 		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
 		 *   PTR_TO_MAP_VALUE_OR_NULL
 		 */
 		struct bpf_map *map_ptr;
 	};
+	/* Fixed part of pointer offset, pointer types only */
+	s32 off;
+	/* For PTR_TO_PACKET, used to find other pointers with the same variable
+	 * offset, so they can share range knowledge.
+	 * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
+	 * came from, when one is tested for != NULL.
+	 */
 	u32 id;
+	/* Ordering of fields matters.  See states_equal() */
+	/* For scalar types (SCALAR_VALUE), this represents our knowledge of
+	 * the actual value.
+	 * For pointer types, this represents the variable part of the offset
+	 * from the pointed-to object, and is shared with all bpf_reg_states
+	 * with the same id as us.
+	 */
+	struct tnum var_off;
 	/* Used to determine if any memory access using this register will
-	 * result in a bad access. These two fields must be last.
-	 * See states_equal()
+	 * result in a bad access.
+	 * These refer to the same value as var_off, not necessarily the actual
+	 * contents of the register.
 	 */
-	s64 min_value;
-	u64 max_value;
-	u32 min_align;
-	u32 aux_off;
-	u32 aux_off_align;
-	bool value_from_signed;
+	s64 smin_value; /* minimum possible (s64)value */
+	s64 smax_value; /* maximum possible (s64)value */
+	u64 umin_value; /* minimum possible (u64)value */
+	u64 umax_value; /* maximum possible (u64)value */
+	/* This field must be last, for states_equal() reasons. */
+	enum bpf_reg_liveness live;
 };
 
 enum bpf_stack_slot_type {
@@ -61,6 +95,7 @@ struct bpf_verifier_state {
 	struct bpf_reg_state regs[MAX_BPF_REG];
 	u8 stack_slot_type[MAX_BPF_STACK];
 	struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
+	struct bpf_verifier_state *parent;
 };
 
 /* linked list of verifier states used to prune search */
@@ -103,7 +138,6 @@ struct bpf_verifier_env {
 	u32 id_gen;			/* used to generate unique reg IDs */
 	bool allow_ptr_leaks;
 	bool seen_direct_write;
-	bool varlen_map_value_access;
 	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
 };
 
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 83cc986..4587a4c 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -137,6 +137,17 @@ struct ethtool_link_ksettings {
 	__set_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, (ptr)->link_modes.name)
 
 /**
+ * ethtool_link_ksettings_del_link_mode - clear bit in link_ksettings
+ * link mode mask
+ *   @ptr : pointer to struct ethtool_link_ksettings
+ *   @name : one of supported/advertising/lp_advertising
+ *   @mode : one of the ETHTOOL_LINK_MODE_*_BIT
+ * (not atomic, no bound checking)
+ */
+#define ethtool_link_ksettings_del_link_mode(ptr, name, mode)		\
+	__clear_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, (ptr)->link_modes.name)
+
+/**
  * ethtool_link_ksettings_test_link_mode - test bit in ksettings link mode mask
  *   @ptr : pointer to struct ethtool_link_ksettings
  *   @name : one of supported/advertising/lp_advertising
@@ -374,5 +385,9 @@ struct ethtool_ops {
 				      struct ethtool_link_ksettings *);
 	int	(*set_link_ksettings)(struct net_device *,
 				      const struct ethtool_link_ksettings *);
+	int	(*get_fecparam)(struct net_device *,
+				      struct ethtool_fecparam *);
+	int	(*set_fecparam)(struct net_device *,
+				      struct ethtool_fecparam *);
 };
 #endif /* _LINUX_ETHTOOL_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index bfef1e5..d29e58f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -711,7 +711,24 @@ bool bpf_helper_changes_pkt_data(void *func);
 
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
+
+/* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
+ * same cpu context. Further for best results no more than a single map
+ * for the do_redirect/do_flush pair should be used. This limitation is
+ * because we only track one map and force a flush when the map changes.
+ * This does not appear to be a real limitation for existing software.
+ */
+int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
+			    struct bpf_prog *prog);
+int xdp_do_redirect(struct net_device *dev,
+		    struct xdp_buff *xdp,
+		    struct bpf_prog *prog);
+void xdp_do_flush_map(void);
+
 void bpf_warn_invalid_xdp_action(u32 act);
+void bpf_warn_invalid_xdp_redirect(u32 ifindex);
+
+struct sock *do_sk_redirect_map(void);
 
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 07650d0..e4bbf7d 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1016,13 +1016,6 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel,
 				  enum vmbus_packet_type type,
 				  u32 flags);
 
-extern int vmbus_sendpacket_ctl(struct vmbus_channel *channel,
-				  void *buffer,
-				  u32 bufferLen,
-				  u64 requestid,
-				  enum vmbus_packet_type type,
-				  u32 flags);
-
 extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
 					    struct hv_page_buffer pagebuffers[],
 					    u32 pagecount,
@@ -1030,20 +1023,6 @@ extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
 					    u32 bufferlen,
 					    u64 requestid);
 
-extern int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
-					   struct hv_page_buffer pagebuffers[],
-					   u32 pagecount,
-					   void *buffer,
-					   u32 bufferlen,
-					   u64 requestid,
-					   u32 flags);
-
-extern int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
-					struct hv_multipage_buffer *mpb,
-					void *buffer,
-					u32 bufferlen,
-					u64 requestid);
-
 extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
 				     struct vmbus_packet_mpb_array *mpb,
 				     u32 desc_size,
diff --git a/include/linux/idr.h b/include/linux/idr.h
index bf70b3e..7c3a365 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -80,19 +80,75 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val)
  */
 
 void idr_preload(gfp_t gfp_mask);
-int idr_alloc(struct idr *, void *entry, int start, int end, gfp_t);
+
+int idr_alloc_cmn(struct idr *idr, void *ptr, unsigned long *index,
+		  unsigned long start, unsigned long end, gfp_t gfp,
+		  bool ext);
+
+/**
+ * idr_alloc - allocate an id
+ * @idr: idr handle
+ * @ptr: pointer to be associated with the new id
+ * @start: the minimum id (inclusive)
+ * @end: the maximum id (exclusive)
+ * @gfp: memory allocation flags
+ *
+ * Allocates an unused ID in the range [start, end).  Returns -ENOSPC
+ * if there are no unused IDs in that range.
+ *
+ * Note that @end is treated as max when <= 0.  This is to always allow
+ * using @start + N as @end as long as N is inside integer range.
+ *
+ * Simultaneous modifications to the @idr are not allowed and should be
+ * prevented by the user, usually with a lock.  idr_alloc() may be called
+ * concurrently with read-only accesses to the @idr, such as idr_find() and
+ * idr_for_each_entry().
+ */
+static inline int idr_alloc(struct idr *idr, void *ptr,
+			    int start, int end, gfp_t gfp)
+{
+	unsigned long id;
+	int ret;
+
+	if (WARN_ON_ONCE(start < 0))
+		return -EINVAL;
+
+	ret = idr_alloc_cmn(idr, ptr, &id, start, end, gfp, false);
+
+	if (ret)
+		return ret;
+
+	return id;
+}
+
+static inline int idr_alloc_ext(struct idr *idr, void *ptr,
+				unsigned long *index,
+				unsigned long start,
+				unsigned long end,
+				gfp_t gfp)
+{
+	return idr_alloc_cmn(idr, ptr, index, start, end, gfp, true);
+}
+
 int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t);
 int idr_for_each(const struct idr *,
 		 int (*fn)(int id, void *p, void *data), void *data);
 void *idr_get_next(struct idr *, int *nextid);
+void *idr_get_next_ext(struct idr *idr, unsigned long *nextid);
 void *idr_replace(struct idr *, void *, int id);
+void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id);
 void idr_destroy(struct idr *);
 
-static inline void *idr_remove(struct idr *idr, int id)
+static inline void *idr_remove_ext(struct idr *idr, unsigned long id)
 {
 	return radix_tree_delete_item(&idr->idr_rt, id, NULL);
 }
 
+static inline void *idr_remove(struct idr *idr, int id)
+{
+	return idr_remove_ext(idr, id);
+}
+
 static inline void idr_init(struct idr *idr)
 {
 	INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
@@ -128,11 +184,16 @@ static inline void idr_preload_end(void)
  * This function can be called under rcu_read_lock(), given that the leaf
  * pointers lifetimes are correctly managed.
  */
-static inline void *idr_find(const struct idr *idr, int id)
+static inline void *idr_find_ext(const struct idr *idr, unsigned long id)
 {
 	return radix_tree_lookup(&idr->idr_rt, id);
 }
 
+static inline void *idr_find(const struct idr *idr, int id)
+{
+	return idr_find_ext(idr, id);
+}
+
 /**
  * idr_for_each_entry - iterate over an idr's elements of a given type
  * @idr:     idr handle
@@ -145,6 +206,8 @@ static inline void *idr_find(const struct idr *idr, int id)
  */
 #define idr_for_each_entry(idr, entry, id)			\
 	for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id)
+#define idr_for_each_entry_ext(idr, entry, id)			\
+	for (id = 0; ((entry) = idr_get_next_ext(idr, &(id))) != NULL; ++id)
 
 /**
  * idr_for_each_entry_continue - continue iteration over an idr's elements of a given type
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 97caf18..f823185 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -118,7 +118,8 @@ extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 		struct ip_msfilter __user *optval, int __user *optlen);
 extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 		struct group_filter __user *optval, int __user *optlen);
-extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, int dif);
+extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt,
+			  int dif, int sdif);
 extern void ip_mc_init_dev(struct in_device *);
 extern void ip_mc_destroy_dev(struct in_device *);
 extern void ip_mc_up(struct in_device *);
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 65da430..ee251c5 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -25,6 +25,13 @@ struct inet_diag_handler {
 					  struct inet_diag_msg *r,
 					  void *info);
 
+	int		(*idiag_get_aux)(struct sock *sk,
+					 bool net_admin,
+					 struct sk_buff *skb);
+
+	size_t		(*idiag_get_aux_size)(struct sock *sk,
+					      bool net_admin);
+
 	int		(*destroy)(struct sk_buff *in_skb,
 				   const struct inet_diag_req_v2 *req);
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 474d6bb..ac2da4e 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -159,6 +159,16 @@ static inline bool inet6_is_jumbogram(const struct sk_buff *skb)
 }
 
 /* can not be used in TCP layer after tcp_v6_fill_cb */
+static inline int inet6_sdif(const struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+	if (skb && ipv6_l3mdev_skb(IP6CB(skb)->flags))
+		return IP6CB(skb)->iif;
+#endif
+	return 0;
+}
+
+/* can not be used in TCP layer after tcp_v6_fill_cb */
 static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb)
 {
 #if defined(CONFIG_NET_L3_MASTER_DEV)
diff --git a/include/linux/mdio-mux.h b/include/linux/mdio-mux.h
index 61f5b21..a5d58f2 100644
--- a/include/linux/mdio-mux.h
+++ b/include/linux/mdio-mux.h
@@ -12,7 +12,16 @@
 #include <linux/device.h>
 #include <linux/phy.h>
 
+/* mdio_mux_init() - Initialize a MDIO mux
+ * @dev		The device owning the MDIO mux
+ * @mux_node	The device node of the MDIO mux
+ * @switch_fn	The function called for switching target MDIO child
+ * mux_handle	A pointer to a (void *) used internaly by mdio-mux
+ * @data	Private data used by switch_fn()
+ * @mux_bus	An optional parent bus (Other case are to use parent_bus property)
+ */
 int mdio_mux_init(struct device *dev,
+		  struct device_node *mux_node,
 		  int (*switch_fn) (int cur, int desired, void *data),
 		  void **mux_handle,
 		  void *data,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index c8a63e1..b0a57e0 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -224,6 +224,7 @@ enum {
 	MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT	= 1ULL <<  35,
 	MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP          = 1ULL <<  36,
 	MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT = 1ULL << 37,
+	MLX4_DEV_CAP_FLAG2_USER_MAC_EN		= 1ULL << 38,
 };
 
 enum {
@@ -524,6 +525,14 @@ struct mlx4_phys_caps {
 	u32			base_tunnel_sqpn;
 };
 
+struct mlx4_spec_qps {
+	u32 qp0_qkey;
+	u32 qp0_proxy;
+	u32 qp0_tunnel;
+	u32 qp1_proxy;
+	u32 qp1_tunnel;
+};
+
 struct mlx4_caps {
 	u64			fw_ver;
 	u32			function;
@@ -553,11 +562,7 @@ struct mlx4_caps {
 	int			max_qp_init_rdma;
 	int			max_qp_dest_rdma;
 	int			max_tc_eth;
-	u32			*qp0_qkey;
-	u32			*qp0_proxy;
-	u32			*qp1_proxy;
-	u32			*qp0_tunnel;
-	u32			*qp1_tunnel;
+	struct mlx4_spec_qps   *spec_qps;
 	int			num_srqs;
 	int			max_srq_wqes;
 	int			max_srq_sge;
@@ -1381,6 +1386,7 @@ int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port);
 int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
 int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
 			  u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
+int mlx4_SET_PORT_user_mac(struct mlx4_dev *dev, u8 port, u8 *user_mac);
 int mlx4_SET_PORT_user_mtu(struct mlx4_dev *dev, u8 port, u16 user_mtu);
 int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
 			   u8 promisc);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index c13d71d..eaf4ad2 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -48,7 +48,7 @@
 /* helper macros */
 #define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0)
 #define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld)
-#define __mlx5_bit_off(typ, fld) ((unsigned)(unsigned long)(&(__mlx5_nullp(typ)->fld)))
+#define __mlx5_bit_off(typ, fld) (offsetof(struct mlx5_ifc_##typ##_bits, fld))
 #define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32)
 #define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64)
 #define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0x1f))
@@ -714,7 +714,7 @@ static inline int mlx5_get_cqe_format(struct mlx5_cqe64 *cqe)
 	return (cqe->op_own >> 2) & 0x3;
 }
 
-static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
+static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
 {
 	return (cqe->lro_tcppsh_abort_dupack >> 6) & 1;
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b3fc9d5..02ff700 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -561,6 +561,7 @@ struct mlx5_fc_stats {
 	unsigned long sampling_interval; /* jiffies */
 };
 
+struct mlx5_mpfs;
 struct mlx5_eswitch;
 struct mlx5_lag;
 struct mlx5_pagefault;
@@ -656,7 +657,11 @@ struct mlx5_priv {
 	struct list_head        ctx_list;
 	spinlock_t              ctx_lock;
 
+	struct list_head	waiting_events_list;
+	bool			is_accum_events;
+
 	struct mlx5_flow_steering *steering;
+	struct mlx5_mpfs        *mpfs;
 	struct mlx5_eswitch     *eswitch;
 	struct mlx5_core_sriov	sriov;
 	struct mlx5_lag		*lag;
@@ -886,8 +891,6 @@ static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset)
 		return buf->direct.buf + offset;
 }
 
-extern struct workqueue_struct *mlx5_core_wq;
-
 #define STRUCT_FIELD(header, field) \
 	.struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field),      \
 	.struct_size_bytes   = sizeof((struct ib_unpacked_ ## header *)0)->field
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index b7338a2..a528b35 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -607,7 +607,7 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         enhanced_multi_pkt_send_wqe[0x1];
 	u8         tunnel_lso_const_out_ip_id[0x1];
 	u8         reserved_at_1c[0x2];
-	u8         tunnel_statless_gre[0x1];
+	u8         tunnel_stateless_gre[0x1];
 	u8         tunnel_stateless_vxlan[0x1];
 
 	u8         swp[0x1];
@@ -969,7 +969,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_2a0[0x10];
 	u8         max_wqe_sz_rq[0x10];
 
-	u8         reserved_at_2c0[0x10];
+	u8         max_flow_counter_31_16[0x10];
 	u8         max_wqe_sz_sq_dc[0x10];
 
 	u8         reserved_at_2e0[0x7];
@@ -987,7 +987,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         reserved_at_340[0x8];
 	u8         log_max_flow_counter_bulk[0x8];
-	u8         max_flow_counter[0x10];
+	u8         max_flow_counter_15_0[0x10];
 
 
 	u8         reserved_at_360[0x3];
@@ -1078,9 +1078,7 @@ struct mlx5_ifc_dest_format_struct_bits {
 };
 
 struct mlx5_ifc_flow_counter_list_bits {
-	u8         clear[0x1];
-	u8         num_of_counters[0xf];
-	u8         flow_counter_id[0x10];
+	u8         flow_counter_id[0x20];
 
 	u8         reserved_at_20[0x20];
 };
@@ -1548,7 +1546,17 @@ struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits {
 
 	u8         port_transmit_wait_low[0x20];
 
-	u8         reserved_at_40[0x780];
+	u8         reserved_at_40[0x100];
+
+	u8         rx_buffer_almost_full_high[0x20];
+
+	u8         rx_buffer_almost_full_low[0x20];
+
+	u8         rx_buffer_full_high[0x20];
+
+	u8         rx_buffer_full_low[0x20];
+
+	u8         reserved_at_1c0[0x600];
 };
 
 struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits {
@@ -1864,7 +1872,19 @@ struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits {
 
 	u8         crc_error_tlp[0x20];
 
-	u8         reserved_at_140[0x680];
+	u8         tx_overflow_buffer_pkt_high[0x20];
+
+	u8         tx_overflow_buffer_pkt_low[0x20];
+
+	u8         outbound_stalled_reads[0x20];
+
+	u8         outbound_stalled_writes[0x20];
+
+	u8         outbound_stalled_reads_events[0x20];
+
+	u8         outbound_stalled_writes_events[0x20];
+
+	u8         reserved_at_200[0x5c0];
 };
 
 struct mlx5_ifc_cmd_inter_comp_event_bits {
@@ -4458,8 +4478,7 @@ struct mlx5_ifc_query_flow_counter_in_bits {
 	u8         reserved_at_c1[0xf];
 	u8         num_of_counters[0x10];
 
-	u8         reserved_at_e0[0x10];
-	u8         flow_counter_id[0x10];
+	u8         flow_counter_id[0x20];
 };
 
 struct mlx5_ifc_query_esw_vport_context_out_bits {
@@ -6351,8 +6370,7 @@ struct mlx5_ifc_dealloc_flow_counter_in_bits {
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_at_40[0x10];
-	u8         flow_counter_id[0x10];
+	u8         flow_counter_id[0x20];
 
 	u8         reserved_at_60[0x20];
 };
@@ -7177,8 +7195,7 @@ struct mlx5_ifc_alloc_flow_counter_out_bits {
 
 	u8         syndrome[0x20];
 
-	u8         reserved_at_40[0x10];
-	u8         flow_counter_id[0x10];
+	u8         flow_counter_id[0x20];
 
 	u8         reserved_at_60[0x20];
 };
@@ -7797,8 +7814,9 @@ struct mlx5_ifc_peir_reg_bits {
 };
 
 struct mlx5_ifc_pcam_enhanced_features_bits {
-	u8         reserved_at_0[0x7c];
+	u8         reserved_at_0[0x7b];
 
+	u8         rx_buffer_fullness_counters[0x1];
 	u8         ptys_connector_type[0x1];
 	u8         reserved_at_7d[0x1];
 	u8         ppcnt_discard_group[0x1];
@@ -7828,8 +7846,9 @@ struct mlx5_ifc_pcam_reg_bits {
 };
 
 struct mlx5_ifc_mcam_enhanced_features_bits {
-	u8         reserved_at_0[0x7d];
-
+	u8         reserved_at_0[0x7b];
+	u8         pcie_outbound_stalled[0x1];
+	u8         tx_overflow_buffer_pkt[0x1];
 	u8         mtpps_enh_out_per_adj[0x1];
 	u8         mtpps_fs[0x1];
 	u8         pcie_performance_group[0x1];
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index b733eb4..abacd54 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -39,6 +39,7 @@
 #define SDIO_DEVICE_ID_BROADCOM_43455		0xa9bf
 #define SDIO_DEVICE_ID_BROADCOM_4354		0x4354
 #define SDIO_DEVICE_ID_BROADCOM_4356		0x4356
+#define SDIO_DEVICE_ID_CYPRESS_4373		0x4373
 
 #define SDIO_VENDOR_ID_INTEL			0x0089
 #define SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX	0x1402
diff --git a/include/linux/net.h b/include/linux/net.h
index ebeb48c..d97d80d 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -190,8 +190,16 @@ struct proto_ops {
 				       struct pipe_inode_info *pipe, size_t len, unsigned int flags);
 	int		(*set_peek_off)(struct sock *sk, int val);
 	int		(*peek_len)(struct socket *sock);
+
+	/* The following functions are called internally by kernel with
+	 * sock lock already held.
+	 */
 	int		(*read_sock)(struct sock *sk, read_descriptor_t *desc,
 				     sk_read_actor_t recv_actor);
+	int		(*sendpage_locked)(struct sock *sk, struct page *page,
+					   int offset, size_t size, int flags);
+	int		(*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
+					  size_t size);
 };
 
 #define DECLARE_SOCKADDR(type, dst, src)	\
@@ -279,6 +287,8 @@ do {									\
 
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
 		   size_t num, size_t len);
+int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
+			  struct kvec *vec, size_t num, size_t len);
 int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
 		   size_t num, size_t len, int flags);
 
@@ -297,6 +307,8 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
 		      unsigned int optlen);
 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 		    size_t size, int flags);
+int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
+			   size_t size, int flags);
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
 
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 1d4737c..dc8b489 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -36,7 +36,6 @@ enum {
 	/**/NETIF_F_GSO_SHIFT,		/* keep the order of SKB_GSO_* bits */
 	NETIF_F_TSO_BIT			/* ... TCPv4 segmentation */
 		= NETIF_F_GSO_SHIFT,
-	NETIF_F_UFO_BIT,		/* ... UDPv4 fragmentation */
 	NETIF_F_GSO_ROBUST_BIT,		/* ... ->SKB_GSO_DODGY */
 	NETIF_F_TSO_ECN_BIT,		/* ... TCP ECN support */
 	NETIF_F_TSO_MANGLEID_BIT,	/* ... IPV4 ID mangling allowed */
@@ -76,6 +75,7 @@ enum {
 	NETIF_F_HW_TC_BIT,		/* Offload TC infrastructure */
 	NETIF_F_HW_ESP_BIT,		/* Hardware ESP transformation offload */
 	NETIF_F_HW_ESP_TX_CSUM_BIT,	/* ESP with TX checksum offload */
+	NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */
 
 	/*
 	 * Add your fresh new feature above and remember to update
@@ -118,7 +118,6 @@ enum {
 #define NETIF_F_TSO6		__NETIF_F(TSO6)
 #define NETIF_F_TSO_ECN		__NETIF_F(TSO_ECN)
 #define NETIF_F_TSO		__NETIF_F(TSO)
-#define NETIF_F_UFO		__NETIF_F(UFO)
 #define NETIF_F_VLAN_CHALLENGED	__NETIF_F(VLAN_CHALLENGED)
 #define NETIF_F_RXFCS		__NETIF_F(RXFCS)
 #define NETIF_F_RXALL		__NETIF_F(RXALL)
@@ -140,6 +139,7 @@ enum {
 #define NETIF_F_HW_TC		__NETIF_F(HW_TC)
 #define NETIF_F_HW_ESP		__NETIF_F(HW_ESP)
 #define NETIF_F_HW_ESP_TX_CSUM	__NETIF_F(HW_ESP_TX_CSUM)
+#define	NETIF_F_RX_UDP_TUNNEL_PORT  __NETIF_F(RX_UDP_TUNNEL_PORT)
 
 #define for_each_netdev_feature(mask_addr, bit)	\
 	for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
@@ -172,7 +172,7 @@ enum {
 				 NETIF_F_FSO)
 
 /* List of features with software fallbacks. */
-#define NETIF_F_GSO_SOFTWARE	(NETIF_F_ALL_TSO | NETIF_F_UFO | \
+#define NETIF_F_GSO_SOFTWARE	(NETIF_F_ALL_TSO | \
 				 NETIF_F_GSO_SCTP)
 
 /*
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 461bd57..f535779 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -35,7 +35,6 @@
 
 #include <linux/percpu.h>
 #include <linux/rculist.h>
-#include <linux/dmaengine.h>
 #include <linux/workqueue.h>
 #include <linux/dynamic_queue_limits.h>
 
@@ -66,6 +65,7 @@ struct mpls_dev;
 /* UDP Tunnel offloads */
 struct udp_tunnel_info;
 struct bpf_prog;
+struct xdp_buff;
 
 void netdev_set_default_ethtool_ops(struct net_device *dev,
 				    const struct ethtool_ops *ops);
@@ -693,10 +693,9 @@ struct netdev_rx_queue {
  */
 struct rx_queue_attribute {
 	struct attribute attr;
-	ssize_t (*show)(struct netdev_rx_queue *queue,
-	    struct rx_queue_attribute *attr, char *buf);
+	ssize_t (*show)(struct netdev_rx_queue *queue, char *buf);
 	ssize_t (*store)(struct netdev_rx_queue *queue,
-	    struct rx_queue_attribute *attr, const char *buf, size_t len);
+			 const char *buf, size_t len);
 };
 
 #ifdef CONFIG_XPS
@@ -770,31 +769,14 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
 				       struct sk_buff *skb);
 
-/* These structures hold the attributes of qdisc and classifiers
- * that are being passed to the netdevice through the setup_tc op.
- */
-enum {
+enum tc_setup_type {
 	TC_SETUP_MQPRIO,
 	TC_SETUP_CLSU32,
 	TC_SETUP_CLSFLOWER,
-	TC_SETUP_MATCHALL,
+	TC_SETUP_CLSMATCHALL,
 	TC_SETUP_CLSBPF,
 };
 
-struct tc_cls_u32_offload;
-
-struct tc_to_netdev {
-	unsigned int type;
-	union {
-		struct tc_cls_u32_offload *cls_u32;
-		struct tc_cls_flower_offload *cls_flower;
-		struct tc_cls_matchall_offload *cls_mall;
-		struct tc_cls_bpf_offload *cls_bpf;
-		struct tc_mqprio_qopt *mqprio;
-	};
-	bool egress_dev;
-};
-
 /* These structures hold the attributes of xdp state that are being passed
  * to the netdevice through the xdp op.
  */
@@ -977,8 +959,8 @@ struct xfrmdev_ops {
  *      with PF and querying it may introduce a theoretical security risk.
  * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting);
  * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
- * int (*ndo_setup_tc)(struct net_device *dev, u32 handle, u32 chain_index,
- *		       __be16 protocol, struct tc_to_netdev *tc);
+ * int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type,
+ *		       void *type_data);
  *	Called to setup any 'tc' scheduler, classifier or action on @dev.
  *	This is always called from the stack with the rtnl lock held and netif
  *	tx queues stopped. This allows the netdevice to perform queue
@@ -1138,7 +1120,12 @@ struct xfrmdev_ops {
  * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp);
  *	This function is used to set or query state related to XDP on the
  *	netdevice. See definition of enum xdp_netdev_command for details.
- *
+ * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp);
+ *	This function is used to submit a XDP packet for transmit on a
+ *	netdevice.
+ * void (*ndo_xdp_flush)(struct net_device *dev);
+ *	This function is used to inform the driver to flush a particular
+ *	xdp tx queue. Must be called on same CPU as xdp_xmit.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1221,9 +1208,8 @@ struct net_device_ops {
 						   struct net_device *dev,
 						   int vf, bool setting);
 	int			(*ndo_setup_tc)(struct net_device *dev,
-						u32 handle, u32 chain_index,
-						__be16 protocol,
-						struct tc_to_netdev *tc);
+						enum tc_setup_type type,
+						void *type_data);
 #if IS_ENABLED(CONFIG_FCOE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
 	int			(*ndo_fcoe_disable)(struct net_device *dev);
@@ -1323,6 +1309,9 @@ struct net_device_ops {
 						       int needed_headroom);
 	int			(*ndo_xdp)(struct net_device *dev,
 					   struct netdev_xdp *xdp);
+	int			(*ndo_xdp_xmit)(struct net_device *dev,
+						struct xdp_buff *xdp);
+	void			(*ndo_xdp_flush)(struct net_device *dev);
 };
 
 /**
@@ -1802,7 +1791,7 @@ struct net_device {
 #endif
 	struct netdev_queue __rcu *ingress_queue;
 #ifdef CONFIG_NETFILTER_INGRESS
-	struct nf_hook_entry __rcu *nf_hooks_ingress;
+	struct nf_hook_entries __rcu *nf_hooks_ingress;
 #endif
 
 	unsigned char		broadcast[MAX_ADDR_LEN];
@@ -2308,6 +2297,7 @@ struct netdev_lag_lower_state_info {
 #define NETDEV_PRECHANGEUPPER	0x001A
 #define NETDEV_CHANGELOWERSTATE	0x001B
 #define NETDEV_UDP_TUNNEL_PUSH_INFO	0x001C
+#define NETDEV_UDP_TUNNEL_DROP_INFO	0x001D
 #define NETDEV_CHANGE_TX_QUEUE_LEN	0x001E
 
 int register_netdevice_notifier(struct notifier_block *nb);
@@ -2423,8 +2413,8 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
 struct net_device *__dev_get_by_name(struct net *net, const char *name);
 int dev_alloc_name(struct net_device *dev, const char *name);
 int dev_open(struct net_device *dev);
-int dev_close(struct net_device *dev);
-int dev_close_many(struct list_head *head, bool unlink);
+void dev_close(struct net_device *dev);
+void dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
 int dev_queue_xmit(struct sk_buff *skb);
@@ -3251,6 +3241,8 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
 	__dev_kfree_skb_any(skb, SKB_REASON_CONSUMED);
 }
 
+void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
+int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
 int netif_rx(struct sk_buff *skb);
 int netif_rx_ni(struct sk_buff *skb);
 int netif_receive_skb(struct sk_buff *skb);
@@ -4021,22 +4013,22 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi
 	return rc;
 }
 
-int netdev_class_create_file_ns(struct class_attribute *class_attr,
+int netdev_class_create_file_ns(const struct class_attribute *class_attr,
 				const void *ns);
-void netdev_class_remove_file_ns(struct class_attribute *class_attr,
+void netdev_class_remove_file_ns(const struct class_attribute *class_attr,
 				 const void *ns);
 
-static inline int netdev_class_create_file(struct class_attribute *class_attr)
+static inline int netdev_class_create_file(const struct class_attribute *class_attr)
 {
 	return netdev_class_create_file_ns(class_attr, NULL);
 }
 
-static inline void netdev_class_remove_file(struct class_attribute *class_attr)
+static inline void netdev_class_remove_file(const struct class_attribute *class_attr)
 {
 	netdev_class_remove_file_ns(class_attr, NULL);
 }
 
-extern struct kobj_ns_type_operations net_ns_type_operations;
+extern const struct kobj_ns_type_operations net_ns_type_operations;
 
 const char *netdev_drivername(const struct net_device *dev);
 
@@ -4091,7 +4083,6 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 
 	/* check flags correspondence */
 	BUILD_BUG_ON(SKB_GSO_TCPV4   != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT));
-	BUILD_BUG_ON(SKB_GSO_UDP     != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_DODGY   != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT));
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 22f0810..f84bca1 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -72,25 +72,32 @@ struct nf_hook_ops {
 };
 
 struct nf_hook_entry {
-	struct nf_hook_entry __rcu	*next;
 	nf_hookfn			*hook;
 	void				*priv;
-	const struct nf_hook_ops	*orig_ops;
 };
 
-static inline void
-nf_hook_entry_init(struct nf_hook_entry *entry,	const struct nf_hook_ops *ops)
-{
-	entry->next = NULL;
-	entry->hook = ops->hook;
-	entry->priv = ops->priv;
-	entry->orig_ops = ops;
-}
+struct nf_hook_entries {
+	u16				num_hook_entries;
+	/* padding */
+	struct nf_hook_entry		hooks[];
+
+	/* trailer: pointers to original orig_ops of each hook.
+	 *
+	 * This is not part of struct nf_hook_entry since its only
+	 * needed in slow path (hook register/unregister).
+	 *
+	 * const struct nf_hook_ops     *orig_ops[]
+	 */
+};
 
-static inline int
-nf_hook_entry_priority(const struct nf_hook_entry *entry)
+static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct nf_hook_entries *e)
 {
-	return entry->orig_ops->priority;
+	unsigned int n = e->num_hook_entries;
+	const void *hook_end;
+
+	hook_end = &e->hooks[n]; /* this is *past* ->hooks[]! */
+
+	return (struct nf_hook_ops **)hook_end;
 }
 
 static inline int
@@ -100,12 +107,6 @@ nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb,
 	return entry->hook(entry->priv, skb, state);
 }
 
-static inline const struct nf_hook_ops *
-nf_hook_entry_ops(const struct nf_hook_entry *entry)
-{
-	return entry->orig_ops;
-}
-
 static inline void nf_hook_state_init(struct nf_hook_state *p,
 				      unsigned int hook,
 				      u_int8_t pf,
@@ -168,7 +169,7 @@ extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 #endif
 
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
-		 struct nf_hook_entry *entry);
+		 const struct nf_hook_entries *e, unsigned int i);
 
 /**
  *	nf_hook - call a netfilter hook
@@ -182,7 +183,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 			  struct net_device *indev, struct net_device *outdev,
 			  int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
-	struct nf_hook_entry *hook_head;
+	struct nf_hook_entries *hook_head;
 	int ret = 1;
 
 #ifdef HAVE_JUMP_LABEL
@@ -200,7 +201,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 		nf_hook_state_init(&state, hook, pf, indev, outdev,
 				   sk, net, okfn);
 
-		ret = nf_hook_slow(skb, &state, hook_head);
+		ret = nf_hook_slow(skb, &state, hook_head, 0);
 	}
 	rcu_read_unlock();
 
diff --git a/include/linux/netfilter/xt_hashlimit.h b/include/linux/netfilter/xt_hashlimit.h
index 074790c..0fc458b 100644
--- a/include/linux/netfilter/xt_hashlimit.h
+++ b/include/linux/netfilter/xt_hashlimit.h
@@ -5,5 +5,6 @@
 
 #define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \
 			  XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT | \
-			  XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES)
+			  XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES |\
+			  XT_HASHLIMIT_RATE_MATCH)
 #endif /*_XT_HASHLIMIT_H*/
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index 5947606..8d5dae1 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -17,7 +17,7 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
 /* caller must hold rcu_read_lock */
 static inline int nf_hook_ingress(struct sk_buff *skb)
 {
-	struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress);
+	struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress);
 	struct nf_hook_state state;
 	int ret;
 
@@ -30,7 +30,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 	nf_hook_state_init(&state, NF_NETDEV_INGRESS,
 			   NFPROTO_NETDEV, skb->dev, NULL, NULL,
 			   dev_net(skb->dev), NULL);
-	ret = nf_hook_slow(skb, &state, e);
+	ret = nf_hook_slow(skb, &state, e, 0);
 	if (ret == 0)
 		return -1;
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 0bb5b21..d78cd01 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -182,6 +182,7 @@ static inline const char *phy_modes(phy_interface_t interface)
 #define MII_ADDR_C45 (1<<30)
 
 struct device;
+struct phylink;
 struct sk_buff;
 
 /*
@@ -469,11 +470,13 @@ struct phy_device {
 
 	struct mutex lock;
 
+	struct phylink *phylink;
 	struct net_device *attached_dev;
 
 	u8 mdix;
 	u8 mdix_ctrl;
 
+	void (*phy_link_change)(struct phy_device *, bool up, bool do_carrier);
 	void (*adjust_link)(struct net_device *dev);
 };
 #define to_phy_device(d) container_of(to_mdio_device(d), \
@@ -667,6 +670,24 @@ struct phy_fixup {
 	int (*run)(struct phy_device *phydev);
 };
 
+const char *phy_speed_to_str(int speed);
+const char *phy_duplex_to_str(unsigned int duplex);
+
+/* A structure for mapping a particular speed and duplex
+ * combination to a particular SUPPORTED and ADVERTISED value
+ */
+struct phy_setting {
+	u32 speed;
+	u8 duplex;
+	u8 bit;
+};
+
+const struct phy_setting *
+phy_lookup_setting(int speed, int duplex, const unsigned long *mask,
+		   size_t maxbit, bool exact);
+size_t phy_speeds(unsigned int *speeds, size_t size,
+		  unsigned long *mask, size_t maxbit);
+
 /**
  * phy_read_mmd - Convenience function for reading a register
  * from an MMD on a given PHY.
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index 78bb0d7..e694d40 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -27,6 +27,8 @@ enum phy_mode {
 	PHY_MODE_USB_HOST,
 	PHY_MODE_USB_DEVICE,
 	PHY_MODE_USB_OTG,
+	PHY_MODE_SGMII,
+	PHY_MODE_10GKR,
 };
 
 /**
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
new file mode 100644
index 0000000..af67edd
--- /dev/null
+++ b/include/linux/phylink.h
@@ -0,0 +1,148 @@
+#ifndef NETDEV_PCS_H
+#define NETDEV_PCS_H
+
+#include <linux/phy.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+struct device_node;
+struct ethtool_cmd;
+struct net_device;
+
+enum {
+	MLO_PAUSE_NONE,
+	MLO_PAUSE_ASYM = BIT(0),
+	MLO_PAUSE_SYM = BIT(1),
+	MLO_PAUSE_RX = BIT(2),
+	MLO_PAUSE_TX = BIT(3),
+	MLO_PAUSE_TXRX_MASK = MLO_PAUSE_TX | MLO_PAUSE_RX,
+	MLO_PAUSE_AN = BIT(4),
+
+	MLO_AN_PHY = 0,	/* Conventional PHY */
+	MLO_AN_FIXED,	/* Fixed-link mode */
+	MLO_AN_SGMII,	/* Cisco SGMII protocol */
+	MLO_AN_8023Z,	/* 1000base-X protocol */
+};
+
+static inline bool phylink_autoneg_inband(unsigned int mode)
+{
+	return mode == MLO_AN_SGMII || mode == MLO_AN_8023Z;
+}
+
+struct phylink_link_state {
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising);
+	phy_interface_t interface;	/* PHY_INTERFACE_xxx */
+	int speed;
+	int duplex;
+	int pause;
+	unsigned int link:1;
+	unsigned int an_enabled:1;
+	unsigned int an_complete:1;
+};
+
+struct phylink_mac_ops {
+	/**
+	 * validate: validate and update the link configuration
+	 * @ndev: net_device structure associated with MAC
+	 * @config: configuration to validate
+	 *
+	 * Update the %config->supported and %config->advertised masks
+	 * clearing bits that can not be supported.
+	 *
+	 * Note: the PHY may be able to transform from one connection
+	 * technology to another, so, eg, don't clear 1000BaseX just
+	 * because the MAC is unable to support it.  This is more about
+	 * clearing unsupported speeds and duplex settings.
+	 *
+	 * If the %config->interface mode is %PHY_INTERFACE_MODE_1000BASEX
+	 * or %PHY_INTERFACE_MODE_2500BASEX, select the appropriate mode
+	 * based on %config->advertised and/or %config->speed.
+	 */
+	void (*validate)(struct net_device *ndev, unsigned long *supported,
+			 struct phylink_link_state *state);
+
+	/* Read the current link state from the hardware */
+	int (*mac_link_state)(struct net_device *, struct phylink_link_state *);
+
+	/* Configure the MAC */
+	/**
+	 * mac_config: configure the MAC for the selected mode and state
+	 * @ndev: net_device structure for the MAC
+	 * @mode: one of MLO_AN_FIXED, MLO_AN_PHY, MLO_AN_8023Z, MLO_AN_SGMII
+	 * @state: state structure
+	 *
+	 * The action performed depends on the currently selected mode:
+	 *
+	 * %MLO_AN_FIXED, %MLO_AN_PHY:
+	 *   set the specified speed, duplex, pause mode, and phy interface
+	 *   mode in the provided @state.
+	 * %MLO_AN_8023Z:
+	 *   place the link in 1000base-X mode, advertising the parameters
+	 *   given in advertising in @state.
+	 * %MLO_AN_SGMII:
+	 *   place the link in Cisco SGMII mode - there is no advertisment
+	 *   to make as the PHY communicates the speed and duplex to the
+	 *   MAC over the in-band control word.  Configuration of the pause
+	 *   mode is as per MLO_AN_PHY since this is not included.
+	 */
+	void (*mac_config)(struct net_device *ndev, unsigned int mode,
+			   const struct phylink_link_state *state);
+
+	/**
+	 * mac_an_restart: restart 802.3z BaseX autonegotiation
+	 * @ndev: net_device structure for the MAC
+	 */
+	void (*mac_an_restart)(struct net_device *ndev);
+
+	void (*mac_link_down)(struct net_device *, unsigned int mode);
+	void (*mac_link_up)(struct net_device *, unsigned int mode,
+			    struct phy_device *);
+};
+
+struct phylink *phylink_create(struct net_device *, struct device_node *,
+	phy_interface_t iface, const struct phylink_mac_ops *ops);
+void phylink_destroy(struct phylink *);
+
+int phylink_connect_phy(struct phylink *, struct phy_device *);
+int phylink_of_phy_connect(struct phylink *, struct device_node *);
+void phylink_disconnect_phy(struct phylink *);
+
+void phylink_mac_change(struct phylink *, bool up);
+
+void phylink_start(struct phylink *);
+void phylink_stop(struct phylink *);
+
+void phylink_ethtool_get_wol(struct phylink *, struct ethtool_wolinfo *);
+int phylink_ethtool_set_wol(struct phylink *, struct ethtool_wolinfo *);
+
+int phylink_ethtool_ksettings_get(struct phylink *,
+				  struct ethtool_link_ksettings *);
+int phylink_ethtool_ksettings_set(struct phylink *,
+				  const struct ethtool_link_ksettings *);
+int phylink_ethtool_nway_reset(struct phylink *);
+void phylink_ethtool_get_pauseparam(struct phylink *,
+				    struct ethtool_pauseparam *);
+int phylink_ethtool_set_pauseparam(struct phylink *,
+				   struct ethtool_pauseparam *);
+int phylink_ethtool_get_module_info(struct phylink *, struct ethtool_modinfo *);
+int phylink_ethtool_get_module_eeprom(struct phylink *,
+				      struct ethtool_eeprom *, u8 *);
+int phylink_init_eee(struct phylink *, bool);
+int phylink_get_eee_err(struct phylink *);
+int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *);
+int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *);
+int phylink_mii_ioctl(struct phylink *, struct ifreq *, int);
+
+#define phylink_zero(bm) \
+	bitmap_zero(bm, __ETHTOOL_LINK_MODE_MASK_NBITS)
+#define __phylink_do_bit(op, bm, mode) \
+	op(ETHTOOL_LINK_MODE_ ## mode ## _BIT, bm)
+
+#define phylink_set(bm, mode)	__phylink_do_bit(__set_bit, bm, mode)
+#define phylink_clear(bm, mode)	__phylink_do_bit(__clear_bit, bm, mode)
+#define phylink_test(bm, mode)	__phylink_do_bit(test_bit, bm, mode)
+
+void phylink_set_port_modes(unsigned long *bits);
+
+#endif
diff --git a/include/linux/platform_data/mdio-bcm-unimac.h b/include/linux/platform_data/mdio-bcm-unimac.h
new file mode 100644
index 0000000..8a5f9f0
--- /dev/null
+++ b/include/linux/platform_data/mdio-bcm-unimac.h
@@ -0,0 +1,13 @@
+#ifndef __MDIO_BCM_UNIMAC_PDATA_H
+#define __MDIO_BCM_UNIMAC_PDATA_H
+
+struct unimac_mdio_pdata {
+	u32 phy_mask;
+	int (*wait_func)(void *data);
+	void *wait_func_data;
+	const char *bus_name;
+};
+
+#define UNIMAC_MDIO_DRV_NAME	"unimac-mdio"
+
+#endif /* __MDIO_BCM_UNIMAC_PDATA_H */
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 0eef0a2..d60de4a 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -323,6 +323,7 @@ struct qed_eth_ops {
 
 	int (*configure_arfs_searcher)(struct qed_dev *cdev,
 				       bool en_searcher);
+	int (*get_coalesce)(struct qed_dev *cdev, u16 *coal, void *handle);
 };
 
 const struct qed_eth_ops *qed_get_eth_ops(void);
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index ef39c7f..cc646ca 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -161,6 +161,18 @@ enum qed_nvm_images {
 	QED_NVM_IMAGE_FCOE_CFG,
 };
 
+struct qed_link_eee_params {
+	u32 tx_lpi_timer;
+#define QED_EEE_1G_ADV		BIT(0)
+#define QED_EEE_10G_ADV		BIT(1)
+
+	/* Capabilities are represented using QED_EEE_*_ADV values */
+	u8 adv_caps;
+	u8 lp_adv_caps;
+	bool enable;
+	bool tx_lpi_enable;
+};
+
 enum qed_led_mode {
 	QED_LED_MODE_OFF,
 	QED_LED_MODE_ON,
@@ -172,8 +184,9 @@ enum qed_led_mode {
 
 #define DIRECT_REG_RD(reg_addr) readl((void __iomem *)(reg_addr))
 
-#define QED_COALESCE_MAX 0xFF
+#define QED_COALESCE_MAX 0x1FF
 #define QED_DEFAULT_RX_USECS 12
+#define QED_DEFAULT_TX_USECS 48
 
 /* forward */
 struct qed_dev;
@@ -408,6 +421,7 @@ struct qed_link_params {
 #define QED_LINK_OVERRIDE_SPEED_FORCED_SPEED    BIT(2)
 #define QED_LINK_OVERRIDE_PAUSE_CONFIG          BIT(3)
 #define QED_LINK_OVERRIDE_LOOPBACK_MODE         BIT(4)
+#define QED_LINK_OVERRIDE_EEE_CONFIG            BIT(5)
 	u32	override_flags;
 	bool	autoneg;
 	u32	adv_speeds;
@@ -422,6 +436,7 @@ struct qed_link_params {
 #define QED_LINK_LOOPBACK_EXT                   BIT(3)
 #define QED_LINK_LOOPBACK_MAC                   BIT(4)
 	u32	loopback_mode;
+	struct qed_link_eee_params eee;
 };
 
 struct qed_link_output {
@@ -437,6 +452,12 @@ struct qed_link_output {
 	u8	port;                   /* In PORT defs */
 	bool	autoneg;
 	u32	pause_config;
+
+	/* EEE - capability & param */
+	bool eee_supported;
+	bool eee_active;
+	u8 sup_caps;
+	struct qed_link_eee_params eee;
 };
 
 struct qed_probe_params {
@@ -654,16 +675,6 @@ struct qed_common_ops {
 			     enum qed_nvm_images type, u8 *buf, u16 len);
 
 /**
- * @brief get_coalesce - Get coalesce parameters in usec
- *
- * @param cdev
- * @param rx_coal - Rx coalesce value in usec
- * @param tx_coal - Tx coalesce value in usec
- *
- */
-	void (*get_coalesce)(struct qed_dev *cdev, u16 *rx_coal, u16 *tx_coal);
-
-/**
  * @brief set_coalesce - Configure Rx coalesce value in usec
  *
  * @param cdev
@@ -674,8 +685,8 @@ struct qed_common_ops {
  *
  * @return 0 on success, error otherwise.
  */
-	int (*set_coalesce)(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
-			    u16 qid, u16 sb_id);
+	int (*set_coalesce)(struct qed_dev *cdev,
+			    u16 rx_coal, u16 tx_coal, void *handle);
 
 /**
  * @brief set_led - Configure LED mode
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 3e57350..567ebb5 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -357,8 +357,25 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index,
 			unsigned new_order);
 int radix_tree_join(struct radix_tree_root *, unsigned long index,
 			unsigned new_order, void *);
-void __rcu **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *,
-			gfp_t, int end);
+
+void __rcu **idr_get_free_cmn(struct radix_tree_root *root,
+			      struct radix_tree_iter *iter, gfp_t gfp,
+			      unsigned long max);
+static inline void __rcu **idr_get_free(struct radix_tree_root *root,
+					struct radix_tree_iter *iter,
+					gfp_t gfp,
+					int end)
+{
+	return idr_get_free_cmn(root, iter, gfp, end > 0 ? end - 1 : INT_MAX);
+}
+
+static inline void __rcu **idr_get_free_ext(struct radix_tree_root *root,
+					    struct radix_tree_iter *iter,
+					    gfp_t gfp,
+					    unsigned long end)
+{
+	return idr_get_free_cmn(root, iter, gfp, end - 1);
+}
 
 enum {
 	RADIX_TREE_ITER_TAG_MASK = 0x0f,	/* tag index in lower nybble */
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 5d5415e..3c07e41 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -36,7 +36,8 @@ struct user_struct {
 	struct hlist_node uidhash_node;
 	kuid_t uid;
 
-#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
+#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \
+    defined(CONFIG_NET)
 	atomic_long_t locked_vm;
 #endif
 };
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 99e8664..82b171e 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -273,87 +273,85 @@ struct sctp_init_chunk {
 
 
 /* Section 3.3.2.1. IPv4 Address Parameter (5) */
-typedef struct sctp_ipv4addr_param {
+struct sctp_ipv4addr_param {
 	struct sctp_paramhdr param_hdr;
-	struct in_addr  addr;
-} sctp_ipv4addr_param_t;
+	struct in_addr addr;
+};
 
 /* Section 3.3.2.1. IPv6 Address Parameter (6) */
-typedef struct sctp_ipv6addr_param {
+struct sctp_ipv6addr_param {
 	struct sctp_paramhdr param_hdr;
 	struct in6_addr addr;
-} sctp_ipv6addr_param_t;
+};
 
 /* Section 3.3.2.1 Cookie Preservative (9) */
-typedef struct sctp_cookie_preserve_param {
+struct sctp_cookie_preserve_param {
 	struct sctp_paramhdr param_hdr;
-	__be32          lifespan_increment;
-} sctp_cookie_preserve_param_t;
+	__be32 lifespan_increment;
+};
 
 /* Section 3.3.2.1 Host Name Address (11) */
-typedef struct sctp_hostname_param {
+struct sctp_hostname_param {
 	struct sctp_paramhdr param_hdr;
 	uint8_t hostname[0];
-} sctp_hostname_param_t;
+};
 
 /* Section 3.3.2.1 Supported Address Types (12) */
-typedef struct sctp_supported_addrs_param {
+struct sctp_supported_addrs_param {
 	struct sctp_paramhdr param_hdr;
 	__be16 types[0];
-} sctp_supported_addrs_param_t;
-
-/* Appendix A. ECN Capable (32768) */
-typedef struct sctp_ecn_capable_param {
-	struct sctp_paramhdr param_hdr;
-} sctp_ecn_capable_param_t;
+};
 
 /* ADDIP Section 3.2.6 Adaptation Layer Indication */
-typedef struct sctp_adaptation_ind_param {
+struct sctp_adaptation_ind_param {
 	struct sctp_paramhdr param_hdr;
 	__be32 adaptation_ind;
-} sctp_adaptation_ind_param_t;
+};
 
 /* ADDIP Section 4.2.7 Supported Extensions Parameter */
-typedef struct sctp_supported_ext_param {
+struct sctp_supported_ext_param {
 	struct sctp_paramhdr param_hdr;
 	__u8 chunks[0];
-} sctp_supported_ext_param_t;
+};
 
 /* AUTH Section 3.1 Random */
-typedef struct sctp_random_param {
+struct sctp_random_param {
 	struct sctp_paramhdr param_hdr;
 	__u8 random_val[0];
-} sctp_random_param_t;
+};
 
 /* AUTH Section 3.2 Chunk List */
-typedef struct sctp_chunks_param {
+struct sctp_chunks_param {
 	struct sctp_paramhdr param_hdr;
 	__u8 chunks[0];
-} sctp_chunks_param_t;
+};
 
 /* AUTH Section 3.3 HMAC Algorithm */
-typedef struct sctp_hmac_algo_param {
+struct sctp_hmac_algo_param {
 	struct sctp_paramhdr param_hdr;
 	__be16 hmac_ids[0];
-} sctp_hmac_algo_param_t;
+};
 
 /* RFC 2960.  Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2):
  *   The INIT ACK chunk is used to acknowledge the initiation of an SCTP
  *   association.
  */
-typedef struct sctp_init_chunk sctp_initack_chunk_t;
+struct sctp_initack_chunk {
+	struct sctp_chunkhdr chunk_hdr;
+	struct sctp_inithdr init_hdr;
+};
 
 /* Section 3.3.3.1 State Cookie (7) */
-typedef struct sctp_cookie_param {
+struct sctp_cookie_param {
 	struct sctp_paramhdr p;
 	__u8 body[0];
-} sctp_cookie_param_t;
+};
 
 /* Section 3.3.3.1 Unrecognized Parameters (8) */
-typedef struct sctp_unrecognized_param {
+struct sctp_unrecognized_param {
 	struct sctp_paramhdr param_hdr;
 	struct sctp_paramhdr unrecognized;
-} sctp_unrecognized_param_t;
+};
 
 
 
@@ -365,30 +363,28 @@ typedef struct sctp_unrecognized_param {
  *  subsequences of DATA chunks as represented by their TSNs.
  */
 
-typedef struct sctp_gap_ack_block {
+struct sctp_gap_ack_block {
 	__be16 start;
 	__be16 end;
-} sctp_gap_ack_block_t;
-
-typedef __be32 sctp_dup_tsn_t;
+};
 
-typedef union {
-	sctp_gap_ack_block_t	gab;
-        sctp_dup_tsn_t		dup;
-} sctp_sack_variable_t;
+union sctp_sack_variable {
+	struct sctp_gap_ack_block gab;
+	__be32 dup;
+};
 
-typedef struct sctp_sackhdr {
+struct sctp_sackhdr {
 	__be32 cum_tsn_ack;
 	__be32 a_rwnd;
 	__be16 num_gap_ack_blocks;
 	__be16 num_dup_tsns;
-	sctp_sack_variable_t variable[0];
-} sctp_sackhdr_t;
+	union sctp_sack_variable variable[0];
+};
 
-typedef struct sctp_sack_chunk {
+struct sctp_sack_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_sackhdr_t sack_hdr;
-} sctp_sack_chunk_t;
+	struct sctp_sackhdr sack_hdr;
+};
 
 
 /* RFC 2960.  Section 3.3.5 Heartbeat Request (HEARTBEAT) (4):
@@ -398,49 +394,49 @@ typedef struct sctp_sack_chunk {
  *  the present association.
  */
 
-typedef struct sctp_heartbeathdr {
+struct sctp_heartbeathdr {
 	struct sctp_paramhdr info;
-} sctp_heartbeathdr_t;
+};
 
-typedef struct sctp_heartbeat_chunk {
+struct sctp_heartbeat_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_heartbeathdr_t hb_hdr;
-} sctp_heartbeat_chunk_t;
+	struct sctp_heartbeathdr hb_hdr;
+};
 
 
 /* For the abort and shutdown ACK we must carry the init tag in the
  * common header. Just the common header is all that is needed with a
  * chunk descriptor.
  */
-typedef struct sctp_abort_chunk {
+struct sctp_abort_chunk {
 	struct sctp_chunkhdr uh;
-} sctp_abort_chunk_t;
+};
 
 
 /* For the graceful shutdown we must carry the tag (in common header)
  * and the highest consecutive acking value.
  */
-typedef struct sctp_shutdownhdr {
+struct sctp_shutdownhdr {
 	__be32 cum_tsn_ack;
-} sctp_shutdownhdr_t;
+};
 
-struct sctp_shutdown_chunk_t {
+struct sctp_shutdown_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_shutdownhdr_t shutdown_hdr;
+	struct sctp_shutdownhdr shutdown_hdr;
 };
 
 /* RFC 2960.  Section 3.3.10 Operation Error (ERROR) (9) */
 
-typedef struct sctp_errhdr {
+struct sctp_errhdr {
 	__be16 cause;
 	__be16 length;
 	__u8  variable[0];
-} sctp_errhdr_t;
+};
 
-typedef struct sctp_operr_chunk {
+struct sctp_operr_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_errhdr_t err_hdr;
-} sctp_operr_chunk_t;
+	struct sctp_errhdr err_hdr;
+};
 
 /* RFC 2960 3.3.10 - Operation Error
  *
@@ -461,7 +457,7 @@ typedef struct sctp_operr_chunk {
  *      9              No User Data
  *     10              Cookie Received While Shutting Down
  */
-typedef enum {
+enum sctp_error {
 
 	SCTP_ERROR_NO_ERROR	   = cpu_to_be16(0x00),
 	SCTP_ERROR_INV_STRM	   = cpu_to_be16(0x01),
@@ -516,33 +512,28 @@ typedef enum {
 	 * 0x0105          Unsupported HMAC Identifier
 	 */
 	 SCTP_ERROR_UNSUP_HMAC	= cpu_to_be16(0x0105)
-} sctp_error_t;
+};
 
 
 
 /* RFC 2960.  Appendix A.  Explicit Congestion Notification.
  *   Explicit Congestion Notification Echo (ECNE) (12)
  */
-typedef struct sctp_ecnehdr {
+struct sctp_ecnehdr {
 	__be32 lowest_tsn;
-} sctp_ecnehdr_t;
+};
 
-typedef struct sctp_ecne_chunk {
+struct sctp_ecne_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_ecnehdr_t ence_hdr;
-} sctp_ecne_chunk_t;
+	struct sctp_ecnehdr ence_hdr;
+};
 
 /* RFC 2960.  Appendix A.  Explicit Congestion Notification.
  *   Congestion Window Reduced (CWR) (13)
  */
-typedef struct sctp_cwrhdr {
+struct sctp_cwrhdr {
 	__be32 lowest_tsn;
-} sctp_cwrhdr_t;
-
-typedef struct sctp_cwr_chunk {
-	struct sctp_chunkhdr chunk_hdr;
-	sctp_cwrhdr_t cwr_hdr;
-} sctp_cwr_chunk_t;
+};
 
 /* PR-SCTP
  * 3.2 Forward Cumulative TSN Chunk Definition (FORWARD TSN)
@@ -638,20 +629,20 @@ struct sctp_fwdtsn_chunk {
  *	The ASCONF Parameter Response is used in the ASCONF-ACK to
  *	report status of ASCONF processing.
  */
-typedef struct sctp_addip_param {
-	struct sctp_paramhdr	param_hdr;
-	__be32		crr_id;
-} sctp_addip_param_t;
+struct sctp_addip_param {
+	struct sctp_paramhdr param_hdr;
+	__be32 crr_id;
+};
 
-typedef struct sctp_addiphdr {
+struct sctp_addiphdr {
 	__be32	serial;
 	__u8	params[0];
-} sctp_addiphdr_t;
+};
 
-typedef struct sctp_addip_chunk {
+struct sctp_addip_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_addiphdr_t addip_hdr;
-} sctp_addip_chunk_t;
+	struct sctp_addiphdr addip_hdr;
+};
 
 /* AUTH
  * Section 4.1  Authentication Chunk (AUTH)
@@ -702,16 +693,16 @@ typedef struct sctp_addip_chunk {
  *   HMAC: n bytes (unsigned integer) This hold the result of the HMAC
  *      calculation.
  */
-typedef struct sctp_authhdr {
+struct sctp_authhdr {
 	__be16 shkey_id;
 	__be16 hmac_id;
 	__u8   hmac[0];
-} sctp_authhdr_t;
+};
 
-typedef struct sctp_auth_chunk {
+struct sctp_auth_chunk {
 	struct sctp_chunkhdr chunk_hdr;
-	sctp_authhdr_t auth_hdr;
-} sctp_auth_chunk_t;
+	struct sctp_authhdr auth_hdr;
+};
 
 struct sctp_infox {
 	struct sctp_info *sctpinfo;
diff --git a/include/linux/seg6_local.h b/include/linux/seg6_local.h
new file mode 100644
index 0000000..ee63e76
--- /dev/null
+++ b/include/linux/seg6_local.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SEG6_LOCAL_H
+#define _LINUX_SEG6_LOCAL_H
+
+#include <uapi/linux/seg6_local.h>
+
+#endif
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
new file mode 100644
index 0000000..4a906f5
--- /dev/null
+++ b/include/linux/sfp.h
@@ -0,0 +1,434 @@
+#ifndef LINUX_SFP_H
+#define LINUX_SFP_H
+
+#include <linux/phy.h>
+
+struct __packed sfp_eeprom_base {
+	u8 phys_id;
+	u8 phys_ext_id;
+	u8 connector;
+#if defined __BIG_ENDIAN_BITFIELD
+	u8 e10g_base_er:1;
+	u8 e10g_base_lrm:1;
+	u8 e10g_base_lr:1;
+	u8 e10g_base_sr:1;
+	u8 if_1x_sx:1;
+	u8 if_1x_lx:1;
+	u8 if_1x_copper_active:1;
+	u8 if_1x_copper_passive:1;
+
+	u8 escon_mmf_1310_led:1;
+	u8 escon_smf_1310_laser:1;
+	u8 sonet_oc192_short_reach:1;
+	u8 sonet_reach_bit1:1;
+	u8 sonet_reach_bit2:1;
+	u8 sonet_oc48_long_reach:1;
+	u8 sonet_oc48_intermediate_reach:1;
+	u8 sonet_oc48_short_reach:1;
+
+	u8 unallocated_5_7:1;
+	u8 sonet_oc12_smf_long_reach:1;
+	u8 sonet_oc12_smf_intermediate_reach:1;
+	u8 sonet_oc12_short_reach:1;
+	u8 unallocated_5_3:1;
+	u8 sonet_oc3_smf_long_reach:1;
+	u8 sonet_oc3_smf_intermediate_reach:1;
+	u8 sonet_oc3_short_reach:1;
+
+	u8 e_base_px:1;
+	u8 e_base_bx10:1;
+	u8 e100_base_fx:1;
+	u8 e100_base_lx:1;
+	u8 e1000_base_t:1;
+	u8 e1000_base_cx:1;
+	u8 e1000_base_lx:1;
+	u8 e1000_base_sx:1;
+
+	u8 fc_ll_v:1;
+	u8 fc_ll_s:1;
+	u8 fc_ll_i:1;
+	u8 fc_ll_l:1;
+	u8 fc_ll_m:1;
+	u8 fc_tech_sa:1;
+	u8 fc_tech_lc:1;
+	u8 fc_tech_electrical_inter_enclosure:1;
+
+	u8 fc_tech_electrical_intra_enclosure:1;
+	u8 fc_tech_sn:1;
+	u8 fc_tech_sl:1;
+	u8 fc_tech_ll:1;
+	u8 sfp_ct_active:1;
+	u8 sfp_ct_passive:1;
+	u8 unallocated_8_1:1;
+	u8 unallocated_8_0:1;
+
+	u8 fc_media_tw:1;
+	u8 fc_media_tp:1;
+	u8 fc_media_mi:1;
+	u8 fc_media_tv:1;
+	u8 fc_media_m6:1;
+	u8 fc_media_m5:1;
+	u8 unallocated_9_1:1;
+	u8 fc_media_sm:1;
+
+	u8 fc_speed_1200:1;
+	u8 fc_speed_800:1;
+	u8 fc_speed_1600:1;
+	u8 fc_speed_400:1;
+	u8 fc_speed_3200:1;
+	u8 fc_speed_200:1;
+	u8 unallocated_10_1:1;
+	u8 fc_speed_100:1;
+#elif defined __LITTLE_ENDIAN_BITFIELD
+	u8 if_1x_copper_passive:1;
+	u8 if_1x_copper_active:1;
+	u8 if_1x_lx:1;
+	u8 if_1x_sx:1;
+	u8 e10g_base_sr:1;
+	u8 e10g_base_lr:1;
+	u8 e10g_base_lrm:1;
+	u8 e10g_base_er:1;
+
+	u8 sonet_oc3_short_reach:1;
+	u8 sonet_oc3_smf_intermediate_reach:1;
+	u8 sonet_oc3_smf_long_reach:1;
+	u8 unallocated_5_3:1;
+	u8 sonet_oc12_short_reach:1;
+	u8 sonet_oc12_smf_intermediate_reach:1;
+	u8 sonet_oc12_smf_long_reach:1;
+	u8 unallocated_5_7:1;
+
+	u8 sonet_oc48_short_reach:1;
+	u8 sonet_oc48_intermediate_reach:1;
+	u8 sonet_oc48_long_reach:1;
+	u8 sonet_reach_bit2:1;
+	u8 sonet_reach_bit1:1;
+	u8 sonet_oc192_short_reach:1;
+	u8 escon_smf_1310_laser:1;
+	u8 escon_mmf_1310_led:1;
+
+	u8 e1000_base_sx:1;
+	u8 e1000_base_lx:1;
+	u8 e1000_base_cx:1;
+	u8 e1000_base_t:1;
+	u8 e100_base_lx:1;
+	u8 e100_base_fx:1;
+	u8 e_base_bx10:1;
+	u8 e_base_px:1;
+
+	u8 fc_tech_electrical_inter_enclosure:1;
+	u8 fc_tech_lc:1;
+	u8 fc_tech_sa:1;
+	u8 fc_ll_m:1;
+	u8 fc_ll_l:1;
+	u8 fc_ll_i:1;
+	u8 fc_ll_s:1;
+	u8 fc_ll_v:1;
+
+	u8 unallocated_8_0:1;
+	u8 unallocated_8_1:1;
+	u8 sfp_ct_passive:1;
+	u8 sfp_ct_active:1;
+	u8 fc_tech_ll:1;
+	u8 fc_tech_sl:1;
+	u8 fc_tech_sn:1;
+	u8 fc_tech_electrical_intra_enclosure:1;
+
+	u8 fc_media_sm:1;
+	u8 unallocated_9_1:1;
+	u8 fc_media_m5:1;
+	u8 fc_media_m6:1;
+	u8 fc_media_tv:1;
+	u8 fc_media_mi:1;
+	u8 fc_media_tp:1;
+	u8 fc_media_tw:1;
+
+	u8 fc_speed_100:1;
+	u8 unallocated_10_1:1;
+	u8 fc_speed_200:1;
+	u8 fc_speed_3200:1;
+	u8 fc_speed_400:1;
+	u8 fc_speed_1600:1;
+	u8 fc_speed_800:1;
+	u8 fc_speed_1200:1;
+#else
+#error Unknown Endian
+#endif
+	u8 encoding;
+	u8 br_nominal;
+	u8 rate_id;
+	u8 link_len[6];
+	char vendor_name[16];
+	u8 extended_cc;
+	char vendor_oui[3];
+	char vendor_pn[16];
+	char vendor_rev[4];
+	union {
+		__be16 optical_wavelength;
+		u8 cable_spec;
+	};
+	u8 reserved62;
+	u8 cc_base;
+};
+
+struct __packed sfp_eeprom_ext {
+	__be16 options;
+	u8 br_max;
+	u8 br_min;
+	char vendor_sn[16];
+	char datecode[8];
+	u8 diagmon;
+	u8 enhopts;
+	u8 sff8472_compliance;
+	u8 cc_ext;
+};
+
+struct __packed sfp_eeprom_id {
+	struct sfp_eeprom_base base;
+	struct sfp_eeprom_ext ext;
+};
+
+/* SFP EEPROM registers */
+enum {
+	SFP_PHYS_ID			= 0x00,
+	SFP_PHYS_EXT_ID			= 0x01,
+	SFP_CONNECTOR			= 0x02,
+	SFP_COMPLIANCE			= 0x03,
+	SFP_ENCODING			= 0x0b,
+	SFP_BR_NOMINAL			= 0x0c,
+	SFP_RATE_ID			= 0x0d,
+	SFP_LINK_LEN_SM_KM		= 0x0e,
+	SFP_LINK_LEN_SM_100M		= 0x0f,
+	SFP_LINK_LEN_50UM_OM2_10M	= 0x10,
+	SFP_LINK_LEN_62_5UM_OM1_10M	= 0x11,
+	SFP_LINK_LEN_COPPER_1M		= 0x12,
+	SFP_LINK_LEN_50UM_OM4_10M	= 0x12,
+	SFP_LINK_LEN_50UM_OM3_10M	= 0x13,
+	SFP_VENDOR_NAME			= 0x14,
+	SFP_VENDOR_OUI			= 0x25,
+	SFP_VENDOR_PN			= 0x28,
+	SFP_VENDOR_REV			= 0x38,
+	SFP_OPTICAL_WAVELENGTH_MSB	= 0x3c,
+	SFP_OPTICAL_WAVELENGTH_LSB	= 0x3d,
+	SFP_CABLE_SPEC			= 0x3c,
+	SFP_CC_BASE			= 0x3f,
+	SFP_OPTIONS			= 0x40,	/* 2 bytes, MSB, LSB */
+	SFP_BR_MAX			= 0x42,
+	SFP_BR_MIN			= 0x43,
+	SFP_VENDOR_SN			= 0x44,
+	SFP_DATECODE			= 0x54,
+	SFP_DIAGMON			= 0x5c,
+	SFP_ENHOPTS			= 0x5d,
+	SFP_SFF8472_COMPLIANCE		= 0x5e,
+	SFP_CC_EXT			= 0x5f,
+
+	SFP_PHYS_ID_SFP			= 0x03,
+	SFP_PHYS_EXT_ID_SFP		= 0x04,
+	SFP_CONNECTOR_UNSPEC		= 0x00,
+	/* codes 01-05 not supportable on SFP, but some modules have single SC */
+	SFP_CONNECTOR_SC		= 0x01,
+	SFP_CONNECTOR_FIBERJACK		= 0x06,
+	SFP_CONNECTOR_LC		= 0x07,
+	SFP_CONNECTOR_MT_RJ		= 0x08,
+	SFP_CONNECTOR_MU		= 0x09,
+	SFP_CONNECTOR_SG		= 0x0a,
+	SFP_CONNECTOR_OPTICAL_PIGTAIL	= 0x0b,
+	SFP_CONNECTOR_MPO_1X12		= 0x0c,
+	SFP_CONNECTOR_MPO_2X16		= 0x0d,
+	SFP_CONNECTOR_HSSDC_II		= 0x20,
+	SFP_CONNECTOR_COPPER_PIGTAIL	= 0x21,
+	SFP_CONNECTOR_RJ45		= 0x22,
+	SFP_CONNECTOR_NOSEPARATE	= 0x23,
+	SFP_CONNECTOR_MXC_2X16		= 0x24,
+	SFP_ENCODING_UNSPEC		= 0x00,
+	SFP_ENCODING_8B10B		= 0x01,
+	SFP_ENCODING_4B5B		= 0x02,
+	SFP_ENCODING_NRZ		= 0x03,
+	SFP_ENCODING_8472_MANCHESTER	= 0x04,
+	SFP_ENCODING_8472_SONET		= 0x05,
+	SFP_ENCODING_8472_64B66B	= 0x06,
+	SFP_ENCODING_256B257B		= 0x07,
+	SFP_ENCODING_PAM4		= 0x08,
+	SFP_OPTIONS_HIGH_POWER_LEVEL	= BIT(13),
+	SFP_OPTIONS_PAGING_A2		= BIT(12),
+	SFP_OPTIONS_RETIMER		= BIT(11),
+	SFP_OPTIONS_COOLED_XCVR		= BIT(10),
+	SFP_OPTIONS_POWER_DECL		= BIT(9),
+	SFP_OPTIONS_RX_LINEAR_OUT	= BIT(8),
+	SFP_OPTIONS_RX_DECISION_THRESH	= BIT(7),
+	SFP_OPTIONS_TUNABLE_TX		= BIT(6),
+	SFP_OPTIONS_RATE_SELECT		= BIT(5),
+	SFP_OPTIONS_TX_DISABLE		= BIT(4),
+	SFP_OPTIONS_TX_FAULT		= BIT(3),
+	SFP_OPTIONS_LOS_INVERTED	= BIT(2),
+	SFP_OPTIONS_LOS_NORMAL		= BIT(1),
+	SFP_DIAGMON_DDM			= BIT(6),
+	SFP_DIAGMON_INT_CAL		= BIT(5),
+	SFP_DIAGMON_EXT_CAL		= BIT(4),
+	SFP_DIAGMON_RXPWR_AVG		= BIT(3),
+	SFP_DIAGMON_ADDRMODE		= BIT(2),
+	SFP_ENHOPTS_ALARMWARN		= BIT(7),
+	SFP_ENHOPTS_SOFT_TX_DISABLE	= BIT(6),
+	SFP_ENHOPTS_SOFT_TX_FAULT	= BIT(5),
+	SFP_ENHOPTS_SOFT_RX_LOS		= BIT(4),
+	SFP_ENHOPTS_SOFT_RATE_SELECT	= BIT(3),
+	SFP_ENHOPTS_APP_SELECT_SFF8079	= BIT(2),
+	SFP_ENHOPTS_SOFT_RATE_SFF8431	= BIT(1),
+	SFP_SFF8472_COMPLIANCE_NONE	= 0x00,
+	SFP_SFF8472_COMPLIANCE_REV9_3	= 0x01,
+	SFP_SFF8472_COMPLIANCE_REV9_5	= 0x02,
+	SFP_SFF8472_COMPLIANCE_REV10_2	= 0x03,
+	SFP_SFF8472_COMPLIANCE_REV10_4	= 0x04,
+	SFP_SFF8472_COMPLIANCE_REV11_0	= 0x05,
+	SFP_SFF8472_COMPLIANCE_REV11_3	= 0x06,
+	SFP_SFF8472_COMPLIANCE_REV11_4	= 0x07,
+	SFP_SFF8472_COMPLIANCE_REV12_0	= 0x08,
+};
+
+/* SFP Diagnostics */
+enum {
+	/* Alarm and warnings stored MSB at lower address then LSB */
+	SFP_TEMP_HIGH_ALARM		= 0x00,
+	SFP_TEMP_LOW_ALARM		= 0x02,
+	SFP_TEMP_HIGH_WARN		= 0x04,
+	SFP_TEMP_LOW_WARN		= 0x06,
+	SFP_VOLT_HIGH_ALARM		= 0x08,
+	SFP_VOLT_LOW_ALARM		= 0x0a,
+	SFP_VOLT_HIGH_WARN		= 0x0c,
+	SFP_VOLT_LOW_WARN		= 0x0e,
+	SFP_BIAS_HIGH_ALARM		= 0x10,
+	SFP_BIAS_LOW_ALARM		= 0x12,
+	SFP_BIAS_HIGH_WARN		= 0x14,
+	SFP_BIAS_LOW_WARN		= 0x16,
+	SFP_TXPWR_HIGH_ALARM		= 0x18,
+	SFP_TXPWR_LOW_ALARM		= 0x1a,
+	SFP_TXPWR_HIGH_WARN		= 0x1c,
+	SFP_TXPWR_LOW_WARN		= 0x1e,
+	SFP_RXPWR_HIGH_ALARM		= 0x20,
+	SFP_RXPWR_LOW_ALARM		= 0x22,
+	SFP_RXPWR_HIGH_WARN		= 0x24,
+	SFP_RXPWR_LOW_WARN		= 0x26,
+	SFP_LASER_TEMP_HIGH_ALARM	= 0x28,
+	SFP_LASER_TEMP_LOW_ALARM	= 0x2a,
+	SFP_LASER_TEMP_HIGH_WARN	= 0x2c,
+	SFP_LASER_TEMP_LOW_WARN		= 0x2e,
+	SFP_TEC_CUR_HIGH_ALARM		= 0x30,
+	SFP_TEC_CUR_LOW_ALARM		= 0x32,
+	SFP_TEC_CUR_HIGH_WARN		= 0x34,
+	SFP_TEC_CUR_LOW_WARN		= 0x36,
+	SFP_CAL_RXPWR4			= 0x38,
+	SFP_CAL_RXPWR3			= 0x3c,
+	SFP_CAL_RXPWR2			= 0x40,
+	SFP_CAL_RXPWR1			= 0x44,
+	SFP_CAL_RXPWR0			= 0x48,
+	SFP_CAL_TXI_SLOPE		= 0x4c,
+	SFP_CAL_TXI_OFFSET		= 0x4e,
+	SFP_CAL_TXPWR_SLOPE		= 0x50,
+	SFP_CAL_TXPWR_OFFSET		= 0x52,
+	SFP_CAL_T_SLOPE			= 0x54,
+	SFP_CAL_T_OFFSET		= 0x56,
+	SFP_CAL_V_SLOPE			= 0x58,
+	SFP_CAL_V_OFFSET		= 0x5a,
+	SFP_CHKSUM			= 0x5f,
+
+	SFP_TEMP			= 0x60,
+	SFP_VCC				= 0x62,
+	SFP_TX_BIAS			= 0x64,
+	SFP_TX_POWER			= 0x66,
+	SFP_RX_POWER			= 0x68,
+	SFP_LASER_TEMP			= 0x6a,
+	SFP_TEC_CUR			= 0x6c,
+
+	SFP_STATUS			= 0x6e,
+	SFP_ALARM			= 0x70,
+
+	SFP_EXT_STATUS			= 0x76,
+	SFP_VSL				= 0x78,
+	SFP_PAGE			= 0x7f,
+};
+
+struct device_node;
+struct ethtool_eeprom;
+struct ethtool_modinfo;
+struct net_device;
+struct sfp_bus;
+
+struct sfp_upstream_ops {
+	int (*module_insert)(void *, const struct sfp_eeprom_id *id);
+	void (*module_remove)(void *);
+	void (*link_down)(void *);
+	void (*link_up)(void *);
+	int (*connect_phy)(void *, struct phy_device *);
+	void (*disconnect_phy)(void *);
+};
+
+#if IS_ENABLED(CONFIG_SFP)
+int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
+		   unsigned long *support);
+phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
+				    const struct sfp_eeprom_id *id);
+void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
+		       unsigned long *support);
+
+int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo);
+int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
+			  u8 *data);
+void sfp_upstream_start(struct sfp_bus *bus);
+void sfp_upstream_stop(struct sfp_bus *bus);
+struct sfp_bus *sfp_register_upstream(struct device_node *np,
+				      struct net_device *ndev, void *upstream,
+				      const struct sfp_upstream_ops *ops);
+void sfp_unregister_upstream(struct sfp_bus *bus);
+#else
+static inline int sfp_parse_port(struct sfp_bus *bus,
+				 const struct sfp_eeprom_id *id,
+				 unsigned long *support)
+{
+	return PORT_OTHER;
+}
+
+static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus,
+						const struct sfp_eeprom_id *id)
+{
+	return PHY_INTERFACE_MODE_NA;
+}
+
+static inline void sfp_parse_support(struct sfp_bus *bus,
+				     const struct sfp_eeprom_id *id,
+				     unsigned long *support)
+{
+}
+
+static inline int sfp_get_module_info(struct sfp_bus *bus,
+				      struct ethtool_modinfo *modinfo)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int sfp_get_module_eeprom(struct sfp_bus *bus,
+					struct ethtool_eeprom *ee, u8 *data)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void sfp_upstream_start(struct sfp_bus *bus)
+{
+}
+
+static inline void sfp_upstream_stop(struct sfp_bus *bus)
+{
+}
+
+static inline struct sfp_bus *sfp_register_upstream(struct device_node *np,
+	struct net_device *ndev, void *upstream,
+	const struct sfp_upstream_ops *ops)
+{
+	return (struct sfp_bus *)-1;
+}
+
+static inline void sfp_unregister_upstream(struct sfp_bus *bus)
+{
+}
+#endif
+
+#endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d67a818..f751f3b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -22,6 +22,7 @@
 #include <linux/cache.h>
 #include <linux/rbtree.h>
 #include <linux/socket.h>
+#include <linux/refcount.h>
 
 #include <linux/atomic.h>
 #include <asm/types.h>
@@ -345,6 +346,42 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
 	frag->size -= delta;
 }
 
+static inline bool skb_frag_must_loop(struct page *p)
+{
+#if defined(CONFIG_HIGHMEM)
+	if (PageHighMem(p))
+		return true;
+#endif
+	return false;
+}
+
+/**
+ *	skb_frag_foreach_page - loop over pages in a fragment
+ *
+ *	@f:		skb frag to operate on
+ *	@f_off:		offset from start of f->page.p
+ *	@f_len:		length from f_off to loop over
+ *	@p:		(temp var) current page
+ *	@p_off:		(temp var) offset from start of current page,
+ *	                           non-zero only on first page.
+ *	@p_len:		(temp var) length in current page,
+ *				   < PAGE_SIZE only on first and last page.
+ *	@copied:	(temp var) length so far, excluding current p_len.
+ *
+ *	A fragment can hold a compound page, in which case per-page
+ *	operations, notably kmap_atomic, must be called for each
+ *	regular page.
+ */
+#define skb_frag_foreach_page(f, f_off, f_len, p, p_off, p_len, copied)	\
+	for (p = skb_frag_page(f) + ((f_off) >> PAGE_SHIFT),		\
+	     p_off = (f_off) & (PAGE_SIZE - 1),				\
+	     p_len = skb_frag_must_loop(p) ?				\
+	     min_t(u32, f_len, PAGE_SIZE - p_off) : f_len,		\
+	     copied = 0;						\
+	     copied < f_len;						\
+	     copied += p_len, p++, p_off = 0,				\
+	     p_len = min_t(u32, f_len - copied, PAGE_SIZE))		\
+
 #define HAVE_HW_TIME_STAMP
 
 /**
@@ -393,6 +430,7 @@ enum {
 	SKBTX_SCHED_TSTAMP = 1 << 6,
 };
 
+#define SKBTX_ZEROCOPY_FRAG	(SKBTX_DEV_ZEROCOPY | SKBTX_SHARED_FRAG)
 #define SKBTX_ANY_SW_TSTAMP	(SKBTX_SW_TSTAMP    | \
 				 SKBTX_SCHED_TSTAMP)
 #define SKBTX_ANY_TSTAMP	(SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
@@ -407,10 +445,46 @@ enum {
  */
 struct ubuf_info {
 	void (*callback)(struct ubuf_info *, bool zerocopy_success);
-	void *ctx;
-	unsigned long desc;
+	union {
+		struct {
+			unsigned long desc;
+			void *ctx;
+		};
+		struct {
+			u32 id;
+			u16 len;
+			u16 zerocopy:1;
+			u32 bytelen;
+		};
+	};
+	refcount_t refcnt;
+
+	struct mmpin {
+		struct user_struct *user;
+		unsigned int num_pg;
+	} mmp;
 };
 
+#define skb_uarg(SKB)	((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
+
+struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
+struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
+					struct ubuf_info *uarg);
+
+static inline void sock_zerocopy_get(struct ubuf_info *uarg)
+{
+	refcount_inc(&uarg->refcnt);
+}
+
+void sock_zerocopy_put(struct ubuf_info *uarg);
+void sock_zerocopy_put_abort(struct ubuf_info *uarg);
+
+void sock_zerocopy_callback(struct ubuf_info *uarg, bool success);
+
+int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
+			     struct msghdr *msg, int len,
+			     struct ubuf_info *uarg);
+
 /* This data is invariant across clones and lives at
  * the end of the header data, ie. at skb->end.
  */
@@ -463,39 +537,38 @@ enum {
 
 enum {
 	SKB_GSO_TCPV4 = 1 << 0,
-	SKB_GSO_UDP = 1 << 1,
 
 	/* This indicates the skb is from an untrusted source. */
-	SKB_GSO_DODGY = 1 << 2,
+	SKB_GSO_DODGY = 1 << 1,
 
 	/* This indicates the tcp segment has CWR set. */
-	SKB_GSO_TCP_ECN = 1 << 3,
+	SKB_GSO_TCP_ECN = 1 << 2,
 
-	SKB_GSO_TCP_FIXEDID = 1 << 4,
+	SKB_GSO_TCP_FIXEDID = 1 << 3,
 
-	SKB_GSO_TCPV6 = 1 << 5,
+	SKB_GSO_TCPV6 = 1 << 4,
 
-	SKB_GSO_FCOE = 1 << 6,
+	SKB_GSO_FCOE = 1 << 5,
 
-	SKB_GSO_GRE = 1 << 7,
+	SKB_GSO_GRE = 1 << 6,
 
-	SKB_GSO_GRE_CSUM = 1 << 8,
+	SKB_GSO_GRE_CSUM = 1 << 7,
 
-	SKB_GSO_IPXIP4 = 1 << 9,
+	SKB_GSO_IPXIP4 = 1 << 8,
 
-	SKB_GSO_IPXIP6 = 1 << 10,
+	SKB_GSO_IPXIP6 = 1 << 9,
 
-	SKB_GSO_UDP_TUNNEL = 1 << 11,
+	SKB_GSO_UDP_TUNNEL = 1 << 10,
 
-	SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12,
+	SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
 
-	SKB_GSO_PARTIAL = 1 << 13,
+	SKB_GSO_PARTIAL = 1 << 12,
 
-	SKB_GSO_TUNNEL_REMCSUM = 1 << 14,
+	SKB_GSO_TUNNEL_REMCSUM = 1 << 13,
 
-	SKB_GSO_SCTP = 1 << 15,
+	SKB_GSO_SCTP = 1 << 14,
 
-	SKB_GSO_ESP = 1 << 16,
+	SKB_GSO_ESP = 1 << 15,
 };
 
 #if BITS_PER_LONG > 32
@@ -945,12 +1018,6 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 	return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE);
 }
 
-struct sk_buff *__alloc_skb_head(gfp_t priority, int node);
-static inline struct sk_buff *alloc_skb_head(gfp_t priority)
-{
-	return __alloc_skb_head(priority, -1);
-}
-
 struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
@@ -1145,8 +1212,6 @@ static inline __u32 skb_get_hash(struct sk_buff *skb)
 	return skb->hash;
 }
 
-__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6);
-
 static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
 {
 	if (!skb->l4_hash && !skb->sw_hash) {
@@ -1159,20 +1224,6 @@ static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6
 	return skb->hash;
 }
 
-__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl);
-
-static inline __u32 skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
-{
-	if (!skb->l4_hash && !skb->sw_hash) {
-		struct flow_keys keys;
-		__u32 hash = __get_hash_from_flowi4(fl4, &keys);
-
-		__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
-	}
-
-	return skb->hash;
-}
-
 __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb);
 
 static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
@@ -1217,6 +1268,50 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb)
 	return &skb_shinfo(skb)->hwtstamps;
 }
 
+static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
+{
+	bool is_zcopy = skb && skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY;
+
+	return is_zcopy ? skb_uarg(skb) : NULL;
+}
+
+static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
+{
+	if (skb && uarg && !skb_zcopy(skb)) {
+		sock_zerocopy_get(uarg);
+		skb_shinfo(skb)->destructor_arg = uarg;
+		skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
+	}
+}
+
+/* Release a reference on a zerocopy structure */
+static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
+{
+	struct ubuf_info *uarg = skb_zcopy(skb);
+
+	if (uarg) {
+		if (uarg->callback == sock_zerocopy_callback) {
+			uarg->zerocopy = uarg->zerocopy && zerocopy;
+			sock_zerocopy_put(uarg);
+		} else {
+			uarg->callback(uarg, zerocopy);
+		}
+
+		skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
+	}
+}
+
+/* Abort a zerocopy operation and revert zckey on error in send syscall */
+static inline void skb_zcopy_abort(struct sk_buff *skb)
+{
+	struct ubuf_info *uarg = skb_zcopy(skb);
+
+	if (uarg) {
+		sock_zerocopy_put_abort(uarg);
+		skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
+	}
+}
+
 /**
  *	skb_queue_empty - check if a queue is empty
  *	@list: queue head
@@ -1799,13 +1894,18 @@ static inline unsigned int skb_headlen(const struct sk_buff *skb)
 	return skb->len - skb->data_len;
 }
 
-static inline unsigned int skb_pagelen(const struct sk_buff *skb)
+static inline unsigned int __skb_pagelen(const struct sk_buff *skb)
 {
 	unsigned int i, len = 0;
 
 	for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--)
 		len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
-	return len + skb_headlen(skb);
+	return len;
+}
+
+static inline unsigned int skb_pagelen(const struct sk_buff *skb)
+{
+	return skb_headlen(skb) + __skb_pagelen(skb);
 }
 
 /**
@@ -2450,7 +2550,17 @@ static inline void skb_orphan(struct sk_buff *skb)
  */
 static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask)
 {
-	if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY)))
+	if (likely(!skb_zcopy(skb)))
+		return 0;
+	if (skb_uarg(skb)->callback == sock_zerocopy_callback)
+		return 0;
+	return skb_copy_ubufs(skb, gfp_mask);
+}
+
+/* Frags must be orphaned, even if refcounted, if skb might loop to rx path */
+static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask)
+{
+	if (likely(!skb_zcopy(skb)))
 		return 0;
 	return skb_copy_ubufs(skb, gfp_mask);
 }
@@ -2899,6 +3009,8 @@ static inline int skb_add_data(struct sk_buff *skb,
 static inline bool skb_can_coalesce(struct sk_buff *skb, int i,
 				    const struct page *page, int off)
 {
+	if (skb_zcopy(skb))
+		return false;
 	if (i) {
 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
 
@@ -3153,6 +3265,9 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
 int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
 		    struct pipe_inode_info *pipe, unsigned int len,
 		    unsigned int flags);
+int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
+			 int len);
+int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len);
 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
 int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h
index 2b78826..66693bc 100644
--- a/include/linux/soc/ti/knav_dma.h
+++ b/include/linux/soc/ti/knav_dma.h
@@ -17,6 +17,8 @@
 #ifndef __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__
 #define __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__
 
+#include <linux/dmaengine.h>
+
 /*
  * PKTDMA descriptor manipulation macros for host packet descriptor
  */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 8b13db5..8ad963c 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -287,6 +287,7 @@ struct ucred {
 #define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
 #define MSG_EOF         MSG_FIN
 
+#define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
 #define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
 					   descriptor received through
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index d4dfac8..88951b7 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -173,8 +173,20 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 	static struct syscall_metadata __used			\
 	  __attribute__((section("__syscalls_metadata")))	\
 	 *__p_syscall_meta_##sname = &__syscall_meta_##sname;
+
+static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
+{
+	return tp_event->class == &event_class_syscall_enter ||
+	       tp_event->class == &event_class_syscall_exit;
+}
+
 #else
 #define SYSCALL_METADATA(sname, nb, ...)
+
+static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
+{
+	return 0;
+}
 #endif
 
 #define SYSCALL_DEFINE0(sname)					\
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 542ca1a..4aa40ef 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -192,15 +192,6 @@ struct tcp_sock {
 
 	struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
 
-	/* Data for direct copy to user */
-	struct {
-		struct sk_buff_head	prequeue;
-		struct task_struct	*task;
-		struct msghdr		*msg;
-		int			memory;
-		int			len;
-	} ucopy;
-
 	u32	snd_wl1;	/* Sequence for window update		*/
 	u32	snd_wnd;	/* The window we expect to receive	*/
 	u32	max_window;	/* Maximal window ever seen from peer	*/
@@ -273,7 +264,7 @@ struct tcp_sock {
 	u32	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
 	u32	snd_cwnd_used;
 	u32	snd_cwnd_stamp;
-	u32	prior_cwnd;	/* Congestion window at start of Recovery. */
+	u32	prior_cwnd;	/* cwnd right before starting loss recovery */
 	u32	prr_delivered;	/* Number of newly delivered packets to
 				 * receiver in Recovery. */
 	u32	prr_out;	/* Total number of pkts sent during Recovery. */
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
new file mode 100644
index 0000000..0d2d3da
--- /dev/null
+++ b/include/linux/tnum.h
@@ -0,0 +1,81 @@
+/* tnum: tracked (or tristate) numbers
+ *
+ * A tnum tracks knowledge about the bits of a value.  Each bit can be either
+ * known (0 or 1), or unknown (x).  Arithmetic operations on tnums will
+ * propagate the unknown bits such that the tnum result represents all the
+ * possible results for possible values of the operands.
+ */
+#include <linux/types.h>
+
+struct tnum {
+	u64 value;
+	u64 mask;
+};
+
+/* Constructors */
+/* Represent a known constant as a tnum. */
+struct tnum tnum_const(u64 value);
+/* A completely unknown value */
+extern const struct tnum tnum_unknown;
+/* A value that's unknown except that @min <= value <= @max */
+struct tnum tnum_range(u64 min, u64 max);
+
+/* Arithmetic and logical ops */
+/* Shift a tnum left (by a fixed shift) */
+struct tnum tnum_lshift(struct tnum a, u8 shift);
+/* Shift a tnum right (by a fixed shift) */
+struct tnum tnum_rshift(struct tnum a, u8 shift);
+/* Add two tnums, return @a + @b */
+struct tnum tnum_add(struct tnum a, struct tnum b);
+/* Subtract two tnums, return @a - @b */
+struct tnum tnum_sub(struct tnum a, struct tnum b);
+/* Bitwise-AND, return @a & @b */
+struct tnum tnum_and(struct tnum a, struct tnum b);
+/* Bitwise-OR, return @a | @b */
+struct tnum tnum_or(struct tnum a, struct tnum b);
+/* Bitwise-XOR, return @a ^ @b */
+struct tnum tnum_xor(struct tnum a, struct tnum b);
+/* Multiply two tnums, return @a * @b */
+struct tnum tnum_mul(struct tnum a, struct tnum b);
+
+/* Return a tnum representing numbers satisfying both @a and @b */
+struct tnum tnum_intersect(struct tnum a, struct tnum b);
+
+/* Return @a with all but the lowest @size bytes cleared */
+struct tnum tnum_cast(struct tnum a, u8 size);
+
+/* Returns true if @a is a known constant */
+static inline bool tnum_is_const(struct tnum a)
+{
+	return !a.mask;
+}
+
+/* Returns true if @a == tnum_const(@b) */
+static inline bool tnum_equals_const(struct tnum a, u64 b)
+{
+	return tnum_is_const(a) && a.value == b;
+}
+
+/* Returns true if @a is completely unknown */
+static inline bool tnum_is_unknown(struct tnum a)
+{
+	return !~a.mask;
+}
+
+/* Returns true if @a is known to be a multiple of @size.
+ * @size must be a power of two.
+ */
+bool tnum_is_aligned(struct tnum a, u64 size);
+
+/* Returns true if @b represents a subset of @a. */
+bool tnum_in(struct tnum a, struct tnum b);
+
+/* Formatting functions.  These have snprintf-like semantics: they will write
+ * up to @size bytes (including the terminating NUL byte), and return the number
+ * of bytes (excluding the terminating NUL) which would have been written had
+ * sufficient space been available.  (Thus tnum_sbin always returns 64.)
+ */
+/* Format a tnum as a pair of hex numbers (value; mask) */
+int tnum_strn(char *str, size_t size, struct tnum a);
+/* Format a tnum as tristate binary expansion */
+int tnum_sbin(char *str, size_t size, struct tnum a);
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 5209b5e..32fb046 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -18,9 +18,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 		case VIRTIO_NET_HDR_GSO_TCPV6:
 			gso_type = SKB_GSO_TCPV6;
 			break;
-		case VIRTIO_NET_HDR_GSO_UDP:
-			gso_type = SKB_GSO_UDP;
-			break;
 		default:
 			return -EINVAL;
 		}
@@ -73,8 +70,6 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
 		else if (sinfo->gso_type & SKB_GSO_TCPV6)
 			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
-		else if (sinfo->gso_type & SKB_GSO_UDP)
-			hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
 		else
 			return -EINVAL;
 		if (sinfo->gso_type & SKB_GSO_TCP_ECN)
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 26ffd83..8f3d5d8 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -10,12 +10,9 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
-
-struct tcf_hashinfo {
-	struct hlist_head	*htab;
-	unsigned int		hmask;
-	spinlock_t		lock;
-	u32			index;
+struct tcf_idrinfo {
+	spinlock_t	lock;
+	struct idr	action_idr;
 };
 
 struct tc_action_ops;
@@ -25,9 +22,8 @@ struct tc_action {
 	__u32				type; /* for backward compat(TCA_OLD_COMPAT) */
 	__u32				order;
 	struct list_head		list;
-	struct tcf_hashinfo		*hinfo;
+	struct tcf_idrinfo		*idrinfo;
 
-	struct hlist_node		tcfa_head;
 	u32				tcfa_index;
 	int				tcfa_refcnt;
 	int				tcfa_bindcnt;
@@ -44,7 +40,6 @@ struct tc_action {
 	struct tc_cookie	*act_cookie;
 	struct tcf_chain	*goto_chain;
 };
-#define tcf_head	common.tcfa_head
 #define tcf_index	common.tcfa_index
 #define tcf_refcnt	common.tcfa_refcnt
 #define tcf_bindcnt	common.tcfa_bindcnt
@@ -57,27 +52,6 @@ struct tc_action {
 #define tcf_lock	common.tcfa_lock
 #define tcf_rcu		common.tcfa_rcu
 
-static inline unsigned int tcf_hash(u32 index, unsigned int hmask)
-{
-	return index & hmask;
-}
-
-static inline int tcf_hashinfo_init(struct tcf_hashinfo *hf, unsigned int mask)
-{
-	int i;
-
-	spin_lock_init(&hf->lock);
-	hf->index = 0;
-	hf->hmask = mask;
-	hf->htab = kzalloc((mask + 1) * sizeof(struct hlist_head),
-			   GFP_KERNEL);
-	if (!hf->htab)
-		return -ENOMEM;
-	for (i = 0; i < mask + 1; i++)
-		INIT_HLIST_HEAD(&hf->htab[i]);
-	return 0;
-}
-
 /* Update lastuse only if needed, to avoid dirtying a cache line.
  * We use a temp variable to avoid fetching jiffies twice.
  */
@@ -126,53 +100,51 @@ struct tc_action_ops {
 };
 
 struct tc_action_net {
-	struct tcf_hashinfo *hinfo;
+	struct tcf_idrinfo *idrinfo;
 	const struct tc_action_ops *ops;
 };
 
 static inline
 int tc_action_net_init(struct tc_action_net *tn,
-		       const struct tc_action_ops *ops, unsigned int mask)
+		       const struct tc_action_ops *ops)
 {
 	int err = 0;
 
-	tn->hinfo = kmalloc(sizeof(*tn->hinfo), GFP_KERNEL);
-	if (!tn->hinfo)
+	tn->idrinfo = kmalloc(sizeof(*tn->idrinfo), GFP_KERNEL);
+	if (!tn->idrinfo)
 		return -ENOMEM;
 	tn->ops = ops;
-	err = tcf_hashinfo_init(tn->hinfo, mask);
-	if (err)
-		kfree(tn->hinfo);
+	spin_lock_init(&tn->idrinfo->lock);
+	idr_init(&tn->idrinfo->action_idr);
 	return err;
 }
 
-void tcf_hashinfo_destroy(const struct tc_action_ops *ops,
-			  struct tcf_hashinfo *hinfo);
+void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
+			 struct tcf_idrinfo *idrinfo);
 
 static inline void tc_action_net_exit(struct tc_action_net *tn)
 {
-	tcf_hashinfo_destroy(tn->ops, tn->hinfo);
-	kfree(tn->hinfo);
+	tcf_idrinfo_destroy(tn->ops, tn->idrinfo);
+	kfree(tn->idrinfo);
 }
 
 int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 		       struct netlink_callback *cb, int type,
 		       const struct tc_action_ops *ops);
-int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index);
-u32 tcf_hash_new_index(struct tc_action_net *tn);
-bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
+int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index);
+bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 		    int bind);
-int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
-		    struct tc_action **a, const struct tc_action_ops *ops, int bind,
-		    bool cpustats);
-void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est);
-void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a);
+int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
+		   struct tc_action **a, const struct tc_action_ops *ops,
+		   int bind, bool cpustats);
+void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est);
+void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
 
-int __tcf_hash_release(struct tc_action *a, bool bind, bool strict);
+int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
 
-static inline int tcf_hash_release(struct tc_action *a, bool bind)
+static inline int tcf_idr_release(struct tc_action *a, bool bind)
 {
-	return __tcf_hash_release(a, bind, false);
+	return __tcf_idr_release(a, bind, false);
 }
 
 int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index c172709..3ac7915 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -19,8 +19,22 @@ struct sock;
 struct socket;
 struct rxrpc_call;
 
+/*
+ * Call completion condition (state == RXRPC_CALL_COMPLETE).
+ */
+enum rxrpc_call_completion {
+	RXRPC_CALL_SUCCEEDED,		/* - Normal termination */
+	RXRPC_CALL_REMOTELY_ABORTED,	/* - call aborted by peer */
+	RXRPC_CALL_LOCALLY_ABORTED,	/* - call aborted locally on error or close */
+	RXRPC_CALL_LOCAL_ERROR,		/* - call failed due to local error */
+	RXRPC_CALL_NETWORK_ERROR,	/* - call terminated by network error */
+	NR__RXRPC_CALL_COMPLETIONS
+};
+
 typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *,
 				  unsigned long);
+typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *,
+				      unsigned long);
 typedef void (*rxrpc_notify_new_call_t)(struct sock *, struct rxrpc_call *,
 					unsigned long);
 typedef void (*rxrpc_discard_new_call_t)(struct rxrpc_call *, unsigned long);
@@ -37,7 +51,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
 					   gfp_t,
 					   rxrpc_notify_rx_t);
 int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
-			   struct msghdr *, size_t);
+			   struct msghdr *, size_t,
+			   rxrpc_notify_end_tx_t);
 int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
 			   void *, size_t, size_t *, bool, u32 *);
 bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
@@ -48,5 +63,9 @@ void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
 int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
 			       rxrpc_user_attach_call_t, unsigned long, gfp_t);
 void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
+int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *,
+			    struct sockaddr_rxrpc *, struct key *);
+int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *,
+			    enum rxrpc_call_completion *, u32 *);
 
 #endif /* _NET_RXRPC_H */
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 53b1a2c..afb37f8 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -58,7 +58,6 @@ struct unix_sock {
 	struct list_head	link;
 	atomic_long_t		inflight;
 	spinlock_t		lock;
-	unsigned char		recursion_level;
 	unsigned long		gc_flags;
 #define UNIX_GC_CANDIDATE	0
 #define UNIX_GC_MAYBE_CYCLE	1
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 0148719..020142b 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -233,7 +233,7 @@ static inline void bacpy(bdaddr_t *dst, const bdaddr_t *src)
 	memcpy(dst, src, sizeof(bdaddr_t));
 }
 
-void baswap(bdaddr_t *dst, bdaddr_t *src);
+void baswap(bdaddr_t *dst, const bdaddr_t *src);
 
 /* Common socket structures and functions */
 
diff --git a/include/net/devlink.h b/include/net/devlink.h
index ed7687b..b9654e1 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -178,7 +178,6 @@ struct devlink_dpipe_table_ops;
  * struct devlink_dpipe_table - table object
  * @priv: private
  * @name: table name
- * @size: maximum number of entries
  * @counters_enabled: indicates if counters are active
  * @counter_control_extern: indicates if counter control is in dpipe or
  *			    external tool
@@ -189,7 +188,6 @@ struct devlink_dpipe_table {
 	void *priv;
 	struct list_head list;
 	const char *name;
-	u64 size;
 	bool counters_enabled;
 	bool counter_control_extern;
 	struct devlink_dpipe_table_ops *table_ops;
@@ -204,6 +202,7 @@ struct devlink_dpipe_table {
  * @counters_set_update - when changing the counter status hardware sync
  *			  maybe needed to allocate/free counter related
  *			  resources
+ * @size_get - get size
  */
 struct devlink_dpipe_table_ops {
 	int (*actions_dump)(void *priv, struct sk_buff *skb);
@@ -211,6 +210,7 @@ struct devlink_dpipe_table_ops {
 	int (*entries_dump)(void *priv, bool counters_enabled,
 			    struct devlink_dpipe_dump_ctx *dump_ctx);
 	int (*counters_set_update)(void *priv, bool enable);
+	u64 (*size_get)(void *priv);
 };
 
 /**
@@ -311,8 +311,7 @@ void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index);
 int devlink_dpipe_table_register(struct devlink *devlink,
 				 const char *table_name,
 				 struct devlink_dpipe_table_ops *table_ops,
-				 void *priv, u64 size,
-				 bool counter_control_extern);
+				 void *priv, bool counter_control_extern);
 void devlink_dpipe_table_unregister(struct devlink *devlink,
 				    const char *table_name);
 int devlink_dpipe_headers_register(struct devlink *devlink,
@@ -324,10 +323,14 @@ int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx);
 int devlink_dpipe_entry_ctx_append(struct devlink_dpipe_dump_ctx *dump_ctx,
 				   struct devlink_dpipe_entry *entry);
 int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx);
+void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry);
 int devlink_dpipe_action_put(struct sk_buff *skb,
 			     struct devlink_dpipe_action *action);
 int devlink_dpipe_match_put(struct sk_buff *skb,
 			    struct devlink_dpipe_match *match);
+extern struct devlink_dpipe_header devlink_dpipe_header_ethernet;
+extern struct devlink_dpipe_header devlink_dpipe_header_ipv4;
+extern struct devlink_dpipe_header devlink_dpipe_header_ipv6;
 
 #else
 
@@ -400,8 +403,7 @@ static inline int
 devlink_dpipe_table_register(struct devlink *devlink,
 			     const char *table_name,
 			     struct devlink_dpipe_table_ops *table_ops,
-			     void *priv, u64 size,
-			     bool counter_control_extern)
+			     void *priv, bool counter_control_extern)
 {
 	return 0;
 }
@@ -447,6 +449,11 @@ devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
 	return 0;
 }
 
+static inline void
+devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
+{
+}
+
 static inline int
 devlink_dpipe_action_put(struct sk_buff *skb,
 			 struct devlink_dpipe_action *action)
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 58969b9..dd44d6c 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -101,6 +101,14 @@ struct dsa_platform_data {
 
 struct packet_type;
 
+struct dsa_device_ops {
+	struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
+	struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
+			       struct packet_type *pt);
+	int (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
+			    int *offset);
+};
+
 struct dsa_switch_tree {
 	struct list_head	list;
 
@@ -125,8 +133,7 @@ struct dsa_switch_tree {
 	/* Copy of tag_ops->rcv for faster access in hot path */
 	struct sk_buff *	(*rcv)(struct sk_buff *skb,
 				       struct net_device *dev,
-				       struct packet_type *pt,
-				       struct net_device *orig_dev);
+				       struct packet_type *pt);
 
 	/*
 	 * The switch port to which the CPU is attached.
@@ -236,6 +243,9 @@ struct dsa_switch {
 	/* devlink used to represent this switch device */
 	struct devlink		*devlink;
 
+	/* Number of switch port queues */
+	unsigned int		num_tx_queues;
+
 	/* Dynamically allocated ports, keep last */
 	size_t num_ports;
 	struct dsa_port ports[];
@@ -256,11 +266,6 @@ static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p)
 	return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p);
 }
 
-static inline bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
-{
-	return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev;
-}
-
 static inline u8 dsa_upstream_port(struct dsa_switch *ds)
 {
 	struct dsa_switch_tree *dst = ds->dst;
@@ -277,6 +282,8 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds)
 		return ds->rtable[dst->cpu_dp->ds->index];
 }
 
+typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid,
+			      bool is_static, void *data);
 struct dsa_switch_ops {
 	/*
 	 * Legacy probing.
@@ -337,13 +344,12 @@ struct dsa_switch_ops {
 				struct phy_device *phy);
 
 	/*
-	 * EEE setttings
+	 * Port's MAC EEE settings
 	 */
-	int	(*set_eee)(struct dsa_switch *ds, int port,
-			   struct phy_device *phydev,
-			   struct ethtool_eee *e);
-	int	(*get_eee)(struct dsa_switch *ds, int port,
-			   struct ethtool_eee *e);
+	int	(*set_mac_eee)(struct dsa_switch *ds, int port,
+			       struct ethtool_eee *e);
+	int	(*get_mac_eee)(struct dsa_switch *ds, int port,
+			       struct ethtool_eee *e);
 
 	/* EEPROM access */
 	int	(*get_eeprom_len)(struct dsa_switch *ds);
@@ -384,24 +390,15 @@ struct dsa_switch_ops {
 				 struct switchdev_trans *trans);
 	int	(*port_vlan_del)(struct dsa_switch *ds, int port,
 				 const struct switchdev_obj_port_vlan *vlan);
-	int	(*port_vlan_dump)(struct dsa_switch *ds, int port,
-				  struct switchdev_obj_port_vlan *vlan,
-				  switchdev_obj_dump_cb_t *cb);
-
 	/*
 	 * Forwarding database
 	 */
-	int	(*port_fdb_prepare)(struct dsa_switch *ds, int port,
-				    const struct switchdev_obj_port_fdb *fdb,
-				    struct switchdev_trans *trans);
-	void	(*port_fdb_add)(struct dsa_switch *ds, int port,
-				const struct switchdev_obj_port_fdb *fdb,
-				struct switchdev_trans *trans);
+	int	(*port_fdb_add)(struct dsa_switch *ds, int port,
+				const unsigned char *addr, u16 vid);
 	int	(*port_fdb_del)(struct dsa_switch *ds, int port,
-				const struct switchdev_obj_port_fdb *fdb);
+				const unsigned char *addr, u16 vid);
 	int	(*port_fdb_dump)(struct dsa_switch *ds, int port,
-				 struct switchdev_obj_port_fdb *fdb,
-				  switchdev_obj_dump_cb_t *cb);
+				 dsa_fdb_dump_cb_t *cb, void *data);
 
 	/*
 	 * Multicast database
@@ -414,10 +411,6 @@ struct dsa_switch_ops {
 				struct switchdev_trans *trans);
 	int	(*port_mdb_del)(struct dsa_switch *ds, int port,
 				const struct switchdev_obj_port_mdb *mdb);
-	int	(*port_mdb_dump)(struct dsa_switch *ds, int port,
-				 struct switchdev_obj_port_mdb *mdb,
-				  switchdev_obj_dump_cb_t *cb);
-
 	/*
 	 * RXNFC
 	 */
diff --git a/include/net/dst.h b/include/net/dst.h
index f73611e..93568bd 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -14,6 +14,7 @@
 #include <linux/rcupdate.h>
 #include <linux/bug.h>
 #include <linux/jiffies.h>
+#include <linux/refcount.h>
 #include <net/neighbour.h>
 #include <asm/processor.h>
 
@@ -107,7 +108,7 @@ struct dst_entry {
 
 struct dst_metrics {
 	u32		metrics[RTAX_MAX];
-	atomic_t	refcnt;
+	refcount_t	refcnt;
 };
 extern const struct dst_metrics dst_default_metrics;
 
diff --git a/include/net/erspan.h b/include/net/erspan.h
new file mode 100644
index 0000000..ca94fc8
--- /dev/null
+++ b/include/net/erspan.h
@@ -0,0 +1,61 @@
+#ifndef __LINUX_ERSPAN_H
+#define __LINUX_ERSPAN_H
+
+/*
+ * GRE header for ERSPAN encapsulation (8 octets [34:41]) -- 8 bytes
+ *       0                   1                   2                   3
+ *      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |0|0|0|1|0|00000|000000000|00000|    Protocol Type for ERSPAN   |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |      Sequence Number (increments per packet per session)      |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *  Note that in the above GRE header [RFC1701] out of the C, R, K, S,
+ *  s, Recur, Flags, Version fields only S (bit 03) is set to 1. The
+ *  other fields are set to zero, so only a sequence number follows.
+ *
+ *  ERSPAN Type II header (8 octets [42:49])
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |  Ver  |          VLAN         | COS | En|T|    Session ID     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |      Reserved         |                  Index                |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB
+ */
+
+#define ERSPAN_VERSION	0x1
+
+#define VER_MASK	0xf000
+#define VLAN_MASK	0x0fff
+#define COS_MASK	0xe000
+#define EN_MASK		0x1800
+#define T_MASK		0x0400
+#define ID_MASK		0x03ff
+#define INDEX_MASK	0xfffff
+
+enum erspan_encap_type {
+	ERSPAN_ENCAP_NOVLAN = 0x0,	/* originally without VLAN tag */
+	ERSPAN_ENCAP_ISL = 0x1,		/* originally ISL encapsulated */
+	ERSPAN_ENCAP_8021Q = 0x2,	/* originally 802.1Q encapsulated */
+	ERSPAN_ENCAP_INFRAME = 0x3,	/* VLAN tag perserved in frame */
+};
+
+struct erspan_metadata {
+	__be32 index;   /* type II */
+};
+
+struct erspanhdr {
+	__be16 ver_vlan;
+#define VER_OFFSET  12
+	__be16 session_id;
+#define COS_OFFSET  13
+#define EN_OFFSET   11
+#define T_OFFSET    10
+	struct erspan_metadata md;
+};
+
+#endif
diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h
new file mode 100644
index 0000000..669b971
--- /dev/null
+++ b/include/net/fib_notifier.h
@@ -0,0 +1,46 @@
+#ifndef __NET_FIB_NOTIFIER_H
+#define __NET_FIB_NOTIFIER_H
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <net/net_namespace.h>
+
+struct fib_notifier_info {
+	struct net *net;
+	int family;
+};
+
+enum fib_event_type {
+	FIB_EVENT_ENTRY_REPLACE,
+	FIB_EVENT_ENTRY_APPEND,
+	FIB_EVENT_ENTRY_ADD,
+	FIB_EVENT_ENTRY_DEL,
+	FIB_EVENT_RULE_ADD,
+	FIB_EVENT_RULE_DEL,
+	FIB_EVENT_NH_ADD,
+	FIB_EVENT_NH_DEL,
+};
+
+struct fib_notifier_ops {
+	int family;
+	struct list_head list;
+	unsigned int (*fib_seq_read)(struct net *net);
+	int (*fib_dump)(struct net *net, struct notifier_block *nb);
+	struct module *owner;
+	struct rcu_head rcu;
+};
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+		      enum fib_event_type event_type,
+		      struct fib_notifier_info *info);
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+		       struct fib_notifier_info *info);
+int register_fib_notifier(struct notifier_block *nb,
+			  void (*cb)(struct notifier_block *nb));
+int unregister_fib_notifier(struct notifier_block *nb);
+struct fib_notifier_ops *
+fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net);
+void fib_notifier_ops_unregister(struct fib_notifier_ops *ops);
+
+#endif
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index c487bfa..3d7f1ce 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -8,6 +8,7 @@
 #include <linux/refcount.h>
 #include <net/flow.h>
 #include <net/rtnetlink.h>
+#include <net/fib_notifier.h>
 
 struct fib_kuid_range {
 	kuid_t start;
@@ -57,6 +58,7 @@ struct fib_rules_ops {
 	int			addr_size;
 	int			unresolved_rules;
 	int			nr_goto_rules;
+	unsigned int		fib_rules_seq;
 
 	int			(*action)(struct fib_rule *,
 					  struct flowi *, int,
@@ -89,6 +91,11 @@ struct fib_rules_ops {
 	struct rcu_head		rcu;
 };
 
+struct fib_rule_notifier_info {
+	struct fib_notifier_info info; /* must be first */
+	struct fib_rule *rule;
+};
+
 #define FRA_GENERIC_POLICY \
 	[FRA_IIFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
 	[FRA_OIFNAME]	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
@@ -143,6 +150,8 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
 int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
 			 u32 flags);
 bool fib_rule_matchall(const struct fib_rule *rule);
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family);
+unsigned int fib_rules_seq_read(struct net *net, int family);
 
 int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		   struct netlink_ext_ack *extack);
diff --git a/include/net/flow.h b/include/net/flow.h
index bae198b..eb60cee3 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -149,6 +149,7 @@ struct flowi6 {
 #define fl6_ipsec_spi		uli.spi
 #define fl6_mh_type		uli.mht.type
 #define fl6_gre_key		uli.gre_key
+	__u32			mp_hash;
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 struct flowidn {
@@ -218,40 +219,6 @@ static inline unsigned int flow_key_size(u16 family)
 	return 0;
 }
 
-#define FLOW_DIR_IN	0
-#define FLOW_DIR_OUT	1
-#define FLOW_DIR_FWD	2
-
-struct net;
-struct sock;
-struct flow_cache_ops;
-
-struct flow_cache_object {
-	const struct flow_cache_ops *ops;
-};
-
-struct flow_cache_ops {
-	struct flow_cache_object *(*get)(struct flow_cache_object *);
-	int (*check)(struct flow_cache_object *);
-	void (*delete)(struct flow_cache_object *);
-};
-
-typedef struct flow_cache_object *(*flow_resolve_t)(
-		struct net *net, const struct flowi *key, u16 family,
-		u8 dir, struct flow_cache_object *oldobj, void *ctx);
-
-struct flow_cache_object *flow_cache_lookup(struct net *net,
-					    const struct flowi *key, u16 family,
-					    u8 dir, flow_resolve_t resolver,
-					    void *ctx);
-int flow_cache_init(struct net *net);
-void flow_cache_fini(struct net *net);
-void flow_cache_hp_init(void);
-
-void flow_cache_flush(struct net *net);
-void flow_cache_flush_deferred(struct net *net);
-extern atomic_t flow_cache_genid;
-
 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys);
 
 static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6)
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index e2663e9..fc3dce7 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -19,6 +19,14 @@ struct flow_dissector_key_control {
 #define FLOW_DIS_FIRST_FRAG	BIT(1)
 #define FLOW_DIS_ENCAPSULATION	BIT(2)
 
+enum flow_dissect_ret {
+	FLOW_DISSECT_RET_OUT_GOOD,
+	FLOW_DISSECT_RET_OUT_BAD,
+	FLOW_DISSECT_RET_PROTO_AGAIN,
+	FLOW_DISSECT_RET_IPPROTO_AGAIN,
+	FLOW_DISSECT_RET_CONTINUE,
+};
+
 /**
  * struct flow_dissector_key_basic:
  * @thoff: Transport header offset
diff --git a/include/net/flowcache.h b/include/net/flowcache.h
deleted file mode 100644
index 51eb971..0000000
--- a/include/net/flowcache.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _NET_FLOWCACHE_H
-#define _NET_FLOWCACHE_H
-
-#include <linux/interrupt.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/notifier.h>
-
-struct flow_cache_percpu {
-	struct hlist_head		*hash_table;
-	unsigned int			hash_count;
-	u32				hash_rnd;
-	int				hash_rnd_recalc;
-	struct tasklet_struct		flush_tasklet;
-};
-
-struct flow_cache {
-	u32				hash_shift;
-	struct flow_cache_percpu __percpu *percpu;
-	struct hlist_node		node;
-	unsigned int			low_watermark;
-	unsigned int			high_watermark;
-	struct timer_list		rnd_timer;
-};
-#endif	/* _NET_FLOWCACHE_H */
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index b87beca..6e91e38 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -49,7 +49,8 @@ struct sock *__inet6_lookup_established(struct net *net,
 					const struct in6_addr *saddr,
 					const __be16 sport,
 					const struct in6_addr *daddr,
-					const u16 hnum, const int dif);
+					const u16 hnum, const int dif,
+					const int sdif);
 
 struct sock *inet6_lookup_listener(struct net *net,
 				   struct inet_hashinfo *hashinfo,
@@ -57,7 +58,8 @@ struct sock *inet6_lookup_listener(struct net *net,
 				   const struct in6_addr *saddr,
 				   const __be16 sport,
 				   const struct in6_addr *daddr,
-				   const unsigned short hnum, const int dif);
+				   const unsigned short hnum,
+				   const int dif, const int sdif);
 
 static inline struct sock *__inet6_lookup(struct net *net,
 					  struct inet_hashinfo *hashinfo,
@@ -66,24 +68,25 @@ static inline struct sock *__inet6_lookup(struct net *net,
 					  const __be16 sport,
 					  const struct in6_addr *daddr,
 					  const u16 hnum,
-					  const int dif,
+					  const int dif, const int sdif,
 					  bool *refcounted)
 {
 	struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr,
-						sport, daddr, hnum, dif);
+						     sport, daddr, hnum,
+						     dif, sdif);
 	*refcounted = true;
 	if (sk)
 		return sk;
 	*refcounted = false;
 	return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
-				     daddr, hnum, dif);
+				     daddr, hnum, dif, sdif);
 }
 
 static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
 					      struct sk_buff *skb, int doff,
 					      const __be16 sport,
 					      const __be16 dport,
-					      int iif,
+					      int iif, int sdif,
 					      bool *refcounted)
 {
 	struct sock *sk = skb_steal_sock(skb);
@@ -95,7 +98,7 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
 	return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
 			      doff, &ipv6_hdr(skb)->saddr, sport,
 			      &ipv6_hdr(skb)->daddr, ntohs(dport),
-			      iif, refcounted);
+			      iif, sdif, refcounted);
 }
 
 struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
@@ -107,13 +110,14 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
 int inet6_hash(struct sock *sk);
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 
-#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	\
+#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif, __sdif) \
 	(((__sk)->sk_portpair == (__ports))			&&	\
 	 ((__sk)->sk_family == AF_INET6)			&&	\
 	 ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr))		&&	\
 	 ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr))	&&	\
 	 (!(__sk)->sk_bound_dev_if	||				\
-	   ((__sk)->sk_bound_dev_if == (__dif))) 		&&	\
+	   ((__sk)->sk_bound_dev_if == (__dif))	||			\
+	   ((__sk)->sk_bound_dev_if == (__sdif)))		&&	\
 	 net_eq(sock_net(__sk), (__net)))
 
 #endif /* _INET6_HASHTABLES_H */
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 6fdcd24..fc59e07 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -1,14 +1,9 @@
 #ifndef __NET_FRAG_H__
 #define __NET_FRAG_H__
 
-#include <linux/percpu_counter.h>
-
 struct netns_frags {
-	/* The percpu_counter "mem" need to be cacheline aligned.
-	 *  mem.count must not share cacheline with other writers
-	 */
-	struct percpu_counter   mem ____cacheline_aligned_in_smp;
-
+	/* Keep atomic mem on separate cachelines in structs that include it */
+	atomic_t		mem ____cacheline_aligned_in_smp;
 	/* sysctls */
 	int			timeout;
 	int			high_thresh;
@@ -108,15 +103,10 @@ struct inet_frags {
 int inet_frags_init(struct inet_frags *);
 void inet_frags_fini(struct inet_frags *);
 
-static inline int inet_frags_init_net(struct netns_frags *nf)
-{
-	return percpu_counter_init(&nf->mem, 0, GFP_KERNEL);
-}
-static inline void inet_frags_uninit_net(struct netns_frags *nf)
+static inline void inet_frags_init_net(struct netns_frags *nf)
 {
-	percpu_counter_destroy(&nf->mem);
+	atomic_set(&nf->mem, 0);
 }
-
 void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
 
 void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
@@ -140,31 +130,24 @@ static inline bool inet_frag_evicting(struct inet_frag_queue *q)
 
 /* Memory Tracking Functions. */
 
-/* The default percpu_counter batch size is not big enough to scale to
- * fragmentation mem acct sizes.
- * The mem size of a 64K fragment is approx:
- *  (44 fragments * 2944 truesize) + frag_queue struct(200) = 129736 bytes
- */
-static unsigned int frag_percpu_counter_batch = 130000;
-
 static inline int frag_mem_limit(struct netns_frags *nf)
 {
-	return percpu_counter_read(&nf->mem);
+	return atomic_read(&nf->mem);
 }
 
 static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
 {
-	percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch);
+	atomic_sub(i, &nf->mem);
 }
 
 static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
 {
-	percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch);
+	atomic_add(i, &nf->mem);
 }
 
-static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf)
+static inline int sum_frag_mem_limit(struct netns_frags *nf)
 {
-	return percpu_counter_sum_positive(&nf->mem);
+	return atomic_read(&nf->mem);
 }
 
 /* RFC 3168 support :
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 5026b1f..2dbbbff 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -221,16 +221,16 @@ struct sock *__inet_lookup_listener(struct net *net,
 				    const __be32 saddr, const __be16 sport,
 				    const __be32 daddr,
 				    const unsigned short hnum,
-				    const int dif);
+				    const int dif, const int sdif);
 
 static inline struct sock *inet_lookup_listener(struct net *net,
 		struct inet_hashinfo *hashinfo,
 		struct sk_buff *skb, int doff,
 		__be32 saddr, __be16 sport,
-		__be32 daddr, __be16 dport, int dif)
+		__be32 daddr, __be16 dport, int dif, int sdif)
 {
 	return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
-				      daddr, ntohs(dport), dif);
+				      daddr, ntohs(dport), dif, sdif);
 }
 
 /* Socket demux engine toys. */
@@ -262,22 +262,24 @@ static inline struct sock *inet_lookup_listener(struct net *net,
 				   (((__force __u64)(__be32)(__daddr)) << 32) | \
 				   ((__force __u64)(__be32)(__saddr)))
 #endif /* __BIG_ENDIAN */
-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)	\
+#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
 	(((__sk)->sk_portpair == (__ports))			&&	\
 	 ((__sk)->sk_addrpair == (__cookie))			&&	\
 	 (!(__sk)->sk_bound_dev_if	||				\
-	   ((__sk)->sk_bound_dev_if == (__dif))) 		&& 	\
+	   ((__sk)->sk_bound_dev_if == (__dif))			||	\
+	   ((__sk)->sk_bound_dev_if == (__sdif)))		&&	\
 	 net_eq(sock_net(__sk), (__net)))
 #else /* 32-bit arch */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
 	const int __name __deprecated __attribute__((unused))
 
-#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
+#define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
 	(((__sk)->sk_portpair == (__ports))		&&		\
 	 ((__sk)->sk_daddr	== (__saddr))		&&		\
 	 ((__sk)->sk_rcv_saddr	== (__daddr))		&&		\
 	 (!(__sk)->sk_bound_dev_if	||				\
-	   ((__sk)->sk_bound_dev_if == (__dif))) 	&&		\
+	   ((__sk)->sk_bound_dev_if == (__dif))		||		\
+	   ((__sk)->sk_bound_dev_if == (__sdif)))	&&		\
 	 net_eq(sock_net(__sk), (__net)))
 #endif /* 64-bit arch */
 
@@ -288,7 +290,7 @@ struct sock *__inet_lookup_established(struct net *net,
 				       struct inet_hashinfo *hashinfo,
 				       const __be32 saddr, const __be16 sport,
 				       const __be32 daddr, const u16 hnum,
-				       const int dif);
+				       const int dif, const int sdif);
 
 static inline struct sock *
 	inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo,
@@ -297,7 +299,7 @@ static inline struct sock *
 				const int dif)
 {
 	return __inet_lookup_established(net, hashinfo, saddr, sport, daddr,
-					 ntohs(dport), dif);
+					 ntohs(dport), dif, 0);
 }
 
 static inline struct sock *__inet_lookup(struct net *net,
@@ -305,20 +307,20 @@ static inline struct sock *__inet_lookup(struct net *net,
 					 struct sk_buff *skb, int doff,
 					 const __be32 saddr, const __be16 sport,
 					 const __be32 daddr, const __be16 dport,
-					 const int dif,
+					 const int dif, const int sdif,
 					 bool *refcounted)
 {
 	u16 hnum = ntohs(dport);
 	struct sock *sk;
 
 	sk = __inet_lookup_established(net, hashinfo, saddr, sport,
-				       daddr, hnum, dif);
+				       daddr, hnum, dif, sdif);
 	*refcounted = true;
 	if (sk)
 		return sk;
 	*refcounted = false;
 	return __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
-				      sport, daddr, hnum, dif);
+				      sport, daddr, hnum, dif, sdif);
 }
 
 static inline struct sock *inet_lookup(struct net *net,
@@ -332,7 +334,7 @@ static inline struct sock *inet_lookup(struct net *net,
 	bool refcounted;
 
 	sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
-			   dport, dif, &refcounted);
+			   dport, dif, 0, &refcounted);
 
 	if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
@@ -344,6 +346,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
 					     int doff,
 					     const __be16 sport,
 					     const __be16 dport,
+					     const int sdif,
 					     bool *refcounted)
 {
 	struct sock *sk = skb_steal_sock(skb);
@@ -355,7 +358,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
 
 	return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
 			     doff, iph->saddr, sport,
-			     iph->daddr, dport, inet_iif(skb),
+			     iph->daddr, dport, inet_iif(skb), sdif,
 			     refcounted);
 }
 
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index f2a215f..950ed18 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -33,18 +33,12 @@ struct inetpeer_addr {
 };
 
 struct inet_peer {
-	/* group together avl_left,avl_right,v4daddr to speedup lookups */
-	struct inet_peer __rcu	*avl_left, *avl_right;
+	struct rb_node		rb_node;
 	struct inetpeer_addr	daddr;
-	__u32			avl_height;
 
 	u32			metrics[RTAX_MAX];
 	u32			rate_tokens;	/* rate limiting for ICMP */
 	unsigned long		rate_last;
-	union {
-		struct list_head	gc_list;
-		struct rcu_head     gc_rcu;
-	};
 	/*
 	 * Once inet_peer is queued for deletion (refcnt == 0), following field
 	 * is not available: rid
@@ -55,7 +49,6 @@ struct inet_peer {
 			atomic_t			rid;		/* Frag reception counter */
 		};
 		struct rcu_head         rcu;
-		struct inet_peer	*gc_next;
 	};
 
 	/* following fields might be frequently dirtied */
@@ -64,7 +57,7 @@ struct inet_peer {
 };
 
 struct inet_peer_base {
-	struct inet_peer __rcu	*root;
+	struct rb_root		rb_root;
 	seqlock_t		lock;
 	int			total;
 };
diff --git a/include/net/ip.h b/include/net/ip.h
index 0cf7f5a..9896f46 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -78,6 +78,16 @@ struct ipcm_cookie {
 #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
 #define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb))
 
+/* return enslaved device index if relevant */
+static inline int inet_sdif(struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+	if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
+		return IPCB(skb)->iif;
+#endif
+	return 0;
+}
+
 struct ip_ra_chain {
 	struct ip_ra_chain __rcu *next;
 	struct sock		*sk;
@@ -567,11 +577,12 @@ int ip_forward(struct sk_buff *skb);
 void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
 		      __be32 daddr, struct rtable *rt, int is_frag);
 
-int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
-		      const struct ip_options *sopt);
-static inline int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb)
+int __ip_options_echo(struct net *net, struct ip_options *dopt,
+		      struct sk_buff *skb, const struct ip_options *sopt);
+static inline int ip_options_echo(struct net *net, struct ip_options *dopt,
+				  struct sk_buff *skb)
 {
-	return __ip_options_echo(dopt, skb, &IPCB(skb)->opt);
+	return __ip_options_echo(net, dopt, skb, &IPCB(skb)->opt);
 }
 
 void ip_options_fragment(struct sk_buff *skb);
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index af509f8..d060d71 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -16,10 +16,12 @@
 #include <linux/ipv6_route.h>
 #include <linux/rtnetlink.h>
 #include <linux/spinlock.h>
+#include <linux/notifier.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/netlink.h>
 #include <net/inetpeer.h>
+#include <net/fib_notifier.h>
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 #define FIB6_TABLE_HASHSZ 256
@@ -119,6 +121,8 @@ struct rt6_info {
 
 	atomic_t			rt6i_ref;
 
+	unsigned int			rt6i_nh_flags;
+
 	/* These are in a separate cache line. */
 	struct rt6key			rt6i_dst ____cacheline_aligned_in_smp;
 	u32				rt6i_flags;
@@ -213,6 +217,22 @@ static inline void ip6_rt_put(struct rt6_info *rt)
 	dst_release(&rt->dst);
 }
 
+void rt6_free_pcpu(struct rt6_info *non_pcpu_rt);
+
+static inline void rt6_hold(struct rt6_info *rt)
+{
+	atomic_inc(&rt->rt6i_ref);
+}
+
+static inline void rt6_release(struct rt6_info *rt)
+{
+	if (atomic_dec_and_test(&rt->rt6i_ref)) {
+		rt6_free_pcpu(rt);
+		dst_dev_put(&rt->dst);
+		dst_release(&rt->dst);
+	}
+}
+
 enum fib6_walk_state {
 #ifdef CONFIG_IPV6_SUBTREES
 	FWS_S,
@@ -261,6 +281,7 @@ struct fib6_table {
 	struct fib6_node	tb6_root;
 	struct inet_peer_base	tb6_peers;
 	unsigned int		flags;
+	unsigned int		fib_seq;
 #define RT6_TABLE_HAS_DFLT_ROUTER	BIT(0)
 };
 
@@ -284,6 +305,11 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
 					 struct fib6_table *,
 					 struct flowi6 *, int);
 
+struct fib6_entry_notifier_info {
+	struct fib_notifier_info info; /* must be first */
+	struct rt6_info *rt;
+};
+
 /*
  *	exported functions
  */
@@ -320,9 +346,24 @@ int fib6_init(void);
 
 int ipv6_route_open(struct inode *inode, struct file *file);
 
+int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+		       enum fib_event_type event_type,
+		       struct fib_notifier_info *info);
+int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+			struct fib_notifier_info *info);
+
+int __net_init fib6_notifier_init(struct net *net);
+void __net_exit fib6_notifier_exit(struct net *net);
+
+unsigned int fib6_tables_seq_read(struct net *net);
+int fib6_tables_dump(struct net *net, struct notifier_block *nb);
+
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 int fib6_rules_init(void);
 void fib6_rules_cleanup(void);
+bool fib6_rule_default(const struct fib_rule *rule);
+int fib6_rules_dump(struct net *net, struct notifier_block *nb);
+unsigned int fib6_rules_seq_read(struct net *net);
 #else
 static inline int               fib6_rules_init(void)
 {
@@ -332,5 +373,17 @@ static inline void              fib6_rules_cleanup(void)
 {
 	return ;
 }
+static inline bool fib6_rule_default(const struct fib_rule *rule)
+{
+	return true;
+}
+static inline int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return 0;
+}
+static inline unsigned int fib6_rules_seq_read(struct net *net)
+{
+	return 0;
+}
 #endif
 #endif
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 1990569..ee96f40 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -115,6 +115,7 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
 
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 			    const struct in6_addr *saddr, int oif, int flags);
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb);
 
 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6);
 
@@ -163,6 +164,16 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
 
+static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
+{
+	const struct dst_entry *dst = skb_dst(skb);
+	const struct rt6_info *rt6 = NULL;
+
+	if (dst)
+		rt6 = container_of(dst, struct rt6_info, dst);
+
+	return rt6;
+}
 
 /*
  *	Store a destination cache entry in a socket
@@ -194,7 +205,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
 	struct rt6_info *rt = (struct rt6_info *)dst;
 
 	return rt->rt6i_flags & RTF_ANYCAST ||
-		(rt->rt6i_dst.plen != 128 &&
+		(rt->rt6i_dst.plen < 127 &&
 		 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr));
 }
 
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 41d580c..1a7f7e4 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -19,6 +19,7 @@
 #include <net/flow.h>
 #include <linux/seq_file.h>
 #include <linux/rcupdate.h>
+#include <net/fib_notifier.h>
 #include <net/fib_rules.h>
 #include <net/inetpeer.h>
 #include <linux/percpu.h>
@@ -124,7 +125,6 @@ struct fib_info {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int			fib_weight;
 #endif
-	unsigned int		fib_offload_cnt;
 	struct rcu_head		rcu;
 	struct fib_nh		fib_nh[0];
 #define fib_dev		fib_nh[0].nh_dev
@@ -177,18 +177,6 @@ struct fib_result_nl {
 
 __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
 
-static inline void fib_info_offload_inc(struct fib_info *fi)
-{
-	fi->fib_offload_cnt++;
-	fi->fib_flags |= RTNH_F_OFFLOAD;
-}
-
-static inline void fib_info_offload_dec(struct fib_info *fi)
-{
-	if (--fi->fib_offload_cnt == 0)
-		fi->fib_flags &= ~RTNH_F_OFFLOAD;
-}
-
 #define FIB_RES_SADDR(net, res)				\
 	((FIB_RES_NH(res).nh_saddr_genid ==		\
 	  atomic_read(&(net)->ipv4.dev_addr_genid)) ?	\
@@ -201,10 +189,6 @@ static inline void fib_info_offload_dec(struct fib_info *fi)
 #define FIB_RES_PREFSRC(net, res)	((res).fi->fib_prefsrc ? : \
 					 FIB_RES_SADDR(net, res))
 
-struct fib_notifier_info {
-	struct net *net;
-};
-
 struct fib_entry_notifier_info {
 	struct fib_notifier_info info; /* must be first */
 	u32 dst;
@@ -215,44 +199,21 @@ struct fib_entry_notifier_info {
 	u32 tb_id;
 };
 
-struct fib_rule_notifier_info {
-	struct fib_notifier_info info; /* must be first */
-	struct fib_rule *rule;
-};
-
 struct fib_nh_notifier_info {
 	struct fib_notifier_info info; /* must be first */
 	struct fib_nh *fib_nh;
 };
 
-enum fib_event_type {
-	FIB_EVENT_ENTRY_REPLACE,
-	FIB_EVENT_ENTRY_APPEND,
-	FIB_EVENT_ENTRY_ADD,
-	FIB_EVENT_ENTRY_DEL,
-	FIB_EVENT_RULE_ADD,
-	FIB_EVENT_RULE_DEL,
-	FIB_EVENT_NH_ADD,
-	FIB_EVENT_NH_DEL,
-};
-
-int register_fib_notifier(struct notifier_block *nb,
-			  void (*cb)(struct notifier_block *nb));
-int unregister_fib_notifier(struct notifier_block *nb);
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
-		      enum fib_event_type event_type,
-		      struct fib_notifier_info *info);
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+		       enum fib_event_type event_type,
 		       struct fib_notifier_info *info);
+int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
+			struct fib_notifier_info *info);
+
+int __net_init fib4_notifier_init(struct net *net);
+void __net_exit fib4_notifier_exit(struct net *net);
 
 void fib_notify(struct net *net, struct notifier_block *nb);
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-void fib_rules_notify(struct net *net, struct notifier_block *nb);
-#else
-static inline void fib_rules_notify(struct net *net, struct notifier_block *nb)
-{
-}
-#endif
 
 struct fib_table {
 	struct hlist_node	tb_hlist;
@@ -325,6 +286,16 @@ static inline bool fib4_rule_default(const struct fib_rule *rule)
 	return true;
 }
 
+static inline int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline unsigned int fib4_rules_seq_read(struct net *net)
+{
+	return 0;
+}
+
 #else /* CONFIG_IP_MULTIPLE_TABLES */
 int __net_init fib4_rules_init(struct net *net);
 void __net_exit fib4_rules_exit(struct net *net);
@@ -370,6 +341,8 @@ out:
 }
 
 bool fib4_rule_default(const struct fib_rule *rule);
+int fib4_rules_dump(struct net *net, struct notifier_block *nb);
+unsigned int fib4_rules_seq_read(struct net *net);
 
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
 
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 5208099..9926528 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -115,6 +115,9 @@ struct ip_tunnel {
 	u32		o_seqno;	/* The last output seqno */
 	int		tun_hlen;	/* Precalculated header length */
 
+	/* This field used only by ERSPAN */
+	u32		index;		/* ERSPAN type II index */
+
 	struct dst_cache dst_cache;
 
 	struct ip_tunnel_parm parms;
@@ -151,8 +154,10 @@ struct ip_tunnel {
 #define TUNNEL_GENEVE_OPT	__cpu_to_be16(0x0800)
 #define TUNNEL_VXLAN_OPT	__cpu_to_be16(0x1000)
 #define TUNNEL_NOCACHE		__cpu_to_be16(0x2000)
+#define TUNNEL_ERSPAN_OPT	__cpu_to_be16(0x4000)
 
-#define TUNNEL_OPTIONS_PRESENT	(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
+#define TUNNEL_OPTIONS_PRESENT \
+		(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)
 
 struct tnl_ptk_info {
 	__be16 flags;
diff --git a/include/net/ncsi.h b/include/net/ncsi.h
index 68680ba..fdc60ff 100644
--- a/include/net/ncsi.h
+++ b/include/net/ncsi.h
@@ -28,12 +28,24 @@ struct ncsi_dev {
 };
 
 #ifdef CONFIG_NET_NCSI
+int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid);
+int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid);
 struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 				   void (*notifier)(struct ncsi_dev *nd));
 int ncsi_start_dev(struct ncsi_dev *nd);
 void ncsi_stop_dev(struct ncsi_dev *nd);
 void ncsi_unregister_dev(struct ncsi_dev *nd);
 #else /* !CONFIG_NET_NCSI */
+static inline int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	return -EINVAL;
+}
+
+static inline int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	return -EINVAL;
+}
+
 static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 					void (*notifier)(struct ncsi_dev *nd))
 {
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 1c401bd..57faa37 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -88,6 +88,7 @@ struct net {
 	/* core fib_rules */
 	struct list_head	rules_ops;
 
+	struct list_head	fib_notifier_ops;  /* protected by net_mutex */
 
 	struct net_device       *loopback_dev;          /* The loopback */
 	struct netns_core	core;
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 4840756..fdc9c64 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -44,12 +44,6 @@ union nf_conntrack_expect_proto {
 #include <linux/types.h>
 #include <linux/skbuff.h>
 
-#ifdef CONFIG_NETFILTER_DEBUG
-#define NF_CT_ASSERT(x)		WARN_ON(!(x))
-#else
-#define NF_CT_ASSERT(x)
-#endif
-
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 
@@ -159,7 +153,7 @@ nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
 /* decrement reference count on a conntrack */
 static inline void nf_ct_put(struct nf_conn *ct)
 {
-	NF_CT_ASSERT(ct);
+	WARN_ON(!ct);
 	nf_conntrack_put(&ct->ct_general);
 }
 
@@ -224,6 +218,9 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 			       enum ip_conntrack_dir dir,
 			       u32 seq);
 
+/* Set all unconfirmed conntrack as dying */
+void nf_ct_unconfirmed_destroy(struct net *);
+
 /* Iterate over all conntracks: if iter returns true, it's deleted. */
 void nf_ct_iterate_cleanup_net(struct net *net,
 			       int (*iter)(struct nf_conn *i, void *data),
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 2ba54fe..818def0 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -107,6 +107,11 @@ void nf_ct_remove_expectations(struct nf_conn *ct);
 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp);
 bool nf_ct_remove_expect(struct nf_conntrack_expect *exp);
 
+void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data), void *data);
+void nf_ct_expect_iterate_net(struct net *net,
+			      bool (*iter)(struct nf_conntrack_expect *e, void *data),
+                              void *data, u32 portid, int report);
+
 /* Allocate space for an expectation: this is mandatory before calling
    nf_ct_expect_related.  You will have to call put afterwards. */
 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me);
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 6d14b36..6269dee 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -20,8 +20,8 @@ struct nf_conntrack_l3proto {
 	/* L3 Protocol Family number. ex) PF_INET */
 	u_int16_t l3proto;
 
-	/* Protocol name */
-	const char *name;
+	/* size of tuple nlattr, fills a hole */
+	u16 nla_size;
 
 	/*
 	 * Try to fill in the third arg: nhoff is offset of l3 proto
@@ -37,10 +37,6 @@ struct nf_conntrack_l3proto {
 	bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
 			     const struct nf_conntrack_tuple *orig);
 
-	/* Print out the per-protocol part of the tuple. */
-	void (*print_tuple)(struct seq_file *s,
-			    const struct nf_conntrack_tuple *);
-
 	/*
 	 * Called before tracking. 
 	 *	*dataoff: offset of protocol header (TCP, UDP,...) in skb
@@ -49,23 +45,17 @@ struct nf_conntrack_l3proto {
 	int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff,
 			   unsigned int *dataoff, u_int8_t *protonum);
 
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	int (*tuple_to_nlattr)(struct sk_buff *skb,
 			       const struct nf_conntrack_tuple *t);
-
-	/* Called when netns wants to use connection tracking */
-	int (*net_ns_get)(struct net *);
-	void (*net_ns_put)(struct net *);
-
-	/*
-	 * Calculate size of tuple nlattr
-	 */
-	int (*nlattr_tuple_size)(void);
-
 	int (*nlattr_to_tuple)(struct nlattr *tb[],
 			       struct nf_conntrack_tuple *t);
 	const struct nla_policy *nla_policy;
+#endif
 
-	size_t nla_size;
+	/* Called when netns wants to use connection tracking */
+	int (*net_ns_get)(struct net *);
+	void (*net_ns_put)(struct net *);
 
 	/* Module (if any) which this is connected to. */
 	struct module *me;
@@ -73,26 +63,11 @@ struct nf_conntrack_l3proto {
 
 extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO];
 
-#ifdef CONFIG_SYSCTL
-/* Protocol pernet registration. */
-int nf_ct_l3proto_pernet_register(struct net *net,
-				  struct nf_conntrack_l3proto *proto);
-#else
-static inline int nf_ct_l3proto_pernet_register(struct net *n,
-						struct nf_conntrack_l3proto *p)
-{
-	return 0;
-}
-#endif
-
-void nf_ct_l3proto_pernet_unregister(struct net *net,
-				     struct nf_conntrack_l3proto *proto);
-
 /* Protocol global registration. */
-int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto);
-void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto);
+int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto);
+void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto);
 
-struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
+const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
 
 /* Existing built-in protocols */
 extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 7032e04..738a030 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -43,7 +43,6 @@ struct nf_conntrack_l4proto {
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
 		      u_int8_t pf,
-		      unsigned int hooknum,
 		      unsigned int *timeouts);
 
 	/* Called when a new connection for this protocol found;
@@ -61,13 +60,6 @@ struct nf_conntrack_l4proto {
 	/* called by gc worker if table is full */
 	bool (*can_early_drop)(const struct nf_conn *ct);
 
-	/* Print out the per-protocol part of the tuple. Return like seq_* */
-	void (*print_tuple)(struct seq_file *s,
-			    const struct nf_conntrack_tuple *);
-
-	/* Print out the private part of the conntrack. */
-	void (*print_conntrack)(struct seq_file *s, struct nf_conn *);
-
 	/* Return the array of timeouts for this protocol. */
 	unsigned int *(*get_timeouts)(struct net *net);
 
@@ -92,15 +84,19 @@ struct nf_conntrack_l4proto {
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
 	struct {
-		size_t obj_size;
 		int (*nlattr_to_obj)(struct nlattr *tb[],
 				     struct net *net, void *data);
 		int (*obj_to_nlattr)(struct sk_buff *skb, const void *data);
 
-		unsigned int nlattr_max;
+		u16 obj_size;
+		u16 nlattr_max;
 		const struct nla_policy *nla_policy;
 	} ctnl_timeout;
 #endif
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
+	/* Print out the private part of the conntrack. */
+	void (*print_conntrack)(struct seq_file *s, struct nf_conn *);
+#endif
 	unsigned int	*net_id;
 	/* Init l4proto pernet data */
 	int (*init_net)(struct net *net, u_int16_t proto);
@@ -108,9 +104,6 @@ struct nf_conntrack_l4proto {
 	/* Return the per-net protocol part. */
 	struct nf_proto_net *(*get_net_proto)(struct net *net);
 
-	/* Protocol name */
-	const char *name;
-
 	/* Module (if any) which this is connected to. */
 	struct module *me;
 };
@@ -120,28 +113,28 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
 
 #define MAX_NF_CT_PROTO 256
 
-struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto,
+const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto,
 						  u_int8_t l4proto);
 
-struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
+const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
 						    u_int8_t l4proto);
-void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p);
+void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p);
 
 /* Protocol pernet registration. */
 int nf_ct_l4proto_pernet_register_one(struct net *net,
-				      struct nf_conntrack_l4proto *proto);
+				const struct nf_conntrack_l4proto *proto);
 void nf_ct_l4proto_pernet_unregister_one(struct net *net,
-					 struct nf_conntrack_l4proto *proto);
+				const struct nf_conntrack_l4proto *proto);
 int nf_ct_l4proto_pernet_register(struct net *net,
-				  struct nf_conntrack_l4proto *proto[],
+				  struct nf_conntrack_l4proto *const proto[],
 				  unsigned int num_proto);
 void nf_ct_l4proto_pernet_unregister(struct net *net,
-				     struct nf_conntrack_l4proto *proto[],
-				     unsigned int num_proto);
+				struct nf_conntrack_l4proto *const proto[],
+				unsigned int num_proto);
 
 /* Protocol global registration. */
 int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto);
-void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *proto);
+void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto);
 int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[],
 			   unsigned int num_proto);
 void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[],
diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index d40b893..483d104 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -16,7 +16,7 @@ struct ctnl_timeout {
 	refcount_t		refcnt;
 	char			name[CTNL_TIMEOUT_NAME_MAX];
 	__u16			l3num;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	char			data[0];
 };
 
@@ -68,7 +68,7 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct,
 
 static inline unsigned int *
 nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct,
-		     struct nf_conntrack_l4proto *l4proto)
+		     const struct nf_conntrack_l4proto *l4proto)
 {
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	struct nf_conn_timeout *timeout_ext;
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 4454719f..3946872 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -10,9 +10,9 @@ struct nf_queue_entry {
 	struct list_head	list;
 	struct sk_buff		*skb;
 	unsigned int		id;
+	unsigned int		hook_index;	/* index in hook_entries->hook[] */
 
 	struct nf_hook_state	state;
-	struct nf_hook_entry	*hook;
 	u16			size; /* sizeof(entry) + saved route keys */
 
 	/* extra space to store route keys */
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index bd5be0d..0f5b12a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -396,7 +396,7 @@ void nft_unregister_set(struct nft_set_type *type);
 struct nft_set {
 	struct list_head		list;
 	struct list_head		bindings;
-	char				name[NFT_SET_MAXNAMELEN];
+	char				*name;
 	u32				ktype;
 	u32				dtype;
 	u32				objtype;
@@ -859,7 +859,7 @@ struct nft_chain {
 	u16				level;
 	u8				flags:6,
 					genmask:2;
-	char				name[NFT_CHAIN_MAXNAMELEN];
+	char				*name;
 };
 
 enum nft_chain_type {
@@ -957,7 +957,7 @@ struct nft_table {
 	u32				use;
 	u16				flags:14,
 					genmask:2;
-	char				name[NFT_TABLE_MAXNAMELEN];
+	char				*name;
 };
 
 enum nft_af_flags {
@@ -1007,21 +1007,21 @@ int nft_verdict_dump(struct sk_buff *skb, int type,
  *
  *	@list: table stateful object list node
  *	@table: table this object belongs to
- *	@type: pointer to object type
- *	@data: pointer to object data
  *	@name: name of this stateful object
  *	@genmask: generation mask
  *	@use: number of references to this stateful object
  * 	@data: object data, layout depends on type
+ *	@ops: object operations
+ *	@data: pointer to object data
  */
 struct nft_object {
 	struct list_head		list;
-	char				name[NFT_OBJ_MAXNAMELEN];
+	char				*name;
 	struct nft_table		*table;
 	u32				genmask:2,
 					use:30;
 	/* runtime data below here */
-	const struct nft_object_type	*type ____cacheline_aligned;
+	const struct nft_object_ops	*ops ____cacheline_aligned;
 	unsigned char			data[]
 		__attribute__((aligned(__alignof__(u64))));
 };
@@ -1044,27 +1044,39 @@ void nft_obj_notify(struct net *net, struct nft_table *table,
 /**
  *	struct nft_object_type - stateful object type
  *
- *	@eval: stateful object evaluation function
+ *	@select_ops: function to select nft_object_ops
+ *	@ops: default ops, used when no select_ops functions is present
  *	@list: list node in list of object types
  *	@type: stateful object numeric type
- *	@size: stateful object size
  *	@owner: module owner
  *	@maxattr: maximum netlink attribute
  *	@policy: netlink attribute policy
+ */
+struct nft_object_type {
+	const struct nft_object_ops	*(*select_ops)(const struct nft_ctx *,
+						       const struct nlattr * const tb[]);
+	const struct nft_object_ops	*ops;
+	struct list_head		list;
+	u32				type;
+	unsigned int                    maxattr;
+	struct module			*owner;
+	const struct nla_policy		*policy;
+};
+
+/**
+ *	struct nft_object_ops - stateful object operations
+ *
+ *	@eval: stateful object evaluation function
+ *	@size: stateful object size
  *	@init: initialize object from netlink attributes
  *	@destroy: release existing stateful object
  *	@dump: netlink dump stateful object
  */
-struct nft_object_type {
+struct nft_object_ops {
 	void				(*eval)(struct nft_object *obj,
 						struct nft_regs *regs,
 						const struct nft_pktinfo *pkt);
-	struct list_head		list;
-	u32				type;
 	unsigned int			size;
-	unsigned int			maxattr;
-	struct module			*owner;
-	const struct nla_policy		*policy;
 	int				(*init)(const struct nft_ctx *ctx,
 						const struct nlattr *const tb[],
 						struct nft_object *obj);
@@ -1072,6 +1084,7 @@ struct nft_object_type {
 	int				(*dump)(struct sk_buff *skb,
 						struct nft_object *obj,
 						bool reset);
+	const struct nft_object_type	*type;
 };
 
 int nft_register_obj(struct nft_object_type *obj_type);
@@ -1272,7 +1285,7 @@ struct nft_trans_set {
 
 struct nft_trans_chain {
 	bool				update;
-	char				name[NFT_CHAIN_MAXNAMELEN];
+	char				*name;
 	struct nft_stats __percpu	*stats;
 	u8				policy;
 };
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 8f690ef..424684c 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -49,6 +49,8 @@ struct nft_payload_set {
 };
 
 extern const struct nft_expr_ops nft_payload_fast_ops;
+
+extern struct static_key_false nft_counters_enabled;
 extern struct static_key_false nft_trace_enabled;
 
 #endif /* _NET_NF_TABLES_CORE_H */
diff --git a/include/net/netlink.h b/include/net/netlink.h
index ef8e6c3..e51cf5f 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -178,6 +178,7 @@ enum {
 	NLA_S16,
 	NLA_S32,
 	NLA_S64,
+	NLA_BITFIELD32,
 	__NLA_TYPE_MAX,
 };
 
@@ -206,6 +207,7 @@ enum {
  *    NLA_MSECS            Leaving the length field zero will verify the
  *                         given type fits, using it verifies minimum length
  *                         just like "All other"
+ *    NLA_BITFIELD32      A 32-bit bitmap/bitselector attribute
  *    All other            Minimum length of attribute payload
  *
  * Example:
@@ -213,11 +215,13 @@ enum {
  * 	[ATTR_FOO] = { .type = NLA_U16 },
  *	[ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
  *	[ATTR_BAZ] = { .len = sizeof(struct mystruct) },
+ *	[ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags },
  * };
  */
 struct nla_policy {
 	u16		type;
 	u16		len;
+	void            *validation_data;
 };
 
 /**
@@ -247,6 +251,7 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
 int nla_policy_len(const struct nla_policy *, int);
 struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype);
 size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize);
+char *nla_strdup(const struct nlattr *nla, gfp_t flags);
 int nla_memcpy(void *dest, const struct nlattr *src, int count);
 int nla_memcmp(const struct nlattr *nla, const void *data, size_t size);
 int nla_strcmp(const struct nlattr *nla, const char *str);
@@ -1203,6 +1208,18 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla)
 }
 
 /**
+ * nla_get_bitfield32 - return payload of 32 bitfield attribute
+ * @nla: nla_bitfield32 attribute
+ */
+static inline struct nla_bitfield32 nla_get_bitfield32(const struct nlattr *nla)
+{
+	struct nla_bitfield32 tmp;
+
+	nla_memcpy(&tmp, nla, sizeof(tmp));
+	return tmp;
+}
+
+/**
  * nla_memdup - duplicate attribute memory (kmemdup)
  * @src: netlink attribute to duplicate from
  * @gfp: GFP mask
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 9a14a08..20d061c 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -159,6 +159,7 @@ struct netns_ipv4 {
 	int sysctl_fib_multipath_hash_policy;
 #endif
 
+	struct fib_notifier_ops	*notifier_ops;
 	unsigned int	fib_seq;	/* protected by rtnl_mutex */
 
 	atomic_t	rt_genid;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index de7745e..2544f97 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -36,6 +36,7 @@ struct netns_sysctl_ipv6 {
 	int idgen_retries;
 	int idgen_delay;
 	int flowlabel_state_ranges;
+	int flowlabel_reflect;
 };
 
 struct netns_ipv6 {
@@ -65,6 +66,7 @@ struct netns_ipv6 {
 	unsigned int		 ip6_rt_gc_expire;
 	unsigned long		 ip6_rt_last_gc;
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	bool			 fib6_has_custom_rules;
 	struct rt6_info         *ip6_prohibit_entry;
 	struct rt6_info         *ip6_blk_hole_entry;
 	struct fib6_table       *fib6_local_tbl;
@@ -86,6 +88,7 @@ struct netns_ipv6 {
 	atomic_t		dev_addr_genid;
 	atomic_t		fib6_sernum;
 	struct seg6_pernet_data *seg6_data;
+	struct fib_notifier_ops	*notifier_ops;
 };
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index cea396b..72d66c8 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -16,7 +16,7 @@ struct netns_nf {
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header *nf_log_dir_header;
 #endif
-	struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+	struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
 	bool			defrag_ipv4;
 #endif
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 27bb963..6115216 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -6,7 +6,6 @@
 #include <linux/workqueue.h>
 #include <linux/xfrm.h>
 #include <net/dst_ops.h>
-#include <net/flowcache.h>
 
 struct ctl_table_header;
 
@@ -73,16 +72,6 @@ struct netns_xfrm {
 	spinlock_t xfrm_state_lock;
 	spinlock_t xfrm_policy_lock;
 	struct mutex xfrm_cfg_mutex;
-
-	/* flow cache part */
-	struct flow_cache	flow_cache_global;
-	atomic_t		flow_cache_genid;
-	struct list_head	flow_cache_gc_list;
-	atomic_t		flow_cache_gc_count;
-	spinlock_t		flow_cache_gc_lock;
-	struct work_struct	flow_cache_gc_work;
-	struct work_struct	flow_cache_flush_work;
-	struct mutex		flow_flush_sem;
 };
 
 #endif
diff --git a/include/net/nsh.h b/include/net/nsh.h
new file mode 100644
index 0000000..a1eaea2
--- /dev/null
+++ b/include/net/nsh.h
@@ -0,0 +1,307 @@
+#ifndef __NET_NSH_H
+#define __NET_NSH_H 1
+
+#include <linux/skbuff.h>
+
+/*
+ * Network Service Header:
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Ver|O|U|    TTL    |   Length  |U|U|U|U|MD Type| Next Protocol |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |          Service Path Identifier (SPI)        | Service Index |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                                                               |
+ * ~               Mandatory/Optional Context Headers              ~
+ * |                                                               |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Version: The version field is used to ensure backward compatibility
+ * going forward with future NSH specification updates.  It MUST be set
+ * to 0x0 by the sender, in this first revision of NSH.  Given the
+ * widespread implementation of existing hardware that uses the first
+ * nibble after an MPLS label stack for ECMP decision processing, this
+ * document reserves version 01b and this value MUST NOT be used in
+ * future versions of the protocol.  Please see [RFC7325] for further
+ * discussion of MPLS-related forwarding requirements.
+ *
+ * O bit: Setting this bit indicates an Operations, Administration, and
+ * Maintenance (OAM) packet.  The actual format and processing of SFC
+ * OAM packets is outside the scope of this specification (see for
+ * example [I-D.ietf-sfc-oam-framework] for one approach).
+ *
+ * The O bit MUST be set for OAM packets and MUST NOT be set for non-OAM
+ * packets.  The O bit MUST NOT be modified along the SFP.
+ *
+ * SF/SFF/SFC Proxy/Classifier implementations that do not support SFC
+ * OAM procedures SHOULD discard packets with O bit set, but MAY support
+ * a configurable parameter to enable forwarding received SFC OAM
+ * packets unmodified to the next element in the chain.  Forwarding OAM
+ * packets unmodified by SFC elements that do not support SFC OAM
+ * procedures may be acceptable for a subset of OAM functions, but can
+ * result in unexpected outcomes for others, thus it is recommended to
+ * analyze the impact of forwarding an OAM packet for all OAM functions
+ * prior to enabling this behavior.  The configurable parameter MUST be
+ * disabled by default.
+ *
+ * TTL: Indicates the maximum SFF hops for an SFP.  This field is used
+ * for service plane loop detection.  The initial TTL value SHOULD be
+ * configurable via the control plane; the configured initial value can
+ * be specific to one or more SFPs.  If no initial value is explicitly
+ * provided, the default initial TTL value of 63 MUST be used.  Each SFF
+ * involved in forwarding an NSH packet MUST decrement the TTL value by
+ * 1 prior to NSH forwarding lookup.  Decrementing by 1 from an incoming
+ * value of 0 shall result in a TTL value of 63.  The packet MUST NOT be
+ * forwarded if TTL is, after decrement, 0.
+ *
+ * All other flag fields, marked U, are unassigned and available for
+ * future use, see Section 11.2.1.  Unassigned bits MUST be set to zero
+ * upon origination, and MUST be ignored and preserved unmodified by
+ * other NSH supporting elements.  Elements which do not understand the
+ * meaning of any of these bits MUST NOT modify their actions based on
+ * those unknown bits.
+ *
+ * Length: The total length, in 4-byte words, of NSH including the Base
+ * Header, the Service Path Header, the Fixed Length Context Header or
+ * Variable Length Context Header(s).  The length MUST be 0x6 for MD
+ * Type equal to 0x1, and MUST be 0x2 or greater for MD Type equal to
+ * 0x2.  The length of the NSH header MUST be an integer multiple of 4
+ * bytes, thus variable length metadata is always padded out to a
+ * multiple of 4 bytes.
+ *
+ * MD Type: Indicates the format of NSH beyond the mandatory Base Header
+ * and the Service Path Header.  MD Type defines the format of the
+ * metadata being carried.
+ *
+ * 0x0 - This is a reserved value.  Implementations SHOULD silently
+ * discard packets with MD Type 0x0.
+ *
+ * 0x1 - This indicates that the format of the header includes a fixed
+ * length Context Header (see Figure 4 below).
+ *
+ * 0x2 - This does not mandate any headers beyond the Base Header and
+ * Service Path Header, but may contain optional variable length Context
+ * Header(s).  The semantics of the variable length Context Header(s)
+ * are not defined in this document.  The format of the optional
+ * variable length Context Headers is provided in Section 2.5.1.
+ *
+ * 0xF - This value is reserved for experimentation and testing, as per
+ * [RFC3692].  Implementations not explicitly configured to be part of
+ * an experiment SHOULD silently discard packets with MD Type 0xF.
+ *
+ * Next Protocol: indicates the protocol type of the encapsulated data.
+ * NSH does not alter the inner payload, and the semantics on the inner
+ * protocol remain unchanged due to NSH service function chaining.
+ * Please see the IANA Considerations section below, Section 11.2.5.
+ *
+ * This document defines the following Next Protocol values:
+ *
+ * 0x1: IPv4
+ * 0x2: IPv6
+ * 0x3: Ethernet
+ * 0x4: NSH
+ * 0x5: MPLS
+ * 0xFE: Experiment 1
+ * 0xFF: Experiment 2
+ *
+ * Packets with Next Protocol values not supported SHOULD be silently
+ * dropped by default, although an implementation MAY provide a
+ * configuration parameter to forward them.  Additionally, an
+ * implementation not explicitly configured for a specific experiment
+ * [RFC3692] SHOULD silently drop packets with Next Protocol values 0xFE
+ * and 0xFF.
+ *
+ * Service Path Identifier (SPI): Identifies a service path.
+ * Participating nodes MUST use this identifier for Service Function
+ * Path selection.  The initial classifier MUST set the appropriate SPI
+ * for a given classification result.
+ *
+ * Service Index (SI): Provides location within the SFP.  The initial
+ * classifier for a given SFP SHOULD set the SI to 255, however the
+ * control plane MAY configure the initial value of SI as appropriate
+ * (i.e., taking into account the length of the service function path).
+ * The Service Index MUST be decremented by a value of 1 by Service
+ * Functions or by SFC Proxy nodes after performing required services
+ * and the new decremented SI value MUST be used in the egress packet's
+ * NSH.  The initial Classifier MUST send the packet to the first SFF in
+ * the identified SFP for forwarding along an SFP.  If re-classification
+ * occurs, and that re-classification results in a new SPI, the
+ * (re)classifier is, in effect, the initial classifier for the
+ * resultant SPI.
+ *
+ * The SI is used in conjunction the with Service Path Identifier for
+ * Service Function Path Selection and for determining the next SFF/SF
+ * in the path.  The SI is also valuable when troubleshooting or
+ * reporting service paths.  Additionally, while the TTL field is the
+ * main mechanism for service plane loop detection, the SI can also be
+ * used for detecting service plane loops.
+ *
+ * When the Base Header specifies MD Type = 0x1, a Fixed Length Context
+ * Header (16-bytes) MUST be present immediately following the Service
+ * Path Header. The value of a Fixed Length Context
+ * Header that carries no metadata MUST be set to zero.
+ *
+ * When the base header specifies MD Type = 0x2, zero or more Variable
+ * Length Context Headers MAY be added, immediately following the
+ * Service Path Header (see Figure 5).  Therefore, Length = 0x2,
+ * indicates that only the Base Header followed by the Service Path
+ * Header are present.  The optional Variable Length Context Headers
+ * MUST be of an integer number of 4-bytes.  The base header Length
+ * field MUST be used to determine the offset to locate the original
+ * packet or frame for SFC nodes that require access to that
+ * information.
+ *
+ * The format of the optional variable length Context Headers
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |          Metadata Class       |      Type     |U|    Length   |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      Variable Metadata                        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Metadata Class (MD Class): Defines the scope of the 'Type' field to
+ * provide a hierarchical namespace.  The IANA Considerations
+ * Section 11.2.4 defines how the MD Class values can be allocated to
+ * standards bodies, vendors, and others.
+ *
+ * Type: Indicates the explicit type of metadata being carried.  The
+ * definition of the Type is the responsibility of the MD Class owner.
+ *
+ * Unassigned bit: One unassigned bit is available for future use. This
+ * bit MUST NOT be set, and MUST be ignored on receipt.
+ *
+ * Length: Indicates the length of the variable metadata, in bytes.  In
+ * case the metadata length is not an integer number of 4-byte words,
+ * the sender MUST add pad bytes immediately following the last metadata
+ * byte to extend the metadata to an integer number of 4-byte words.
+ * The receiver MUST round up the length field to the nearest 4-byte
+ * word boundary, to locate and process the next field in the packet.
+ * The receiver MUST access only those bytes in the metadata indicated
+ * by the length field (i.e., actual number of bytes) and MUST ignore
+ * the remaining bytes up to the nearest 4-byte word boundary.  The
+ * Length may be 0 or greater.
+ *
+ * A value of 0 denotes a Context Header without a Variable Metadata
+ * field.
+ *
+ * [0] https://datatracker.ietf.org/doc/draft-ietf-sfc-nsh/
+ */
+
+/**
+ * struct nsh_md1_ctx - Keeps track of NSH context data
+ * @nshc<1-4>: NSH Contexts.
+ */
+struct nsh_md1_ctx {
+	__be32 context[4];
+};
+
+struct nsh_md2_tlv {
+	__be16 md_class;
+	u8 type;
+	u8 length;
+	u8 md_value[];
+};
+
+struct nshhdr {
+	__be16 ver_flags_ttl_len;
+	u8 mdtype;
+	u8 np;
+	__be32 path_hdr;
+	union {
+	    struct nsh_md1_ctx md1;
+	    struct nsh_md2_tlv md2;
+	};
+};
+
+/* Masking NSH header fields. */
+#define NSH_VER_MASK       0xc000
+#define NSH_VER_SHIFT      14
+#define NSH_FLAGS_MASK     0x3000
+#define NSH_FLAGS_SHIFT    12
+#define NSH_TTL_MASK       0x0fc0
+#define NSH_TTL_SHIFT      6
+#define NSH_LEN_MASK       0x003f
+#define NSH_LEN_SHIFT      0
+
+#define NSH_MDTYPE_MASK    0x0f
+#define NSH_MDTYPE_SHIFT   0
+
+#define NSH_SPI_MASK       0xffffff00
+#define NSH_SPI_SHIFT      8
+#define NSH_SI_MASK        0x000000ff
+#define NSH_SI_SHIFT       0
+
+/* MD Type Registry. */
+#define NSH_M_TYPE1     0x01
+#define NSH_M_TYPE2     0x02
+#define NSH_M_EXP1      0xFE
+#define NSH_M_EXP2      0xFF
+
+/* NSH Base Header Length */
+#define NSH_BASE_HDR_LEN  8
+
+/* NSH MD Type 1 header Length. */
+#define NSH_M_TYPE1_LEN   24
+
+/* NSH header maximum Length. */
+#define NSH_HDR_MAX_LEN 256
+
+/* NSH context headers maximum Length. */
+#define NSH_CTX_HDRS_MAX_LEN 248
+
+static inline struct nshhdr *nsh_hdr(struct sk_buff *skb)
+{
+	return (struct nshhdr *)skb_network_header(skb);
+}
+
+static inline u16 nsh_hdr_len(const struct nshhdr *nsh)
+{
+	return ((ntohs(nsh->ver_flags_ttl_len) & NSH_LEN_MASK)
+		>> NSH_LEN_SHIFT) << 2;
+}
+
+static inline u8 nsh_get_ver(const struct nshhdr *nsh)
+{
+	return (ntohs(nsh->ver_flags_ttl_len) & NSH_VER_MASK)
+		>> NSH_VER_SHIFT;
+}
+
+static inline u8 nsh_get_flags(const struct nshhdr *nsh)
+{
+	return (ntohs(nsh->ver_flags_ttl_len) & NSH_FLAGS_MASK)
+		>> NSH_FLAGS_SHIFT;
+}
+
+static inline u8 nsh_get_ttl(const struct nshhdr *nsh)
+{
+	return (ntohs(nsh->ver_flags_ttl_len) & NSH_TTL_MASK)
+		>> NSH_TTL_SHIFT;
+}
+
+static inline void __nsh_set_xflag(struct nshhdr *nsh, u16 xflag, u16 xmask)
+{
+	nsh->ver_flags_ttl_len
+		= (nsh->ver_flags_ttl_len & ~htons(xmask)) | htons(xflag);
+}
+
+static inline void nsh_set_flags_and_ttl(struct nshhdr *nsh, u8 flags, u8 ttl)
+{
+	__nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) & NSH_FLAGS_MASK) |
+			     ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK),
+			NSH_FLAGS_MASK | NSH_TTL_MASK);
+}
+
+static inline void nsh_set_flags_ttl_len(struct nshhdr *nsh, u8 flags,
+					 u8 ttl, u8 len)
+{
+	len = len >> 2;
+	__nsh_set_xflag(nsh, ((flags << NSH_FLAGS_SHIFT) & NSH_FLAGS_MASK) |
+			     ((ttl << NSH_TTL_SHIFT) & NSH_TTL_MASK) |
+			     ((len << NSH_LEN_SHIFT) & NSH_LEN_MASK),
+			NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK);
+}
+
+#endif /* __NET_NSH_H */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 537d0a0..e80edd8 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -11,7 +11,7 @@ struct tcf_walker {
 	int	stop;
 	int	skip;
 	int	count;
-	int	(*fn)(struct tcf_proto *, unsigned long node, struct tcf_walker *);
+	int	(*fn)(struct tcf_proto *, void *node, struct tcf_walker *);
 };
 
 int register_tcf_proto_ops(struct tcf_proto_ops *ops);
@@ -113,36 +113,6 @@ static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police)
 	return 0;
 }
 
-/**
- * tcf_exts_is_predicative - check if a predicative extension is present
- * @exts: tc filter extensions handle
- *
- * Returns 1 if a predicative extension is present, i.e. an extension which
- * might cause further actions and thus overrule the regular tcf_result.
- */
-static inline int
-tcf_exts_is_predicative(struct tcf_exts *exts)
-{
-#ifdef CONFIG_NET_CLS_ACT
-	return exts->nr_actions;
-#else
-	return 0;
-#endif
-}
-
-/**
- * tcf_exts_is_available - check if at least one extension is present
- * @exts: tc filter extensions handle
- *
- * Returns 1 if at least one extension is present.
- */
-static inline int
-tcf_exts_is_available(struct tcf_exts *exts)
-{
-	/* All non-predicative extensions must be added here. */
-	return tcf_exts_is_predicative(exts);
-}
-
 static inline void tcf_exts_to_list(const struct tcf_exts *exts,
 				    struct list_head *actions)
 {
@@ -177,46 +147,61 @@ tcf_exts_stats_update(const struct tcf_exts *exts,
 }
 
 /**
+ * tcf_exts_has_actions - check if at least one action is present
+ * @exts: tc filter extensions handle
+ *
+ * Returns true if at least one action is present.
+ */
+static inline bool tcf_exts_has_actions(struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	return exts->nr_actions;
+#else
+	return false;
+#endif
+}
+
+/**
+ * tcf_exts_has_one_action - check if exactly one action is present
+ * @exts: tc filter extensions handle
+ *
+ * Returns true if exactly one action is present.
+ */
+static inline bool tcf_exts_has_one_action(struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	return exts->nr_actions == 1;
+#else
+	return false;
+#endif
+}
+
+/**
  * tcf_exts_exec - execute tc filter extensions
  * @skb: socket buffer
  * @exts: tc filter extensions handle
  * @res: desired result
  *
- * Executes all configured extensions. Returns 0 on a normal execution,
+ * Executes all configured extensions. Returns TC_ACT_OK on a normal execution,
  * a negative number if the filter must be considered unmatched or
  * a positive action code (TC_ACT_*) which must be returned to the
  * underlying layer.
  */
 static inline int
 tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
-	       struct tcf_result *res)
+	      struct tcf_result *res)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (exts->nr_actions)
-		return tcf_action_exec(skb, exts->actions, exts->nr_actions,
-				       res);
+	return tcf_action_exec(skb, exts->actions, exts->nr_actions, res);
 #endif
-	return 0;
+	return TC_ACT_OK;
 }
 
-#ifdef CONFIG_NET_CLS_ACT
-
-#define tc_no_actions(_exts)  ((_exts)->nr_actions == 0)
-#define tc_single_action(_exts) ((_exts)->nr_actions == 1)
-
-#else /* CONFIG_NET_CLS_ACT */
-
-#define tc_no_actions(_exts) true
-#define tc_single_action(_exts) false
-
-#endif /* CONFIG_NET_CLS_ACT */
-
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
 		      struct nlattr **tb, struct nlattr *rate_tlv,
 		      struct tcf_exts *exts, bool ovr);
 void tcf_exts_destroy(struct tcf_exts *exts);
-void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
-		     struct tcf_exts *src);
+void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
 int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
@@ -333,26 +318,6 @@ int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *,
 			struct tcf_pkt_info *);
 
 /**
- * tcf_em_tree_change - replace ematch tree of a running classifier
- *
- * @tp: classifier kind handle
- * @dst: destination ematch tree variable
- * @src: source ematch tree (temporary tree from tcf_em_tree_validate)
- *
- * This functions replaces the ematch tree in @dst with the ematch
- * tree in @src. The classifier in charge of the ematch tree may be
- * running.
- */
-static inline void tcf_em_tree_change(struct tcf_proto *tp,
-				      struct tcf_ematch_tree *dst,
-				      struct tcf_ematch_tree *src)
-{
-	tcf_tree_lock(tp);
-	memcpy(dst, src, sizeof(*dst));
-	tcf_tree_unlock(tp);
-}
-
-/**
  * tcf_em_tree_match - evaulate an ematch tree
  *
  * @skb: socket buffer of the packet in question
@@ -386,7 +351,6 @@ struct tcf_ematch_tree {
 #define tcf_em_tree_validate(tp, tb, t) ((void)(t), 0)
 #define tcf_em_tree_destroy(t) do { (void)(t); } while(0)
 #define tcf_em_tree_dump(skb, t, tlv) (0)
-#define tcf_em_tree_change(tp, dst, src) do { } while(0)
 #define tcf_em_tree_match(skb, t, info) ((void)(info), 1)
 
 #endif /* CONFIG_NET_EMATCH */
@@ -441,6 +405,23 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
 }
 #endif /* CONFIG_NET_CLS_IND */
 
+struct tc_cls_common_offload {
+	u32 chain_index;
+	__be16 protocol;
+	u32 prio;
+	u32 classid;
+};
+
+static inline void
+tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
+			   const struct tcf_proto *tp)
+{
+	cls_common->chain_index = tp->chain->index;
+	cls_common->protocol = tp->protocol;
+	cls_common->prio = tp->prio;
+	cls_common->classid = tp->classid;
+}
+
 struct tc_cls_u32_knode {
 	struct tcf_exts *exts;
 	struct tc_u32_sel *sel;
@@ -467,6 +448,7 @@ enum tc_clsu32_command {
 };
 
 struct tc_cls_u32_offload {
+	struct tc_cls_common_offload common;
 	/* knode values */
 	enum tc_clsu32_command command;
 	union {
@@ -475,19 +457,12 @@ struct tc_cls_u32_offload {
 	};
 };
 
-static inline bool tc_can_offload(const struct net_device *dev,
-				  const struct tcf_proto *tp)
+static inline bool tc_can_offload(const struct net_device *dev)
 {
-	const struct Qdisc *sch = tp->q;
-	const struct Qdisc_class_ops *cops = sch->ops->cl_ops;
-
 	if (!(dev->features & NETIF_F_HW_TC))
 		return false;
 	if (!dev->netdev_ops->ndo_setup_tc)
 		return false;
-	if (cops && cops->tcf_cl_offload)
-		return cops->tcf_cl_offload(tp->classid);
-
 	return true;
 }
 
@@ -496,12 +471,11 @@ static inline bool tc_skip_hw(u32 flags)
 	return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false;
 }
 
-static inline bool tc_should_offload(const struct net_device *dev,
-				     const struct tcf_proto *tp, u32 flags)
+static inline bool tc_should_offload(const struct net_device *dev, u32 flags)
 {
 	if (tc_skip_hw(flags))
 		return false;
-	return tc_can_offload(dev, tp);
+	return tc_can_offload(dev);
 }
 
 static inline bool tc_skip_sw(u32 flags)
@@ -533,13 +507,14 @@ enum tc_fl_command {
 };
 
 struct tc_cls_flower_offload {
+	struct tc_cls_common_offload common;
 	enum tc_fl_command command;
-	u32 prio;
 	unsigned long cookie;
 	struct flow_dissector *dissector;
 	struct fl_flow_key *mask;
 	struct fl_flow_key *key;
 	struct tcf_exts *exts;
+	bool egress_dev;
 };
 
 enum tc_matchall_command {
@@ -548,6 +523,7 @@ enum tc_matchall_command {
 };
 
 struct tc_cls_matchall_offload {
+	struct tc_cls_common_offload common;
 	enum tc_matchall_command command;
 	struct tcf_exts *exts;
 	unsigned long cookie;
@@ -561,6 +537,7 @@ enum tc_clsbpf_command {
 };
 
 struct tc_cls_bpf_offload {
+	struct tc_cls_common_offload common;
 	enum tc_clsbpf_command command;
 	struct tcf_exts *exts;
 	struct bpf_prog *prog;
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 2579c20..259bc19 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -5,6 +5,7 @@
 #include <linux/ktime.h>
 #include <linux/if_vlan.h>
 #include <net/sch_generic.h>
+#include <uapi/linux/pkt_sched.h>
 
 #define DEFAULT_TX_QUEUE_LEN	1000
 
@@ -132,4 +133,17 @@ static inline unsigned int psched_mtu(const struct net_device *dev)
 	return dev->mtu + dev->hard_header_len;
 }
 
+static inline bool is_classid_clsact_ingress(u32 classid)
+{
+	/* This also returns true for ingress qdisc */
+	return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) &&
+	       TC_H_MIN(classid) != TC_H_MIN(TC_H_MIN_EGRESS);
+}
+
+static inline bool is_classid_clsact_egress(u32 classid)
+{
+	return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) &&
+	       TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_EGRESS);
+}
+
 #endif
diff --git a/include/net/raw.h b/include/net/raw.h
index 57c33dd..99d26d0 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -26,7 +26,7 @@ extern struct proto raw_prot;
 extern struct raw_hashinfo raw_v4_hashinfo;
 struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
 			     unsigned short num, __be32 raddr,
-			     __be32 laddr, int dif);
+			     __be32 laddr, int dif, int sdif);
 
 int raw_abort(struct sock *sk, int err);
 void raw_icmp_error(struct sk_buff *, int, u32);
diff --git a/include/net/rawv6.h b/include/net/rawv6.h
index cbe4e9d..4addc5c 100644
--- a/include/net/rawv6.h
+++ b/include/net/rawv6.h
@@ -6,7 +6,7 @@
 extern struct raw_hashinfo raw_v6_hashinfo;
 struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 			     unsigned short num, const struct in6_addr *loc_addr,
-			     const struct in6_addr *rmt_addr, int dif);
+			     const struct in6_addr *rmt_addr, int dif, int sdif);
 
 int raw_abort(struct sock *sk, int err);
 
diff --git a/include/net/route.h b/include/net/route.h
index cb0a76d..1b09a93 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -189,10 +189,11 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
 
 	rcu_read_lock();
 	err = ip_route_input_noref(skb, dst, src, tos, devin);
-	if (!err)
+	if (!err) {
 		skb_dst_force_safe(skb);
-	if (!skb_dst(skb))
-		err = -EINVAL;
+		if (!skb_dst(skb))
+			err = -EINVAL;
+	}
 	rcu_read_unlock();
 
 	return err;
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index abe6b73..21837ca 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -7,12 +7,15 @@
 typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *,
 			      struct netlink_ext_ack *);
 typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
-typedef u16 (*rtnl_calcit_func)(struct sk_buff *, struct nlmsghdr *);
+
+enum rtnl_link_flags {
+	RTNL_FLAG_DOIT_UNLOCKED = 1,
+};
 
 int __rtnl_register(int protocol, int msgtype,
-		    rtnl_doit_func, rtnl_dumpit_func, rtnl_calcit_func);
+		    rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
 void rtnl_register(int protocol, int msgtype,
-		   rtnl_doit_func, rtnl_dumpit_func, rtnl_calcit_func);
+		   rtnl_doit_func, rtnl_dumpit_func, unsigned int flags);
 int rtnl_unregister(int protocol, int msgtype);
 void rtnl_unregister_all(int protocol);
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c1109cd..135f5a2 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -75,7 +75,6 @@ struct Qdisc {
 	struct hlist_node       hash;
 	u32			handle;
 	u32			parent;
-	void			*u32_node;
 
 	struct netdev_queue	*dev_queue;
 
@@ -154,8 +153,7 @@ struct Qdisc_class_ops {
 	void			(*qlen_notify)(struct Qdisc *, unsigned long);
 
 	/* Class manipulation routines */
-	unsigned long		(*get)(struct Qdisc *, u32 classid);
-	void			(*put)(struct Qdisc *, unsigned long);
+	unsigned long		(*find)(struct Qdisc *, u32 classid);
 	int			(*change)(struct Qdisc *, u32, u32,
 					struct nlattr **, unsigned long *);
 	int			(*delete)(struct Qdisc *, unsigned long);
@@ -163,7 +161,6 @@ struct Qdisc_class_ops {
 
 	/* Filter manipulation */
 	struct tcf_block *	(*tcf_block)(struct Qdisc *, unsigned long);
-	bool			(*tcf_cl_offload)(u32 classid);
 	unsigned long		(*bind_tcf)(struct Qdisc *, unsigned long,
 					u32 classid);
 	void			(*unbind_tcf)(struct Qdisc *, unsigned long);
@@ -220,16 +217,17 @@ struct tcf_proto_ops {
 	int			(*init)(struct tcf_proto*);
 	void			(*destroy)(struct tcf_proto*);
 
-	unsigned long		(*get)(struct tcf_proto*, u32 handle);
+	void*			(*get)(struct tcf_proto*, u32 handle);
 	int			(*change)(struct net *net, struct sk_buff *,
 					struct tcf_proto*, unsigned long,
 					u32 handle, struct nlattr **,
-					unsigned long *, bool);
-	int			(*delete)(struct tcf_proto*, unsigned long, bool*);
+					void **, bool);
+	int			(*delete)(struct tcf_proto*, void *, bool*);
 	void			(*walk)(struct tcf_proto*, struct tcf_walker *arg);
+	void			(*bind_class)(void *, u32, unsigned long);
 
 	/* rtnetlink specific */
-	int			(*dump)(struct net*, struct tcf_proto*, unsigned long,
+	int			(*dump)(struct net*, struct tcf_proto*, void *,
 					struct sk_buff *skb, struct tcmsg*);
 
 	struct module		*owner;
@@ -401,6 +399,9 @@ qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id)
 	struct Qdisc_class_common *cl;
 	unsigned int h;
 
+	if (!id)
+		return NULL;
+
 	h = qdisc_class_hash(id, hash->hashmask);
 	hlist_for_each_entry(cl, &hash->hash[h], hnode) {
 		if (cl->classid == id)
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index d4679e7..b55c6a4 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -40,7 +40,7 @@
 #include <net/sctp/structs.h>
 
 
-typedef enum {
+enum sctp_verb {
 	SCTP_CMD_NOP = 0,	/* Do nothing. */
 	SCTP_CMD_NEW_ASOC,	/* Register a new association.  */
 	SCTP_CMD_DELETE_TCB,	/* Delete the current association. */
@@ -108,16 +108,16 @@ typedef enum {
 	SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/
 	SCTP_CMD_SET_ASOC,	 /* Restore association context */
 	SCTP_CMD_LAST
-} sctp_verb_t;
+};
 
-/* How many commands can you put in an sctp_cmd_seq_t?
+/* How many commands can you put in an struct sctp_cmd_seq?
  * This is a rather arbitrary number, ideally derived from a careful
  * analysis of the state functions, but in reality just taken from
  * thin air in the hopes othat we don't trigger a kernel panic.
  */
 #define SCTP_MAX_NUM_COMMANDS 20
 
-typedef union {
+union sctp_arg {
 	void *zero_all;	/* Set to NULL to clear the entire union */
 	__s32 i32;
 	__u32 u32;
@@ -126,8 +126,8 @@ typedef union {
 	__u8 u8;
 	int error;
 	__be16 err;
-	sctp_state_t state;
-	sctp_event_timeout_t to;
+	enum sctp_state state;
+	enum sctp_event_timeout to;
 	struct sctp_chunk *chunk;
 	struct sctp_association *asoc;
 	struct sctp_transport *transport;
@@ -135,26 +135,26 @@ typedef union {
 	struct sctp_init_chunk *init;
 	struct sctp_ulpevent *ulpevent;
 	struct sctp_packet *packet;
-	sctp_sackhdr_t *sackh;
+	struct sctp_sackhdr *sackh;
 	struct sctp_datamsg *msg;
-} sctp_arg_t;
+};
 
 /* We are simulating ML type constructors here.
  *
  * SCTP_ARG_CONSTRUCTOR(NAME, TYPE, ELT) builds a function called
  * SCTP_NAME() which takes an argument of type TYPE and returns an
- * sctp_arg_t.  It does this by inserting the sole argument into the
- * ELT union element of a local sctp_arg_t.
+ * union sctp_arg.  It does this by inserting the sole argument into
+ * the ELT union element of a local union sctp_arg.
  *
  * E.g., SCTP_ARG_CONSTRUCTOR(I32, __s32, i32) builds SCTP_I32(arg),
- * which takes an __s32 and returns a sctp_arg_t containing the
+ * which takes an __s32 and returns a union sctp_arg containing the
  * __s32.  So, after foo = SCTP_I32(arg), foo.i32 == arg.
  */
 
 #define SCTP_ARG_CONSTRUCTOR(name, type, elt) \
-static inline sctp_arg_t	\
+static inline union sctp_arg	\
 SCTP_## name (type arg)		\
-{ sctp_arg_t retval;\
+{ union sctp_arg retval;\
   retval.zero_all = NULL;\
   retval.elt = arg;\
   return retval;\
@@ -167,8 +167,8 @@ SCTP_ARG_CONSTRUCTOR(U16,	__u16, u16)
 SCTP_ARG_CONSTRUCTOR(U8,	__u8, u8)
 SCTP_ARG_CONSTRUCTOR(ERROR,     int, error)
 SCTP_ARG_CONSTRUCTOR(PERR,      __be16, err)	/* protocol error */
-SCTP_ARG_CONSTRUCTOR(STATE,	sctp_state_t, state)
-SCTP_ARG_CONSTRUCTOR(TO,	sctp_event_timeout_t, to)
+SCTP_ARG_CONSTRUCTOR(STATE,	enum sctp_state, state)
+SCTP_ARG_CONSTRUCTOR(TO,	enum sctp_event_timeout, to)
 SCTP_ARG_CONSTRUCTOR(CHUNK,	struct sctp_chunk *, chunk)
 SCTP_ARG_CONSTRUCTOR(ASOC,	struct sctp_association *, asoc)
 SCTP_ARG_CONSTRUCTOR(TRANSPORT,	struct sctp_transport *, transport)
@@ -176,42 +176,42 @@ SCTP_ARG_CONSTRUCTOR(BA,	struct sctp_bind_addr *, bp)
 SCTP_ARG_CONSTRUCTOR(PEER_INIT,	struct sctp_init_chunk *, init)
 SCTP_ARG_CONSTRUCTOR(ULPEVENT,  struct sctp_ulpevent *, ulpevent)
 SCTP_ARG_CONSTRUCTOR(PACKET,	struct sctp_packet *, packet)
-SCTP_ARG_CONSTRUCTOR(SACKH,	sctp_sackhdr_t *, sackh)
+SCTP_ARG_CONSTRUCTOR(SACKH,	struct sctp_sackhdr *, sackh)
 SCTP_ARG_CONSTRUCTOR(DATAMSG,	struct sctp_datamsg *, msg)
 
-static inline sctp_arg_t SCTP_FORCE(void)
+static inline union sctp_arg SCTP_FORCE(void)
 {
 	return SCTP_I32(1);
 }
 
-static inline sctp_arg_t SCTP_NOFORCE(void)
+static inline union sctp_arg SCTP_NOFORCE(void)
 {
 	return SCTP_I32(0);
 }
 
-static inline sctp_arg_t SCTP_NULL(void)
+static inline union sctp_arg SCTP_NULL(void)
 {
-	sctp_arg_t retval;
+	union sctp_arg retval;
 	retval.zero_all = NULL;
 	return retval;
 }
 
-typedef struct {
-	sctp_arg_t obj;
-	sctp_verb_t verb;
-} sctp_cmd_t;
+struct sctp_cmd {
+	union sctp_arg obj;
+	enum sctp_verb verb;
+};
 
-typedef struct {
-	sctp_cmd_t cmds[SCTP_MAX_NUM_COMMANDS];
-	sctp_cmd_t *last_used_slot;
-	sctp_cmd_t *next_cmd;
-} sctp_cmd_seq_t;
+struct sctp_cmd_seq {
+	struct sctp_cmd cmds[SCTP_MAX_NUM_COMMANDS];
+	struct sctp_cmd *last_used_slot;
+	struct sctp_cmd *next_cmd;
+};
 
 
 /* Initialize a block of memory as a command sequence.
  * Return 0 if the initialization fails.
  */
-static inline int sctp_init_cmd_seq(sctp_cmd_seq_t *seq)
+static inline int sctp_init_cmd_seq(struct sctp_cmd_seq *seq)
 {
 	/* cmds[] is filled backwards to simplify the overflow BUG() check */
 	seq->last_used_slot = seq->cmds + SCTP_MAX_NUM_COMMANDS;
@@ -220,15 +220,15 @@ static inline int sctp_init_cmd_seq(sctp_cmd_seq_t *seq)
 }
 
 
-/* Add a command to an sctp_cmd_seq_t.
+/* Add a command to an struct sctp_cmd_seq.
  *
  * Use the SCTP_* constructors defined by SCTP_ARG_CONSTRUCTOR() above
  * to wrap data which goes in the obj argument.
  */
-static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb,
-				   sctp_arg_t obj)
+static inline void sctp_add_cmd_sf(struct sctp_cmd_seq *seq,
+				   enum sctp_verb verb, union sctp_arg obj)
 {
-	sctp_cmd_t *cmd = seq->last_used_slot - 1;
+	struct sctp_cmd *cmd = seq->last_used_slot - 1;
 
 	BUG_ON(cmd < seq->cmds);
 
@@ -240,7 +240,7 @@ static inline void sctp_add_cmd_sf(sctp_cmd_seq_t *seq, sctp_verb_t verb,
 /* Return the next command structure in an sctp_cmd_seq.
  * Return NULL at the end of the sequence.
  */
-static inline sctp_cmd_t *sctp_next_cmd(sctp_cmd_seq_t *seq)
+static inline struct sctp_cmd *sctp_next_cmd(struct sctp_cmd_seq *seq)
 {
 	if (seq->next_cmd <= seq->last_used_slot)
 		return NULL;
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 9b18044..deaafa9 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -42,7 +42,7 @@
 
 #include <linux/sctp.h>
 #include <linux/ipv6.h> /* For ipv6hdr. */
-#include <net/tcp_states.h>  /* For TCP states used in sctp_sock_state_t */
+#include <net/tcp_states.h>  /* For TCP states used in enum sctp_sock_state */
 
 /* Value used for stream negotiation. */
 enum { SCTP_MAX_STREAM = 0xffff };
@@ -71,20 +71,18 @@ enum { SCTP_DEFAULT_INSTREAMS = SCTP_MAX_STREAM };
 					 SCTP_NUM_AUTH_CHUNK_TYPES)
 
 /* These are the different flavours of event.  */
-typedef enum {
-
+enum sctp_event {
 	SCTP_EVENT_T_CHUNK = 1,
 	SCTP_EVENT_T_TIMEOUT,
 	SCTP_EVENT_T_OTHER,
 	SCTP_EVENT_T_PRIMITIVE
-
-} sctp_event_t;
+};
 
 /* As a convenience for the state machine, we append SCTP_EVENT_* and
  * SCTP_ULP_* to the list of possible chunks.
  */
 
-typedef enum {
+enum sctp_event_timeout {
 	SCTP_EVENT_TIMEOUT_NONE = 0,
 	SCTP_EVENT_TIMEOUT_T1_COOKIE,
 	SCTP_EVENT_TIMEOUT_T1_INIT,
@@ -96,21 +94,21 @@ typedef enum {
 	SCTP_EVENT_TIMEOUT_RECONF,
 	SCTP_EVENT_TIMEOUT_SACK,
 	SCTP_EVENT_TIMEOUT_AUTOCLOSE,
-} sctp_event_timeout_t;
+};
 
 #define SCTP_EVENT_TIMEOUT_MAX		SCTP_EVENT_TIMEOUT_AUTOCLOSE
 #define SCTP_NUM_TIMEOUT_TYPES		(SCTP_EVENT_TIMEOUT_MAX + 1)
 
-typedef enum {
+enum sctp_event_other {
 	SCTP_EVENT_NO_PENDING_TSN = 0,
 	SCTP_EVENT_ICMP_PROTO_UNREACH,
-} sctp_event_other_t;
+};
 
 #define SCTP_EVENT_OTHER_MAX		SCTP_EVENT_ICMP_PROTO_UNREACH
 #define SCTP_NUM_OTHER_TYPES		(SCTP_EVENT_OTHER_MAX + 1)
 
 /* These are primitive requests from the ULP.  */
-typedef enum {
+enum sctp_event_primitive {
 	SCTP_PRIMITIVE_ASSOCIATE = 0,
 	SCTP_PRIMITIVE_SHUTDOWN,
 	SCTP_PRIMITIVE_ABORT,
@@ -118,7 +116,7 @@ typedef enum {
 	SCTP_PRIMITIVE_REQUESTHEARTBEAT,
 	SCTP_PRIMITIVE_ASCONF,
 	SCTP_PRIMITIVE_RECONF,
-} sctp_event_primitive_t;
+};
 
 #define SCTP_EVENT_PRIMITIVE_MAX	SCTP_PRIMITIVE_RECONF
 #define SCTP_NUM_PRIMITIVE_TYPES	(SCTP_EVENT_PRIMITIVE_MAX + 1)
@@ -126,25 +124,25 @@ typedef enum {
 /* We define here a utility type for manipulating subtypes.
  * The subtype constructors all work like this:
  *
- * 	sctp_subtype_t foo = SCTP_ST_CHUNK(SCTP_CID_INIT);
+ *   union sctp_subtype foo = SCTP_ST_CHUNK(SCTP_CID_INIT);
  */
 
-typedef union {
+union sctp_subtype {
 	enum sctp_cid chunk;
-	sctp_event_timeout_t timeout;
-	sctp_event_other_t other;
-	sctp_event_primitive_t primitive;
-} sctp_subtype_t;
+	enum sctp_event_timeout timeout;
+	enum sctp_event_other other;
+	enum sctp_event_primitive primitive;
+};
 
 #define SCTP_SUBTYPE_CONSTRUCTOR(_name, _type, _elt) \
-static inline sctp_subtype_t	\
+static inline union sctp_subtype	\
 SCTP_ST_## _name (_type _arg)		\
-{ sctp_subtype_t _retval; _retval._elt = _arg; return _retval; }
+{ union sctp_subtype _retval; _retval._elt = _arg; return _retval; }
 
 SCTP_SUBTYPE_CONSTRUCTOR(CHUNK,		enum sctp_cid,		chunk)
-SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT,	sctp_event_timeout_t,	timeout)
-SCTP_SUBTYPE_CONSTRUCTOR(OTHER,		sctp_event_other_t,	other)
-SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	sctp_event_primitive_t,	primitive)
+SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT,	enum sctp_event_timeout, timeout)
+SCTP_SUBTYPE_CONSTRUCTOR(OTHER,		enum sctp_event_other,	other)
+SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	enum sctp_event_primitive, primitive)
 
 
 #define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA)
@@ -155,8 +153,7 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	sctp_event_primitive_t,	primitive)
 				- sizeof(struct sctp_data_chunk)))
 
 /* Internal error codes */
-typedef enum {
-
+enum sctp_ierror {
 	SCTP_IERROR_NO_ERROR	        = 0,
 	SCTP_IERROR_BASE		= 1000,
 	SCTP_IERROR_NO_COOKIE,
@@ -177,12 +174,12 @@ typedef enum {
 	SCTP_IERROR_PROTO_VIOLATION,
 	SCTP_IERROR_ERROR,
 	SCTP_IERROR_ABORT,
-} sctp_ierror_t;
+};
 
 
 
 /* SCTP state defines for internal state machine */
-typedef enum {
+enum sctp_state {
 
 	SCTP_STATE_CLOSED		= 0,
 	SCTP_STATE_COOKIE_WAIT		= 1,
@@ -193,7 +190,7 @@ typedef enum {
 	SCTP_STATE_SHUTDOWN_RECEIVED	= 6,
 	SCTP_STATE_SHUTDOWN_ACK_SENT	= 7,
 
-} sctp_state_t;
+};
 
 #define SCTP_STATE_MAX			SCTP_STATE_SHUTDOWN_ACK_SENT
 #define SCTP_STATE_NUM_STATES		(SCTP_STATE_MAX + 1)
@@ -214,19 +211,19 @@ typedef enum {
  * - A socket in SCTP_SS_ESTABLISHED state indicates that it has a single 
  *   association.
  */
-typedef enum {
+enum sctp_sock_state {
 	SCTP_SS_CLOSED         = TCP_CLOSE,
 	SCTP_SS_LISTENING      = TCP_LISTEN,
 	SCTP_SS_ESTABLISHING   = TCP_SYN_SENT,
 	SCTP_SS_ESTABLISHED    = TCP_ESTABLISHED,
 	SCTP_SS_CLOSING        = TCP_CLOSE_WAIT,
-} sctp_sock_state_t;
+};
 
 /* These functions map various type to printable names.  */
-const char *sctp_cname(const sctp_subtype_t);	/* chunk types */
-const char *sctp_oname(const sctp_subtype_t);	/* other events */
-const char *sctp_tname(const sctp_subtype_t);	/* timeouts */
-const char *sctp_pname(const sctp_subtype_t);	/* primitives */
+const char *sctp_cname(const union sctp_subtype id);	/* chunk types */
+const char *sctp_oname(const union sctp_subtype id);	/* other events */
+const char *sctp_tname(const union sctp_subtype id);	/* timeouts */
+const char *sctp_pname(const union sctp_subtype id);	/* primitives */
 
 /* This is a table of printable names of sctp_state_t's.  */
 extern const char *const sctp_state_tbl[];
@@ -312,19 +309,19 @@ enum { SCTP_MAX_GABS = 16 };
 /* These return values describe the success or failure of a number of
  * routines which form the lower interface to SCTP_outqueue.
  */
-typedef enum {
+enum sctp_xmit {
 	SCTP_XMIT_OK,
 	SCTP_XMIT_PMTU_FULL,
 	SCTP_XMIT_RWND_FULL,
 	SCTP_XMIT_DELAY,
-} sctp_xmit_t;
+};
 
 /* These are the commands for manipulating transports.  */
-typedef enum {
+enum sctp_transport_cmd {
 	SCTP_TRANSPORT_UP,
 	SCTP_TRANSPORT_DOWN,
 	SCTP_TRANSPORT_PF,
-} sctp_transport_cmd_t;
+};
 
 /* These are the address scopes defined mainly for IPv4 addresses
  * based on draft of SCTP IPv4 scoping <draft-stewart-tsvwg-sctp-ipv4-00.txt>.
@@ -333,20 +330,22 @@ typedef enum {
  * At this point, the IPv6 scopes will be mapped to these internal scopes
  * as much as possible.
  */
-typedef enum {
+enum sctp_scope {
 	SCTP_SCOPE_GLOBAL,		/* IPv4 global addresses */
 	SCTP_SCOPE_PRIVATE,		/* IPv4 private addresses */
 	SCTP_SCOPE_LINK,		/* IPv4 link local address */
 	SCTP_SCOPE_LOOPBACK,		/* IPv4 loopback address */
 	SCTP_SCOPE_UNUSABLE,		/* IPv4 unusable addresses */
-} sctp_scope_t;
+};
 
-typedef enum {
+enum {
 	SCTP_SCOPE_POLICY_DISABLE,	/* Disable IPv4 address scoping */
 	SCTP_SCOPE_POLICY_ENABLE,	/* Enable IPv4 address scoping */
 	SCTP_SCOPE_POLICY_PRIVATE,	/* Follow draft but allow IPv4 private addresses */
 	SCTP_SCOPE_POLICY_LINK,		/* Follow draft but allow IPv4 link local addresses */
-} sctp_scope_policy_t;
+};
+
+#define SCTP_SCOPE_POLICY_MAX	SCTP_SCOPE_POLICY_LINK
 
 /* Based on IPv4 scoping <draft-stewart-tsvwg-sctp-ipv4-00.txt>,
  * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 198.18.0.0/24,
@@ -370,20 +369,20 @@ typedef enum {
 						   peer */
 
 /* Reasons to retransmit. */
-typedef enum {
+enum sctp_retransmit_reason {
 	SCTP_RTXR_T3_RTX,
 	SCTP_RTXR_FAST_RTX,
 	SCTP_RTXR_PMTUD,
 	SCTP_RTXR_T1_RTX,
-} sctp_retransmit_reason_t;
+};
 
 /* Reasons to lower cwnd. */
-typedef enum {
+enum sctp_lower_cwnd {
 	SCTP_LOWER_CWND_T3_RTX,
 	SCTP_LOWER_CWND_FAST_RTX,
 	SCTP_LOWER_CWND_ECNE,
 	SCTP_LOWER_CWND_INACTIVE,
-} sctp_lower_cwnd_t;
+};
 
 
 /* SCTP-AUTH Necessary constants */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 45fd4c6..06b4f51 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -94,8 +94,8 @@
 /*
  * sctp/protocol.c
  */
-int sctp_copy_local_addr_list(struct net *, struct sctp_bind_addr *,
-			      sctp_scope_t, gfp_t gfp, int flags);
+int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *addr,
+			      enum sctp_scope, gfp_t gfp, int flags);
 struct sctp_pf *sctp_get_pf_specific(sa_family_t family);
 int sctp_register_pf(struct sctp_pf *, sa_family_t);
 void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int);
@@ -479,13 +479,13 @@ for (pos.v = chunk->member;\
 _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
 
 #define _sctp_walk_errors(err, chunk_hdr, end)\
-for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
+for (err = (struct sctp_errhdr *)((void *)chunk_hdr + \
 	    sizeof(struct sctp_chunkhdr));\
-     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\
+     ((void *)err + offsetof(struct sctp_errhdr, length) + sizeof(err->length) <=\
       (void *)chunk_hdr + end) &&\
      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
-     ntohs(err->length) >= sizeof(sctp_errhdr_t); \
-     err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length))))
+     ntohs(err->length) >= sizeof(struct sctp_errhdr); \
+     err = (struct sctp_errhdr *)((void *)err + SCTP_PAD4(ntohs(err->length))))
 
 #define sctp_walk_fwdtsn(pos, chunk)\
 _sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk))
@@ -550,7 +550,8 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport)
 
 /* Is a socket of this style? */
 #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style))
-static inline int __sctp_style(const struct sock *sk, sctp_socket_type_t style)
+static inline int __sctp_style(const struct sock *sk,
+			       enum sctp_socket_type style)
 {
 	return sctp_sk(sk)->type == style;
 }
@@ -558,14 +559,15 @@ static inline int __sctp_style(const struct sock *sk, sctp_socket_type_t style)
 /* Is the association in this state? */
 #define sctp_state(asoc, state) __sctp_state((asoc), (SCTP_STATE_##state))
 static inline int __sctp_state(const struct sctp_association *asoc,
-			       sctp_state_t state)
+			       enum sctp_state state)
 {
 	return asoc->state == state;
 }
 
 /* Is the socket in this state? */
 #define sctp_sstate(sk, state) __sctp_sstate((sk), (SCTP_SS_##state))
-static inline int __sctp_sstate(const struct sock *sk, sctp_sock_state_t state)
+static inline int __sctp_sstate(const struct sock *sk,
+				enum sctp_sock_state state)
 {
 	return sk->sk_state == state;
 }
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 860f378..2db3d3a 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -53,7 +53,7 @@
 /*
  * Possible values for the disposition are:
  */
-typedef enum {
+enum sctp_disposition {
 	SCTP_DISPOSITION_DISCARD,	 /* No further processing.  */
 	SCTP_DISPOSITION_CONSUME,	 /* Process return values normally.  */
 	SCTP_DISPOSITION_NOMEM,		 /* We ran out of memory--recover.  */
@@ -63,24 +63,20 @@ typedef enum {
 	SCTP_DISPOSITION_NOT_IMPL,	 /* This entry is not implemented.  */
 	SCTP_DISPOSITION_ERROR,		 /* This is plain old user error.  */
 	SCTP_DISPOSITION_BUG,		 /* This is a bug.  */
-} sctp_disposition_t;
-
-typedef struct {
-	int name;
-	int action;
-} sctp_sm_command_t;
-
-typedef sctp_disposition_t (sctp_state_fn_t) (struct net *,
-					      const struct sctp_endpoint *,
-					      const struct sctp_association *,
-					      const sctp_subtype_t type,
-					      void *arg,
-					      sctp_cmd_seq_t *);
+};
+
+typedef enum sctp_disposition (sctp_state_fn_t) (
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
 typedef void (sctp_timer_event_t) (unsigned long);
-typedef struct {
+struct sctp_sm_table_entry {
 	sctp_state_fn_t *fn;
 	const char *name;
-} sctp_sm_table_entry_t;
+};
 
 /* A naming convention of "sctp_sf_xxx" applies to all the state functions
  * currently in use.
@@ -175,10 +171,11 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire;
 
 /* Prototypes for utility support functions.  */
 __u8 sctp_get_chunk_type(struct sctp_chunk *chunk);
-const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *,
-					    sctp_event_t,
-					    sctp_state_t,
-					    sctp_subtype_t);
+const struct sctp_sm_table_entry *sctp_sm_lookup_event(
+					struct net *net,
+					enum sctp_event event_type,
+					enum sctp_state state,
+					union sctp_subtype event_subtype);
 int sctp_chunk_iif(const struct sctp_chunk *);
 struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *,
 					     struct sctp_chunk *,
@@ -187,68 +184,69 @@ __u32 sctp_generate_verification_tag(void);
 void sctp_populate_tie_tags(__u8 *cookie, __u32 curTag, __u32 hisTag);
 
 /* Prototypes for chunk-building functions.  */
-struct sctp_chunk *sctp_make_init(const struct sctp_association *,
-			     const struct sctp_bind_addr *,
-			     gfp_t gfp, int vparam_len);
-struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *,
-				 const struct sctp_chunk *,
-				 const gfp_t gfp,
-				 const int unkparam_len);
-struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *,
-				    const struct sctp_chunk *);
-struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *,
-				   const struct sctp_chunk *);
-struct sctp_chunk *sctp_make_cwr(const struct sctp_association *,
+struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
+				  const struct sctp_bind_addr *bp,
+				  gfp_t gfp, int vparam_len);
+struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
+				      const struct sctp_chunk *chunk,
+				      const gfp_t gfp, const int unkparam_len);
+struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc,
+					 const struct sctp_chunk *chunk);
+struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk);
+struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
 				 const __u32 lowest_tsn,
-				 const struct sctp_chunk *);
-struct sctp_chunk * sctp_make_datafrag_empty(struct sctp_association *,
-					const struct sctp_sndrcvinfo *sinfo,
-					int len, const __u8 flags,
-					__u16 ssn, gfp_t gfp);
-struct sctp_chunk *sctp_make_ecne(const struct sctp_association *,
-				  const __u32);
-struct sctp_chunk *sctp_make_sack(const struct sctp_association *);
+				 const struct sctp_chunk *chunk);
+struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
+					    const struct sctp_sndrcvinfo *sinfo,
+					    int len, const __u8 flags,
+					    __u16 ssn, gfp_t gfp);
+struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
+				  const __u32 lowest_tsn);
+struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc);
 struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
 				      const struct sctp_chunk *chunk);
 struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
-					  const struct sctp_chunk *);
-struct sctp_chunk *sctp_make_shutdown_complete(const struct sctp_association *,
-					  const struct sctp_chunk *);
-void sctp_init_cause(struct sctp_chunk *, __be16 cause, size_t);
-struct sctp_chunk *sctp_make_abort(const struct sctp_association *,
-			      const struct sctp_chunk *,
-			      const size_t hint);
-struct sctp_chunk *sctp_make_abort_no_data(const struct sctp_association *,
-				      const struct sctp_chunk *,
-				      __u32 tsn);
-struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *,
-					struct msghdr *, size_t msg_len);
-struct sctp_chunk *sctp_make_abort_violation(const struct sctp_association *,
-				   const struct sctp_chunk *,
-				   const __u8 *,
-				   const size_t );
-struct sctp_chunk *sctp_make_violation_paramlen(const struct sctp_association *,
-				   const struct sctp_chunk *,
-				   struct sctp_paramhdr *);
-struct sctp_chunk *sctp_make_violation_max_retrans(const struct sctp_association *,
-						   const struct sctp_chunk *);
-struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *,
-				  const struct sctp_transport *);
-struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *,
-				      const struct sctp_chunk *,
-				      const void *payload,
-				      const size_t paylen);
-struct sctp_chunk *sctp_make_op_error(const struct sctp_association *,
-				 const struct sctp_chunk *chunk,
-				 __be16 cause_code,
-				 const void *payload,
-				 size_t paylen,
-				 size_t reserve_tail);
-
-struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *,
-					      union sctp_addr *,
-					      struct sockaddr *,
-					      int, __be16);
+					  const struct sctp_chunk *chunk);
+struct sctp_chunk *sctp_make_shutdown_complete(
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk);
+void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen);
+struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
+				   const struct sctp_chunk *chunk,
+				   const size_t hint);
+struct sctp_chunk *sctp_make_abort_no_data(const struct sctp_association *asoc,
+					   const struct sctp_chunk *chunk,
+					   __u32 tsn);
+struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc,
+					struct msghdr *msg, size_t msg_len);
+struct sctp_chunk *sctp_make_abort_violation(
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					const __u8 *payload,
+					const size_t paylen);
+struct sctp_chunk *sctp_make_violation_paramlen(
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					struct sctp_paramhdr *param);
+struct sctp_chunk *sctp_make_violation_max_retrans(
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk);
+struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
+				       const struct sctp_transport *transport);
+struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
+					   const struct sctp_chunk *chunk,
+					   const void *payload,
+					   const size_t paylen);
+struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
+				      const struct sctp_chunk *chunk,
+				      __be16 cause_code, const void *payload,
+				      size_t paylen, size_t reserve_tail);
+
+struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
+					      union sctp_addr *laddr,
+					      struct sockaddr *addrs,
+					      int addrcnt, __be16 flags);
 struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc,
 					     union sctp_addr *addr);
 bool sctp_verify_asconf(const struct sctp_association *asoc,
@@ -262,27 +260,25 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
 				    __u32 new_cum_tsn, size_t nstreams,
 				    struct sctp_fwdtsn_skip *skiplist);
 struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc);
-struct sctp_chunk *sctp_make_strreset_req(
-				const struct sctp_association *asoc,
-				__u16 stream_num, __u16 *stream_list,
-				bool out, bool in);
+struct sctp_chunk *sctp_make_strreset_req(const struct sctp_association *asoc,
+					  __u16 stream_num, __u16 *stream_list,
+					  bool out, bool in);
 struct sctp_chunk *sctp_make_strreset_tsnreq(
-				const struct sctp_association *asoc);
+					const struct sctp_association *asoc);
 struct sctp_chunk *sctp_make_strreset_addstrm(
-				const struct sctp_association *asoc,
-				__u16 out, __u16 in);
-struct sctp_chunk *sctp_make_strreset_resp(
-				const struct sctp_association *asoc,
-				__u32 result, __u32 sn);
-struct sctp_chunk *sctp_make_strreset_tsnresp(
-				struct sctp_association *asoc,
-				__u32 result, __u32 sn,
-				__u32 sender_tsn, __u32 receiver_tsn);
+					const struct sctp_association *asoc,
+					__u16 out, __u16 in);
+struct sctp_chunk *sctp_make_strreset_resp(const struct sctp_association *asoc,
+					   __u32 result, __u32 sn);
+struct sctp_chunk *sctp_make_strreset_tsnresp(struct sctp_association *asoc,
+					      __u32 result, __u32 sn,
+					      __u32 sender_tsn,
+					      __u32 receiver_tsn);
 bool sctp_verify_reconf(const struct sctp_association *asoc,
 			struct sctp_chunk *chunk,
 			struct sctp_paramhdr **errp);
-void sctp_chunk_assign_tsn(struct sctp_chunk *);
-void sctp_chunk_assign_ssn(struct sctp_chunk *);
+void sctp_chunk_assign_tsn(struct sctp_chunk *chunk);
+void sctp_chunk_assign_ssn(struct sctp_chunk *chunk);
 
 /* Prototypes for stream-processing functions.  */
 struct sctp_chunk *sctp_process_strreset_outreq(
@@ -312,12 +308,10 @@ struct sctp_chunk *sctp_process_strreset_resp(
 
 /* Prototypes for statetable processing. */
 
-int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
-	       sctp_state_t state,
-               struct sctp_endpoint *,
-               struct sctp_association *asoc,
-               void *event_arg,
-	       gfp_t gfp);
+int sctp_do_sm(struct net *net, enum sctp_event event_type,
+	       union sctp_subtype subtype, enum sctp_state state,
+	       struct sctp_endpoint *ep, struct sctp_association *asoc,
+	       void *event_arg, gfp_t gfp);
 
 /* 2nd level prototypes */
 void sctp_generate_t3_rtx_event(unsigned long peer);
@@ -327,11 +321,12 @@ void sctp_generate_proto_unreach_event(unsigned long peer);
 
 void sctp_ootb_pkt_free(struct sctp_packet *packet);
 
-struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       struct sctp_chunk *chunk,
-				       gfp_t gfp, int *err,
-				       struct sctp_chunk **err_chk_p);
+struct sctp_association *sctp_unpack_cookie(
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					struct sctp_chunk *chunk,
+					gfp_t gfp, int *err,
+					struct sctp_chunk **err_chk_p);
 
 /* 3rd level prototypes */
 __u32 sctp_generate_tag(const struct sctp_endpoint *ep);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 5ab29af..0477945 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -150,18 +150,18 @@ extern struct sctp_globals {
 #define sctp_checksum_disable		(sctp_globals.checksum_disable)
 
 /* SCTP Socket type: UDP or TCP style. */
-typedef enum {
+enum sctp_socket_type {
 	SCTP_SOCKET_UDP = 0,
 	SCTP_SOCKET_UDP_HIGH_BANDWIDTH,
 	SCTP_SOCKET_TCP
-} sctp_socket_type_t;
+};
 
 /* Per socket SCTP information. */
 struct sctp_sock {
 	/* inet_sock has to be the first member of sctp_sock */
 	struct inet_sock inet;
 	/* What kind of a socket is this? */
-	sctp_socket_type_t type;
+	enum sctp_socket_type type;
 
 	/* PF_ family specific functions.  */
 	struct sctp_pf *pf;
@@ -371,12 +371,12 @@ union sctp_params {
  *    chunk is sent and the destination transport address to which this
  *    HEARTBEAT is sent (see Section 8.3).
  */
-typedef struct sctp_sender_hb_info {
+struct sctp_sender_hb_info {
 	struct sctp_paramhdr param_hdr;
 	union sctp_addr daddr;
 	unsigned long sent_at;
 	__u64 hb_nonce;
-} sctp_sender_hb_info_t;
+};
 
 int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
 		     gfp_t gfp);
@@ -449,7 +449,7 @@ struct sctp_af {
 	int		(*addr_valid)	(union sctp_addr *,
 					 struct sctp_sock *,
 					 const struct sk_buff *);
-	sctp_scope_t	(*scope) (union sctp_addr *);
+	enum sctp_scope	(*scope)(union sctp_addr *);
 	void		(*inaddr_any)	(union sctp_addr *, __be16);
 	int		(*is_any)	(const union sctp_addr *);
 	int		(*available)	(union sctp_addr *,
@@ -657,8 +657,6 @@ struct sctp_sockaddr_entry {
 
 #define SCTP_ADDRESS_TICK_DELAY	500
 
-typedef struct sctp_chunk *(sctp_packet_phandler_t)(struct sctp_association *);
-
 /* This structure holds lists of chunks as we are assembling for
  * transmission.
  */
@@ -697,10 +695,11 @@ struct sctp_packet {
 void sctp_packet_init(struct sctp_packet *, struct sctp_transport *,
 		      __u16 sport, __u16 dport);
 void sctp_packet_config(struct sctp_packet *, __u32 vtag, int);
-sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *,
-				       struct sctp_chunk *, int, gfp_t);
-sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *,
-                                     struct sctp_chunk *);
+enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet,
+					  struct sctp_chunk *chunk,
+					  int one_packet, gfp_t gfp);
+enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet,
+					struct sctp_chunk *chunk);
 int sctp_packet_transmit(struct sctp_packet *, gfp_t);
 void sctp_packet_free(struct sctp_packet *);
 
@@ -950,7 +949,8 @@ int sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
 void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32);
-void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t);
+void sctp_transport_lower_cwnd(struct sctp_transport *t,
+			       enum sctp_lower_cwnd reason);
 void sctp_transport_burst_limited(struct sctp_transport *);
 void sctp_transport_burst_reset(struct sctp_transport *);
 unsigned long sctp_transport_timeout(struct sctp_transport *);
@@ -1053,8 +1053,8 @@ int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *);
 int sctp_outq_is_empty(const struct sctp_outq *);
 void sctp_outq_restart(struct sctp_outq *);
 
-void sctp_retransmit(struct sctp_outq *, struct sctp_transport *,
-		     sctp_retransmit_reason_t);
+void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
+		     enum sctp_retransmit_reason reason);
 void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
 void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
 void sctp_prsctp_prune(struct sctp_association *asoc,
@@ -1110,7 +1110,7 @@ void sctp_bind_addr_init(struct sctp_bind_addr *, __u16 port);
 void sctp_bind_addr_free(struct sctp_bind_addr *);
 int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
 			const struct sctp_bind_addr *src,
-			sctp_scope_t scope, gfp_t gfp,
+			enum sctp_scope scope, gfp_t gfp,
 			int flags);
 int sctp_bind_addr_dup(struct sctp_bind_addr *dest,
 			const struct sctp_bind_addr *src,
@@ -1134,17 +1134,18 @@ union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp,
 int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len,
 			   __u16 port, gfp_t gfp);
 
-sctp_scope_t sctp_scope(const union sctp_addr *);
-int sctp_in_scope(struct net *net, const union sctp_addr *addr, const sctp_scope_t scope);
+enum sctp_scope sctp_scope(const union sctp_addr *addr);
+int sctp_in_scope(struct net *net, const union sctp_addr *addr,
+		  const enum sctp_scope scope);
 int sctp_is_any(struct sock *sk, const union sctp_addr *addr);
 int sctp_is_ep_boundall(struct sock *sk);
 
 
 /* What type of endpoint?  */
-typedef enum {
+enum sctp_endpoint_type {
 	SCTP_EP_TYPE_SOCKET,
 	SCTP_EP_TYPE_ASSOCIATION,
-} sctp_endpoint_type_t;
+};
 
 /*
  * A common base class to bridge the implmentation view of a
@@ -1168,7 +1169,7 @@ struct sctp_ep_common {
 	int hashent;
 
 	/* Runtime type information.  What kind of endpoint is this? */
-	sctp_endpoint_type_t type;
+	enum sctp_endpoint_type type;
 
 	/* Some fields to help us manage this object.
 	 *   refcnt   - Reference count access to this object.
@@ -1556,9 +1557,9 @@ struct sctp_association {
 		 * and authenticated chunk list.  All that is part of the
 		 * cookie and these are just pointers to those locations
 		 */
-		sctp_random_param_t *peer_random;
-		sctp_chunks_param_t *peer_chunks;
-		sctp_hmac_algo_param_t *peer_hmacs;
+		struct sctp_random_param *peer_random;
+		struct sctp_chunks_param *peer_chunks;
+		struct sctp_hmac_algo_param *peer_hmacs;
 	} peer;
 
 	/* State       : A state variable indicating what state the
@@ -1574,7 +1575,7 @@ struct sctp_association {
 	 *
 	 *		State takes values from SCTP_STATE_*.
 	 */
-	sctp_state_t state;
+	enum sctp_state state;
 
 	/* Overall     : The overall association error count.
 	 * Error Count : [Clear this any time I get something.]
@@ -1924,8 +1925,8 @@ static inline struct sctp_association *sctp_assoc(struct sctp_ep_common *base)
 
 
 struct sctp_association *
-sctp_association_new(const struct sctp_endpoint *, const struct sock *,
-		     sctp_scope_t scope, gfp_t gfp);
+sctp_association_new(const struct sctp_endpoint *ep, const struct sock *sk,
+		     enum sctp_scope scope, gfp_t gfp);
 void sctp_association_free(struct sctp_association *);
 void sctp_association_put(struct sctp_association *);
 void sctp_association_hold(struct sctp_association *);
@@ -1945,9 +1946,10 @@ void sctp_assoc_del_peer(struct sctp_association *asoc,
 			 const union sctp_addr *addr);
 void sctp_assoc_rm_peer(struct sctp_association *asoc,
 			 struct sctp_transport *peer);
-void sctp_assoc_control_transport(struct sctp_association *,
-				  struct sctp_transport *,
-				  sctp_transport_cmd_t, sctp_sn_error_t);
+void sctp_assoc_control_transport(struct sctp_association *asoc,
+				  struct sctp_transport *transport,
+				  enum sctp_transport_cmd command,
+				  sctp_sn_error_t error);
 struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *, __u32);
 struct sctp_transport *sctp_assoc_is_match(struct sctp_association *,
 					   struct net *,
@@ -1966,8 +1968,8 @@ void sctp_assoc_set_primary(struct sctp_association *,
 			    struct sctp_transport *);
 void sctp_assoc_del_nonprimary_peers(struct sctp_association *,
 				    struct sctp_transport *);
-int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *,
-				     sctp_scope_t, gfp_t);
+int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
+				     enum sctp_scope scope, gfp_t gfp);
 int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *,
 					 struct sctp_cookie*,
 					 gfp_t gfp);
@@ -1983,16 +1985,16 @@ int sctp_cmp_addr_exact(const union sctp_addr *ss1,
 struct sctp_chunk *sctp_get_ecne_prepend(struct sctp_association *asoc);
 
 /* A convenience structure to parse out SCTP specific CMSGs. */
-typedef struct sctp_cmsgs {
+struct sctp_cmsgs {
 	struct sctp_initmsg *init;
 	struct sctp_sndrcvinfo *srinfo;
 	struct sctp_sndinfo *sinfo;
-} sctp_cmsgs_t;
+};
 
 /* Structure for tracking memory objects */
-typedef struct {
+struct sctp_dbg_objcnt_entry {
 	char *label;
 	atomic_t *counter;
-} sctp_dbg_objcnt_entry_t;
+};
 
 #endif /* __sctp_structs_h__ */
diff --git a/include/net/seg6.h b/include/net/seg6.h
index 4e03575..099bad5 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -56,7 +56,12 @@ extern int seg6_init(void);
 extern void seg6_exit(void);
 extern int seg6_iptunnel_init(void);
 extern void seg6_iptunnel_exit(void);
+extern int seg6_local_init(void);
+extern void seg6_local_exit(void);
 
 extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
+extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+			     int proto);
+extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
 
 #endif
diff --git a/include/net/sock.h b/include/net/sock.h
index aeeec62..03a3625 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -294,6 +294,7 @@ struct sock_common {
   *	@sk_stamp: time stamp of last packet received
   *	@sk_tsflags: SO_TIMESTAMPING socket options
   *	@sk_tskey: counter to disambiguate concurrent tstamp requests
+  *	@sk_zckey: counter to order MSG_ZEROCOPY notifications
   *	@sk_socket: Identd and reporting IO signals
   *	@sk_user_data: RPC layer private data
   *	@sk_frag: cached page frag
@@ -462,6 +463,7 @@ struct sock {
 	u16			sk_tsflags;
 	u8			sk_shutdown;
 	u32			sk_tskey;
+	atomic_t		sk_zckey;
 	struct socket		*sk_socket;
 	void			*sk_user_data;
 #ifdef CONFIG_SECURITY
@@ -1529,6 +1531,8 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 			     gfp_t priority);
 void __sock_wfree(struct sk_buff *skb);
 void sock_wfree(struct sk_buff *skb);
+struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
+			     gfp_t priority);
 void skb_orphan_partial(struct sk_buff *skb);
 void sock_rfree(struct sk_buff *skb);
 void sock_efree(struct sk_buff *skb);
@@ -1580,11 +1584,14 @@ int sock_no_shutdown(struct socket *, int);
 int sock_no_getsockopt(struct socket *, int , int, char __user *, int __user *);
 int sock_no_setsockopt(struct socket *, int, int, char __user *, unsigned int);
 int sock_no_sendmsg(struct socket *, struct msghdr *, size_t);
+int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len);
 int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int);
 int sock_no_mmap(struct file *file, struct socket *sock,
 		 struct vm_area_struct *vma);
 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset,
 			 size_t size, int flags);
+ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
+				int offset, size_t size, int flags);
 
 /*
  * Functions to fill in entries in struct proto_ops when a protocol
@@ -2361,6 +2368,16 @@ bool sk_net_capable(const struct sock *sk, int cap);
 
 void sk_get_meminfo(const struct sock *sk, u32 *meminfo);
 
+/* Take into consideration the size of the struct sk_buff overhead in the
+ * determination of these values, since that is non-constant across
+ * platforms.  This makes socket queueing behavior and performance
+ * not depend upon such differences.
+ */
+#define _SK_MEM_PACKETS		256
+#define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
+#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
+#define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
+
 extern __u32 sysctl_wmem_max;
 extern __u32 sysctl_rmem_max;
 
diff --git a/include/net/strparser.h b/include/net/strparser.h
index 0c28ad9..7dc131d 100644
--- a/include/net/strparser.h
+++ b/include/net/strparser.h
@@ -18,26 +18,26 @@
 #define STRP_STATS_INCR(stat) ((stat)++)
 
 struct strp_stats {
-	unsigned long long rx_msgs;
-	unsigned long long rx_bytes;
-	unsigned int rx_mem_fail;
-	unsigned int rx_need_more_hdr;
-	unsigned int rx_msg_too_big;
-	unsigned int rx_msg_timeouts;
-	unsigned int rx_bad_hdr_len;
+	unsigned long long msgs;
+	unsigned long long bytes;
+	unsigned int mem_fail;
+	unsigned int need_more_hdr;
+	unsigned int msg_too_big;
+	unsigned int msg_timeouts;
+	unsigned int bad_hdr_len;
 };
 
 struct strp_aggr_stats {
-	unsigned long long rx_msgs;
-	unsigned long long rx_bytes;
-	unsigned int rx_mem_fail;
-	unsigned int rx_need_more_hdr;
-	unsigned int rx_msg_too_big;
-	unsigned int rx_msg_timeouts;
-	unsigned int rx_bad_hdr_len;
-	unsigned int rx_aborts;
-	unsigned int rx_interrupted;
-	unsigned int rx_unrecov_intr;
+	unsigned long long msgs;
+	unsigned long long bytes;
+	unsigned int mem_fail;
+	unsigned int need_more_hdr;
+	unsigned int msg_too_big;
+	unsigned int msg_timeouts;
+	unsigned int bad_hdr_len;
+	unsigned int aborts;
+	unsigned int interrupted;
+	unsigned int unrecov_intr;
 };
 
 struct strparser;
@@ -48,16 +48,18 @@ struct strp_callbacks {
 	void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
 	int (*read_sock_done)(struct strparser *strp, int err);
 	void (*abort_parser)(struct strparser *strp, int err);
+	void (*lock)(struct strparser *strp);
+	void (*unlock)(struct strparser *strp);
 };
 
-struct strp_rx_msg {
+struct strp_msg {
 	int full_len;
 	int offset;
 };
 
-static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb)
+static inline struct strp_msg *strp_msg(struct sk_buff *skb)
 {
-	return (struct strp_rx_msg *)((void *)skb->cb +
+	return (struct strp_msg *)((void *)skb->cb +
 		offsetof(struct qdisc_skb_cb, data));
 }
 
@@ -65,18 +67,18 @@ static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb)
 struct strparser {
 	struct sock *sk;
 
-	u32 rx_stopped : 1;
-	u32 rx_paused : 1;
-	u32 rx_aborted : 1;
-	u32 rx_interrupted : 1;
-	u32 rx_unrecov_intr : 1;
-
-	struct sk_buff **rx_skb_nextp;
-	struct timer_list rx_msg_timer;
-	struct sk_buff *rx_skb_head;
-	unsigned int rx_need_bytes;
-	struct delayed_work rx_delayed_work;
-	struct work_struct rx_work;
+	u32 stopped : 1;
+	u32 paused : 1;
+	u32 aborted : 1;
+	u32 interrupted : 1;
+	u32 unrecov_intr : 1;
+
+	struct sk_buff **skb_nextp;
+	struct timer_list msg_timer;
+	struct sk_buff *skb_head;
+	unsigned int need_bytes;
+	struct delayed_work delayed_work;
+	struct work_struct work;
 	struct strp_stats stats;
 	struct strp_callbacks cb;
 };
@@ -84,7 +86,7 @@ struct strparser {
 /* Must be called with lock held for attached socket */
 static inline void strp_pause(struct strparser *strp)
 {
-	strp->rx_paused = 1;
+	strp->paused = 1;
 }
 
 /* May be called without holding lock for attached socket */
@@ -97,37 +99,37 @@ static inline void save_strp_stats(struct strparser *strp,
 
 #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat +=		\
 				 strp->stats._stat)
-	SAVE_PSOCK_STATS(rx_msgs);
-	SAVE_PSOCK_STATS(rx_bytes);
-	SAVE_PSOCK_STATS(rx_mem_fail);
-	SAVE_PSOCK_STATS(rx_need_more_hdr);
-	SAVE_PSOCK_STATS(rx_msg_too_big);
-	SAVE_PSOCK_STATS(rx_msg_timeouts);
-	SAVE_PSOCK_STATS(rx_bad_hdr_len);
+	SAVE_PSOCK_STATS(msgs);
+	SAVE_PSOCK_STATS(bytes);
+	SAVE_PSOCK_STATS(mem_fail);
+	SAVE_PSOCK_STATS(need_more_hdr);
+	SAVE_PSOCK_STATS(msg_too_big);
+	SAVE_PSOCK_STATS(msg_timeouts);
+	SAVE_PSOCK_STATS(bad_hdr_len);
 #undef SAVE_PSOCK_STATS
 
-	if (strp->rx_aborted)
-		agg_stats->rx_aborts++;
-	if (strp->rx_interrupted)
-		agg_stats->rx_interrupted++;
-	if (strp->rx_unrecov_intr)
-		agg_stats->rx_unrecov_intr++;
+	if (strp->aborted)
+		agg_stats->aborts++;
+	if (strp->interrupted)
+		agg_stats->interrupted++;
+	if (strp->unrecov_intr)
+		agg_stats->unrecov_intr++;
 }
 
 static inline void aggregate_strp_stats(struct strp_aggr_stats *stats,
 					struct strp_aggr_stats *agg_stats)
 {
 #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat)
-	SAVE_PSOCK_STATS(rx_msgs);
-	SAVE_PSOCK_STATS(rx_bytes);
-	SAVE_PSOCK_STATS(rx_mem_fail);
-	SAVE_PSOCK_STATS(rx_need_more_hdr);
-	SAVE_PSOCK_STATS(rx_msg_too_big);
-	SAVE_PSOCK_STATS(rx_msg_timeouts);
-	SAVE_PSOCK_STATS(rx_bad_hdr_len);
-	SAVE_PSOCK_STATS(rx_aborts);
-	SAVE_PSOCK_STATS(rx_interrupted);
-	SAVE_PSOCK_STATS(rx_unrecov_intr);
+	SAVE_PSOCK_STATS(msgs);
+	SAVE_PSOCK_STATS(bytes);
+	SAVE_PSOCK_STATS(mem_fail);
+	SAVE_PSOCK_STATS(need_more_hdr);
+	SAVE_PSOCK_STATS(msg_too_big);
+	SAVE_PSOCK_STATS(msg_timeouts);
+	SAVE_PSOCK_STATS(bad_hdr_len);
+	SAVE_PSOCK_STATS(aborts);
+	SAVE_PSOCK_STATS(interrupted);
+	SAVE_PSOCK_STATS(unrecov_intr);
 #undef SAVE_PSOCK_STATS
 
 }
@@ -135,8 +137,11 @@ static inline void aggregate_strp_stats(struct strp_aggr_stats *stats,
 void strp_done(struct strparser *strp);
 void strp_stop(struct strparser *strp);
 void strp_check_rcv(struct strparser *strp);
-int strp_init(struct strparser *strp, struct sock *csk,
-	      struct strp_callbacks *cb);
+int strp_init(struct strparser *strp, struct sock *sk,
+	      const struct strp_callbacks *cb);
 void strp_data_ready(struct strparser *strp);
+int strp_process(struct strparser *strp, struct sk_buff *orig_skb,
+		 unsigned int orig_offset, size_t orig_len,
+		 size_t max_msg_size, long timeo);
 
 #endif /* __NET_STRPARSER_H_ */
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 8ae9e3b..d767b79 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -74,7 +74,6 @@ struct switchdev_attr {
 enum switchdev_obj_id {
 	SWITCHDEV_OBJ_ID_UNDEFINED,
 	SWITCHDEV_OBJ_ID_PORT_VLAN,
-	SWITCHDEV_OBJ_ID_PORT_FDB,
 	SWITCHDEV_OBJ_ID_PORT_MDB,
 };
 
@@ -97,17 +96,6 @@ struct switchdev_obj_port_vlan {
 #define SWITCHDEV_OBJ_PORT_VLAN(obj) \
 	container_of(obj, struct switchdev_obj_port_vlan, obj)
 
-/* SWITCHDEV_OBJ_ID_PORT_FDB */
-struct switchdev_obj_port_fdb {
-	struct switchdev_obj obj;
-	unsigned char addr[ETH_ALEN];
-	u16 vid;
-	u16 ndm_state;
-};
-
-#define SWITCHDEV_OBJ_PORT_FDB(obj) \
-	container_of(obj, struct switchdev_obj_port_fdb, obj)
-
 /* SWITCHDEV_OBJ_ID_PORT_MDB */
 struct switchdev_obj_port_mdb {
 	struct switchdev_obj obj;
@@ -135,8 +123,6 @@ typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
  * @switchdev_port_obj_add: Add an object to port (see switchdev_obj_*).
  *
  * @switchdev_port_obj_del: Delete an object from port (see switchdev_obj_*).
- *
- * @switchdev_port_obj_dump: Dump port objects (see switchdev_obj_*).
  */
 struct switchdev_ops {
 	int	(*switchdev_port_attr_get)(struct net_device *dev,
@@ -149,9 +135,6 @@ struct switchdev_ops {
 					  struct switchdev_trans *trans);
 	int	(*switchdev_port_obj_del)(struct net_device *dev,
 					  const struct switchdev_obj *obj);
-	int	(*switchdev_port_obj_dump)(struct net_device *dev,
-					   struct switchdev_obj *obj,
-					   switchdev_obj_dump_cb_t *cb);
 };
 
 enum switchdev_notifier_type {
@@ -189,28 +172,10 @@ int switchdev_port_obj_add(struct net_device *dev,
 			   const struct switchdev_obj *obj);
 int switchdev_port_obj_del(struct net_device *dev,
 			   const struct switchdev_obj *obj);
-int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
-			    switchdev_obj_dump_cb_t *cb);
 int register_switchdev_notifier(struct notifier_block *nb);
 int unregister_switchdev_notifier(struct notifier_block *nb);
 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
 			     struct switchdev_notifier_info *info);
-int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-				  struct net_device *dev, u32 filter_mask,
-				  int nlflags);
-int switchdev_port_bridge_setlink(struct net_device *dev,
-				  struct nlmsghdr *nlh, u16 flags);
-int switchdev_port_bridge_dellink(struct net_device *dev,
-				  struct nlmsghdr *nlh, u16 flags);
-int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-			   struct net_device *dev, const unsigned char *addr,
-			   u16 vid, u16 nlm_flags);
-int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-			   struct net_device *dev, const unsigned char *addr,
-			   u16 vid);
-int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
-			    struct net_device *dev,
-			    struct net_device *filter_dev, int *idx);
 void switchdev_port_fwd_mark_set(struct net_device *dev,
 				 struct net_device *group_dev,
 				 bool joining);
@@ -249,13 +214,6 @@ static inline int switchdev_port_obj_del(struct net_device *dev,
 	return -EOPNOTSUPP;
 }
 
-static inline int switchdev_port_obj_dump(struct net_device *dev,
-					  const struct switchdev_obj *obj,
-					  switchdev_obj_dump_cb_t *cb)
-{
-	return -EOPNOTSUPP;
-}
-
 static inline int register_switchdev_notifier(struct notifier_block *nb)
 {
 	return 0;
@@ -273,51 +231,6 @@ static inline int call_switchdev_notifiers(unsigned long val,
 	return NOTIFY_DONE;
 }
 
-static inline int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid,
-					    u32 seq, struct net_device *dev,
-					    u32 filter_mask, int nlflags)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int switchdev_port_bridge_setlink(struct net_device *dev,
-						struct nlmsghdr *nlh,
-						u16 flags)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int switchdev_port_bridge_dellink(struct net_device *dev,
-						struct nlmsghdr *nlh,
-						u16 flags)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-					 struct net_device *dev,
-					 const unsigned char *addr,
-					 u16 vid, u16 nlm_flags)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-					 struct net_device *dev,
-					 const unsigned char *addr, u16 vid)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int switchdev_port_fdb_dump(struct sk_buff *skb,
-					  struct netlink_callback *cb,
-					  struct net_device *dev,
-					  struct net_device *filter_dev,
-					  int *idx)
-{
-       return *idx;
-}
-
 static inline bool switchdev_port_same_parent_id(struct net_device *a,
 						 struct net_device *b)
 {
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index d576374..41afe1c 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -15,7 +15,8 @@ struct tcf_gact {
 };
 #define to_gact(a) ((struct tcf_gact *)a)
 
-static inline bool __is_tcf_gact_act(const struct tc_action *a, int act)
+static inline bool __is_tcf_gact_act(const struct tc_action *a, int act,
+				     bool is_ext)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	struct tcf_gact *gact;
@@ -24,7 +25,8 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act)
 		return false;
 
 	gact = to_gact(a);
-	if (gact->tcf_action == act)
+	if ((!is_ext && gact->tcf_action == act) ||
+	    (is_ext && TC_ACT_EXT_CMP(gact->tcf_action, act)))
 		return true;
 
 #endif
@@ -33,12 +35,22 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act)
 
 static inline bool is_tcf_gact_shot(const struct tc_action *a)
 {
-	return __is_tcf_gact_act(a, TC_ACT_SHOT);
+	return __is_tcf_gact_act(a, TC_ACT_SHOT, false);
 }
 
 static inline bool is_tcf_gact_trap(const struct tc_action *a)
 {
-	return __is_tcf_gact_act(a, TC_ACT_TRAP);
+	return __is_tcf_gact_act(a, TC_ACT_TRAP, false);
+}
+
+static inline bool is_tcf_gact_goto_chain(const struct tc_action *a)
+{
+	return __is_tcf_gact_act(a, TC_ACT_GOTO_CHAIN, true);
+}
+
+static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a)
+{
+	return a->goto_chain->index;
 }
 
 #endif /* __NET_TC_GACT_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f642a39..b510f28 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -139,6 +139,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #endif
 #define TCP_RTO_MAX	((unsigned)(120*HZ))
 #define TCP_RTO_MIN	((unsigned)(HZ/5))
+#define TCP_TIMEOUT_MIN	(2U) /* Min timeout for TCP timers in jiffies */
 #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))	/* RFC6298 2.1 initial RTO value	*/
 #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))	/* RFC 1122 initial RTO value, now
 						 * used as a fallback RTO for the
@@ -150,8 +151,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
 					                 * for local resources.
 					                 */
-#define TCP_REO_TIMEOUT_MIN	(2000) /* Min RACK reordering timeout in usec */
-
 #define TCP_KEEPALIVE_TIME	(120*60*HZ)	/* two hours */
 #define TCP_KEEPALIVE_PROBES	9		/* Max of 9 keepalive probes	*/
 #define TCP_KEEPALIVE_INTVL	(75*HZ)
@@ -257,7 +256,6 @@ extern int sysctl_tcp_rmem[3];
 extern int sysctl_tcp_app_win;
 extern int sysctl_tcp_adv_win_scale;
 extern int sysctl_tcp_frto;
-extern int sysctl_tcp_low_latency;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
@@ -352,8 +350,11 @@ int tcp_v4_rcv(struct sk_buff *skb);
 
 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
 int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
 int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
 		 int flags);
+int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
+			size_t size, int flags);
 ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
 		 size_t size, int flags);
 void tcp_release_cb(struct sock *sk);
@@ -363,7 +364,7 @@ void tcp_delack_timer_handler(struct sock *sk);
 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			 const struct tcphdr *th, unsigned int len);
+			 const struct tcphdr *th);
 void tcp_rcv_space_adjust(struct sock *sk);
 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
 void tcp_twsk_destructor(struct sock *sk);
@@ -796,6 +797,12 @@ struct tcp_skb_cb {
 			u16	tcp_gso_segs;
 			u16	tcp_gso_size;
 		};
+
+		/* Used to stash the receive timestamp while this skb is in the
+		 * out of order queue, as skb->tstamp is overwritten by the
+		 * rbnode.
+		 */
+		ktime_t		swtstamp;
 	};
 	__u8		tcp_flags;	/* TCP header flags. (tcp[13])	*/
 
@@ -812,7 +819,8 @@ struct tcp_skb_cb {
 	__u8		ip_dsfield;	/* IPv4 tos or IPv6 dsfield	*/
 	__u8		txstamp_ack:1,	/* Record TX timestamp for ack? */
 			eor:1,		/* Is skb MSG_EOR marked? */
-			unused:6;
+			has_rxtstamp:1,	/* SKB has a RX timestamp	*/
+			unused:5;
 	__u32		ack_seq;	/* Sequence number ACK'd	*/
 	union {
 		struct {
@@ -849,6 +857,16 @@ static inline int tcp_v6_iif(const struct sk_buff *skb)
 
 	return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif;
 }
+
+/* TCP_SKB_CB reference means this can not be used from early demux */
+static inline int tcp_v6_sdif(const struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+	if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags))
+		return TCP_SKB_CB(skb)->header.h6.iif;
+#endif
+	return 0;
+}
 #endif
 
 /* TCP_SKB_CB reference means this can not be used from early demux */
@@ -862,6 +880,16 @@ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
 	return false;
 }
 
+/* TCP_SKB_CB reference means this can not be used from early demux */
+static inline int tcp_v4_sdif(struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+	if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
+		return TCP_SKB_CB(skb)->header.h4.iif;
+#endif
+	return 0;
+}
+
 /* Due to TSO, an SKB can be composed of multiple actual
  * packets.  To keep these tracked properly, we use this.
  */
@@ -1243,17 +1271,6 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb)
 		__tcp_checksum_complete(skb);
 }
 
-/* Prequeue for VJ style copy to user, combined with checksumming. */
-
-static inline void tcp_prequeue_init(struct tcp_sock *tp)
-{
-	tp->ucopy.task = NULL;
-	tp->ucopy.len = 0;
-	tp->ucopy.memory = 0;
-	skb_queue_head_init(&tp->ucopy.prequeue);
-}
-
-bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
 int tcp_filter(struct sock *sk, struct sk_buff *skb);
 
@@ -1545,8 +1562,7 @@ int tcp_fastopen_reset_cipher(void *key, unsigned int len);
 void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
 struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 			      struct request_sock *req,
-			      struct tcp_fastopen_cookie *foc,
-			      struct dst_entry *dst);
+			      struct tcp_fastopen_cookie *foc);
 void tcp_fastopen_init_key_once(bool publish);
 bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
 			     struct tcp_fastopen_cookie *cookie);
@@ -1927,7 +1943,8 @@ static inline s64 tcp_rto_delta_us(const struct sock *sk)
 /*
  * Save and compile IPv4 options, return a pointer to it
  */
-static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
+static inline struct ip_options_rcu *tcp_v4_save_options(struct net *net,
+							 struct sk_buff *skb)
 {
 	const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
 	struct ip_options_rcu *dopt = NULL;
@@ -1936,7 +1953,7 @@ static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
 		int opt_size = sizeof(*dopt) + opt->optlen;
 
 		dopt = kmalloc(opt_size, GFP_ATOMIC);
-		if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) {
+		if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) {
 			kfree(dopt);
 			dopt = NULL;
 		}
diff --git a/include/net/tso.h b/include/net/tso.h
index b7be852..9a56c39 100644
--- a/include/net/tso.h
+++ b/include/net/tso.h
@@ -3,6 +3,8 @@
 
 #include <net/ip.h>
 
+#define TSO_HEADER_SIZE		128
+
 struct tso_t {
 	int next_frag_idx;
 	void *data;
diff --git a/include/net/tun_proto.h b/include/net/tun_proto.h
new file mode 100644
index 0000000..2ea3deb
--- /dev/null
+++ b/include/net/tun_proto.h
@@ -0,0 +1,49 @@
+#ifndef __NET_TUN_PROTO_H
+#define __NET_TUN_PROTO_H
+
+#include <linux/kernel.h>
+
+/* One byte protocol values as defined by VXLAN-GPE and NSH. These will
+ * hopefully get a shared IANA registry.
+ */
+#define TUN_P_IPV4      0x01
+#define TUN_P_IPV6      0x02
+#define TUN_P_ETHERNET  0x03
+#define TUN_P_NSH       0x04
+#define TUN_P_MPLS_UC   0x05
+
+static inline __be16 tun_p_to_eth_p(u8 proto)
+{
+	switch (proto) {
+	case TUN_P_IPV4:
+		return htons(ETH_P_IP);
+	case TUN_P_IPV6:
+		return htons(ETH_P_IPV6);
+	case TUN_P_ETHERNET:
+		return htons(ETH_P_TEB);
+	case TUN_P_NSH:
+		return htons(ETH_P_NSH);
+	case TUN_P_MPLS_UC:
+		return htons(ETH_P_MPLS_UC);
+	}
+	return 0;
+}
+
+static inline u8 tun_p_from_eth_p(__be16 proto)
+{
+	switch (proto) {
+	case htons(ETH_P_IP):
+		return TUN_P_IPV4;
+	case htons(ETH_P_IPV6):
+		return TUN_P_IPV6;
+	case htons(ETH_P_TEB):
+		return TUN_P_ETHERNET;
+	case htons(ETH_P_NSH):
+		return TUN_P_NSH;
+	case htons(ETH_P_MPLS_UC):
+		return TUN_P_MPLS_UC;
+	}
+	return 0;
+}
+
+#endif
diff --git a/include/net/udp.h b/include/net/udp.h
index 626c2d8..12dfbfe 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -287,7 +287,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 			     __be32 daddr, __be16 dport, int dif);
 struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
-			       __be32 daddr, __be16 dport, int dif,
+			       __be32 daddr, __be16 dport, int dif, int sdif,
 			       struct udp_table *tbl, struct sk_buff *skb);
 struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
 				 __be16 sport, __be16 dport);
@@ -298,7 +298,7 @@ struct sock *udp6_lib_lookup(struct net *net,
 struct sock *__udp6_lib_lookup(struct net *net,
 			       const struct in6_addr *saddr, __be16 sport,
 			       const struct in6_addr *daddr, __be16 dport,
-			       int dif, struct udp_table *tbl,
+			       int dif, int sdif, struct udp_table *tbl,
 			       struct sk_buff *skb);
 struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
 				 __be16 sport, __be16 dport);
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 02c5be0..10cce0d 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -115,6 +115,8 @@ struct udp_tunnel_info {
 /* Notify network devices of offloadable types */
 void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
 			     unsigned short type);
+void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock,
+			     unsigned short type);
 void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type);
 void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type);
 
@@ -124,6 +126,12 @@ static inline void udp_tunnel_get_rx_info(struct net_device *dev)
 	call_netdevice_notifiers(NETDEV_UDP_TUNNEL_PUSH_INFO, dev);
 }
 
+static inline void udp_tunnel_drop_rx_info(struct net_device *dev)
+{
+	ASSERT_RTNL();
+	call_netdevice_notifiers(NETDEV_UDP_TUNNEL_DROP_INFO, dev);
+}
+
 /* Transmit the skb using UDP encapsulation. */
 void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
 			 __be32 src, __be32 dst, __u8 tos, __u8 ttl,
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 3f430e3..4e3876d 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -168,12 +168,6 @@ reserved_flags2:2;
 #define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \
 			     cpu_to_be32(0xff))
 
-/* VXLAN-GPE header Next Protocol. */
-#define VXLAN_GPE_NP_IPV4      0x01
-#define VXLAN_GPE_NP_IPV6      0x02
-#define VXLAN_GPE_NP_ETHERNET  0x03
-#define VXLAN_GPE_NP_NSH       0x04
-
 struct vxlan_metadata {
 	u32		gbp;
 };
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index c0916ab..f002a2c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -43,6 +43,8 @@
 	MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap))
 #define MODULE_ALIAS_XFRM_TYPE(family, proto) \
 	MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto))
+#define MODULE_ALIAS_XFRM_OFFLOAD_TYPE(family, proto) \
+	MODULE_ALIAS("xfrm-offload-" __stringify(family) "-" __stringify(proto))
 
 #ifdef CONFIG_XFRM_STATISTICS
 #define XFRM_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.xfrm_statistics, field)
@@ -163,6 +165,7 @@ struct xfrm_state {
 		int		header_len;
 		int		trailer_len;
 		u32		extra_flags;
+		u32		output_mark;
 	} props;
 
 	struct xfrm_lifetime_cfg lft;
@@ -296,10 +299,12 @@ struct xfrm_policy_afinfo {
 	struct dst_entry	*(*dst_lookup)(struct net *net,
 					       int tos, int oif,
 					       const xfrm_address_t *saddr,
-					       const xfrm_address_t *daddr);
+					       const xfrm_address_t *daddr,
+					       u32 mark);
 	int			(*get_saddr)(struct net *net, int oif,
 					     xfrm_address_t *saddr,
-					     xfrm_address_t *daddr);
+					     xfrm_address_t *daddr,
+					     u32 mark);
 	void			(*decode_session)(struct sk_buff *skb,
 						  struct flowi *fl,
 						  int reverse);
@@ -317,6 +322,7 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam
 void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
 void km_policy_notify(struct xfrm_policy *xp, int dir,
 		      const struct km_event *c);
+void xfrm_policy_cache_flush(void);
 void km_state_notify(struct xfrm_state *x, const struct km_event *c);
 
 struct xfrm_tmpl;
@@ -563,7 +569,6 @@ struct xfrm_policy {
 	refcount_t		refcnt;
 	struct timer_list	timer;
 
-	struct flow_cache_object flo;
 	atomic_t		genid;
 	u32			priority;
 	u32			index;
@@ -978,7 +983,6 @@ struct xfrm_dst {
 		struct rt6_info		rt6;
 	} u;
 	struct dst_entry *route;
-	struct flow_cache_object flo;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	int num_pols, num_xfrms;
 	u32 xfrm_genid;
@@ -1015,6 +1019,7 @@ struct xfrm_offload {
 #define	CRYPTO_FALLBACK		8
 #define	XFRM_GSO_SEGMENT	16
 #define	XFRM_GRO		32
+#define	XFRM_ESP_NO_TRAILER	64
 
 	__u32			status;
 #define CRYPTO_SUCCESS				1
@@ -1226,9 +1231,6 @@ static inline void xfrm_sk_free_policy(struct sock *sk)
 	}
 }
 
-void xfrm_garbage_collect(struct net *net);
-void xfrm_garbage_collect_deferred(struct net *net);
-
 #else
 
 static inline void xfrm_sk_free_policy(struct sock *sk) {}
@@ -1263,9 +1265,6 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
 {
 	return 1;
 }
-static inline void xfrm_garbage_collect(struct net *net)
-{
-}
 #endif
 
 static __inline__
@@ -1565,7 +1564,7 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
 u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
 int xfrm_init_replay(struct xfrm_state *x);
 int xfrm_state_mtu(struct xfrm_state *x, int mtu);
-int __xfrm_init_state(struct xfrm_state *x, bool init_replay);
+int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload);
 int xfrm_init_state(struct xfrm_state *x);
 int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
@@ -1645,7 +1644,7 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
 				    const xfrm_address_t *saddr,
 				    const xfrm_address_t *daddr,
-				    int family);
+				    int family, u32 mark);
 
 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp);
 
@@ -1863,6 +1862,20 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 		       struct xfrm_user_offload *xuo);
 bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
 
+static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
+{
+	struct xfrm_state *x = dst->xfrm;
+
+	if (!x || !x->type_offload)
+		return false;
+
+	if (x->xso.offload_handle && (x->xso.dev == dst->path->dev) &&
+	    !dst->child->xfrm)
+		return true;
+
+	return false;
+}
+
 static inline void xfrm_dev_state_delete(struct xfrm_state *x)
 {
 	struct xfrm_state_offload *xso = &x->xso;
@@ -1905,6 +1918,11 @@ static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x
 {
 	return false;
 }
+
+static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
+{
+	return false;
+}
 #endif
 
 static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m)
diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h
new file mode 100644
index 0000000..1bee3e7
--- /dev/null
+++ b/include/trace/events/bridge.h
@@ -0,0 +1,129 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bridge
+
+#if !defined(_TRACE_BRIDGE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BRIDGE_H
+
+#include <linux/netdevice.h>
+#include <linux/tracepoint.h>
+
+#include "../../../net/bridge/br_private.h"
+
+TRACE_EVENT(br_fdb_add,
+
+	TP_PROTO(struct ndmsg *ndm, struct net_device *dev,
+		 const unsigned char *addr, u16 vid, u16 nlh_flags),
+
+	TP_ARGS(ndm, dev, addr, vid, nlh_flags),
+
+	TP_STRUCT__entry(
+		__field(u8, ndm_flags)
+		__string(dev, dev->name)
+		__array(unsigned char, addr, ETH_ALEN)
+		__field(u16, vid)
+		__field(u16, nlh_flags)
+	),
+
+	TP_fast_assign(
+		__assign_str(dev, dev->name);
+		memcpy(__entry->addr, addr, ETH_ALEN);
+		__entry->vid = vid;
+		__entry->nlh_flags = nlh_flags;
+		__entry->ndm_flags = ndm->ndm_flags;
+	),
+
+	TP_printk("dev %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u nlh_flags %04x ndm_flags %02x",
+		  __get_str(dev), __entry->addr[0], __entry->addr[1],
+		  __entry->addr[2], __entry->addr[3], __entry->addr[4],
+		  __entry->addr[5], __entry->vid,
+		  __entry->nlh_flags, __entry->ndm_flags)
+);
+
+TRACE_EVENT(br_fdb_external_learn_add,
+
+	TP_PROTO(struct net_bridge *br, struct net_bridge_port *p,
+		 const unsigned char *addr, u16 vid),
+
+	TP_ARGS(br, p, addr, vid),
+
+	TP_STRUCT__entry(
+		__string(br_dev, br->dev->name)
+		__string(dev, p ? p->dev->name : "null")
+		__array(unsigned char, addr, ETH_ALEN)
+		__field(u16, vid)
+	),
+
+	TP_fast_assign(
+		__assign_str(br_dev, br->dev->name);
+		__assign_str(dev, p ? p->dev->name : "null");
+		memcpy(__entry->addr, addr, ETH_ALEN);
+		__entry->vid = vid;
+	),
+
+	TP_printk("br_dev %s port %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u",
+		  __get_str(br_dev), __get_str(dev), __entry->addr[0],
+		  __entry->addr[1], __entry->addr[2], __entry->addr[3],
+		  __entry->addr[4], __entry->addr[5], __entry->vid)
+);
+
+TRACE_EVENT(fdb_delete,
+
+	TP_PROTO(struct net_bridge *br, struct net_bridge_fdb_entry *f),
+
+	TP_ARGS(br, f),
+
+	TP_STRUCT__entry(
+		__string(br_dev, br->dev->name)
+		__string(dev, f->dst ? f->dst->dev->name : "null")
+		__array(unsigned char, addr, ETH_ALEN)
+		__field(u16, vid)
+	),
+
+	TP_fast_assign(
+		__assign_str(br_dev, br->dev->name);
+		__assign_str(dev, f->dst ? f->dst->dev->name : "null");
+		memcpy(__entry->addr, f->addr.addr, ETH_ALEN);
+		__entry->vid = f->vlan_id;
+	),
+
+	TP_printk("br_dev %s dev %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u",
+		  __get_str(br_dev), __get_str(dev), __entry->addr[0],
+		  __entry->addr[1], __entry->addr[2], __entry->addr[3],
+		  __entry->addr[4], __entry->addr[5], __entry->vid)
+);
+
+TRACE_EVENT(br_fdb_update,
+
+	TP_PROTO(struct net_bridge *br, struct net_bridge_port *source,
+		 const unsigned char *addr, u16 vid, bool added_by_user),
+
+	TP_ARGS(br, source, addr, vid, added_by_user),
+
+	TP_STRUCT__entry(
+		__string(br_dev, br->dev->name)
+		__string(dev, source->dev->name)
+		__array(unsigned char, addr, ETH_ALEN)
+		__field(u16, vid)
+		__field(bool, added_by_user)
+	),
+
+	TP_fast_assign(
+		__assign_str(br_dev, br->dev->name);
+		__assign_str(dev, source->dev->name);
+		memcpy(__entry->addr, addr, ETH_ALEN);
+		__entry->vid = vid;
+		__entry->added_by_user = added_by_user;
+	),
+
+	TP_printk("br_dev %s source %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u added_by_user %d",
+		  __get_str(br_dev), __get_str(dev), __entry->addr[0],
+		  __entry->addr[1], __entry->addr[2], __entry->addr[3],
+		  __entry->addr[4], __entry->addr[5], __entry->vid,
+		  __entry->added_by_user)
+);
+
+
+#endif /* _TRACE_BRIDGE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h
new file mode 100644
index 0000000..60d0d8b
--- /dev/null
+++ b/include/trace/events/qdisc.h
@@ -0,0 +1,50 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM qdisc
+
+#if !defined(_TRACE_QDISC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_QDISC_H_
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/tracepoint.h>
+#include <linux/ftrace.h>
+
+TRACE_EVENT(qdisc_dequeue,
+
+	TP_PROTO(struct Qdisc *qdisc, const struct netdev_queue *txq,
+		 int packets, struct sk_buff *skb),
+
+	TP_ARGS(qdisc, txq, packets, skb),
+
+	TP_STRUCT__entry(
+		__field(	struct Qdisc *,		qdisc	)
+		__field(const	struct netdev_queue *,	txq	)
+		__field(	int,			packets	)
+		__field(	void *,			skbaddr	)
+		__field(	int,			ifindex	)
+		__field(	u32,			handle	)
+		__field(	u32,			parent	)
+		__field(	unsigned long,		txq_state)
+	),
+
+	/* skb==NULL indicate packets dequeued was 0, even when packets==1 */
+	TP_fast_assign(
+		__entry->qdisc		= qdisc;
+		__entry->txq		= txq;
+		__entry->packets	= skb ? packets : 0;
+		__entry->skbaddr	= skb;
+		__entry->ifindex	= txq->dev ? txq->dev->ifindex : 0;
+		__entry->handle		= qdisc->handle;
+		__entry->parent		= qdisc->parent;
+		__entry->txq_state	= txq->state;
+	),
+
+	TP_printk("dequeue ifindex=%d qdisc handle=0x%X parent=0x%X txq_state=0x%lX packets=%d skbaddr=%p",
+		  __entry->ifindex, __entry->handle, __entry->parent,
+		  __entry->txq_state, __entry->packets, __entry->skbaddr )
+);
+
+#endif /* _TRACE_QDISC_H_ */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 1b61357..862575a 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -12,7 +12,8 @@
 	FN(ABORTED)		\
 	FN(DROP)		\
 	FN(PASS)		\
-	FN(TX)
+	FN(TX)			\
+	FN(REDIRECT)
 
 #define __XDP_ACT_TP_FN(x)	\
 	TRACE_DEFINE_ENUM(XDP_##x);
@@ -30,24 +31,119 @@ TRACE_EVENT(xdp_exception,
 	TP_ARGS(dev, xdp, act),
 
 	TP_STRUCT__entry(
-		__string(name, dev->name)
-		__array(u8, prog_tag, 8)
+		__field(int, prog_id)
 		__field(u32, act)
+		__field(int, ifindex)
 	),
 
 	TP_fast_assign(
-		BUILD_BUG_ON(sizeof(__entry->prog_tag) != sizeof(xdp->tag));
-		memcpy(__entry->prog_tag, xdp->tag, sizeof(xdp->tag));
-		__assign_str(name, dev->name);
-		__entry->act = act;
+		__entry->prog_id	= xdp->aux->id;
+		__entry->act		= act;
+		__entry->ifindex	= dev->ifindex;
 	),
 
-	TP_printk("prog=%s device=%s action=%s",
-		  __print_hex_str(__entry->prog_tag, 8),
-		  __get_str(name),
-		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB))
+	TP_printk("prog_id=%d action=%s ifindex=%d",
+		  __entry->prog_id,
+		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+		  __entry->ifindex)
 );
 
+DECLARE_EVENT_CLASS(xdp_redirect_template,
+
+	TP_PROTO(const struct net_device *dev,
+		 const struct bpf_prog *xdp,
+		 int to_ifindex, int err,
+		 const struct bpf_map *map, u32 map_index),
+
+	TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
+
+	TP_STRUCT__entry(
+		__field(int, prog_id)
+		__field(u32, act)
+		__field(int, ifindex)
+		__field(int, err)
+		__field(int, to_ifindex)
+		__field(u32, map_id)
+		__field(int, map_index)
+	),
+
+	TP_fast_assign(
+		__entry->prog_id	= xdp->aux->id;
+		__entry->act		= XDP_REDIRECT;
+		__entry->ifindex	= dev->ifindex;
+		__entry->err		= err;
+		__entry->to_ifindex	= to_ifindex;
+		__entry->map_id		= map ? map->id : 0;
+		__entry->map_index	= map_index;
+	),
+
+	TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d",
+		  __entry->prog_id,
+		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+		  __entry->ifindex, __entry->to_ifindex,
+		  __entry->err)
+);
+
+DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
+	TP_PROTO(const struct net_device *dev,
+		 const struct bpf_prog *xdp,
+		 int to_ifindex, int err,
+		 const struct bpf_map *map, u32 map_index),
+	TP_ARGS(dev, xdp, to_ifindex, err, map, map_index)
+);
+
+DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
+	TP_PROTO(const struct net_device *dev,
+		 const struct bpf_prog *xdp,
+		 int to_ifindex, int err,
+		 const struct bpf_map *map, u32 map_index),
+	TP_ARGS(dev, xdp, to_ifindex, err, map, map_index)
+);
+
+#define _trace_xdp_redirect(dev, xdp, to)		\
+	 trace_xdp_redirect(dev, xdp, to, 0, NULL, 0);
+
+#define _trace_xdp_redirect_err(dev, xdp, to, err)	\
+	 trace_xdp_redirect_err(dev, xdp, to, err, NULL, 0);
+
+DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map,
+	TP_PROTO(const struct net_device *dev,
+		 const struct bpf_prog *xdp,
+		 int to_ifindex, int err,
+		 const struct bpf_map *map, u32 map_index),
+	TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
+	TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
+		  " map_id=%d map_index=%d",
+		  __entry->prog_id,
+		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+		  __entry->ifindex, __entry->to_ifindex,
+		  __entry->err,
+		  __entry->map_id, __entry->map_index)
+);
+
+DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
+	TP_PROTO(const struct net_device *dev,
+		 const struct bpf_prog *xdp,
+		 int to_ifindex, int err,
+		 const struct bpf_map *map, u32 map_index),
+	TP_ARGS(dev, xdp, to_ifindex, err, map, map_index),
+	TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
+		  " map_id=%d map_index=%d",
+		  __entry->prog_id,
+		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+		  __entry->ifindex, __entry->to_ifindex,
+		  __entry->err,
+		  __entry->map_id, __entry->map_index)
+);
+
+#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx)		\
+	 trace_xdp_redirect_map(dev, xdp, fwd ? fwd->ifindex : 0,	\
+				0, map, idx);
+
+#define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err)	\
+	 trace_xdp_redirect_map_err(dev, xdp, fwd ? fwd->ifindex : 0,	\
+				    err, map, idx);
+
 #endif /* _TRACE_XDP_H */
 
 #include <trace/define_trace.h>
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 9861be8..e47c9e4 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -104,4 +104,6 @@
 
 #define SO_PEERGROUPS		59
 
+#define SO_ZEROCOPY		60
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e99e3e6..ba848b7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -30,9 +30,14 @@
 #define BPF_FROM_LE	BPF_TO_LE
 #define BPF_FROM_BE	BPF_TO_BE
 
+/* jmp encodings */
 #define BPF_JNE		0x50	/* jump != */
+#define BPF_JLT		0xa0	/* LT is unsigned, '<' */
+#define BPF_JLE		0xb0	/* LE is unsigned, '<=' */
 #define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
 #define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
+#define BPF_JSLT	0xc0	/* SLT is signed, '<' */
+#define BPF_JSLE	0xd0	/* SLE is signed, '<=' */
 #define BPF_CALL	0x80	/* function call */
 #define BPF_EXIT	0x90	/* function return */
 
@@ -104,6 +109,8 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_LPM_TRIE,
 	BPF_MAP_TYPE_ARRAY_OF_MAPS,
 	BPF_MAP_TYPE_HASH_OF_MAPS,
+	BPF_MAP_TYPE_DEVMAP,
+	BPF_MAP_TYPE_SOCKMAP,
 };
 
 enum bpf_prog_type {
@@ -121,6 +128,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_LWT_OUT,
 	BPF_PROG_TYPE_LWT_XMIT,
 	BPF_PROG_TYPE_SOCK_OPS,
+	BPF_PROG_TYPE_SK_SKB,
 };
 
 enum bpf_attach_type {
@@ -128,6 +136,8 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET_EGRESS,
 	BPF_CGROUP_INET_SOCK_CREATE,
 	BPF_CGROUP_SOCK_OPS,
+	BPF_SK_SKB_STREAM_PARSER,
+	BPF_SK_SKB_STREAM_VERDICT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -153,6 +163,7 @@ enum bpf_attach_type {
 #define BPF_NOEXIST	1 /* create new element if it didn't exist */
 #define BPF_EXIST	2 /* update existing element */
 
+/* flags for BPF_MAP_CREATE command */
 #define BPF_F_NO_PREALLOC	(1U << 0)
 /* Instead of having one common LRU list in the
  * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
@@ -161,6 +172,8 @@ enum bpf_attach_type {
  * across different LRU lists.
  */
 #define BPF_F_NO_COMMON_LRU	(1U << 1)
+/* Specify numa node during map creation */
+#define BPF_F_NUMA_NODE		(1U << 2)
 
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
@@ -168,8 +181,13 @@ union bpf_attr {
 		__u32	key_size;	/* size of key in bytes */
 		__u32	value_size;	/* size of value in bytes */
 		__u32	max_entries;	/* max number of entries in a map */
-		__u32	map_flags;	/* prealloc or not */
+		__u32	map_flags;	/* BPF_MAP_CREATE related
+					 * flags defined above.
+					 */
 		__u32	inner_map_fd;	/* fd pointing to the inner map */
+		__u32	numa_node;	/* numa node (effective only if
+					 * BPF_F_NUMA_NODE is set).
+					 */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -344,9 +362,20 @@ union bpf_attr {
  * int bpf_redirect(ifindex, flags)
  *     redirect to another netdev
  *     @ifindex: ifindex of the net device
- *     @flags: bit 0 - if set, redirect to ingress instead of egress
- *             other bits - reserved
- *     Return: TC_ACT_REDIRECT
+ *     @flags:
+ *	  cls_bpf:
+ *          bit 0 - if set, redirect to ingress instead of egress
+ *          other bits - reserved
+ *	  xdp_bpf:
+ *	    all bits - reserved
+ *     Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
+ *	       xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
+ * int bpf_redirect_map(map, key, flags)
+ *     redirect to endpoint in map
+ *     @map: pointer to dev map
+ *     @key: index in map to lookup
+ *     @flags: --
+ *     Return: XDP_REDIRECT on success or XDP_ABORT on error
  *
  * u32 bpf_get_route_realm(skb)
  *     retrieve a dst's tclassid
@@ -539,6 +568,20 @@ union bpf_attr {
  *     @mode: operation mode (enum bpf_adj_room_mode)
  *     @flags: reserved for future use
  *     Return: 0 on success or negative error code
+ *
+ * int bpf_sk_redirect_map(map, key, flags)
+ *     Redirect skb to a sock in map using key as a lookup key for the
+ *     sock in map.
+ *     @map: pointer to sockmap
+ *     @key: key to lookup sock in map
+ *     @flags: reserved for future use
+ *     Return: SK_REDIRECT
+ *
+ * int bpf_sock_map_update(skops, map, key, flags)
+ *	@skops: pointer to bpf_sock_ops
+ *	@map: pointer to sockmap to update
+ *	@key: key to insert/update sock in map
+ *	@flags: same flags as map update elem
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -591,7 +634,10 @@ union bpf_attr {
 	FN(get_socket_uid),		\
 	FN(set_hash),			\
 	FN(setsockopt),			\
-	FN(skb_adjust_room),
+	FN(skb_adjust_room),		\
+	FN(redirect_map),		\
+	FN(sk_redirect_map),		\
+	FN(sock_map_update),		\
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -668,6 +714,15 @@ struct __sk_buff {
 	__u32 data;
 	__u32 data_end;
 	__u32 napi_id;
+
+	/* accessed by BPF_PROG_TYPE_sk_skb types */
+	__u32 family;
+	__u32 remote_ip4;	/* Stored in network byte order */
+	__u32 local_ip4;	/* Stored in network byte order */
+	__u32 remote_ip6[4];	/* Stored in network byte order */
+	__u32 local_ip6[4];	/* Stored in network byte order */
+	__u32 remote_port;	/* Stored in network byte order */
+	__u32 local_port;	/* stored in host byte order */
 };
 
 struct bpf_tunnel_key {
@@ -703,6 +758,8 @@ struct bpf_sock {
 	__u32 family;
 	__u32 type;
 	__u32 protocol;
+	__u32 mark;
+	__u32 priority;
 };
 
 #define XDP_PACKET_HEADROOM 256
@@ -717,6 +774,7 @@ enum xdp_action {
 	XDP_DROP,
 	XDP_PASS,
 	XDP_TX,
+	XDP_REDIRECT,
 };
 
 /* user accessible metadata for XDP packet hook
@@ -727,6 +785,12 @@ struct xdp_md {
 	__u32 data_end;
 };
 
+enum sk_action {
+	SK_ABORTED = 0,
+	SK_DROP,
+	SK_REDIRECT,
+};
+
 #define BPF_TAG_SIZE	8
 
 struct bpf_prog_info {
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index b0e807a..0cbca96 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -226,4 +226,22 @@ enum devlink_dpipe_action_type {
 	DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY,
 };
 
+enum devlink_dpipe_field_ethernet_id {
+	DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
+};
+
+enum devlink_dpipe_field_ipv4_id {
+	DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
+};
+
+enum devlink_dpipe_field_ipv6_id {
+	DEVLINK_DPIPE_FIELD_IPV6_DST_IP,
+};
+
+enum devlink_dpipe_header_id {
+	DEVLINK_DPIPE_HEADER_ETHERNET,
+	DEVLINK_DPIPE_HEADER_IPV4,
+	DEVLINK_DPIPE_HEADER_IPV6,
+};
+
 #endif /* _UAPI_LINUX_DEVLINK_H_ */
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index 07bdce1..78fdf52 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -18,10 +18,13 @@ struct sock_extended_err {
 #define SO_EE_ORIGIN_ICMP	2
 #define SO_EE_ORIGIN_ICMP6	3
 #define SO_EE_ORIGIN_TXSTATUS	4
+#define SO_EE_ORIGIN_ZEROCOPY	5
 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 
 #define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 
+#define SO_EE_CODE_ZEROCOPY_COPIED	1
+
 /**
  *	struct scm_timestamping - timestamps exposed through cmsg
  *
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 7d4a594..9c041da 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1238,6 +1238,47 @@ struct ethtool_per_queue_op {
 	char	data[];
 };
 
+/**
+ * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters
+ * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM
+ * @active_fec: FEC mode which is active on porte
+ * @fec: Bitmask of supported/configured FEC modes
+ * @rsvd: Reserved for future extensions. i.e FEC bypass feature.
+ *
+ * Drivers should reject a non-zero setting of @autoneg when
+ * autoneogotiation is disabled (or not supported) for the link.
+ *
+ */
+struct ethtool_fecparam {
+	__u32   cmd;
+	/* bitmask of FEC modes */
+	__u32   active_fec;
+	__u32   fec;
+	__u32   reserved;
+};
+
+/**
+ * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration
+ * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported
+ * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver
+ * @ETHTOOL_FEC_OFF: No FEC Mode
+ * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode
+ * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode
+ */
+enum ethtool_fec_config_bits {
+	ETHTOOL_FEC_NONE_BIT,
+	ETHTOOL_FEC_AUTO_BIT,
+	ETHTOOL_FEC_OFF_BIT,
+	ETHTOOL_FEC_RS_BIT,
+	ETHTOOL_FEC_BASER_BIT,
+};
+
+#define ETHTOOL_FEC_NONE		(1 << ETHTOOL_FEC_NONE_BIT)
+#define ETHTOOL_FEC_AUTO		(1 << ETHTOOL_FEC_AUTO_BIT)
+#define ETHTOOL_FEC_OFF			(1 << ETHTOOL_FEC_OFF_BIT)
+#define ETHTOOL_FEC_RS			(1 << ETHTOOL_FEC_RS_BIT)
+#define ETHTOOL_FEC_BASER		(1 << ETHTOOL_FEC_BASER_BIT)
+
 /* CMDs currently supported */
 #define ETHTOOL_GSET		0x00000001 /* DEPRECATED, Get settings.
 					    * Please use ETHTOOL_GLINKSETTINGS
@@ -1330,6 +1371,8 @@ struct ethtool_per_queue_op {
 #define ETHTOOL_SLINKSETTINGS	0x0000004d /* Set ethtool_link_settings */
 #define ETHTOOL_PHY_GTUNABLE	0x0000004e /* Get PHY tunable configuration */
 #define ETHTOOL_PHY_STUNABLE	0x0000004f /* Set PHY tunable configuration */
+#define ETHTOOL_GFECPARAM	0x00000050 /* Get FEC settings */
+#define ETHTOOL_SFECPARAM	0x00000051 /* Set FEC settings */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
@@ -1387,6 +1430,9 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_2500baseT_Full_BIT	= 47,
 	ETHTOOL_LINK_MODE_5000baseT_Full_BIT	= 48,
 
+	ETHTOOL_LINK_MODE_FEC_NONE_BIT	= 49,
+	ETHTOOL_LINK_MODE_FEC_RS_BIT	= 50,
+	ETHTOOL_LINK_MODE_FEC_BASER_BIT	= 51,
 
 	/* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
 	 * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
@@ -1395,7 +1441,7 @@ enum ethtool_link_mode_bit_indices {
 	 */
 
 	__ETHTOOL_LINK_MODE_LAST
-	  = ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+	  = ETHTOOL_LINK_MODE_FEC_BASER_BIT,
 };
 
 #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name)	\
diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h
index cf73510..a2a6356 100644
--- a/include/uapi/linux/if_arp.h
+++ b/include/uapi/linux/if_arp.h
@@ -59,6 +59,7 @@
 #define ARPHRD_LAPB	516		/* LAPB				*/
 #define ARPHRD_DDCMP    517		/* Digital's DDCMP protocol     */
 #define ARPHRD_RAWHDLC	518		/* Raw HDLC			*/
+#define ARPHRD_RAWIP    519		/* Raw IP                       */
 
 #define ARPHRD_TUNNEL	768		/* IPIP tunnel			*/
 #define ARPHRD_TUNNEL6	769		/* IP6IP6 tunnel       		*/
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 5bc9bfd..9037065 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -66,6 +66,7 @@
 #define ETH_P_ATALK	0x809B		/* Appletalk DDP		*/
 #define ETH_P_AARP	0x80F3		/* Appletalk AARP		*/
 #define ETH_P_8021Q	0x8100          /* 802.1Q VLAN Extended Header  */
+#define ETH_P_ERSPAN	0x88BE		/* ERSPAN type II		*/
 #define ETH_P_IPX	0x8137		/* IPX over DIX			*/
 #define ETH_P_IPV6	0x86DD		/* IPv6 over bluebook		*/
 #define ETH_P_PAUSE	0x8808		/* IEEE Pause frames. See 802.3 31B */
@@ -98,11 +99,13 @@
 #define ETH_P_FIP	0x8914		/* FCoE Initialization Protocol */
 #define ETH_P_80221	0x8917		/* IEEE 802.21 Media Independent Handover Protocol */
 #define ETH_P_HSR	0x892F		/* IEC 62439-3 HSRv1	*/
+#define ETH_P_NSH	0x894F		/* Network Service Header */
 #define ETH_P_LOOPBACK	0x9000		/* Ethernet loopback packet, per IEEE 802.3 */
 #define ETH_P_QINQ1	0x9100		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_QINQ2	0x9200		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_QINQ3	0x9300		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_EDSA	0xDADA		/* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_IFE	0xED3E		/* ForCES inter-FE LFB type */
 #define ETH_P_AF_IUCV   0xFBFB		/* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */
 
 #define ETH_P_802_3_MIN	0x0600		/* If the value in the ethernet type is less than this value
@@ -137,6 +140,9 @@
 #define ETH_P_IEEE802154 0x00F6		/* IEEE802.15.4 frame		*/
 #define ETH_P_CAIF	0x00F7		/* ST-Ericsson CAIF protocol	*/
 #define ETH_P_XDSA	0x00F8		/* Multiplexed DSA protocol	*/
+#define ETH_P_MAP	0x00F9		/* Qualcomm multiplexing and
+					 * aggregation protocol
+					 */
 
 /*
  *	This is an Ethernet frame header.
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 6792d19..2e52088 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -134,6 +134,7 @@ enum {
 	IFLA_GRE_COLLECT_METADATA,
 	IFLA_GRE_IGNORE_DF,
 	IFLA_GRE_FWMARK,
+	IFLA_GRE_ERSPAN_INDEX,
 	__IFLA_GRE_MAX,
 };
 
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index bbe2010..f52ff62 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -142,6 +142,8 @@ enum {
 	INET_DIAG_PAD,
 	INET_DIAG_MARK,
 	INET_DIAG_BBRINFO,
+	INET_DIAG_CLASS_ID,
+	INET_DIAG_MD5SIG,
 	__INET_DIAG_MAX,
 };
 
diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
index 92724cba..7fdd19c 100644
--- a/include/uapi/linux/lwtunnel.h
+++ b/include/uapi/linux/lwtunnel.h
@@ -11,6 +11,7 @@ enum lwtunnel_encap_types {
 	LWTUNNEL_ENCAP_IP6,
 	LWTUNNEL_ENCAP_SEG6,
 	LWTUNNEL_ENCAP_BPF,
+	LWTUNNEL_ENCAP_SEG6_LOCAL,
 	__LWTUNNEL_ENCAP_MAX,
 };
 
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 683f6f8..871afa4 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1,10 +1,11 @@
 #ifndef _LINUX_NF_TABLES_H
 #define _LINUX_NF_TABLES_H
 
-#define NFT_TABLE_MAXNAMELEN	32
-#define NFT_CHAIN_MAXNAMELEN	32
-#define NFT_SET_MAXNAMELEN	32
-#define NFT_OBJ_MAXNAMELEN	32
+#define NFT_NAME_MAXLEN		256
+#define NFT_TABLE_MAXNAMELEN	NFT_NAME_MAXLEN
+#define NFT_CHAIN_MAXNAMELEN	NFT_NAME_MAXLEN
+#define NFT_SET_MAXNAMELEN	NFT_NAME_MAXLEN
+#define NFT_OBJ_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_USERDATA_MAXLEN	256
 
 /**
@@ -731,7 +732,8 @@ enum nft_exthdr_op {
  * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32)
  * @NFTA_EXTHDR_LEN: extension header length (NLA_U32)
  * @NFTA_EXTHDR_FLAGS: extension header flags (NLA_U32)
- * @NFTA_EXTHDR_OP: option match type (NLA_U8)
+ * @NFTA_EXTHDR_OP: option match type (NLA_U32)
+ * @NFTA_EXTHDR_SREG: option match type (NLA_U32)
  */
 enum nft_exthdr_attributes {
 	NFTA_EXTHDR_UNSPEC,
@@ -741,6 +743,7 @@ enum nft_exthdr_attributes {
 	NFTA_EXTHDR_LEN,
 	NFTA_EXTHDR_FLAGS,
 	NFTA_EXTHDR_OP,
+	NFTA_EXTHDR_SREG,
 	__NFTA_EXTHDR_MAX
 };
 #define NFTA_EXTHDR_MAX		(__NFTA_EXTHDR_MAX - 1)
@@ -808,11 +811,13 @@ enum nft_meta_keys {
  * @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid)
  * @NFT_RT_NEXTHOP4: routing nexthop for IPv4
  * @NFT_RT_NEXTHOP6: routing nexthop for IPv6
+ * @NFT_RT_TCPMSS: fetch current path tcp mss
  */
 enum nft_rt_keys {
 	NFT_RT_CLASSID,
 	NFT_RT_NEXTHOP4,
 	NFT_RT_NEXTHOP6,
+	NFT_RT_TCPMSS,
 };
 
 /**
@@ -1221,6 +1226,8 @@ enum nft_objref_attributes {
 enum nft_gen_attributes {
 	NFTA_GEN_UNSPEC,
 	NFTA_GEN_ID,
+	NFTA_GEN_PROC_PID,
+	NFTA_GEN_PROC_NAME,
 	__NFTA_GEN_MAX
 };
 #define NFTA_GEN_MAX		(__NFTA_GEN_MAX - 1)
@@ -1275,7 +1282,8 @@ enum nft_ct_helper_attributes {
 #define NFT_OBJECT_COUNTER	1
 #define NFT_OBJECT_QUOTA	2
 #define NFT_OBJECT_CT_HELPER	3
-#define __NFT_OBJECT_MAX	4
+#define NFT_OBJECT_LIMIT	4
+#define __NFT_OBJECT_MAX	5
 #define NFT_OBJECT_MAX		(__NFT_OBJECT_MAX - 1)
 
 /**
diff --git a/include/uapi/linux/netfilter/xt_hashlimit.h b/include/uapi/linux/netfilter/xt_hashlimit.h
index 79da349..aa98573 100644
--- a/include/uapi/linux/netfilter/xt_hashlimit.h
+++ b/include/uapi/linux/netfilter/xt_hashlimit.h
@@ -19,12 +19,13 @@
 struct xt_hashlimit_htable;
 
 enum {
-	XT_HASHLIMIT_HASH_DIP = 1 << 0,
-	XT_HASHLIMIT_HASH_DPT = 1 << 1,
-	XT_HASHLIMIT_HASH_SIP = 1 << 2,
-	XT_HASHLIMIT_HASH_SPT = 1 << 3,
-	XT_HASHLIMIT_INVERT   = 1 << 4,
-	XT_HASHLIMIT_BYTES    = 1 << 5,
+	XT_HASHLIMIT_HASH_DIP		= 1 << 0,
+	XT_HASHLIMIT_HASH_DPT		= 1 << 1,
+	XT_HASHLIMIT_HASH_SIP		= 1 << 2,
+	XT_HASHLIMIT_HASH_SPT		= 1 << 3,
+	XT_HASHLIMIT_INVERT		= 1 << 4,
+	XT_HASHLIMIT_BYTES		= 1 << 5,
+	XT_HASHLIMIT_RATE_MATCH		= 1 << 6,
 };
 
 struct hashlimit_cfg {
@@ -79,6 +80,21 @@ struct hashlimit_cfg2 {
 	__u8 srcmask, dstmask;
 };
 
+struct hashlimit_cfg3 {
+	__u64 avg;		/* Average secs between packets * scale */
+	__u64 burst;		/* Period multiplier for upper limit. */
+	__u32 mode;		/* bitmask of XT_HASHLIMIT_HASH_* */
+
+	/* user specified */
+	__u32 size;		/* how many buckets */
+	__u32 max;		/* max number of entries */
+	__u32 gc_interval;	/* gc interval */
+	__u32 expire;		/* when do entries expire? */
+
+	__u32 interval;
+	__u8 srcmask, dstmask;
+};
+
 struct xt_hashlimit_mtinfo1 {
 	char name[IFNAMSIZ];
 	struct hashlimit_cfg1 cfg;
@@ -95,4 +111,12 @@ struct xt_hashlimit_mtinfo2 {
 	struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
 };
 
+struct xt_hashlimit_mtinfo3 {
+	char name[NAME_MAX];
+	struct hashlimit_cfg3 cfg;
+
+	/* Used internally by the kernel */
+	struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
+};
+
 #endif /* _UAPI_XT_HASHLIMIT_H */
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index f86127a..e8af60a 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -69,6 +69,9 @@ struct nlmsghdr {
 #define NLM_F_CREATE	0x400	/* Create, if it does not exist	*/
 #define NLM_F_APPEND	0x800	/* Add to end of list		*/
 
+/* Modifiers to DELETE request */
+#define NLM_F_NONREC	0x100	/* Do not delete recursively	*/
+
 /* Flags for ACK message */
 #define NLM_F_CAPPED	0x100	/* request was capped */
 #define NLM_F_ACK_TLVS	0x200	/* extended ACK TVLs were included */
@@ -226,5 +229,22 @@ struct nlattr {
 #define NLA_ALIGN(len)		(((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
 #define NLA_HDRLEN		((int) NLA_ALIGN(sizeof(struct nlattr)))
 
+/* Generic 32 bitflags attribute content sent to the kernel.
+ *
+ * The value is a bitmap that defines the values being set
+ * The selector is a bitmask that defines which value is legit
+ *
+ * Examples:
+ *  value = 0x0, and selector = 0x1
+ *  implies we are selecting bit 1 and we want to set its value to 0.
+ *
+ *  value = 0x2, and selector = 0x2
+ *  implies we are selecting bit 2 and we want to set its value to 1.
+ *
+ */
+struct nla_bitfield32 {
+	__u32 value;
+	__u32 selector;
+};
 
 #endif /* _UAPI__LINUX_NETLINK_H */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index d148505..dab7dad 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -683,10 +683,29 @@ struct tcamsg {
 	unsigned char	tca__pad1;
 	unsigned short	tca__pad2;
 };
+
+enum {
+	TCA_ROOT_UNSPEC,
+	TCA_ROOT_TAB,
+#define TCA_ACT_TAB TCA_ROOT_TAB
+#define TCAA_MAX TCA_ROOT_TAB
+	TCA_ROOT_FLAGS,
+	TCA_ROOT_COUNT,
+	TCA_ROOT_TIME_DELTA, /* in msecs */
+	__TCA_ROOT_MAX,
+#define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
+};
+
 #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
 #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
-#define TCAA_MAX 1
+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
+ *
+ * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
+ * actions in a dump. All dump responses will contain the number of actions
+ * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
+ *
+ */
+#define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)
 
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
diff --git a/include/linux/rxrpc.h b/include/uapi/linux/rxrpc.h
index 7343f71..9656aad 100644
--- a/include/linux/rxrpc.h
+++ b/include/uapi/linux/rxrpc.h
@@ -1,17 +1,18 @@
-/* AF_RXRPC parameters
+/* Types and definitions for AF_RXRPC.
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
+ * modify it under the terms of the GNU General Public Licence
  * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
+ * 2 of the Licence, or (at your option) any later version.
  */
 
-#ifndef _LINUX_RXRPC_H
-#define _LINUX_RXRPC_H
+#ifndef _UAPI_LINUX_RXRPC_H
+#define _UAPI_LINUX_RXRPC_H
 
+#include <linux/types.h>
 #include <linux/in.h>
 #include <linux/in6.h>
 
@@ -76,4 +77,48 @@ enum rxrpc_cmsg_type {
 #define RXRPC_SECURITY_RXGK	4	/* gssapi-based */
 #define RXRPC_SECURITY_RXK5	5	/* kerberos 5 */
 
-#endif /* _LINUX_RXRPC_H */
+/*
+ * RxRPC-level abort codes
+ */
+#define RX_CALL_DEAD		-1	/* call/conn has been inactive and is shut down */
+#define RX_INVALID_OPERATION	-2	/* invalid operation requested / attempted */
+#define RX_CALL_TIMEOUT		-3	/* call timeout exceeded */
+#define RX_EOF			-4	/* unexpected end of data on read op */
+#define RX_PROTOCOL_ERROR	-5	/* low-level protocol error */
+#define RX_USER_ABORT		-6	/* generic user abort */
+#define RX_ADDRINUSE		-7	/* UDP port in use */
+#define RX_DEBUGI_BADTYPE	-8	/* bad debugging packet type */
+
+/*
+ * (un)marshalling abort codes (rxgen)
+ */
+#define RXGEN_CC_MARSHAL	-450
+#define RXGEN_CC_UNMARSHAL	-451
+#define RXGEN_SS_MARSHAL	-452
+#define RXGEN_SS_UNMARSHAL	-453
+#define RXGEN_DECODE		-454
+#define RXGEN_OPCODE		-455
+#define RXGEN_SS_XDRFREE	-456
+#define RXGEN_CC_XDRFREE	-457
+
+/*
+ * Rx kerberos security abort codes
+ * - unfortunately we have no generalised security abort codes to say things
+ *   like "unsupported security", so we have to use these instead and hope the
+ *   other side understands
+ */
+#define RXKADINCONSISTENCY	19270400	/* security module structure inconsistent */
+#define RXKADPACKETSHORT	19270401	/* packet too short for security challenge */
+#define RXKADLEVELFAIL		19270402	/* security level negotiation failed */
+#define RXKADTICKETLEN		19270403	/* ticket length too short or too long */
+#define RXKADOUTOFSEQUENCE	19270404	/* packet had bad sequence number */
+#define RXKADNOAUTH		19270405	/* caller not authorised */
+#define RXKADBADKEY		19270406	/* illegal key: bad parity or weak */
+#define RXKADBADTICKET		19270407	/* security object was passed a bad ticket */
+#define RXKADUNKNOWNKEY		19270408	/* ticket contained unknown key version number */
+#define RXKADEXPIRED		19270409	/* authentication expired */
+#define RXKADSEALEDINCON	19270410	/* sealed data inconsistent */
+#define RXKADDATALEN		19270411	/* user data too long */
+#define RXKADILLEGALLEVEL	19270412	/* caller not authorised to use encrypted conns */
+
+#endif /* _UAPI_LINUX_RXRPC_H */
diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h
index b6e5a0a..b23df9f 100644
--- a/include/uapi/linux/seg6_iptunnel.h
+++ b/include/uapi/linux/seg6_iptunnel.h
@@ -33,16 +33,26 @@ struct seg6_iptunnel_encap {
 enum {
 	SEG6_IPTUN_MODE_INLINE,
 	SEG6_IPTUN_MODE_ENCAP,
+	SEG6_IPTUN_MODE_L2ENCAP,
 };
 
 #ifdef __KERNEL__
 
 static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
 {
-	int encap = (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP);
-
-	return ((tuninfo->srh->hdrlen + 1) << 3) +
-	       (encap * sizeof(struct ipv6hdr));
+	int head = 0;
+
+	switch (tuninfo->mode) {
+	case SEG6_IPTUN_MODE_INLINE:
+		break;
+	case SEG6_IPTUN_MODE_ENCAP:
+		head = sizeof(struct ipv6hdr);
+		break;
+	case SEG6_IPTUN_MODE_L2ENCAP:
+		return 0;
+	}
+
+	return ((tuninfo->srh->hdrlen + 1) << 3) + head;
 }
 
 #endif
diff --git a/include/uapi/linux/seg6_local.h b/include/uapi/linux/seg6_local.h
new file mode 100644
index 0000000..ef2d8c3
--- /dev/null
+++ b/include/uapi/linux/seg6_local.h
@@ -0,0 +1,68 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_SEG6_LOCAL_H
+#define _UAPI_LINUX_SEG6_LOCAL_H
+
+#include <linux/seg6.h>
+
+enum {
+	SEG6_LOCAL_UNSPEC,
+	SEG6_LOCAL_ACTION,
+	SEG6_LOCAL_SRH,
+	SEG6_LOCAL_TABLE,
+	SEG6_LOCAL_NH4,
+	SEG6_LOCAL_NH6,
+	SEG6_LOCAL_IIF,
+	SEG6_LOCAL_OIF,
+	__SEG6_LOCAL_MAX,
+};
+#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
+
+enum {
+	SEG6_LOCAL_ACTION_UNSPEC	= 0,
+	/* node segment */
+	SEG6_LOCAL_ACTION_END		= 1,
+	/* adjacency segment (IPv6 cross-connect) */
+	SEG6_LOCAL_ACTION_END_X		= 2,
+	/* lookup of next seg NH in table */
+	SEG6_LOCAL_ACTION_END_T		= 3,
+	/* decap and L2 cross-connect */
+	SEG6_LOCAL_ACTION_END_DX2	= 4,
+	/* decap and IPv6 cross-connect */
+	SEG6_LOCAL_ACTION_END_DX6	= 5,
+	/* decap and IPv4 cross-connect */
+	SEG6_LOCAL_ACTION_END_DX4	= 6,
+	/* decap and lookup of DA in v6 table */
+	SEG6_LOCAL_ACTION_END_DT6	= 7,
+	/* decap and lookup of DA in v4 table */
+	SEG6_LOCAL_ACTION_END_DT4	= 8,
+	/* binding segment with insertion */
+	SEG6_LOCAL_ACTION_END_B6	= 9,
+	/* binding segment with encapsulation */
+	SEG6_LOCAL_ACTION_END_B6_ENCAP	= 10,
+	/* binding segment with MPLS encap */
+	SEG6_LOCAL_ACTION_END_BM	= 11,
+	/* lookup last seg in table */
+	SEG6_LOCAL_ACTION_END_S		= 12,
+	/* forward to SR-unaware VNF with static proxy */
+	SEG6_LOCAL_ACTION_END_AS	= 13,
+	/* forward to SR-unaware VNF with masquerading */
+	SEG6_LOCAL_ACTION_END_AM	= 14,
+
+	__SEG6_LOCAL_ACTION_MAX,
+};
+
+#define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1)
+
+#endif
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index d856932..758f12b 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -184,12 +184,7 @@ enum
 	LINUX_MIB_DELAYEDACKLOST,		/* DelayedACKLost */
 	LINUX_MIB_LISTENOVERFLOWS,		/* ListenOverflows */
 	LINUX_MIB_LISTENDROPS,			/* ListenDrops */
-	LINUX_MIB_TCPPREQUEUED,			/* TCPPrequeued */
-	LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG,	/* TCPDirectCopyFromBacklog */
-	LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE,	/* TCPDirectCopyFromPrequeue */
-	LINUX_MIB_TCPPREQUEUEDROPPED,		/* TCPPrequeueDropped */
 	LINUX_MIB_TCPHPHITS,			/* TCPHPHits */
-	LINUX_MIB_TCPHPHITSTOUSER,		/* TCPHPHitsToUser */
 	LINUX_MIB_TCPPUREACKS,			/* TCPPureAcks */
 	LINUX_MIB_TCPHPACKS,			/* TCPHPAcks */
 	LINUX_MIB_TCPRENORECOVERY,		/* TCPRenoRecovery */
@@ -208,14 +203,12 @@ enum
 	LINUX_MIB_TCPSACKFAILURES,		/* TCPSackFailures */
 	LINUX_MIB_TCPLOSSFAILURES,		/* TCPLossFailures */
 	LINUX_MIB_TCPFASTRETRANS,		/* TCPFastRetrans */
-	LINUX_MIB_TCPFORWARDRETRANS,		/* TCPForwardRetrans */
 	LINUX_MIB_TCPSLOWSTARTRETRANS,		/* TCPSlowStartRetrans */
 	LINUX_MIB_TCPTIMEOUTS,			/* TCPTimeouts */
 	LINUX_MIB_TCPLOSSPROBES,		/* TCPLossProbes */
 	LINUX_MIB_TCPLOSSPROBERECOVERY,		/* TCPLossProbeRecovery */
 	LINUX_MIB_TCPRENORECOVERYFAIL,		/* TCPRenoRecoveryFail */
 	LINUX_MIB_TCPSACKRECOVERYFAIL,		/* TCPSackRecoveryFail */
-	LINUX_MIB_TCPSCHEDULERFAILED,		/* TCPSchedulerFailed */
 	LINUX_MIB_TCPRCVCOLLAPSED,		/* TCPRcvCollapsed */
 	LINUX_MIB_TCPDSACKOLDSENT,		/* TCPDSACKOldSent */
 	LINUX_MIB_TCPDSACKOFOSENT,		/* TCPDSACKOfoSent */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index a5507c9..15c25ec 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -231,6 +231,14 @@ enum {
 	TCP_NLA_SNDBUF_LIMITED,	/* Time (usec) limited by send buffer */
 	TCP_NLA_DATA_SEGS_OUT,	/* Data pkts sent including retransmission */
 	TCP_NLA_TOTAL_RETRANS,	/* Data pkts retransmitted */
+	TCP_NLA_PACING_RATE,    /* Pacing rate in bytes per second */
+	TCP_NLA_DELIVERY_RATE,  /* Delivery rate in bytes per second */
+	TCP_NLA_SND_CWND,       /* Sending congestion window */
+	TCP_NLA_REORDERING,     /* Reordering metric */
+	TCP_NLA_MIN_RTT,        /* minimum RTT */
+	TCP_NLA_RECUR_RETRANS,  /* Recurring retransmits for the current pkt */
+	TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */
+
 };
 
 /* for TCP_MD5SIG socket option */
@@ -248,4 +256,13 @@ struct tcp_md5sig {
 	__u8	tcpm_key[TCP_MD5SIG_MAXKEYLEN];		/* key (binary) */
 };
 
+/* INET_DIAG_MD5SIG */
+struct tcp_diag_md5sig {
+	__u8	tcpm_family;
+	__u8	tcpm_prefixlen;
+	__u16	tcpm_keylen;
+	__be32	tcpm_addr[4];
+	__u8	tcpm_key[TCP_MD5SIG_MAXKEYLEN];
+};
+
 #endif /* _UAPI_LINUX_TCP_H */
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 2b384ff..5fe7370 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -304,6 +304,7 @@ enum xfrm_attr_type_t {
 	XFRMA_ADDRESS_FILTER,	/* struct xfrm_address_filter */
 	XFRMA_PAD,
 	XFRMA_OFFLOAD_DEV,	/* struct xfrm_state_offload */
+	XFRMA_OUTPUT_MARK,	/* __u32 */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e1e5e65..897daa0 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,7 +1,13 @@
 obj-y := core.o
 
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
+ifeq ($(CONFIG_NET),y)
+obj-$(CONFIG_BPF_SYSCALL) += devmap.o
+ifeq ($(CONFIG_STREAM_PARSER),y)
+obj-$(CONFIG_BPF_SYSCALL) += sockmap.o
+endif
+endif
 ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
 endif
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index d771a38..98c0f00 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -49,13 +49,15 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
 static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 {
 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
+	int numa_node = bpf_map_attr_numa_node(attr);
 	struct bpf_array *array;
 	u64 array_size;
 	u32 elem_size;
 
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
-	    attr->value_size == 0 || attr->map_flags)
+	    attr->value_size == 0 || attr->map_flags & ~BPF_F_NUMA_NODE ||
+	    (percpu && numa_node != NUMA_NO_NODE))
 		return ERR_PTR(-EINVAL);
 
 	if (attr->value_size > KMALLOC_MAX_SIZE)
@@ -77,7 +79,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 		return ERR_PTR(-ENOMEM);
 
 	/* allocate all map elements and zero-initialize them */
-	array = bpf_map_area_alloc(array_size);
+	array = bpf_map_area_alloc(array_size, numa_node);
 	if (!array)
 		return ERR_PTR(-ENOMEM);
 
@@ -87,6 +89,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	array->map.value_size = attr->value_size;
 	array->map.max_entries = attr->max_entries;
 	array->map.map_flags = attr->map_flags;
+	array->map.numa_node = numa_node;
 	array->elem_size = elem_size;
 
 	if (!percpu)
@@ -603,6 +606,31 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
 	return READ_ONCE(*inner_map);
 }
 
+static u32 array_of_map_gen_lookup(struct bpf_map *map,
+				   struct bpf_insn *insn_buf)
+{
+	u32 elem_size = round_up(map->value_size, 8);
+	struct bpf_insn *insn = insn_buf;
+	const int ret = BPF_REG_0;
+	const int map_ptr = BPF_REG_1;
+	const int index = BPF_REG_2;
+
+	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
+	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
+	*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
+	if (is_power_of_2(elem_size))
+		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
+	else
+		*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
+	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
+	*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
+	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
+	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+	*insn++ = BPF_MOV64_IMM(ret, 0);
+
+	return insn - insn_buf;
+}
+
 const struct bpf_map_ops array_of_maps_map_ops = {
 	.map_alloc = array_of_map_alloc,
 	.map_free = array_of_map_free,
@@ -612,4 +640,5 @@ const struct bpf_map_ops array_of_maps_map_ops = {
 	.map_fd_get_ptr = bpf_map_fd_get_ptr,
 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
+	.map_gen_lookup = array_of_map_gen_lookup,
 };
diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h
index 5c35a98..7d4f89b 100644
--- a/kernel/bpf/bpf_lru_list.h
+++ b/kernel/bpf/bpf_lru_list.h
@@ -69,7 +69,8 @@ static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node)
 	/* ref is an approximation on access frequency.  It does not
 	 * have to be very accurate.  Hence, no protection is used.
 	 */
-	node->ref = 1;
+	if (!node->ref)
+		node->ref = 1;
 }
 
 int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ad5f559..917cc04 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -595,9 +595,13 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
 	case BPF_JMP | BPF_JEQ  | BPF_K:
 	case BPF_JMP | BPF_JNE  | BPF_K:
 	case BPF_JMP | BPF_JGT  | BPF_K:
+	case BPF_JMP | BPF_JLT  | BPF_K:
 	case BPF_JMP | BPF_JGE  | BPF_K:
+	case BPF_JMP | BPF_JLE  | BPF_K:
 	case BPF_JMP | BPF_JSGT | BPF_K:
+	case BPF_JMP | BPF_JSLT | BPF_K:
 	case BPF_JMP | BPF_JSGE | BPF_K:
+	case BPF_JMP | BPF_JSLE | BPF_K:
 	case BPF_JMP | BPF_JSET | BPF_K:
 		/* Accommodate for extra offset in case of a backjump. */
 		off = from->off;
@@ -833,12 +837,20 @@ static unsigned int ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn,
 		[BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
 		[BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
 		[BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
+		[BPF_JMP | BPF_JLT | BPF_X] = &&JMP_JLT_X,
+		[BPF_JMP | BPF_JLT | BPF_K] = &&JMP_JLT_K,
 		[BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
 		[BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
+		[BPF_JMP | BPF_JLE | BPF_X] = &&JMP_JLE_X,
+		[BPF_JMP | BPF_JLE | BPF_K] = &&JMP_JLE_K,
 		[BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
 		[BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
+		[BPF_JMP | BPF_JSLT | BPF_X] = &&JMP_JSLT_X,
+		[BPF_JMP | BPF_JSLT | BPF_K] = &&JMP_JSLT_K,
 		[BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
 		[BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
+		[BPF_JMP | BPF_JSLE | BPF_X] = &&JMP_JSLE_X,
+		[BPF_JMP | BPF_JSLE | BPF_K] = &&JMP_JSLE_K,
 		[BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
 		[BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
 		/* Program return */
@@ -1073,6 +1085,18 @@ out:
 			CONT_JMP;
 		}
 		CONT;
+	JMP_JLT_X:
+		if (DST < SRC) {
+			insn += insn->off;
+			CONT_JMP;
+		}
+		CONT;
+	JMP_JLT_K:
+		if (DST < IMM) {
+			insn += insn->off;
+			CONT_JMP;
+		}
+		CONT;
 	JMP_JGE_X:
 		if (DST >= SRC) {
 			insn += insn->off;
@@ -1085,6 +1109,18 @@ out:
 			CONT_JMP;
 		}
 		CONT;
+	JMP_JLE_X:
+		if (DST <= SRC) {
+			insn += insn->off;
+			CONT_JMP;
+		}
+		CONT;
+	JMP_JLE_K:
+		if (DST <= IMM) {
+			insn += insn->off;
+			CONT_JMP;
+		}
+		CONT;
 	JMP_JSGT_X:
 		if (((s64) DST) > ((s64) SRC)) {
 			insn += insn->off;
@@ -1097,6 +1133,18 @@ out:
 			CONT_JMP;
 		}
 		CONT;
+	JMP_JSLT_X:
+		if (((s64) DST) < ((s64) SRC)) {
+			insn += insn->off;
+			CONT_JMP;
+		}
+		CONT;
+	JMP_JSLT_K:
+		if (((s64) DST) < ((s64) IMM)) {
+			insn += insn->off;
+			CONT_JMP;
+		}
+		CONT;
 	JMP_JSGE_X:
 		if (((s64) DST) >= ((s64) SRC)) {
 			insn += insn->off;
@@ -1109,6 +1157,18 @@ out:
 			CONT_JMP;
 		}
 		CONT;
+	JMP_JSLE_X:
+		if (((s64) DST) <= ((s64) SRC)) {
+			insn += insn->off;
+			CONT_JMP;
+		}
+		CONT;
+	JMP_JSLE_K:
+		if (((s64) DST) <= ((s64) IMM)) {
+			insn += insn->off;
+			CONT_JMP;
+		}
+		CONT;
 	JMP_JSET_X:
 		if (DST & SRC) {
 			insn += insn->off;
@@ -1378,6 +1438,7 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
 const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
 const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
 const struct bpf_func_proto bpf_get_current_comm_proto __weak;
+const struct bpf_func_proto bpf_sock_map_update_proto __weak;
 
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
new file mode 100644
index 0000000..ecf9f99
--- /dev/null
+++ b/kernel/bpf/devmap.c
@@ -0,0 +1,409 @@
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+/* Devmaps primary use is as a backend map for XDP BPF helper call
+ * bpf_redirect_map(). Because XDP is mostly concerned with performance we
+ * spent some effort to ensure the datapath with redirect maps does not use
+ * any locking. This is a quick note on the details.
+ *
+ * We have three possible paths to get into the devmap control plane bpf
+ * syscalls, bpf programs, and driver side xmit/flush operations. A bpf syscall
+ * will invoke an update, delete, or lookup operation. To ensure updates and
+ * deletes appear atomic from the datapath side xchg() is used to modify the
+ * netdev_map array. Then because the datapath does a lookup into the netdev_map
+ * array (read-only) from an RCU critical section we use call_rcu() to wait for
+ * an rcu grace period before free'ing the old data structures. This ensures the
+ * datapath always has a valid copy. However, the datapath does a "flush"
+ * operation that pushes any pending packets in the driver outside the RCU
+ * critical section. Each bpf_dtab_netdev tracks these pending operations using
+ * an atomic per-cpu bitmap. The bpf_dtab_netdev object will not be destroyed
+ * until all bits are cleared indicating outstanding flush operations have
+ * completed.
+ *
+ * BPF syscalls may race with BPF program calls on any of the update, delete
+ * or lookup operations. As noted above the xchg() operation also keep the
+ * netdev_map consistent in this case. From the devmap side BPF programs
+ * calling into these operations are the same as multiple user space threads
+ * making system calls.
+ *
+ * Finally, any of the above may race with a netdev_unregister notifier. The
+ * unregister notifier must search for net devices in the map structure that
+ * contain a reference to the net device and remove them. This is a two step
+ * process (a) dereference the bpf_dtab_netdev object in netdev_map and (b)
+ * check to see if the ifindex is the same as the net_device being removed.
+ * When removing the dev a cmpxchg() is used to ensure the correct dev is
+ * removed, in the case of a concurrent update or delete operation it is
+ * possible that the initially referenced dev is no longer in the map. As the
+ * notifier hook walks the map we know that new dev references can not be
+ * added by the user because core infrastructure ensures dev_get_by_index()
+ * calls will fail at this point.
+ */
+#include <linux/bpf.h>
+#include <linux/filter.h>
+
+struct bpf_dtab_netdev {
+	struct net_device *dev;
+	struct bpf_dtab *dtab;
+	unsigned int bit;
+	struct rcu_head rcu;
+};
+
+struct bpf_dtab {
+	struct bpf_map map;
+	struct bpf_dtab_netdev **netdev_map;
+	unsigned long __percpu *flush_needed;
+	struct list_head list;
+};
+
+static DEFINE_SPINLOCK(dev_map_lock);
+static LIST_HEAD(dev_map_list);
+
+static u64 dev_map_bitmap_size(const union bpf_attr *attr)
+{
+	return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long);
+}
+
+static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_dtab *dtab;
+	u64 cost;
+	int err;
+
+	/* check sanity of attributes */
+	if (attr->max_entries == 0 || attr->key_size != 4 ||
+	    attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
+		return ERR_PTR(-EINVAL);
+
+	dtab = kzalloc(sizeof(*dtab), GFP_USER);
+	if (!dtab)
+		return ERR_PTR(-ENOMEM);
+
+	/* mandatory map attributes */
+	dtab->map.map_type = attr->map_type;
+	dtab->map.key_size = attr->key_size;
+	dtab->map.value_size = attr->value_size;
+	dtab->map.max_entries = attr->max_entries;
+	dtab->map.map_flags = attr->map_flags;
+	dtab->map.numa_node = bpf_map_attr_numa_node(attr);
+
+	/* make sure page count doesn't overflow */
+	cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
+	cost += dev_map_bitmap_size(attr) * num_possible_cpus();
+	if (cost >= U32_MAX - PAGE_SIZE)
+		goto free_dtab;
+
+	dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+	/* if map size is larger than memlock limit, reject it early */
+	err = bpf_map_precharge_memlock(dtab->map.pages);
+	if (err)
+		goto free_dtab;
+
+	/* A per cpu bitfield with a bit per possible net device */
+	dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr),
+					    __alignof__(unsigned long));
+	if (!dtab->flush_needed)
+		goto free_dtab;
+
+	dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
+					      sizeof(struct bpf_dtab_netdev *),
+					      dtab->map.numa_node);
+	if (!dtab->netdev_map)
+		goto free_dtab;
+
+	spin_lock(&dev_map_lock);
+	list_add_tail_rcu(&dtab->list, &dev_map_list);
+	spin_unlock(&dev_map_lock);
+
+	return &dtab->map;
+free_dtab:
+	free_percpu(dtab->flush_needed);
+	kfree(dtab);
+	return ERR_PTR(-ENOMEM);
+}
+
+static void dev_map_free(struct bpf_map *map)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	int i, cpu;
+
+	/* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+	 * so the programs (can be more than one that used this map) were
+	 * disconnected from events. Wait for outstanding critical sections in
+	 * these programs to complete. The rcu critical section only guarantees
+	 * no further reads against netdev_map. It does __not__ ensure pending
+	 * flush operations (if any) are complete.
+	 */
+
+	spin_lock(&dev_map_lock);
+	list_del_rcu(&dtab->list);
+	spin_unlock(&dev_map_lock);
+
+	synchronize_rcu();
+
+	/* To ensure all pending flush operations have completed wait for flush
+	 * bitmap to indicate all flush_needed bits to be zero on _all_ cpus.
+	 * Because the above synchronize_rcu() ensures the map is disconnected
+	 * from the program we can assume no new bits will be set.
+	 */
+	for_each_online_cpu(cpu) {
+		unsigned long *bitmap = per_cpu_ptr(dtab->flush_needed, cpu);
+
+		while (!bitmap_empty(bitmap, dtab->map.max_entries))
+			cpu_relax();
+	}
+
+	for (i = 0; i < dtab->map.max_entries; i++) {
+		struct bpf_dtab_netdev *dev;
+
+		dev = dtab->netdev_map[i];
+		if (!dev)
+			continue;
+
+		dev_put(dev->dev);
+		kfree(dev);
+	}
+
+	free_percpu(dtab->flush_needed);
+	bpf_map_area_free(dtab->netdev_map);
+	kfree(dtab);
+}
+
+static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	u32 index = key ? *(u32 *)key : U32_MAX;
+	u32 *next = next_key;
+
+	if (index >= dtab->map.max_entries) {
+		*next = 0;
+		return 0;
+	}
+
+	if (index == dtab->map.max_entries - 1)
+		return -ENOENT;
+	*next = index + 1;
+	return 0;
+}
+
+void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed);
+
+	__set_bit(bit, bitmap);
+}
+
+/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
+ * from the driver before returning from its napi->poll() routine. The poll()
+ * routine is called either from busy_poll context or net_rx_action signaled
+ * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the
+ * net device can be torn down. On devmap tear down we ensure the ctx bitmap
+ * is zeroed before completing to ensure all flush operations have completed.
+ */
+void __dev_map_flush(struct bpf_map *map)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed);
+	u32 bit;
+
+	for_each_set_bit(bit, bitmap, map->max_entries) {
+		struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
+		struct net_device *netdev;
+
+		/* This is possible if the dev entry is removed by user space
+		 * between xdp redirect and flush op.
+		 */
+		if (unlikely(!dev))
+			continue;
+
+		__clear_bit(bit, bitmap);
+		netdev = dev->dev;
+		if (likely(netdev->netdev_ops->ndo_xdp_flush))
+			netdev->netdev_ops->ndo_xdp_flush(netdev);
+	}
+}
+
+/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
+ * update happens in parallel here a dev_put wont happen until after reading the
+ * ifindex.
+ */
+struct net_device  *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	struct bpf_dtab_netdev *dev;
+
+	if (key >= map->max_entries)
+		return NULL;
+
+	dev = READ_ONCE(dtab->netdev_map[key]);
+	return dev ? dev->dev : NULL;
+}
+
+static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct net_device *dev = __dev_map_lookup_elem(map, *(u32 *)key);
+
+	return dev ? &dev->ifindex : NULL;
+}
+
+static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
+{
+	if (dev->dev->netdev_ops->ndo_xdp_flush) {
+		struct net_device *fl = dev->dev;
+		unsigned long *bitmap;
+		int cpu;
+
+		for_each_online_cpu(cpu) {
+			bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
+			__clear_bit(dev->bit, bitmap);
+
+			fl->netdev_ops->ndo_xdp_flush(dev->dev);
+		}
+	}
+}
+
+static void __dev_map_entry_free(struct rcu_head *rcu)
+{
+	struct bpf_dtab_netdev *dev;
+
+	dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
+	dev_map_flush_old(dev);
+	dev_put(dev->dev);
+	kfree(dev);
+}
+
+static int dev_map_delete_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	struct bpf_dtab_netdev *old_dev;
+	int k = *(u32 *)key;
+
+	if (k >= map->max_entries)
+		return -EINVAL;
+
+	/* Use call_rcu() here to ensure any rcu critical sections have
+	 * completed, but this does not guarantee a flush has happened
+	 * yet. Because driver side rcu_read_lock/unlock only protects the
+	 * running XDP program. However, for pending flush operations the
+	 * dev and ctx are stored in another per cpu map. And additionally,
+	 * the driver tear down ensures all soft irqs are complete before
+	 * removing the net device in the case of dev_put equals zero.
+	 */
+	old_dev = xchg(&dtab->netdev_map[k], NULL);
+	if (old_dev)
+		call_rcu(&old_dev->rcu, __dev_map_entry_free);
+	return 0;
+}
+
+static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
+				u64 map_flags)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	struct net *net = current->nsproxy->net_ns;
+	struct bpf_dtab_netdev *dev, *old_dev;
+	u32 i = *(u32 *)key;
+	u32 ifindex = *(u32 *)value;
+
+	if (unlikely(map_flags > BPF_EXIST))
+		return -EINVAL;
+	if (unlikely(i >= dtab->map.max_entries))
+		return -E2BIG;
+	if (unlikely(map_flags == BPF_NOEXIST))
+		return -EEXIST;
+
+	if (!ifindex) {
+		dev = NULL;
+	} else {
+		dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
+				   map->numa_node);
+		if (!dev)
+			return -ENOMEM;
+
+		dev->dev = dev_get_by_index(net, ifindex);
+		if (!dev->dev) {
+			kfree(dev);
+			return -EINVAL;
+		}
+
+		dev->bit = i;
+		dev->dtab = dtab;
+	}
+
+	/* Use call_rcu() here to ensure rcu critical sections have completed
+	 * Remembering the driver side flush operation will happen before the
+	 * net device is removed.
+	 */
+	old_dev = xchg(&dtab->netdev_map[i], dev);
+	if (old_dev)
+		call_rcu(&old_dev->rcu, __dev_map_entry_free);
+
+	return 0;
+}
+
+const struct bpf_map_ops dev_map_ops = {
+	.map_alloc = dev_map_alloc,
+	.map_free = dev_map_free,
+	.map_get_next_key = dev_map_get_next_key,
+	.map_lookup_elem = dev_map_lookup_elem,
+	.map_update_elem = dev_map_update_elem,
+	.map_delete_elem = dev_map_delete_elem,
+};
+
+static int dev_map_notification(struct notifier_block *notifier,
+				ulong event, void *ptr)
+{
+	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+	struct bpf_dtab *dtab;
+	int i;
+
+	switch (event) {
+	case NETDEV_UNREGISTER:
+		/* This rcu_read_lock/unlock pair is needed because
+		 * dev_map_list is an RCU list AND to ensure a delete
+		 * operation does not free a netdev_map entry while we
+		 * are comparing it against the netdev being unregistered.
+		 */
+		rcu_read_lock();
+		list_for_each_entry_rcu(dtab, &dev_map_list, list) {
+			for (i = 0; i < dtab->map.max_entries; i++) {
+				struct bpf_dtab_netdev *dev, *odev;
+
+				dev = READ_ONCE(dtab->netdev_map[i]);
+				if (!dev ||
+				    dev->dev->ifindex != netdev->ifindex)
+					continue;
+				odev = cmpxchg(&dtab->netdev_map[i], dev, NULL);
+				if (dev == odev)
+					call_rcu(&dev->rcu,
+						 __dev_map_entry_free);
+			}
+		}
+		rcu_read_unlock();
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block dev_map_notifier = {
+	.notifier_call = dev_map_notification,
+};
+
+static int __init dev_map_init(void)
+{
+	register_netdevice_notifier(&dev_map_notifier);
+	return 0;
+}
+
+subsys_initcall(dev_map_init);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d11c818..431126f 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -18,6 +18,9 @@
 #include "bpf_lru_list.h"
 #include "map_in_map.h"
 
+#define HTAB_CREATE_FLAG_MASK \
+	(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE)
+
 struct bucket {
 	struct hlist_nulls_head head;
 	raw_spinlock_t lock;
@@ -138,7 +141,8 @@ static int prealloc_init(struct bpf_htab *htab)
 	if (!htab_is_percpu(htab) && !htab_is_lru(htab))
 		num_entries += num_possible_cpus();
 
-	htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries);
+	htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries,
+					 htab->map.numa_node);
 	if (!htab->elems)
 		return -ENOMEM;
 
@@ -233,6 +237,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	 */
 	bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
 	bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
+	int numa_node = bpf_map_attr_numa_node(attr);
 	struct bpf_htab *htab;
 	int err, i;
 	u64 cost;
@@ -248,7 +253,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 		 */
 		return ERR_PTR(-EPERM);
 
-	if (attr->map_flags & ~(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU))
+	if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK)
 		/* reserved bits should not be used */
 		return ERR_PTR(-EINVAL);
 
@@ -258,6 +263,9 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	if (lru && !prealloc)
 		return ERR_PTR(-ENOTSUPP);
 
+	if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru))
+		return ERR_PTR(-EINVAL);
+
 	htab = kzalloc(sizeof(*htab), GFP_USER);
 	if (!htab)
 		return ERR_PTR(-ENOMEM);
@@ -268,6 +276,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	htab->map.value_size = attr->value_size;
 	htab->map.max_entries = attr->max_entries;
 	htab->map.map_flags = attr->map_flags;
+	htab->map.numa_node = numa_node;
 
 	/* check sanity of attributes.
 	 * value_size == 0 may be allowed in the future to use map as a set
@@ -346,7 +355,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 
 	err = -ENOMEM;
 	htab->buckets = bpf_map_area_alloc(htab->n_buckets *
-					   sizeof(struct bucket));
+					   sizeof(struct bucket),
+					   htab->map.numa_node);
 	if (!htab->buckets)
 		goto free_htab;
 
@@ -504,6 +514,29 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
 	return NULL;
 }
 
+static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
+				   struct bpf_insn *insn_buf)
+{
+	struct bpf_insn *insn = insn_buf;
+	const int ret = BPF_REG_0;
+	const int ref_reg = BPF_REG_1;
+
+	*insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem);
+	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4);
+	*insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret,
+			      offsetof(struct htab_elem, lru_node) +
+			      offsetof(struct bpf_lru_node, ref));
+	*insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1);
+	*insn++ = BPF_ST_MEM(BPF_B, ret,
+			     offsetof(struct htab_elem, lru_node) +
+			     offsetof(struct bpf_lru_node, ref),
+			     1);
+	*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
+				offsetof(struct htab_elem, key) +
+				round_up(map->key_size, 8));
+	return insn - insn_buf;
+}
+
 /* It is called from the bpf_lru_list when the LRU needs to delete
  * older elements from the htab.
  */
@@ -704,7 +737,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 				atomic_dec(&htab->count);
 				return ERR_PTR(-E2BIG);
 			}
-		l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
+		l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
+				     htab->map.numa_node);
 		if (!l_new)
 			return ERR_PTR(-ENOMEM);
 	}
@@ -1126,6 +1160,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
 	.map_lookup_elem = htab_lru_map_lookup_elem,
 	.map_update_elem = htab_lru_map_update_elem,
 	.map_delete_elem = htab_lru_map_delete_elem,
+	.map_gen_lookup = htab_lru_map_gen_lookup,
 };
 
 /* Called from eBPF program */
@@ -1315,6 +1350,22 @@ static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
 	return READ_ONCE(*inner_map);
 }
 
+static u32 htab_of_map_gen_lookup(struct bpf_map *map,
+				  struct bpf_insn *insn_buf)
+{
+	struct bpf_insn *insn = insn_buf;
+	const int ret = BPF_REG_0;
+
+	*insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem);
+	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2);
+	*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
+				offsetof(struct htab_elem, key) +
+				round_up(map->key_size, 8));
+	*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
+
+	return insn - insn_buf;
+}
+
 static void htab_of_map_free(struct bpf_map *map)
 {
 	bpf_map_meta_free(map->inner_map_meta);
@@ -1330,4 +1381,5 @@ const struct bpf_map_ops htab_of_maps_map_ops = {
 	.map_fd_get_ptr = bpf_map_fd_get_ptr,
 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
+	.map_gen_lookup = htab_of_map_gen_lookup,
 };
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index b09185f..1b76784 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -244,7 +244,8 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
 	if (value)
 		size += trie->map.value_size;
 
-	node = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN);
+	node = kmalloc_node(size, GFP_ATOMIC | __GFP_NOWARN,
+			    trie->map.numa_node);
 	if (!node)
 		return NULL;
 
@@ -405,6 +406,8 @@ static int trie_delete_elem(struct bpf_map *map, void *key)
 #define LPM_KEY_SIZE_MAX	LPM_KEY_SIZE(LPM_DATA_SIZE_MAX)
 #define LPM_KEY_SIZE_MIN	LPM_KEY_SIZE(LPM_DATA_SIZE_MIN)
 
+#define LPM_CREATE_FLAG_MASK	(BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE)
+
 static struct bpf_map *trie_alloc(union bpf_attr *attr)
 {
 	struct lpm_trie *trie;
@@ -416,7 +419,8 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
 
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 ||
-	    attr->map_flags != BPF_F_NO_PREALLOC ||
+	    !(attr->map_flags & BPF_F_NO_PREALLOC) ||
+	    attr->map_flags & ~LPM_CREATE_FLAG_MASK ||
 	    attr->key_size < LPM_KEY_SIZE_MIN ||
 	    attr->key_size > LPM_KEY_SIZE_MAX ||
 	    attr->value_size < LPM_VAL_SIZE_MIN ||
@@ -433,6 +437,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
 	trie->map.value_size = attr->value_size;
 	trie->map.max_entries = attr->max_entries;
 	trie->map.map_flags = attr->map_flags;
+	trie->map.numa_node = bpf_map_attr_numa_node(attr);
 	trie->data_size = attr->key_size -
 			  offsetof(struct bpf_lpm_trie_key, data);
 	trie->max_prefixlen = trie->data_size * 8;
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
new file mode 100644
index 0000000..f6ffde9
--- /dev/null
+++ b/kernel/bpf/sockmap.c
@@ -0,0 +1,873 @@
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+/* A BPF sock_map is used to store sock objects. This is primarly used
+ * for doing socket redirect with BPF helper routines.
+ *
+ * A sock map may have BPF programs attached to it, currently a program
+ * used to parse packets and a program to provide a verdict and redirect
+ * decision on the packet are supported. Any programs attached to a sock
+ * map are inherited by sock objects when they are added to the map. If
+ * no BPF programs are attached the sock object may only be used for sock
+ * redirect.
+ *
+ * A sock object may be in multiple maps, but can only inherit a single
+ * parse or verdict program. If adding a sock object to a map would result
+ * in having multiple parsing programs the update will return an EBUSY error.
+ *
+ * For reference this program is similar to devmap used in XDP context
+ * reviewing these together may be useful. For an example please review
+ * ./samples/bpf/sockmap/.
+ */
+#include <linux/bpf.h>
+#include <net/sock.h>
+#include <linux/filter.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/kernel.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/workqueue.h>
+#include <linux/list.h>
+#include <net/strparser.h>
+
+struct bpf_stab {
+	struct bpf_map map;
+	struct sock **sock_map;
+	struct bpf_prog *bpf_parse;
+	struct bpf_prog *bpf_verdict;
+};
+
+enum smap_psock_state {
+	SMAP_TX_RUNNING,
+};
+
+struct smap_psock_map_entry {
+	struct list_head list;
+	struct sock **entry;
+};
+
+struct smap_psock {
+	struct rcu_head	rcu;
+	/* refcnt is used inside sk_callback_lock */
+	u32 refcnt;
+
+	/* datapath variables */
+	struct sk_buff_head rxqueue;
+	bool strp_enabled;
+
+	/* datapath error path cache across tx work invocations */
+	int save_rem;
+	int save_off;
+	struct sk_buff *save_skb;
+
+	struct strparser strp;
+	struct bpf_prog *bpf_parse;
+	struct bpf_prog *bpf_verdict;
+	struct list_head maps;
+
+	/* Back reference used when sock callback trigger sockmap operations */
+	struct sock *sock;
+	unsigned long state;
+
+	struct work_struct tx_work;
+	struct work_struct gc_work;
+
+	void (*save_data_ready)(struct sock *sk);
+	void (*save_write_space)(struct sock *sk);
+	void (*save_state_change)(struct sock *sk);
+};
+
+static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
+{
+	return rcu_dereference_sk_user_data(sk);
+}
+
+static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
+{
+	struct bpf_prog *prog = READ_ONCE(psock->bpf_verdict);
+	int rc;
+
+	if (unlikely(!prog))
+		return SK_DROP;
+
+	skb_orphan(skb);
+	skb->sk = psock->sock;
+	bpf_compute_data_end(skb);
+	rc = (*prog->bpf_func)(skb, prog->insnsi);
+	skb->sk = NULL;
+
+	return rc;
+}
+
+static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
+{
+	struct sock *sk;
+	int rc;
+
+	/* Because we use per cpu values to feed input from sock redirect
+	 * in BPF program to do_sk_redirect_map() call we need to ensure we
+	 * are not preempted. RCU read lock is not sufficient in this case
+	 * with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
+	 */
+	preempt_disable();
+	rc = smap_verdict_func(psock, skb);
+	switch (rc) {
+	case SK_REDIRECT:
+		sk = do_sk_redirect_map();
+		preempt_enable();
+		if (likely(sk)) {
+			struct smap_psock *peer = smap_psock_sk(sk);
+
+			if (likely(peer &&
+				   test_bit(SMAP_TX_RUNNING, &peer->state) &&
+				   !sock_flag(sk, SOCK_DEAD) &&
+				   sock_writeable(sk))) {
+				skb_set_owner_w(skb, sk);
+				skb_queue_tail(&peer->rxqueue, skb);
+				schedule_work(&peer->tx_work);
+				break;
+			}
+		}
+	/* Fall through and free skb otherwise */
+	case SK_DROP:
+	default:
+		if (rc != SK_REDIRECT)
+			preempt_enable();
+		kfree_skb(skb);
+	}
+}
+
+static void smap_report_sk_error(struct smap_psock *psock, int err)
+{
+	struct sock *sk = psock->sock;
+
+	sk->sk_err = err;
+	sk->sk_error_report(sk);
+}
+
+static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
+
+/* Called with lock_sock(sk) held */
+static void smap_state_change(struct sock *sk)
+{
+	struct smap_psock_map_entry *e, *tmp;
+	struct smap_psock *psock;
+	struct socket_wq *wq;
+	struct sock *osk;
+
+	rcu_read_lock();
+
+	/* Allowing transitions into an established syn_recv states allows
+	 * for early binding sockets to a smap object before the connection
+	 * is established.
+	 */
+	switch (sk->sk_state) {
+	case TCP_SYN_SENT:
+	case TCP_SYN_RECV:
+	case TCP_ESTABLISHED:
+		break;
+	case TCP_CLOSE_WAIT:
+	case TCP_CLOSING:
+	case TCP_LAST_ACK:
+	case TCP_FIN_WAIT1:
+	case TCP_FIN_WAIT2:
+	case TCP_LISTEN:
+		break;
+	case TCP_CLOSE:
+		/* Only release if the map entry is in fact the sock in
+		 * question. There is a case where the operator deletes
+		 * the sock from the map, but the TCP sock is closed before
+		 * the psock is detached. Use cmpxchg to verify correct
+		 * sock is removed.
+		 */
+		psock = smap_psock_sk(sk);
+		if (unlikely(!psock))
+			break;
+		write_lock_bh(&sk->sk_callback_lock);
+		list_for_each_entry_safe(e, tmp, &psock->maps, list) {
+			osk = cmpxchg(e->entry, sk, NULL);
+			if (osk == sk) {
+				list_del(&e->list);
+				smap_release_sock(psock, sk);
+			}
+		}
+		write_unlock_bh(&sk->sk_callback_lock);
+		break;
+	default:
+		psock = smap_psock_sk(sk);
+		if (unlikely(!psock))
+			break;
+		smap_report_sk_error(psock, EPIPE);
+		break;
+	}
+
+	wq = rcu_dereference(sk->sk_wq);
+	if (skwq_has_sleeper(wq))
+		wake_up_interruptible_all(&wq->wait);
+	rcu_read_unlock();
+}
+
+static void smap_read_sock_strparser(struct strparser *strp,
+				     struct sk_buff *skb)
+{
+	struct smap_psock *psock;
+
+	rcu_read_lock();
+	psock = container_of(strp, struct smap_psock, strp);
+	smap_do_verdict(psock, skb);
+	rcu_read_unlock();
+}
+
+/* Called with lock held on socket */
+static void smap_data_ready(struct sock *sk)
+{
+	struct smap_psock *psock;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (likely(psock)) {
+		write_lock_bh(&sk->sk_callback_lock);
+		strp_data_ready(&psock->strp);
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+	rcu_read_unlock();
+}
+
+static void smap_tx_work(struct work_struct *w)
+{
+	struct smap_psock *psock;
+	struct sk_buff *skb;
+	int rem, off, n;
+
+	psock = container_of(w, struct smap_psock, tx_work);
+
+	/* lock sock to avoid losing sk_socket at some point during loop */
+	lock_sock(psock->sock);
+	if (psock->save_skb) {
+		skb = psock->save_skb;
+		rem = psock->save_rem;
+		off = psock->save_off;
+		psock->save_skb = NULL;
+		goto start;
+	}
+
+	while ((skb = skb_dequeue(&psock->rxqueue))) {
+		rem = skb->len;
+		off = 0;
+start:
+		do {
+			if (likely(psock->sock->sk_socket))
+				n = skb_send_sock_locked(psock->sock,
+							 skb, off, rem);
+			else
+				n = -EINVAL;
+			if (n <= 0) {
+				if (n == -EAGAIN) {
+					/* Retry when space is available */
+					psock->save_skb = skb;
+					psock->save_rem = rem;
+					psock->save_off = off;
+					goto out;
+				}
+				/* Hard errors break pipe and stop xmit */
+				smap_report_sk_error(psock, n ? -n : EPIPE);
+				clear_bit(SMAP_TX_RUNNING, &psock->state);
+				kfree_skb(skb);
+				goto out;
+			}
+			rem -= n;
+			off += n;
+		} while (rem);
+		kfree_skb(skb);
+	}
+out:
+	release_sock(psock->sock);
+}
+
+static void smap_write_space(struct sock *sk)
+{
+	struct smap_psock *psock;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (likely(psock && test_bit(SMAP_TX_RUNNING, &psock->state)))
+		schedule_work(&psock->tx_work);
+	rcu_read_unlock();
+}
+
+static void smap_stop_sock(struct smap_psock *psock, struct sock *sk)
+{
+	if (!psock->strp_enabled)
+		return;
+	sk->sk_data_ready = psock->save_data_ready;
+	sk->sk_write_space = psock->save_write_space;
+	sk->sk_state_change = psock->save_state_change;
+	psock->save_data_ready = NULL;
+	psock->save_write_space = NULL;
+	psock->save_state_change = NULL;
+	strp_stop(&psock->strp);
+	psock->strp_enabled = false;
+}
+
+static void smap_destroy_psock(struct rcu_head *rcu)
+{
+	struct smap_psock *psock = container_of(rcu,
+						  struct smap_psock, rcu);
+
+	/* Now that a grace period has passed there is no longer
+	 * any reference to this sock in the sockmap so we can
+	 * destroy the psock, strparser, and bpf programs. But,
+	 * because we use workqueue sync operations we can not
+	 * do it in rcu context
+	 */
+	schedule_work(&psock->gc_work);
+}
+
+static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
+{
+	psock->refcnt--;
+	if (psock->refcnt)
+		return;
+
+	smap_stop_sock(psock, sock);
+	clear_bit(SMAP_TX_RUNNING, &psock->state);
+	rcu_assign_sk_user_data(sock, NULL);
+	call_rcu_sched(&psock->rcu, smap_destroy_psock);
+}
+
+static int smap_parse_func_strparser(struct strparser *strp,
+				       struct sk_buff *skb)
+{
+	struct smap_psock *psock;
+	struct bpf_prog *prog;
+	int rc;
+
+	rcu_read_lock();
+	psock = container_of(strp, struct smap_psock, strp);
+	prog = READ_ONCE(psock->bpf_parse);
+
+	if (unlikely(!prog)) {
+		rcu_read_unlock();
+		return skb->len;
+	}
+
+	/* Attach socket for bpf program to use if needed we can do this
+	 * because strparser clones the skb before handing it to a upper
+	 * layer, meaning skb_orphan has been called. We NULL sk on the
+	 * way out to ensure we don't trigger a BUG_ON in skb/sk operations
+	 * later and because we are not charging the memory of this skb to
+	 * any socket yet.
+	 */
+	skb->sk = psock->sock;
+	bpf_compute_data_end(skb);
+	rc = (*prog->bpf_func)(skb, prog->insnsi);
+	skb->sk = NULL;
+	rcu_read_unlock();
+	return rc;
+}
+
+
+static int smap_read_sock_done(struct strparser *strp, int err)
+{
+	return err;
+}
+
+static int smap_init_sock(struct smap_psock *psock,
+			  struct sock *sk)
+{
+	static const struct strp_callbacks cb = {
+		.rcv_msg = smap_read_sock_strparser,
+		.parse_msg = smap_parse_func_strparser,
+		.read_sock_done = smap_read_sock_done,
+	};
+
+	return strp_init(&psock->strp, sk, &cb);
+}
+
+static void smap_init_progs(struct smap_psock *psock,
+			    struct bpf_stab *stab,
+			    struct bpf_prog *verdict,
+			    struct bpf_prog *parse)
+{
+	struct bpf_prog *orig_parse, *orig_verdict;
+
+	orig_parse = xchg(&psock->bpf_parse, parse);
+	orig_verdict = xchg(&psock->bpf_verdict, verdict);
+
+	if (orig_verdict)
+		bpf_prog_put(orig_verdict);
+	if (orig_parse)
+		bpf_prog_put(orig_parse);
+}
+
+static void smap_start_sock(struct smap_psock *psock, struct sock *sk)
+{
+	if (sk->sk_data_ready == smap_data_ready)
+		return;
+	psock->save_data_ready = sk->sk_data_ready;
+	psock->save_write_space = sk->sk_write_space;
+	psock->save_state_change = sk->sk_state_change;
+	sk->sk_data_ready = smap_data_ready;
+	sk->sk_write_space = smap_write_space;
+	sk->sk_state_change = smap_state_change;
+	psock->strp_enabled = true;
+}
+
+static void sock_map_remove_complete(struct bpf_stab *stab)
+{
+	bpf_map_area_free(stab->sock_map);
+	kfree(stab);
+}
+
+static void smap_gc_work(struct work_struct *w)
+{
+	struct smap_psock_map_entry *e, *tmp;
+	struct smap_psock *psock;
+
+	psock = container_of(w, struct smap_psock, gc_work);
+
+	/* no callback lock needed because we already detached sockmap ops */
+	if (psock->strp_enabled)
+		strp_done(&psock->strp);
+
+	cancel_work_sync(&psock->tx_work);
+	__skb_queue_purge(&psock->rxqueue);
+
+	/* At this point all strparser and xmit work must be complete */
+	if (psock->bpf_parse)
+		bpf_prog_put(psock->bpf_parse);
+	if (psock->bpf_verdict)
+		bpf_prog_put(psock->bpf_verdict);
+
+	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
+		list_del(&e->list);
+		kfree(e);
+	}
+
+	sock_put(psock->sock);
+	kfree(psock);
+}
+
+static struct smap_psock *smap_init_psock(struct sock *sock,
+					  struct bpf_stab *stab)
+{
+	struct smap_psock *psock;
+
+	psock = kzalloc_node(sizeof(struct smap_psock),
+			     GFP_ATOMIC | __GFP_NOWARN,
+			     stab->map.numa_node);
+	if (!psock)
+		return ERR_PTR(-ENOMEM);
+
+	psock->sock = sock;
+	skb_queue_head_init(&psock->rxqueue);
+	INIT_WORK(&psock->tx_work, smap_tx_work);
+	INIT_WORK(&psock->gc_work, smap_gc_work);
+	INIT_LIST_HEAD(&psock->maps);
+	psock->refcnt = 1;
+
+	rcu_assign_sk_user_data(sock, psock);
+	sock_hold(sock);
+	return psock;
+}
+
+static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_stab *stab;
+	int err = -EINVAL;
+	u64 cost;
+
+	/* check sanity of attributes */
+	if (attr->max_entries == 0 || attr->key_size != 4 ||
+	    attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
+		return ERR_PTR(-EINVAL);
+
+	if (attr->value_size > KMALLOC_MAX_SIZE)
+		return ERR_PTR(-E2BIG);
+
+	stab = kzalloc(sizeof(*stab), GFP_USER);
+	if (!stab)
+		return ERR_PTR(-ENOMEM);
+
+	/* mandatory map attributes */
+	stab->map.map_type = attr->map_type;
+	stab->map.key_size = attr->key_size;
+	stab->map.value_size = attr->value_size;
+	stab->map.max_entries = attr->max_entries;
+	stab->map.map_flags = attr->map_flags;
+	stab->map.numa_node = bpf_map_attr_numa_node(attr);
+
+	/* make sure page count doesn't overflow */
+	cost = (u64) stab->map.max_entries * sizeof(struct sock *);
+	if (cost >= U32_MAX - PAGE_SIZE)
+		goto free_stab;
+
+	stab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+	/* if map size is larger than memlock limit, reject it early */
+	err = bpf_map_precharge_memlock(stab->map.pages);
+	if (err)
+		goto free_stab;
+
+	err = -ENOMEM;
+	stab->sock_map = bpf_map_area_alloc(stab->map.max_entries *
+					    sizeof(struct sock *),
+					    stab->map.numa_node);
+	if (!stab->sock_map)
+		goto free_stab;
+
+	return &stab->map;
+free_stab:
+	kfree(stab);
+	return ERR_PTR(err);
+}
+
+static void smap_list_remove(struct smap_psock *psock, struct sock **entry)
+{
+	struct smap_psock_map_entry *e, *tmp;
+
+	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
+		if (e->entry == entry) {
+			list_del(&e->list);
+			break;
+		}
+	}
+}
+
+static void sock_map_free(struct bpf_map *map)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	int i;
+
+	synchronize_rcu();
+
+	/* At this point no update, lookup or delete operations can happen.
+	 * However, be aware we can still get a socket state event updates,
+	 * and data ready callabacks that reference the psock from sk_user_data
+	 * Also psock worker threads are still in-flight. So smap_release_sock
+	 * will only free the psock after cancel_sync on the worker threads
+	 * and a grace period expire to ensure psock is really safe to remove.
+	 */
+	rcu_read_lock();
+	for (i = 0; i < stab->map.max_entries; i++) {
+		struct smap_psock *psock;
+		struct sock *sock;
+
+		sock = xchg(&stab->sock_map[i], NULL);
+		if (!sock)
+			continue;
+
+		write_lock_bh(&sock->sk_callback_lock);
+		psock = smap_psock_sk(sock);
+		smap_list_remove(psock, &stab->sock_map[i]);
+		smap_release_sock(psock, sock);
+		write_unlock_bh(&sock->sk_callback_lock);
+	}
+	rcu_read_unlock();
+
+	if (stab->bpf_verdict)
+		bpf_prog_put(stab->bpf_verdict);
+	if (stab->bpf_parse)
+		bpf_prog_put(stab->bpf_parse);
+
+	sock_map_remove_complete(stab);
+}
+
+static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	u32 i = key ? *(u32 *)key : U32_MAX;
+	u32 *next = (u32 *)next_key;
+
+	if (i >= stab->map.max_entries) {
+		*next = 0;
+		return 0;
+	}
+
+	if (i == stab->map.max_entries - 1)
+		return -ENOENT;
+
+	*next = i + 1;
+	return 0;
+}
+
+struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+
+	if (key >= map->max_entries)
+		return NULL;
+
+	return READ_ONCE(stab->sock_map[key]);
+}
+
+static int sock_map_delete_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	struct smap_psock *psock;
+	int k = *(u32 *)key;
+	struct sock *sock;
+
+	if (k >= map->max_entries)
+		return -EINVAL;
+
+	sock = xchg(&stab->sock_map[k], NULL);
+	if (!sock)
+		return -EINVAL;
+
+	write_lock_bh(&sock->sk_callback_lock);
+	psock = smap_psock_sk(sock);
+	if (!psock)
+		goto out;
+
+	if (psock->bpf_parse)
+		smap_stop_sock(psock, sock);
+	smap_list_remove(psock, &stab->sock_map[k]);
+	smap_release_sock(psock, sock);
+out:
+	write_unlock_bh(&sock->sk_callback_lock);
+	return 0;
+}
+
+/* Locking notes: Concurrent updates, deletes, and lookups are allowed and are
+ * done inside rcu critical sections. This ensures on updates that the psock
+ * will not be released via smap_release_sock() until concurrent updates/deletes
+ * complete. All operations operate on sock_map using cmpxchg and xchg
+ * operations to ensure we do not get stale references. Any reads into the
+ * map must be done with READ_ONCE() because of this.
+ *
+ * A psock is destroyed via call_rcu and after any worker threads are cancelled
+ * and syncd so we are certain all references from the update/lookup/delete
+ * operations as well as references in the data path are no longer in use.
+ *
+ * Psocks may exist in multiple maps, but only a single set of parse/verdict
+ * programs may be inherited from the maps it belongs to. A reference count
+ * is kept with the total number of references to the psock from all maps. The
+ * psock will not be released until this reaches zero. The psock and sock
+ * user data data use the sk_callback_lock to protect critical data structures
+ * from concurrent access. This allows us to avoid two updates from modifying
+ * the user data in sock and the lock is required anyways for modifying
+ * callbacks, we simply increase its scope slightly.
+ *
+ * Rules to follow,
+ *  - psock must always be read inside RCU critical section
+ *  - sk_user_data must only be modified inside sk_callback_lock and read
+ *    inside RCU critical section.
+ *  - psock->maps list must only be read & modified inside sk_callback_lock
+ *  - sock_map must use READ_ONCE and (cmp)xchg operations
+ *  - BPF verdict/parse programs must use READ_ONCE and xchg operations
+ */
+static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
+				    struct bpf_map *map,
+				    void *key, u64 flags)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	struct smap_psock_map_entry *e = NULL;
+	struct bpf_prog *verdict, *parse;
+	struct sock *osock, *sock;
+	struct smap_psock *psock;
+	u32 i = *(u32 *)key;
+	int err;
+
+	if (unlikely(flags > BPF_EXIST))
+		return -EINVAL;
+
+	if (unlikely(i >= stab->map.max_entries))
+		return -E2BIG;
+
+	sock = READ_ONCE(stab->sock_map[i]);
+	if (flags == BPF_EXIST && !sock)
+		return -ENOENT;
+	else if (flags == BPF_NOEXIST && sock)
+		return -EEXIST;
+
+	sock = skops->sk;
+
+	/* 1. If sock map has BPF programs those will be inherited by the
+	 * sock being added. If the sock is already attached to BPF programs
+	 * this results in an error.
+	 */
+	verdict = READ_ONCE(stab->bpf_verdict);
+	parse = READ_ONCE(stab->bpf_parse);
+
+	if (parse && verdict) {
+		/* bpf prog refcnt may be zero if a concurrent attach operation
+		 * removes the program after the above READ_ONCE() but before
+		 * we increment the refcnt. If this is the case abort with an
+		 * error.
+		 */
+		verdict = bpf_prog_inc_not_zero(stab->bpf_verdict);
+		if (IS_ERR(verdict))
+			return PTR_ERR(verdict);
+
+		parse = bpf_prog_inc_not_zero(stab->bpf_parse);
+		if (IS_ERR(parse)) {
+			bpf_prog_put(verdict);
+			return PTR_ERR(parse);
+		}
+	}
+
+	write_lock_bh(&sock->sk_callback_lock);
+	psock = smap_psock_sk(sock);
+
+	/* 2. Do not allow inheriting programs if psock exists and has
+	 * already inherited programs. This would create confusion on
+	 * which parser/verdict program is running. If no psock exists
+	 * create one. Inside sk_callback_lock to ensure concurrent create
+	 * doesn't update user data.
+	 */
+	if (psock) {
+		if (READ_ONCE(psock->bpf_parse) && parse) {
+			err = -EBUSY;
+			goto out_progs;
+		}
+		psock->refcnt++;
+	} else {
+		psock = smap_init_psock(sock, stab);
+		if (IS_ERR(psock)) {
+			err = PTR_ERR(psock);
+			goto out_progs;
+		}
+
+		set_bit(SMAP_TX_RUNNING, &psock->state);
+	}
+
+	e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
+	if (!e) {
+		err = -ENOMEM;
+		goto out_progs;
+	}
+	e->entry = &stab->sock_map[i];
+
+	/* 3. At this point we have a reference to a valid psock that is
+	 * running. Attach any BPF programs needed.
+	 */
+	if (parse && verdict && !psock->strp_enabled) {
+		err = smap_init_sock(psock, sock);
+		if (err)
+			goto out_free;
+		smap_init_progs(psock, stab, verdict, parse);
+		smap_start_sock(psock, sock);
+	}
+
+	/* 4. Place psock in sockmap for use and stop any programs on
+	 * the old sock assuming its not the same sock we are replacing
+	 * it with. Because we can only have a single set of programs if
+	 * old_sock has a strp we can stop it.
+	 */
+	list_add_tail(&e->list, &psock->maps);
+	write_unlock_bh(&sock->sk_callback_lock);
+
+	osock = xchg(&stab->sock_map[i], sock);
+	if (osock) {
+		struct smap_psock *opsock = smap_psock_sk(osock);
+
+		write_lock_bh(&osock->sk_callback_lock);
+		if (osock != sock && parse)
+			smap_stop_sock(opsock, osock);
+		smap_list_remove(opsock, &stab->sock_map[i]);
+		smap_release_sock(opsock, osock);
+		write_unlock_bh(&osock->sk_callback_lock);
+	}
+	return 0;
+out_free:
+	smap_release_sock(psock, sock);
+out_progs:
+	if (verdict)
+		bpf_prog_put(verdict);
+	if (parse)
+		bpf_prog_put(parse);
+	write_unlock_bh(&sock->sk_callback_lock);
+	kfree(e);
+	return err;
+}
+
+int sock_map_attach_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	struct bpf_prog *orig;
+
+	if (unlikely(map->map_type != BPF_MAP_TYPE_SOCKMAP))
+		return -EINVAL;
+
+	switch (type) {
+	case BPF_SK_SKB_STREAM_PARSER:
+		orig = xchg(&stab->bpf_parse, prog);
+		break;
+	case BPF_SK_SKB_STREAM_VERDICT:
+		orig = xchg(&stab->bpf_verdict, prog);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (orig)
+		bpf_prog_put(orig);
+
+	return 0;
+}
+
+static void *sock_map_lookup(struct bpf_map *map, void *key)
+{
+	return NULL;
+}
+
+static int sock_map_update_elem(struct bpf_map *map,
+				void *key, void *value, u64 flags)
+{
+	struct bpf_sock_ops_kern skops;
+	u32 fd = *(u32 *)value;
+	struct socket *socket;
+	int err;
+
+	socket = sockfd_lookup(fd, &err);
+	if (!socket)
+		return err;
+
+	skops.sk = socket->sk;
+	if (!skops.sk) {
+		fput(socket->file);
+		return -EINVAL;
+	}
+
+	err = sock_map_ctx_update_elem(&skops, map, key, flags);
+	fput(socket->file);
+	return err;
+}
+
+const struct bpf_map_ops sock_map_ops = {
+	.map_alloc = sock_map_alloc,
+	.map_free = sock_map_free,
+	.map_lookup_elem = sock_map_lookup,
+	.map_get_next_key = sock_map_get_next_key,
+	.map_update_elem = sock_map_update_elem,
+	.map_delete_elem = sock_map_delete_elem,
+};
+
+BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
+	   struct bpf_map *, map, void *, key, u64, flags)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	return sock_map_ctx_update_elem(bpf_sock, map, key, flags);
+}
+
+const struct bpf_func_proto bpf_sock_map_update_proto = {
+	.func		= bpf_sock_map_update,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_PTR_TO_MAP_KEY,
+	.arg4_type	= ARG_ANYTHING,
+};
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 31147d7..135be43 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -31,7 +31,8 @@ static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
 	u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
 	int err;
 
-	smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries);
+	smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries,
+					 smap->map.numa_node);
 	if (!smap->elems)
 		return -ENOMEM;
 
@@ -59,7 +60,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 	if (!capable(CAP_SYS_ADMIN))
 		return ERR_PTR(-EPERM);
 
-	if (attr->map_flags)
+	if (attr->map_flags & ~BPF_F_NUMA_NODE)
 		return ERR_PTR(-EINVAL);
 
 	/* check sanity of attributes */
@@ -75,7 +76,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 	if (cost >= U32_MAX - PAGE_SIZE)
 		return ERR_PTR(-E2BIG);
 
-	smap = bpf_map_area_alloc(cost);
+	smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
 	if (!smap)
 		return ERR_PTR(-ENOMEM);
 
@@ -91,6 +92,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 	smap->map.map_flags = attr->map_flags;
 	smap->n_buckets = n_buckets;
 	smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+	smap->map.numa_node = bpf_map_attr_numa_node(attr);
 
 	err = bpf_map_precharge_memlock(smap->map.pages);
 	if (err)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6c772ad..70ad8e2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -48,6 +48,47 @@ static const struct bpf_map_ops * const bpf_map_types[] = {
 #undef BPF_MAP_TYPE
 };
 
+/*
+ * If we're handed a bigger struct than we know of, ensure all the unknown bits
+ * are 0 - i.e. new user-space does not rely on any kernel feature extensions
+ * we don't know about yet.
+ *
+ * There is a ToCToU between this function call and the following
+ * copy_from_user() call. However, this is not a concern since this function is
+ * meant to be a future-proofing of bits.
+ */
+static int check_uarg_tail_zero(void __user *uaddr,
+				size_t expected_size,
+				size_t actual_size)
+{
+	unsigned char __user *addr;
+	unsigned char __user *end;
+	unsigned char val;
+	int err;
+
+	if (unlikely(actual_size > PAGE_SIZE))	/* silly large */
+		return -E2BIG;
+
+	if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size)))
+		return -EFAULT;
+
+	if (actual_size <= expected_size)
+		return 0;
+
+	addr = uaddr + expected_size;
+	end  = uaddr + actual_size;
+
+	for (; addr < end; addr++) {
+		err = get_user(val, addr);
+		if (err)
+			return err;
+		if (val)
+			return -E2BIG;
+	}
+
+	return 0;
+}
+
 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
 {
 	struct bpf_map *map;
@@ -64,7 +105,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
 	return map;
 }
 
-void *bpf_map_area_alloc(size_t size)
+void *bpf_map_area_alloc(size_t size, int numa_node)
 {
 	/* We definitely need __GFP_NORETRY, so OOM killer doesn't
 	 * trigger under memory pressure as we really just want to
@@ -74,12 +115,13 @@ void *bpf_map_area_alloc(size_t size)
 	void *area;
 
 	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
-		area = kmalloc(size, GFP_USER | flags);
+		area = kmalloc_node(size, GFP_USER | flags, numa_node);
 		if (area != NULL)
 			return area;
 	}
 
-	return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL);
+	return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags,
+					   __builtin_return_address(0));
 }
 
 void bpf_map_area_free(void *area)
@@ -268,10 +310,11 @@ int bpf_map_new_fd(struct bpf_map *map)
 		   offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
 		   sizeof(attr->CMD##_LAST_FIELD)) != NULL
 
-#define BPF_MAP_CREATE_LAST_FIELD inner_map_fd
+#define BPF_MAP_CREATE_LAST_FIELD numa_node
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
 {
+	int numa_node = bpf_map_attr_numa_node(attr);
 	struct bpf_map *map;
 	int err;
 
@@ -279,6 +322,11 @@ static int map_create(union bpf_attr *attr)
 	if (err)
 		return -EINVAL;
 
+	if (numa_node != NUMA_NO_NODE &&
+	    ((unsigned int)numa_node >= nr_node_ids ||
+	     !node_online(numa_node)))
+		return -EINVAL;
+
 	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
 	map = find_and_alloc_map(attr);
 	if (IS_ERR(map))
@@ -870,7 +918,7 @@ struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
 EXPORT_SYMBOL_GPL(bpf_prog_inc);
 
 /* prog_idr_lock should have been held */
-static struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
+struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
 {
 	int refold;
 
@@ -886,6 +934,7 @@ static struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
 
 	return prog;
 }
+EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
 
 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
 {
@@ -1047,6 +1096,36 @@ static int bpf_obj_get(const union bpf_attr *attr)
 
 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 
+static int sockmap_get_from_fd(const union bpf_attr *attr)
+{
+	int ufd = attr->target_fd;
+	struct bpf_prog *prog;
+	struct bpf_map *map;
+	struct fd f;
+	int err;
+
+	f = fdget(ufd);
+	map = __bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	prog = bpf_prog_get_type(attr->attach_bpf_fd, BPF_PROG_TYPE_SK_SKB);
+	if (IS_ERR(prog)) {
+		fdput(f);
+		return PTR_ERR(prog);
+	}
+
+	err = sock_map_attach_prog(map, prog, attr->attach_type);
+	if (err) {
+		fdput(f);
+		bpf_prog_put(prog);
+		return err;
+	}
+
+	fdput(f);
+	return 0;
+}
+
 static int bpf_prog_attach(const union bpf_attr *attr)
 {
 	enum bpf_prog_type ptype;
@@ -1074,6 +1153,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_CGROUP_SOCK_OPS:
 		ptype = BPF_PROG_TYPE_SOCK_OPS;
 		break;
+	case BPF_SK_SKB_STREAM_PARSER:
+	case BPF_SK_SKB_STREAM_VERDICT:
+		return sockmap_get_from_fd(attr);
 	default:
 		return -EINVAL;
 	}
@@ -1246,32 +1328,6 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
 	return fd;
 }
 
-static int check_uarg_tail_zero(void __user *uaddr,
-				size_t expected_size,
-				size_t actual_size)
-{
-	unsigned char __user *addr;
-	unsigned char __user *end;
-	unsigned char val;
-	int err;
-
-	if (actual_size <= expected_size)
-		return 0;
-
-	addr = uaddr + expected_size;
-	end  = uaddr + actual_size;
-
-	for (; addr < end; addr++) {
-		err = get_user(val, addr);
-		if (err)
-			return err;
-		if (val)
-			return -E2BIG;
-	}
-
-	return 0;
-}
-
 static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 				   const union bpf_attr *attr,
 				   union bpf_attr __user *uattr)
@@ -1393,17 +1449,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
 		return -EPERM;
 
-	if (!access_ok(VERIFY_READ, uattr, 1))
-		return -EFAULT;
-
-	if (size > PAGE_SIZE)	/* silly large */
-		return -E2BIG;
-
-	/* If we're handed a bigger struct than we know of,
-	 * ensure all the unknown bits are 0 - i.e. new
-	 * user-space does not rely on any kernel feature
-	 * extensions we dont know about yet.
-	 */
 	err = check_uarg_tail_zero(uattr, sizeof(attr), size);
 	if (err)
 		return err;
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
new file mode 100644
index 0000000..1f4bf68
--- /dev/null
+++ b/kernel/bpf/tnum.c
@@ -0,0 +1,180 @@
+/* tnum: tracked (or tristate) numbers
+ *
+ * A tnum tracks knowledge about the bits of a value.  Each bit can be either
+ * known (0 or 1), or unknown (x).  Arithmetic operations on tnums will
+ * propagate the unknown bits such that the tnum result represents all the
+ * possible results for possible values of the operands.
+ */
+#include <linux/kernel.h>
+#include <linux/tnum.h>
+
+#define TNUM(_v, _m)	(struct tnum){.value = _v, .mask = _m}
+/* A completely unknown value */
+const struct tnum tnum_unknown = { .value = 0, .mask = -1 };
+
+struct tnum tnum_const(u64 value)
+{
+	return TNUM(value, 0);
+}
+
+struct tnum tnum_range(u64 min, u64 max)
+{
+	u64 chi = min ^ max, delta;
+	u8 bits = fls64(chi);
+
+	/* special case, needed because 1ULL << 64 is undefined */
+	if (bits > 63)
+		return tnum_unknown;
+	/* e.g. if chi = 4, bits = 3, delta = (1<<3) - 1 = 7.
+	 * if chi = 0, bits = 0, delta = (1<<0) - 1 = 0, so we return
+	 *  constant min (since min == max).
+	 */
+	delta = (1ULL << bits) - 1;
+	return TNUM(min & ~delta, delta);
+}
+
+struct tnum tnum_lshift(struct tnum a, u8 shift)
+{
+	return TNUM(a.value << shift, a.mask << shift);
+}
+
+struct tnum tnum_rshift(struct tnum a, u8 shift)
+{
+	return TNUM(a.value >> shift, a.mask >> shift);
+}
+
+struct tnum tnum_add(struct tnum a, struct tnum b)
+{
+	u64 sm, sv, sigma, chi, mu;
+
+	sm = a.mask + b.mask;
+	sv = a.value + b.value;
+	sigma = sm + sv;
+	chi = sigma ^ sv;
+	mu = chi | a.mask | b.mask;
+	return TNUM(sv & ~mu, mu);
+}
+
+struct tnum tnum_sub(struct tnum a, struct tnum b)
+{
+	u64 dv, alpha, beta, chi, mu;
+
+	dv = a.value - b.value;
+	alpha = dv + a.mask;
+	beta = dv - b.mask;
+	chi = alpha ^ beta;
+	mu = chi | a.mask | b.mask;
+	return TNUM(dv & ~mu, mu);
+}
+
+struct tnum tnum_and(struct tnum a, struct tnum b)
+{
+	u64 alpha, beta, v;
+
+	alpha = a.value | a.mask;
+	beta = b.value | b.mask;
+	v = a.value & b.value;
+	return TNUM(v, alpha & beta & ~v);
+}
+
+struct tnum tnum_or(struct tnum a, struct tnum b)
+{
+	u64 v, mu;
+
+	v = a.value | b.value;
+	mu = a.mask | b.mask;
+	return TNUM(v, mu & ~v);
+}
+
+struct tnum tnum_xor(struct tnum a, struct tnum b)
+{
+	u64 v, mu;
+
+	v = a.value ^ b.value;
+	mu = a.mask | b.mask;
+	return TNUM(v & ~mu, mu);
+}
+
+/* half-multiply add: acc += (unknown * mask * value).
+ * An intermediate step in the multiply algorithm.
+ */
+static struct tnum hma(struct tnum acc, u64 value, u64 mask)
+{
+	while (mask) {
+		if (mask & 1)
+			acc = tnum_add(acc, TNUM(0, value));
+		mask >>= 1;
+		value <<= 1;
+	}
+	return acc;
+}
+
+struct tnum tnum_mul(struct tnum a, struct tnum b)
+{
+	struct tnum acc;
+	u64 pi;
+
+	pi = a.value * b.value;
+	acc = hma(TNUM(pi, 0), a.mask, b.mask | b.value);
+	return hma(acc, b.mask, a.value);
+}
+
+/* Note that if a and b disagree - i.e. one has a 'known 1' where the other has
+ * a 'known 0' - this will return a 'known 1' for that bit.
+ */
+struct tnum tnum_intersect(struct tnum a, struct tnum b)
+{
+	u64 v, mu;
+
+	v = a.value | b.value;
+	mu = a.mask & b.mask;
+	return TNUM(v & ~mu, mu);
+}
+
+struct tnum tnum_cast(struct tnum a, u8 size)
+{
+	a.value &= (1ULL << (size * 8)) - 1;
+	a.mask &= (1ULL << (size * 8)) - 1;
+	return a;
+}
+
+bool tnum_is_aligned(struct tnum a, u64 size)
+{
+	if (!size)
+		return true;
+	return !((a.value | a.mask) & (size - 1));
+}
+
+bool tnum_in(struct tnum a, struct tnum b)
+{
+	if (b.mask & ~a.mask)
+		return false;
+	b.value &= ~a.mask;
+	return a.value == b.value;
+}
+
+int tnum_strn(char *str, size_t size, struct tnum a)
+{
+	return snprintf(str, size, "(%#llx; %#llx)", a.value, a.mask);
+}
+EXPORT_SYMBOL_GPL(tnum_strn);
+
+int tnum_sbin(char *str, size_t size, struct tnum a)
+{
+	size_t n;
+
+	for (n = 64; n; n--) {
+		if (n < size) {
+			if (a.mask & 1)
+				str[n - 1] = 'x';
+			else if (a.value & 1)
+				str[n - 1] = '1';
+			else
+				str[n - 1] = '0';
+		}
+		a.mask >>= 1;
+		a.value >>= 1;
+	}
+	str[min(size - 1, (size_t)64)] = 0;
+	return 64;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 664d939..d690c7d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -61,12 +61,12 @@
  * (and -20 constant is saved for further stack bounds checking).
  * Meaning that this reg is a pointer to stack plus known immediate constant.
  *
- * Most of the time the registers have UNKNOWN_VALUE type, which
+ * Most of the time the registers have SCALAR_VALUE type, which
  * means the register has some value, but it's not a valid pointer.
- * (like pointer plus pointer becomes UNKNOWN_VALUE type)
+ * (like pointer plus pointer becomes SCALAR_VALUE type)
  *
  * When verifier sees load or store instructions the type of base register
- * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, FRAME_PTR. These are three pointer
+ * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
  * types recognized by check_mem_access() function.
  *
  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
@@ -140,7 +140,7 @@ struct bpf_verifier_stack_elem {
 	struct bpf_verifier_stack_elem *next;
 };
 
-#define BPF_COMPLEXITY_LIMIT_INSNS	98304
+#define BPF_COMPLEXITY_LIMIT_INSNS	131072
 #define BPF_COMPLEXITY_LIMIT_STACK	1024
 
 #define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA)
@@ -180,15 +180,12 @@ static __printf(1, 2) void verbose(const char *fmt, ...)
 /* string representation of 'enum bpf_reg_type' */
 static const char * const reg_type_str[] = {
 	[NOT_INIT]		= "?",
-	[UNKNOWN_VALUE]		= "inv",
+	[SCALAR_VALUE]		= "inv",
 	[PTR_TO_CTX]		= "ctx",
 	[CONST_PTR_TO_MAP]	= "map_ptr",
 	[PTR_TO_MAP_VALUE]	= "map_value",
 	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
-	[PTR_TO_MAP_VALUE_ADJ]	= "map_value_adj",
-	[FRAME_PTR]		= "fp",
 	[PTR_TO_STACK]		= "fp",
-	[CONST_IMM]		= "imm",
 	[PTR_TO_PACKET]		= "pkt",
 	[PTR_TO_PACKET_END]	= "pkt_end",
 };
@@ -221,32 +218,52 @@ static void print_verifier_state(struct bpf_verifier_state *state)
 		if (t == NOT_INIT)
 			continue;
 		verbose(" R%d=%s", i, reg_type_str[t]);
-		if (t == CONST_IMM || t == PTR_TO_STACK)
-			verbose("%lld", reg->imm);
-		else if (t == PTR_TO_PACKET)
-			verbose("(id=%d,off=%d,r=%d)",
-				reg->id, reg->off, reg->range);
-		else if (t == UNKNOWN_VALUE && reg->imm)
-			verbose("%lld", reg->imm);
-		else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
-			 t == PTR_TO_MAP_VALUE_OR_NULL ||
-			 t == PTR_TO_MAP_VALUE_ADJ)
-			verbose("(ks=%d,vs=%d,id=%u)",
-				reg->map_ptr->key_size,
-				reg->map_ptr->value_size,
-				reg->id);
-		if (reg->min_value != BPF_REGISTER_MIN_RANGE)
-			verbose(",min_value=%lld",
-				(long long)reg->min_value);
-		if (reg->max_value != BPF_REGISTER_MAX_RANGE)
-			verbose(",max_value=%llu",
-				(unsigned long long)reg->max_value);
-		if (reg->min_align)
-			verbose(",min_align=%u", reg->min_align);
-		if (reg->aux_off)
-			verbose(",aux_off=%u", reg->aux_off);
-		if (reg->aux_off_align)
-			verbose(",aux_off_align=%u", reg->aux_off_align);
+		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
+		    tnum_is_const(reg->var_off)) {
+			/* reg->off should be 0 for SCALAR_VALUE */
+			verbose("%lld", reg->var_off.value + reg->off);
+		} else {
+			verbose("(id=%d", reg->id);
+			if (t != SCALAR_VALUE)
+				verbose(",off=%d", reg->off);
+			if (t == PTR_TO_PACKET)
+				verbose(",r=%d", reg->range);
+			else if (t == CONST_PTR_TO_MAP ||
+				 t == PTR_TO_MAP_VALUE ||
+				 t == PTR_TO_MAP_VALUE_OR_NULL)
+				verbose(",ks=%d,vs=%d",
+					reg->map_ptr->key_size,
+					reg->map_ptr->value_size);
+			if (tnum_is_const(reg->var_off)) {
+				/* Typically an immediate SCALAR_VALUE, but
+				 * could be a pointer whose offset is too big
+				 * for reg->off
+				 */
+				verbose(",imm=%llx", reg->var_off.value);
+			} else {
+				if (reg->smin_value != reg->umin_value &&
+				    reg->smin_value != S64_MIN)
+					verbose(",smin_value=%lld",
+						(long long)reg->smin_value);
+				if (reg->smax_value != reg->umax_value &&
+				    reg->smax_value != S64_MAX)
+					verbose(",smax_value=%lld",
+						(long long)reg->smax_value);
+				if (reg->umin_value != 0)
+					verbose(",umin_value=%llu",
+						(unsigned long long)reg->umin_value);
+				if (reg->umax_value != U64_MAX)
+					verbose(",umax_value=%llu",
+						(unsigned long long)reg->umax_value);
+				if (!tnum_is_unknown(reg->var_off)) {
+					char tn_buf[48];
+
+					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+					verbose(",var_off=%s", tn_buf);
+				}
+			}
+			verbose(")");
+		}
 	}
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] == STACK_SPILL)
@@ -295,11 +312,15 @@ static const char *const bpf_jmp_string[16] = {
 	[BPF_JA >> 4]   = "jmp",
 	[BPF_JEQ >> 4]  = "==",
 	[BPF_JGT >> 4]  = ">",
+	[BPF_JLT >> 4]  = "<",
 	[BPF_JGE >> 4]  = ">=",
+	[BPF_JLE >> 4]  = "<=",
 	[BPF_JSET >> 4] = "&",
 	[BPF_JNE >> 4]  = "!=",
 	[BPF_JSGT >> 4] = "s>",
+	[BPF_JSLT >> 4] = "s<",
 	[BPF_JSGE >> 4] = "s>=",
+	[BPF_JSLE >> 4] = "s<=",
 	[BPF_CALL >> 4] = "call",
 	[BPF_EXIT >> 4] = "exit",
 };
@@ -463,56 +484,163 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
 
-static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
+static void __mark_reg_not_init(struct bpf_reg_state *reg);
+
+/* Mark the unknown part of a register (variable offset or scalar value) as
+ * known to have the value @imm.
+ */
+static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
 {
-	BUG_ON(regno >= MAX_BPF_REG);
+	reg->id = 0;
+	reg->var_off = tnum_const(imm);
+	reg->smin_value = (s64)imm;
+	reg->smax_value = (s64)imm;
+	reg->umin_value = imm;
+	reg->umax_value = imm;
+}
 
-	memset(&regs[regno], 0, sizeof(regs[regno]));
-	regs[regno].type = NOT_INIT;
-	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
-	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+/* Mark the 'variable offset' part of a register as zero.  This should be
+ * used only on registers holding a pointer type.
+ */
+static void __mark_reg_known_zero(struct bpf_reg_state *reg)
+{
+	__mark_reg_known(reg, 0);
 }
 
-static void init_reg_state(struct bpf_reg_state *regs)
+static void mark_reg_known_zero(struct bpf_reg_state *regs, u32 regno)
 {
-	int i;
+	if (WARN_ON(regno >= MAX_BPF_REG)) {
+		verbose("mark_reg_known_zero(regs, %u)\n", regno);
+		/* Something bad happened, let's kill all regs */
+		for (regno = 0; regno < MAX_BPF_REG; regno++)
+			__mark_reg_not_init(regs + regno);
+		return;
+	}
+	__mark_reg_known_zero(regs + regno);
+}
 
-	for (i = 0; i < MAX_BPF_REG; i++)
-		mark_reg_not_init(regs, i);
+/* Attempts to improve min/max values based on var_off information */
+static void __update_reg_bounds(struct bpf_reg_state *reg)
+{
+	/* min signed is max(sign bit) | min(other bits) */
+	reg->smin_value = max_t(s64, reg->smin_value,
+				reg->var_off.value | (reg->var_off.mask & S64_MIN));
+	/* max signed is min(sign bit) | max(other bits) */
+	reg->smax_value = min_t(s64, reg->smax_value,
+				reg->var_off.value | (reg->var_off.mask & S64_MAX));
+	reg->umin_value = max(reg->umin_value, reg->var_off.value);
+	reg->umax_value = min(reg->umax_value,
+			      reg->var_off.value | reg->var_off.mask);
+}
 
-	/* frame pointer */
-	regs[BPF_REG_FP].type = FRAME_PTR;
+/* Uses signed min/max values to inform unsigned, and vice-versa */
+static void __reg_deduce_bounds(struct bpf_reg_state *reg)
+{
+	/* Learn sign from signed bounds.
+	 * If we cannot cross the sign boundary, then signed and unsigned bounds
+	 * are the same, so combine.  This works even in the negative case, e.g.
+	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
+	 */
+	if (reg->smin_value >= 0 || reg->smax_value < 0) {
+		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
+							  reg->umin_value);
+		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
+							  reg->umax_value);
+		return;
+	}
+	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
+	 * boundary, so we must be careful.
+	 */
+	if ((s64)reg->umax_value >= 0) {
+		/* Positive.  We can't learn anything from the smin, but smax
+		 * is positive, hence safe.
+		 */
+		reg->smin_value = reg->umin_value;
+		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
+							  reg->umax_value);
+	} else if ((s64)reg->umin_value < 0) {
+		/* Negative.  We can't learn anything from the smax, but smin
+		 * is negative, hence safe.
+		 */
+		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
+							  reg->umin_value);
+		reg->smax_value = reg->umax_value;
+	}
+}
 
-	/* 1st arg to a function */
-	regs[BPF_REG_1].type = PTR_TO_CTX;
+/* Attempts to improve var_off based on unsigned min/max information */
+static void __reg_bound_offset(struct bpf_reg_state *reg)
+{
+	reg->var_off = tnum_intersect(reg->var_off,
+				      tnum_range(reg->umin_value,
+						 reg->umax_value));
 }
 
-static void __mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
+/* Reset the min/max bounds of a register */
+static void __mark_reg_unbounded(struct bpf_reg_state *reg)
 {
-	regs[regno].type = UNKNOWN_VALUE;
-	regs[regno].id = 0;
-	regs[regno].imm = 0;
+	reg->smin_value = S64_MIN;
+	reg->smax_value = S64_MAX;
+	reg->umin_value = 0;
+	reg->umax_value = U64_MAX;
 }
 
-static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
+/* Mark a register as having a completely unknown (scalar) value. */
+static void __mark_reg_unknown(struct bpf_reg_state *reg)
 {
-	BUG_ON(regno >= MAX_BPF_REG);
-	__mark_reg_unknown_value(regs, regno);
+	reg->type = SCALAR_VALUE;
+	reg->id = 0;
+	reg->off = 0;
+	reg->var_off = tnum_unknown;
+	__mark_reg_unbounded(reg);
 }
 
-static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
+static void mark_reg_unknown(struct bpf_reg_state *regs, u32 regno)
 {
-	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
-	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
-	regs[regno].value_from_signed = false;
-	regs[regno].min_align = 0;
+	if (WARN_ON(regno >= MAX_BPF_REG)) {
+		verbose("mark_reg_unknown(regs, %u)\n", regno);
+		/* Something bad happened, let's kill all regs */
+		for (regno = 0; regno < MAX_BPF_REG; regno++)
+			__mark_reg_not_init(regs + regno);
+		return;
+	}
+	__mark_reg_unknown(regs + regno);
 }
 
-static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs,
-					     u32 regno)
+static void __mark_reg_not_init(struct bpf_reg_state *reg)
+{
+	__mark_reg_unknown(reg);
+	reg->type = NOT_INIT;
+}
+
+static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
 {
-	mark_reg_unknown_value(regs, regno);
-	reset_reg_range_values(regs, regno);
+	if (WARN_ON(regno >= MAX_BPF_REG)) {
+		verbose("mark_reg_not_init(regs, %u)\n", regno);
+		/* Something bad happened, let's kill all regs */
+		for (regno = 0; regno < MAX_BPF_REG; regno++)
+			__mark_reg_not_init(regs + regno);
+		return;
+	}
+	__mark_reg_not_init(regs + regno);
+}
+
+static void init_reg_state(struct bpf_reg_state *regs)
+{
+	int i;
+
+	for (i = 0; i < MAX_BPF_REG; i++) {
+		mark_reg_not_init(regs, i);
+		regs[i].live = REG_LIVE_NONE;
+	}
+
+	/* frame pointer */
+	regs[BPF_REG_FP].type = PTR_TO_STACK;
+	mark_reg_known_zero(regs, BPF_REG_FP);
+
+	/* 1st arg to a function */
+	regs[BPF_REG_1].type = PTR_TO_CTX;
+	mark_reg_known_zero(regs, BPF_REG_1);
 }
 
 enum reg_arg_type {
@@ -521,9 +649,26 @@ enum reg_arg_type {
 	DST_OP_NO_MARK	/* same as above, check only, don't mark */
 };
 
-static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
+static void mark_reg_read(const struct bpf_verifier_state *state, u32 regno)
+{
+	struct bpf_verifier_state *parent = state->parent;
+
+	while (parent) {
+		/* if read wasn't screened by an earlier write ... */
+		if (state->regs[regno].live & REG_LIVE_WRITTEN)
+			break;
+		/* ... then we depend on parent's value */
+		parent->regs[regno].live |= REG_LIVE_READ;
+		state = parent;
+		parent = state->parent;
+	}
+}
+
+static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
 			 enum reg_arg_type t)
 {
+	struct bpf_reg_state *regs = env->cur_state.regs;
+
 	if (regno >= MAX_BPF_REG) {
 		verbose("R%d is invalid\n", regno);
 		return -EINVAL;
@@ -535,14 +680,16 @@ static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
 			verbose("R%d !read_ok\n", regno);
 			return -EACCES;
 		}
+		mark_reg_read(&env->cur_state, regno);
 	} else {
 		/* check whether register used as dest operand can be written to */
 		if (regno == BPF_REG_FP) {
 			verbose("frame pointer is read only\n");
 			return -EACCES;
 		}
+		regs[regno].live |= REG_LIVE_WRITTEN;
 		if (t == DST_OP)
-			mark_reg_unknown_value(regs, regno);
+			mark_reg_unknown(regs, regno);
 	}
 	return 0;
 }
@@ -552,12 +699,10 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	switch (type) {
 	case PTR_TO_MAP_VALUE:
 	case PTR_TO_MAP_VALUE_OR_NULL:
-	case PTR_TO_MAP_VALUE_ADJ:
 	case PTR_TO_STACK:
 	case PTR_TO_CTX:
 	case PTR_TO_PACKET:
 	case PTR_TO_PACKET_END:
-	case FRAME_PTR:
 	case CONST_PTR_TO_MAP:
 		return true;
 	default:
@@ -571,7 +716,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 static int check_stack_write(struct bpf_verifier_state *state, int off,
 			     int size, int value_regno)
 {
-	int i;
+	int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
 	 * so it's aligned access and [off, off + size) are within stack limits
 	 */
@@ -586,15 +731,14 @@ static int check_stack_write(struct bpf_verifier_state *state, int off,
 		}
 
 		/* save register state */
-		state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
-			state->regs[value_regno];
+		state->spilled_regs[spi] = state->regs[value_regno];
+		state->spilled_regs[spi].live |= REG_LIVE_WRITTEN;
 
 		for (i = 0; i < BPF_REG_SIZE; i++)
 			state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
 	} else {
 		/* regular write of data into stack */
-		state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
-			(struct bpf_reg_state) {};
+		state->spilled_regs[spi] = (struct bpf_reg_state) {};
 
 		for (i = 0; i < size; i++)
 			state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
@@ -602,11 +746,26 @@ static int check_stack_write(struct bpf_verifier_state *state, int off,
 	return 0;
 }
 
+static void mark_stack_slot_read(const struct bpf_verifier_state *state, int slot)
+{
+	struct bpf_verifier_state *parent = state->parent;
+
+	while (parent) {
+		/* if read wasn't screened by an earlier write ... */
+		if (state->spilled_regs[slot].live & REG_LIVE_WRITTEN)
+			break;
+		/* ... then we depend on parent's value */
+		parent->spilled_regs[slot].live |= REG_LIVE_READ;
+		state = parent;
+		parent = state->parent;
+	}
+}
+
 static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
 			    int value_regno)
 {
 	u8 *slot_type;
-	int i;
+	int i, spi;
 
 	slot_type = &state->stack_slot_type[MAX_BPF_STACK + off];
 
@@ -622,10 +781,13 @@ static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
 			}
 		}
 
-		if (value_regno >= 0)
+		spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
+
+		if (value_regno >= 0) {
 			/* restore register state from stack */
-			state->regs[value_regno] =
-				state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE];
+			state->regs[value_regno] = state->spilled_regs[spi];
+			mark_stack_slot_read(state, spi);
+		}
 		return 0;
 	} else {
 		for (i = 0; i < size; i++) {
@@ -637,14 +799,13 @@ static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
 		}
 		if (value_regno >= 0)
 			/* have read misc data from the stack */
-			mark_reg_unknown_value_and_range(state->regs,
-							 value_regno);
+			mark_reg_unknown(state->regs, value_regno);
 		return 0;
 	}
 }
 
 /* check read/write into map element returned by bpf_map_lookup_elem() */
-static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
+static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
 			    int size)
 {
 	struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
@@ -657,49 +818,50 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
 	return 0;
 }
 
-/* check read/write into an adjusted map element */
-static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno,
+/* check read/write into a map element with possible variable offset */
+static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 				int off, int size)
 {
 	struct bpf_verifier_state *state = &env->cur_state;
 	struct bpf_reg_state *reg = &state->regs[regno];
 	int err;
 
-	/* We adjusted the register to this map value, so we
-	 * need to change off and size to min_value and max_value
-	 * respectively to make sure our theoretical access will be
-	 * safe.
+	/* We may have adjusted the register to this map value, so we
+	 * need to try adding each of min_value and max_value to off
+	 * to make sure our theoretical access will be safe.
 	 */
 	if (log_level)
 		print_verifier_state(state);
-	env->varlen_map_value_access = true;
 	/* The minimum value is only important with signed
 	 * comparisons where we can't assume the floor of a
 	 * value is 0.  If we are using signed variables for our
 	 * index'es we need to make sure that whatever we use
 	 * will have a set floor within our range.
 	 */
-	if (reg->min_value < 0) {
+	if (reg->smin_value < 0) {
 		verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
 			regno);
 		return -EACCES;
 	}
-	err = check_map_access(env, regno, reg->min_value + off, size);
+	err = __check_map_access(env, regno, reg->smin_value + off, size);
 	if (err) {
-		verbose("R%d min value is outside of the array range\n",
-			regno);
+		verbose("R%d min value is outside of the array range\n", regno);
 		return err;
 	}
 
-	/* If we haven't set a max value then we need to bail
-	 * since we can't be sure we won't do bad things.
+	/* If we haven't set a max value then we need to bail since we can't be
+	 * sure we won't do bad things.
+	 * If reg->umax_value + off could overflow, treat that as unbounded too.
 	 */
-	if (reg->max_value == BPF_REGISTER_MAX_RANGE) {
+	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
 		verbose("R%d unbounded memory access, make sure to bounds check any array access into a map\n",
 			regno);
 		return -EACCES;
 	}
-	return check_map_access(env, regno, reg->max_value + off, size);
+	err = __check_map_access(env, regno, reg->umax_value + off, size);
+	if (err)
+		verbose("R%d max value is outside of the array range\n", regno);
+	return err;
 }
 
 #define MAX_PACKET_OFF 0xffff
@@ -719,6 +881,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 	case BPF_PROG_TYPE_SCHED_ACT:
 	case BPF_PROG_TYPE_XDP:
 	case BPF_PROG_TYPE_LWT_XMIT:
+	case BPF_PROG_TYPE_SK_SKB:
 		if (meta)
 			return meta->pkt_access;
 
@@ -729,14 +892,13 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 	}
 }
 
-static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
-			       int size)
+static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
+				 int off, int size)
 {
 	struct bpf_reg_state *regs = env->cur_state.regs;
 	struct bpf_reg_state *reg = &regs[regno];
 
-	off += reg->off;
-	if (off < 0 || size <= 0 || off + size > reg->range) {
+	if (off < 0 || size <= 0 || (u64)off + size > reg->range) {
 		verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
 			off, size, regno, reg->id, reg->off, reg->range);
 		return -EACCES;
@@ -744,7 +906,35 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 	return 0;
 }
 
-/* check access to 'struct bpf_context' fields */
+static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
+			       int size)
+{
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *reg = &regs[regno];
+	int err;
+
+	/* We may have added a variable offset to the packet pointer; but any
+	 * reg->range we have comes after that.  We are only checking the fixed
+	 * offset.
+	 */
+
+	/* We don't allow negative numbers, because we aren't tracking enough
+	 * detail to prove they're safe.
+	 */
+	if (reg->smin_value < 0) {
+		verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+			regno);
+		return -EACCES;
+	}
+	err = __check_packet_access(env, regno, off, size);
+	if (err) {
+		verbose("R%d offset is outside of the packet\n", regno);
+		return err;
+	}
+	return err;
+}
+
+/* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
 			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
 {
@@ -784,13 +974,7 @@ static bool __is_pointer_value(bool allow_ptr_leaks,
 	if (allow_ptr_leaks)
 		return false;
 
-	switch (reg->type) {
-	case UNKNOWN_VALUE:
-	case CONST_IMM:
-		return false;
-	default:
-		return true;
-	}
+	return reg->type != SCALAR_VALUE;
 }
 
 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
@@ -801,23 +985,13 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
 				   int off, int size, bool strict)
 {
+	struct tnum reg_off;
 	int ip_align;
-	int reg_off;
 
 	/* Byte size accesses are always allowed. */
 	if (!strict || size == 1)
 		return 0;
 
-	reg_off = reg->off;
-	if (reg->id) {
-		if (reg->aux_off_align % size) {
-			verbose("Packet access is only %u byte aligned, %d byte access not allowed\n",
-				reg->aux_off_align, size);
-			return -EACCES;
-		}
-		reg_off += reg->aux_off;
-	}
-
 	/* For platforms that do not have a Kconfig enabling
 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
@@ -827,20 +1001,37 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
 	 * unconditional IP align value of '2'.
 	 */
 	ip_align = 2;
-	if ((ip_align + reg_off + off) % size != 0) {
-		verbose("misaligned packet access off %d+%d+%d size %d\n",
-			ip_align, reg_off, off, size);
+
+	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
+	if (!tnum_is_aligned(reg_off, size)) {
+		char tn_buf[48];
+
+		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+		verbose("misaligned packet access off %d+%s+%d+%d size %d\n",
+			ip_align, tn_buf, reg->off, off, size);
 		return -EACCES;
 	}
 
 	return 0;
 }
 
-static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
-				   int size, bool strict)
+static int check_generic_ptr_alignment(const struct bpf_reg_state *reg,
+				       const char *pointer_desc,
+				       int off, int size, bool strict)
 {
-	if (strict && size != 1) {
-		verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
+	struct tnum reg_off;
+
+	/* Byte size accesses are always allowed. */
+	if (!strict || size == 1)
+		return 0;
+
+	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
+	if (!tnum_is_aligned(reg_off, size)) {
+		char tn_buf[48];
+
+		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+		verbose("misaligned %saccess off %s+%d+%d size %d\n",
+			pointer_desc, tn_buf, reg->off, off, size);
 		return -EACCES;
 	}
 
@@ -852,21 +1043,25 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 			       int off, int size)
 {
 	bool strict = env->strict_alignment;
+	const char *pointer_desc = "";
 
 	switch (reg->type) {
 	case PTR_TO_PACKET:
+		/* special case, because of NET_IP_ALIGN */
 		return check_pkt_ptr_alignment(reg, off, size, strict);
-	case PTR_TO_MAP_VALUE_ADJ:
-		return check_val_ptr_alignment(reg, size, strict);
+	case PTR_TO_MAP_VALUE:
+		pointer_desc = "value ";
+		break;
+	case PTR_TO_CTX:
+		pointer_desc = "context ";
+		break;
+	case PTR_TO_STACK:
+		pointer_desc = "stack ";
+		break;
 	default:
-		if (off % size != 0) {
-			verbose("misaligned access off %d size %d\n",
-				off, size);
-			return -EACCES;
-		}
-
-		return 0;
+		break;
 	}
+	return check_generic_ptr_alignment(reg, pointer_desc, off, size, strict);
 }
 
 /* check whether memory at (regno + off) is accessible for t = (read | write)
@@ -883,52 +1078,79 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 	struct bpf_reg_state *reg = &state->regs[regno];
 	int size, err = 0;
 
-	if (reg->type == PTR_TO_STACK)
-		off += reg->imm;
-
 	size = bpf_size_to_bytes(bpf_size);
 	if (size < 0)
 		return size;
 
+	/* alignment checks will add in reg->off themselves */
 	err = check_ptr_alignment(env, reg, off, size);
 	if (err)
 		return err;
 
-	if (reg->type == PTR_TO_MAP_VALUE ||
-	    reg->type == PTR_TO_MAP_VALUE_ADJ) {
+	/* for access checks, reg->off is just part of off */
+	off += reg->off;
+
+	if (reg->type == PTR_TO_MAP_VALUE) {
 		if (t == BPF_WRITE && value_regno >= 0 &&
 		    is_pointer_value(env, value_regno)) {
 			verbose("R%d leaks addr into map\n", value_regno);
 			return -EACCES;
 		}
 
-		if (reg->type == PTR_TO_MAP_VALUE_ADJ)
-			err = check_map_access_adj(env, regno, off, size);
-		else
-			err = check_map_access(env, regno, off, size);
+		err = check_map_access(env, regno, off, size);
 		if (!err && t == BPF_READ && value_regno >= 0)
-			mark_reg_unknown_value_and_range(state->regs,
-							 value_regno);
+			mark_reg_unknown(state->regs, value_regno);
 
 	} else if (reg->type == PTR_TO_CTX) {
-		enum bpf_reg_type reg_type = UNKNOWN_VALUE;
+		enum bpf_reg_type reg_type = SCALAR_VALUE;
 
 		if (t == BPF_WRITE && value_regno >= 0 &&
 		    is_pointer_value(env, value_regno)) {
 			verbose("R%d leaks addr into ctx\n", value_regno);
 			return -EACCES;
 		}
+		/* ctx accesses must be at a fixed offset, so that we can
+		 * determine what type of data were returned.
+		 */
+		if (!tnum_is_const(reg->var_off)) {
+			char tn_buf[48];
+
+			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+			verbose("variable ctx access var_off=%s off=%d size=%d",
+				tn_buf, off, size);
+			return -EACCES;
+		}
+		off += reg->var_off.value;
 		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
 		if (!err && t == BPF_READ && value_regno >= 0) {
-			mark_reg_unknown_value_and_range(state->regs,
-							 value_regno);
-			/* note that reg.[id|off|range] == 0 */
+			/* ctx access returns either a scalar, or a
+			 * PTR_TO_PACKET[_END].  In the latter case, we know
+			 * the offset is zero.
+			 */
+			if (reg_type == SCALAR_VALUE)
+				mark_reg_unknown(state->regs, value_regno);
+			else
+				mark_reg_known_zero(state->regs, value_regno);
+			state->regs[value_regno].id = 0;
+			state->regs[value_regno].off = 0;
+			state->regs[value_regno].range = 0;
 			state->regs[value_regno].type = reg_type;
-			state->regs[value_regno].aux_off = 0;
-			state->regs[value_regno].aux_off_align = 0;
 		}
 
-	} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
+	} else if (reg->type == PTR_TO_STACK) {
+		/* stack accesses must be at a fixed offset, so that we can
+		 * determine what type of data were returned.
+		 * See check_stack_read().
+		 */
+		if (!tnum_is_const(reg->var_off)) {
+			char tn_buf[48];
+
+			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+			verbose("variable stack access var_off=%s off=%d size=%d",
+				tn_buf, off, size);
+			return -EACCES;
+		}
+		off += reg->var_off.value;
 		if (off >= 0 || off < -MAX_BPF_STACK) {
 			verbose("invalid stack off=%d size=%d\n", off, size);
 			return -EACCES;
@@ -948,7 +1170,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		} else {
 			err = check_stack_read(state, off, size, value_regno);
 		}
-	} else if (state->regs[regno].type == PTR_TO_PACKET) {
+	} else if (reg->type == PTR_TO_PACKET) {
 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
 			verbose("cannot write into packet\n");
 			return -EACCES;
@@ -960,28 +1182,25 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		}
 		err = check_packet_access(env, regno, off, size);
 		if (!err && t == BPF_READ && value_regno >= 0)
-			mark_reg_unknown_value_and_range(state->regs,
-							 value_regno);
+			mark_reg_unknown(state->regs, value_regno);
 	} else {
 		verbose("R%d invalid mem access '%s'\n",
 			regno, reg_type_str[reg->type]);
 		return -EACCES;
 	}
 
-	if (!err && size <= 2 && value_regno >= 0 && env->allow_ptr_leaks &&
-	    state->regs[value_regno].type == UNKNOWN_VALUE) {
-		/* 1 or 2 byte load zero-extends, determine the number of
-		 * zero upper bits. Not doing it fo 4 byte load, since
-		 * such values cannot be added to ptr_to_packet anyway.
-		 */
-		state->regs[value_regno].imm = 64 - size * 8;
+	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
+	    state->regs[value_regno].type == SCALAR_VALUE) {
+		/* b/h/w load zero-extends, mark upper bits as known 0 */
+		state->regs[value_regno].var_off = tnum_cast(
+					state->regs[value_regno].var_off, size);
+		__update_reg_bounds(&state->regs[value_regno]);
 	}
 	return err;
 }
 
 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
 {
-	struct bpf_reg_state *regs = env->cur_state.regs;
 	int err;
 
 	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
@@ -991,12 +1210,12 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
 	}
 
 	/* check src1 operand */
-	err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+	err = check_reg_arg(env, insn->src_reg, SRC_OP);
 	if (err)
 		return err;
 
 	/* check src2 operand */
-	err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
 	if (err)
 		return err;
 
@@ -1016,9 +1235,17 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
 				BPF_SIZE(insn->code), BPF_WRITE, -1);
 }
 
+/* Does this register contain a constant zero? */
+static bool register_is_null(struct bpf_reg_state reg)
+{
+	return reg.type == SCALAR_VALUE && tnum_equals_const(reg.var_off, 0);
+}
+
 /* when register 'regno' is passed into function that will read 'access_size'
  * bytes from that pointer, make sure that it's within stack boundary
- * and all elements of stack are initialized
+ * and all elements of stack are initialized.
+ * Unlike most pointer bounds-checking functions, this one doesn't take an
+ * 'off' argument, so it has to add in reg->off itself.
  */
 static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 				int access_size, bool zero_size_allowed,
@@ -1029,9 +1256,9 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 	int off, i;
 
 	if (regs[regno].type != PTR_TO_STACK) {
+		/* Allow zero-byte read from NULL, regardless of pointer type */
 		if (zero_size_allowed && access_size == 0 &&
-		    regs[regno].type == CONST_IMM &&
-		    regs[regno].imm  == 0)
+		    register_is_null(regs[regno]))
 			return 0;
 
 		verbose("R%d type=%s expected=%s\n", regno,
@@ -1040,7 +1267,15 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 		return -EACCES;
 	}
 
-	off = regs[regno].imm;
+	/* Only allow fixed-offset stack reads */
+	if (!tnum_is_const(regs[regno].var_off)) {
+		char tn_buf[48];
+
+		tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
+		verbose("invalid variable stack read R%d var_off=%s\n",
+			regno, tn_buf);
+	}
+	off = regs[regno].off + regs[regno].var_off.value;
 	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
 	    access_size <= 0) {
 		verbose("invalid stack type R%d off=%d access_size=%d\n",
@@ -1071,16 +1306,14 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 				   int access_size, bool zero_size_allowed,
 				   struct bpf_call_arg_meta *meta)
 {
-	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
 
-	switch (regs[regno].type) {
+	switch (reg->type) {
 	case PTR_TO_PACKET:
-		return check_packet_access(env, regno, 0, access_size);
+		return check_packet_access(env, regno, reg->off, access_size);
 	case PTR_TO_MAP_VALUE:
-		return check_map_access(env, regno, 0, access_size);
-	case PTR_TO_MAP_VALUE_ADJ:
-		return check_map_access_adj(env, regno, 0, access_size);
-	default: /* const_imm|ptr_to_stack or invalid ptr */
+		return check_map_access(env, regno, reg->off, access_size);
+	default: /* scalar_value|ptr_to_stack or invalid ptr */
 		return check_stack_boundary(env, regno, access_size,
 					    zero_size_allowed, meta);
 	}
@@ -1097,10 +1330,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 	if (arg_type == ARG_DONTCARE)
 		return 0;
 
-	if (type == NOT_INIT) {
-		verbose("R%d !read_ok\n", regno);
-		return -EACCES;
-	}
+	err = check_reg_arg(env, regno, SRC_OP);
+	if (err)
+		return err;
 
 	if (arg_type == ARG_ANYTHING) {
 		if (is_pointer_value(env, regno)) {
@@ -1123,11 +1355,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			goto err_type;
 	} else if (arg_type == ARG_CONST_SIZE ||
 		   arg_type == ARG_CONST_SIZE_OR_ZERO) {
-		expected_type = CONST_IMM;
-		/* One exception. Allow UNKNOWN_VALUE registers when the
-		 * boundaries are known and don't cause unsafe memory accesses
-		 */
-		if (type != UNKNOWN_VALUE && type != expected_type)
+		expected_type = SCALAR_VALUE;
+		if (type != expected_type)
 			goto err_type;
 	} else if (arg_type == ARG_CONST_MAP_PTR) {
 		expected_type = CONST_PTR_TO_MAP;
@@ -1141,13 +1370,13 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		   arg_type == ARG_PTR_TO_UNINIT_MEM) {
 		expected_type = PTR_TO_STACK;
 		/* One exception here. In case function allows for NULL to be
-		 * passed in as argument, it's a CONST_IMM type. Final test
+		 * passed in as argument, it's a SCALAR_VALUE type. Final test
 		 * happens during stack boundary checking.
 		 */
-		if (type == CONST_IMM && reg->imm == 0)
+		if (register_is_null(*reg))
 			/* final test in check_stack_boundary() */;
 		else if (type != PTR_TO_PACKET && type != PTR_TO_MAP_VALUE &&
-			 type != PTR_TO_MAP_VALUE_ADJ && type != expected_type)
+			 type != expected_type)
 			goto err_type;
 		meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
 	} else {
@@ -1173,7 +1402,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			return -EACCES;
 		}
 		if (type == PTR_TO_PACKET)
-			err = check_packet_access(env, regno, 0,
+			err = check_packet_access(env, regno, reg->off,
 						  meta->map_ptr->key_size);
 		else
 			err = check_stack_boundary(env, regno,
@@ -1189,7 +1418,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			return -EACCES;
 		}
 		if (type == PTR_TO_PACKET)
-			err = check_packet_access(env, regno, 0,
+			err = check_packet_access(env, regno, reg->off,
 						  meta->map_ptr->value_size);
 		else
 			err = check_stack_boundary(env, regno,
@@ -1209,10 +1438,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			return -EACCES;
 		}
 
-		/* If the register is UNKNOWN_VALUE, the access check happens
-		 * using its boundaries. Otherwise, just use its imm
+		/* The register is SCALAR_VALUE; the access check
+		 * happens using its boundaries.
 		 */
-		if (type == UNKNOWN_VALUE) {
+
+		if (!tnum_is_const(reg->var_off))
 			/* For unprivileged variable accesses, disable raw
 			 * mode so that the program is required to
 			 * initialize all the memory that the helper could
@@ -1220,35 +1450,28 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			 */
 			meta = NULL;
 
-			if (reg->min_value < 0) {
-				verbose("R%d min value is negative, either use unsigned or 'var &= const'\n",
-					regno);
-				return -EACCES;
-			}
-
-			if (reg->min_value == 0) {
-				err = check_helper_mem_access(env, regno - 1, 0,
-							      zero_size_allowed,
-							      meta);
-				if (err)
-					return err;
-			}
+		if (reg->smin_value < 0) {
+			verbose("R%d min value is negative, either use unsigned or 'var &= const'\n",
+				regno);
+			return -EACCES;
+		}
 
-			if (reg->max_value == BPF_REGISTER_MAX_RANGE) {
-				verbose("R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
-					regno);
-				return -EACCES;
-			}
-			err = check_helper_mem_access(env, regno - 1,
-						      reg->max_value,
-						      zero_size_allowed, meta);
+		if (reg->umin_value == 0) {
+			err = check_helper_mem_access(env, regno - 1, 0,
+						      zero_size_allowed,
+						      meta);
 			if (err)
 				return err;
-		} else {
-			/* register is CONST_IMM */
-			err = check_helper_mem_access(env, regno - 1, reg->imm,
-						      zero_size_allowed, meta);
 		}
+
+		if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
+			verbose("R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
+				regno);
+			return -EACCES;
+		}
+		err = check_helper_mem_access(env, regno - 1,
+					      reg->umax_value,
+					      zero_size_allowed, meta);
 	}
 
 	return err;
@@ -1283,10 +1506,25 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		    func_id != BPF_FUNC_current_task_under_cgroup)
 			goto error;
 		break;
+	/* devmap returns a pointer to a live net_device ifindex that we cannot
+	 * allow to be modified from bpf side. So do not allow lookup elements
+	 * for now.
+	 */
+	case BPF_MAP_TYPE_DEVMAP:
+		if (func_id != BPF_FUNC_redirect_map)
+			goto error;
+		break;
 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
 	case BPF_MAP_TYPE_HASH_OF_MAPS:
 		if (func_id != BPF_FUNC_map_lookup_elem)
 			goto error;
+		break;
+	case BPF_MAP_TYPE_SOCKMAP:
+		if (func_id != BPF_FUNC_sk_redirect_map &&
+		    func_id != BPF_FUNC_sock_map_update &&
+		    func_id != BPF_FUNC_map_delete_elem)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -1311,6 +1549,18 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
 			goto error;
 		break;
+	case BPF_FUNC_redirect_map:
+		if (map->map_type != BPF_MAP_TYPE_DEVMAP)
+			goto error;
+		break;
+	case BPF_FUNC_sk_redirect_map:
+		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
+			goto error;
+		break;
+	case BPF_FUNC_sock_map_update:
+		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -1340,6 +1590,9 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
 	return count > 1 ? -EINVAL : 0;
 }
 
+/* Packet data might have moved, any old PTR_TO_PACKET[_END] are now invalid,
+ * so turn them into unknown SCALAR_VALUE.
+ */
 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state *state = &env->cur_state;
@@ -1349,7 +1602,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 	for (i = 0; i < MAX_BPF_REG; i++)
 		if (regs[i].type == PTR_TO_PACKET ||
 		    regs[i].type == PTR_TO_PACKET_END)
-			mark_reg_unknown_value(regs, i);
+			mark_reg_unknown(regs, i);
 
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] != STACK_SPILL)
@@ -1358,8 +1611,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 		if (reg->type != PTR_TO_PACKET &&
 		    reg->type != PTR_TO_PACKET_END)
 			continue;
-		__mark_reg_unknown_value(state->spilled_regs,
-					 i / BPF_REG_SIZE);
+		__mark_reg_unknown(reg);
 	}
 }
 
@@ -1434,19 +1686,24 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	}
 
 	/* reset caller saved regs */
-	for (i = 0; i < CALLER_SAVED_REGS; i++)
+	for (i = 0; i < CALLER_SAVED_REGS; i++) {
 		mark_reg_not_init(regs, caller_saved[i]);
+		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
+	}
 
-	/* update return register */
+	/* update return register (already marked as written above) */
 	if (fn->ret_type == RET_INTEGER) {
-		regs[BPF_REG_0].type = UNKNOWN_VALUE;
+		/* sets type to SCALAR_VALUE */
+		mark_reg_unknown(regs, BPF_REG_0);
 	} else if (fn->ret_type == RET_VOID) {
 		regs[BPF_REG_0].type = NOT_INIT;
 	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
 		struct bpf_insn_aux_data *insn_aux;
 
 		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
-		regs[BPF_REG_0].max_value = regs[BPF_REG_0].min_value = 0;
+		/* There is no offset yet applied, variable or fixed */
+		mark_reg_known_zero(regs, BPF_REG_0);
+		regs[BPF_REG_0].off = 0;
 		/* remember map_ptr, so that check_map_access()
 		 * can check 'value_size' boundary of memory access
 		 * to map element returned from bpf_map_lookup_elem()
@@ -1477,494 +1734,551 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 	return 0;
 }
 
-static int check_packet_ptr_add(struct bpf_verifier_env *env,
-				struct bpf_insn *insn)
+static void coerce_reg_to_32(struct bpf_reg_state *reg)
 {
-	struct bpf_reg_state *regs = env->cur_state.regs;
-	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
-	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
-	struct bpf_reg_state tmp_reg;
-	s32 imm;
-
-	if (BPF_SRC(insn->code) == BPF_K) {
-		/* pkt_ptr += imm */
-		imm = insn->imm;
-
-add_imm:
-		if (imm < 0) {
-			verbose("addition of negative constant to packet pointer is not allowed\n");
-			return -EACCES;
-		}
-		if (imm >= MAX_PACKET_OFF ||
-		    imm + dst_reg->off >= MAX_PACKET_OFF) {
-			verbose("constant %d is too large to add to packet pointer\n",
-				imm);
-			return -EACCES;
-		}
-		/* a constant was added to pkt_ptr.
-		 * Remember it while keeping the same 'id'
-		 */
-		dst_reg->off += imm;
-	} else {
-		bool had_id;
-
-		if (src_reg->type == PTR_TO_PACKET) {
-			/* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */
-			tmp_reg = *dst_reg;  /* save r7 state */
-			*dst_reg = *src_reg; /* copy pkt_ptr state r6 into r7 */
-			src_reg = &tmp_reg;  /* pretend it's src_reg state */
-			/* if the checks below reject it, the copy won't matter,
-			 * since we're rejecting the whole program. If all ok,
-			 * then imm22 state will be added to r7
-			 * and r7 will be pkt(id=0,off=22,r=62) while
-			 * r6 will stay as pkt(id=0,off=0,r=62)
-			 */
-		}
+	/* clear high 32 bits */
+	reg->var_off = tnum_cast(reg->var_off, 4);
+	/* Update bounds */
+	__update_reg_bounds(reg);
+}
 
-		if (src_reg->type == CONST_IMM) {
-			/* pkt_ptr += reg where reg is known constant */
-			imm = src_reg->imm;
-			goto add_imm;
-		}
-		/* disallow pkt_ptr += reg
-		 * if reg is not uknown_value with guaranteed zero upper bits
-		 * otherwise pkt_ptr may overflow and addition will become
-		 * subtraction which is not allowed
-		 */
-		if (src_reg->type != UNKNOWN_VALUE) {
-			verbose("cannot add '%s' to ptr_to_packet\n",
-				reg_type_str[src_reg->type]);
-			return -EACCES;
-		}
-		if (src_reg->imm < 48) {
-			verbose("cannot add integer value with %lld upper zero bits to ptr_to_packet\n",
-				src_reg->imm);
-			return -EACCES;
-		}
+static bool signed_add_overflows(s64 a, s64 b)
+{
+	/* Do the add in u64, where overflow is well-defined */
+	s64 res = (s64)((u64)a + (u64)b);
 
-		had_id = (dst_reg->id != 0);
+	if (b < 0)
+		return res > a;
+	return res < a;
+}
 
-		/* dst_reg stays as pkt_ptr type and since some positive
-		 * integer value was added to the pointer, increment its 'id'
-		 */
-		dst_reg->id = ++env->id_gen;
-
-		/* something was added to pkt_ptr, set range to zero */
-		dst_reg->aux_off += dst_reg->off;
-		dst_reg->off = 0;
-		dst_reg->range = 0;
-		if (had_id)
-			dst_reg->aux_off_align = min(dst_reg->aux_off_align,
-						     src_reg->min_align);
-		else
-			dst_reg->aux_off_align = src_reg->min_align;
-	}
-	return 0;
+static bool signed_sub_overflows(s64 a, s64 b)
+{
+	/* Do the sub in u64, where overflow is well-defined */
+	s64 res = (s64)((u64)a - (u64)b);
+
+	if (b < 0)
+		return res < a;
+	return res > a;
 }
 
-static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
+/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
+ * Caller should also handle BPF_MOV case separately.
+ * If we return -EACCES, caller may want to try again treating pointer as a
+ * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
+ */
+static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
+				   struct bpf_insn *insn,
+				   const struct bpf_reg_state *ptr_reg,
+				   const struct bpf_reg_state *off_reg)
 {
-	struct bpf_reg_state *regs = env->cur_state.regs;
-	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
+	bool known = tnum_is_const(off_reg->var_off);
+	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
+	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
+	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
+	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
 	u8 opcode = BPF_OP(insn->code);
-	s64 imm_log2;
+	u32 dst = insn->dst_reg;
 
-	/* for type == UNKNOWN_VALUE:
-	 * imm > 0 -> number of zero upper bits
-	 * imm == 0 -> don't track which is the same as all bits can be non-zero
-	 */
+	dst_reg = &regs[dst];
 
-	if (BPF_SRC(insn->code) == BPF_X) {
-		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
-
-		if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 &&
-		    dst_reg->imm && opcode == BPF_ADD) {
-			/* dreg += sreg
-			 * where both have zero upper bits. Adding them
-			 * can only result making one more bit non-zero
-			 * in the larger value.
-			 * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47)
-			 *     0xffff (imm=48) + 0xffff = 0x1fffe (imm=47)
-			 */
-			dst_reg->imm = min(dst_reg->imm, src_reg->imm);
-			dst_reg->imm--;
-			return 0;
-		}
-		if (src_reg->type == CONST_IMM && src_reg->imm > 0 &&
-		    dst_reg->imm && opcode == BPF_ADD) {
-			/* dreg += sreg
-			 * where dreg has zero upper bits and sreg is const.
-			 * Adding them can only result making one more bit
-			 * non-zero in the larger value.
-			 */
-			imm_log2 = __ilog2_u64((long long)src_reg->imm);
-			dst_reg->imm = min(dst_reg->imm, 63 - imm_log2);
-			dst_reg->imm--;
-			return 0;
-		}
-		/* all other cases non supported yet, just mark dst_reg */
-		dst_reg->imm = 0;
-		return 0;
+	if (WARN_ON_ONCE(known && (smin_val != smax_val))) {
+		print_verifier_state(&env->cur_state);
+		verbose("verifier internal error: known but bad sbounds\n");
+		return -EINVAL;
+	}
+	if (WARN_ON_ONCE(known && (umin_val != umax_val))) {
+		print_verifier_state(&env->cur_state);
+		verbose("verifier internal error: known but bad ubounds\n");
+		return -EINVAL;
 	}
 
-	/* sign extend 32-bit imm into 64-bit to make sure that
-	 * negative values occupy bit 63. Note ilog2() would have
-	 * been incorrect, since sizeof(insn->imm) == 4
-	 */
-	imm_log2 = __ilog2_u64((long long)insn->imm);
-
-	if (dst_reg->imm && opcode == BPF_LSH) {
-		/* reg <<= imm
-		 * if reg was a result of 2 byte load, then its imm == 48
-		 * which means that upper 48 bits are zero and shifting this reg
-		 * left by 4 would mean that upper 44 bits are still zero
-		 */
-		dst_reg->imm -= insn->imm;
-	} else if (dst_reg->imm && opcode == BPF_MUL) {
-		/* reg *= imm
-		 * if multiplying by 14 subtract 4
-		 * This is conservative calculation of upper zero bits.
-		 * It's not trying to special case insn->imm == 1 or 0 cases
-		 */
-		dst_reg->imm -= imm_log2 + 1;
-	} else if (opcode == BPF_AND) {
-		/* reg &= imm */
-		dst_reg->imm = 63 - imm_log2;
-	} else if (dst_reg->imm && opcode == BPF_ADD) {
-		/* reg += imm */
-		dst_reg->imm = min(dst_reg->imm, 63 - imm_log2);
-		dst_reg->imm--;
-	} else if (opcode == BPF_RSH) {
-		/* reg >>= imm
-		 * which means that after right shift, upper bits will be zero
-		 * note that verifier already checked that
-		 * 0 <= imm < 64 for shift insn
-		 */
-		dst_reg->imm += insn->imm;
-		if (unlikely(dst_reg->imm > 64))
-			/* some dumb code did:
-			 * r2 = *(u32 *)mem;
-			 * r2 >>= 32;
-			 * and all bits are zero now */
-			dst_reg->imm = 64;
-	} else {
-		/* all other alu ops, means that we don't know what will
-		 * happen to the value, mark it with unknown number of zero bits
-		 */
-		dst_reg->imm = 0;
+	if (BPF_CLASS(insn->code) != BPF_ALU64) {
+		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
+		if (!env->allow_ptr_leaks)
+			verbose("R%d 32-bit pointer arithmetic prohibited\n",
+				dst);
+		return -EACCES;
 	}
 
-	if (dst_reg->imm < 0) {
-		/* all 64 bits of the register can contain non-zero bits
-		 * and such value cannot be added to ptr_to_packet, since it
-		 * may overflow, mark it as unknown to avoid further eval
-		 */
-		dst_reg->imm = 0;
+	if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
+		if (!env->allow_ptr_leaks)
+			verbose("R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
+				dst);
+		return -EACCES;
+	}
+	if (ptr_reg->type == CONST_PTR_TO_MAP) {
+		if (!env->allow_ptr_leaks)
+			verbose("R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
+				dst);
+		return -EACCES;
+	}
+	if (ptr_reg->type == PTR_TO_PACKET_END) {
+		if (!env->allow_ptr_leaks)
+			verbose("R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
+				dst);
+		return -EACCES;
 	}
-	return 0;
-}
 
-static int evaluate_reg_imm_alu_unknown(struct bpf_verifier_env *env,
-					struct bpf_insn *insn)
-{
-	struct bpf_reg_state *regs = env->cur_state.regs;
-	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
-	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
-	u8 opcode = BPF_OP(insn->code);
-	s64 imm_log2 = __ilog2_u64((long long)dst_reg->imm);
-
-	/* BPF_X code with src_reg->type UNKNOWN_VALUE here. */
-	if (src_reg->imm > 0 && dst_reg->imm) {
-		switch (opcode) {
-		case BPF_ADD:
-			/* dreg += sreg
-			 * where both have zero upper bits. Adding them
-			 * can only result making one more bit non-zero
-			 * in the larger value.
-			 * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47)
-			 *     0xffff (imm=48) + 0xffff = 0x1fffe (imm=47)
-			 */
-			dst_reg->imm = min(src_reg->imm, 63 - imm_log2);
-			dst_reg->imm--;
-			break;
-		case BPF_AND:
-			/* dreg &= sreg
-			 * AND can not extend zero bits only shrink
-			 * Ex.  0x00..00ffffff
-			 *    & 0x0f..ffffffff
-			 *     ----------------
-			 *      0x00..00ffffff
-			 */
-			dst_reg->imm = max(src_reg->imm, 63 - imm_log2);
+	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
+	 * The id may be overwritten later if we create a new variable offset.
+	 */
+	dst_reg->type = ptr_reg->type;
+	dst_reg->id = ptr_reg->id;
+
+	switch (opcode) {
+	case BPF_ADD:
+		/* We can take a fixed offset as long as it doesn't overflow
+		 * the s32 'off' field
+		 */
+		if (known && (ptr_reg->off + smin_val ==
+			      (s64)(s32)(ptr_reg->off + smin_val))) {
+			/* pointer += K.  Accumulate it into fixed offset */
+			dst_reg->smin_value = smin_ptr;
+			dst_reg->smax_value = smax_ptr;
+			dst_reg->umin_value = umin_ptr;
+			dst_reg->umax_value = umax_ptr;
+			dst_reg->var_off = ptr_reg->var_off;
+			dst_reg->off = ptr_reg->off + smin_val;
+			dst_reg->range = ptr_reg->range;
 			break;
-		case BPF_OR:
-			/* dreg |= sreg
-			 * OR can only extend zero bits
-			 * Ex.  0x00..00ffffff
-			 *    | 0x0f..ffffffff
-			 *     ----------------
-			 *      0x0f..00ffffff
-			 */
-			dst_reg->imm = min(src_reg->imm, 63 - imm_log2);
+		}
+		/* A new variable offset is created.  Note that off_reg->off
+		 * == 0, since it's a scalar.
+		 * dst_reg gets the pointer type and since some positive
+		 * integer value was added to the pointer, give it a new 'id'
+		 * if it's a PTR_TO_PACKET.
+		 * this creates a new 'base' pointer, off_reg (variable) gets
+		 * added into the variable offset, and we copy the fixed offset
+		 * from ptr_reg.
+		 */
+		if (signed_add_overflows(smin_ptr, smin_val) ||
+		    signed_add_overflows(smax_ptr, smax_val)) {
+			dst_reg->smin_value = S64_MIN;
+			dst_reg->smax_value = S64_MAX;
+		} else {
+			dst_reg->smin_value = smin_ptr + smin_val;
+			dst_reg->smax_value = smax_ptr + smax_val;
+		}
+		if (umin_ptr + umin_val < umin_ptr ||
+		    umax_ptr + umax_val < umax_ptr) {
+			dst_reg->umin_value = 0;
+			dst_reg->umax_value = U64_MAX;
+		} else {
+			dst_reg->umin_value = umin_ptr + umin_val;
+			dst_reg->umax_value = umax_ptr + umax_val;
+		}
+		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
+		dst_reg->off = ptr_reg->off;
+		if (ptr_reg->type == PTR_TO_PACKET) {
+			dst_reg->id = ++env->id_gen;
+			/* something was added to pkt_ptr, set range to zero */
+			dst_reg->range = 0;
+		}
+		break;
+	case BPF_SUB:
+		if (dst_reg == off_reg) {
+			/* scalar -= pointer.  Creates an unknown scalar */
+			if (!env->allow_ptr_leaks)
+				verbose("R%d tried to subtract pointer from scalar\n",
+					dst);
+			return -EACCES;
+		}
+		/* We don't allow subtraction from FP, because (according to
+		 * test_verifier.c test "invalid fp arithmetic", JITs might not
+		 * be able to deal with it.
+		 */
+		if (ptr_reg->type == PTR_TO_STACK) {
+			if (!env->allow_ptr_leaks)
+				verbose("R%d subtraction from stack pointer prohibited\n",
+					dst);
+			return -EACCES;
+		}
+		if (known && (ptr_reg->off - smin_val ==
+			      (s64)(s32)(ptr_reg->off - smin_val))) {
+			/* pointer -= K.  Subtract it from fixed offset */
+			dst_reg->smin_value = smin_ptr;
+			dst_reg->smax_value = smax_ptr;
+			dst_reg->umin_value = umin_ptr;
+			dst_reg->umax_value = umax_ptr;
+			dst_reg->var_off = ptr_reg->var_off;
+			dst_reg->id = ptr_reg->id;
+			dst_reg->off = ptr_reg->off - smin_val;
+			dst_reg->range = ptr_reg->range;
 			break;
-		case BPF_SUB:
-		case BPF_MUL:
-		case BPF_RSH:
-		case BPF_LSH:
-			/* These may be flushed out later */
-		default:
-			mark_reg_unknown_value(regs, insn->dst_reg);
 		}
-	} else {
-		mark_reg_unknown_value(regs, insn->dst_reg);
+		/* A new variable offset is created.  If the subtrahend is known
+		 * nonnegative, then any reg->range we had before is still good.
+		 */
+		if (signed_sub_overflows(smin_ptr, smax_val) ||
+		    signed_sub_overflows(smax_ptr, smin_val)) {
+			/* Overflow possible, we know nothing */
+			dst_reg->smin_value = S64_MIN;
+			dst_reg->smax_value = S64_MAX;
+		} else {
+			dst_reg->smin_value = smin_ptr - smax_val;
+			dst_reg->smax_value = smax_ptr - smin_val;
+		}
+		if (umin_ptr < umax_val) {
+			/* Overflow possible, we know nothing */
+			dst_reg->umin_value = 0;
+			dst_reg->umax_value = U64_MAX;
+		} else {
+			/* Cannot overflow (as long as bounds are consistent) */
+			dst_reg->umin_value = umin_ptr - umax_val;
+			dst_reg->umax_value = umax_ptr - umin_val;
+		}
+		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
+		dst_reg->off = ptr_reg->off;
+		if (ptr_reg->type == PTR_TO_PACKET) {
+			dst_reg->id = ++env->id_gen;
+			/* something was added to pkt_ptr, set range to zero */
+			if (smin_val < 0)
+				dst_reg->range = 0;
+		}
+		break;
+	case BPF_AND:
+	case BPF_OR:
+	case BPF_XOR:
+		/* bitwise ops on pointers are troublesome, prohibit for now.
+		 * (However, in principle we could allow some cases, e.g.
+		 * ptr &= ~3 which would reduce min_value by 3.)
+		 */
+		if (!env->allow_ptr_leaks)
+			verbose("R%d bitwise operator %s on pointer prohibited\n",
+				dst, bpf_alu_string[opcode >> 4]);
+		return -EACCES;
+	default:
+		/* other operators (e.g. MUL,LSH) produce non-pointer results */
+		if (!env->allow_ptr_leaks)
+			verbose("R%d pointer arithmetic with %s operator prohibited\n",
+				dst, bpf_alu_string[opcode >> 4]);
+		return -EACCES;
 	}
 
-	dst_reg->type = UNKNOWN_VALUE;
+	__update_reg_bounds(dst_reg);
+	__reg_deduce_bounds(dst_reg);
+	__reg_bound_offset(dst_reg);
 	return 0;
 }
 
-static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
-				struct bpf_insn *insn)
+static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
+				      struct bpf_insn *insn,
+				      struct bpf_reg_state *dst_reg,
+				      struct bpf_reg_state src_reg)
 {
 	struct bpf_reg_state *regs = env->cur_state.regs;
-	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
-	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
 	u8 opcode = BPF_OP(insn->code);
-	u64 dst_imm = dst_reg->imm;
-
-	if (BPF_SRC(insn->code) == BPF_X && src_reg->type == UNKNOWN_VALUE)
-		return evaluate_reg_imm_alu_unknown(env, insn);
-
-	/* dst_reg->type == CONST_IMM here. Simulate execution of insns
-	 * containing ALU ops. Don't care about overflow or negative
-	 * values, just add/sub/... them; registers are in u64.
-	 */
-	if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm += insn->imm;
-	} else if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm += src_reg->imm;
-	} else if (opcode == BPF_SUB && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm -= insn->imm;
-	} else if (opcode == BPF_SUB && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm -= src_reg->imm;
-	} else if (opcode == BPF_MUL && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm *= insn->imm;
-	} else if (opcode == BPF_MUL && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm *= src_reg->imm;
-	} else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm |= insn->imm;
-	} else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm |= src_reg->imm;
-	} else if (opcode == BPF_AND && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm &= insn->imm;
-	} else if (opcode == BPF_AND && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm &= src_reg->imm;
-	} else if (opcode == BPF_RSH && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm >>= insn->imm;
-	} else if (opcode == BPF_RSH && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm >>= src_reg->imm;
-	} else if (opcode == BPF_LSH && BPF_SRC(insn->code) == BPF_K) {
-		dst_imm <<= insn->imm;
-	} else if (opcode == BPF_LSH && BPF_SRC(insn->code) == BPF_X &&
-		   src_reg->type == CONST_IMM) {
-		dst_imm <<= src_reg->imm;
-	} else {
-		mark_reg_unknown_value(regs, insn->dst_reg);
-		goto out;
-	}
-
-	dst_reg->imm = dst_imm;
-out:
-	return 0;
-}
-
-static void check_reg_overflow(struct bpf_reg_state *reg)
-{
-	if (reg->max_value > BPF_REGISTER_MAX_RANGE)
-		reg->max_value = BPF_REGISTER_MAX_RANGE;
-	if (reg->min_value < BPF_REGISTER_MIN_RANGE ||
-	    reg->min_value > BPF_REGISTER_MAX_RANGE)
-		reg->min_value = BPF_REGISTER_MIN_RANGE;
-}
-
-static u32 calc_align(u32 imm)
-{
-	if (!imm)
-		return 1U << 31;
-	return imm - ((imm - 1) & imm);
-}
-
-static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
-				    struct bpf_insn *insn)
-{
-	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
-	s64 min_val = BPF_REGISTER_MIN_RANGE;
-	u64 max_val = BPF_REGISTER_MAX_RANGE;
-	u8 opcode = BPF_OP(insn->code);
-	u32 dst_align, src_align;
-
-	dst_reg = &regs[insn->dst_reg];
-	src_align = 0;
-	if (BPF_SRC(insn->code) == BPF_X) {
-		check_reg_overflow(&regs[insn->src_reg]);
-		min_val = regs[insn->src_reg].min_value;
-		max_val = regs[insn->src_reg].max_value;
-
-		/* If the source register is a random pointer then the
-		 * min_value/max_value values represent the range of the known
-		 * accesses into that value, not the actual min/max value of the
-		 * register itself.  In this case we have to reset the reg range
-		 * values so we know it is not safe to look at.
-		 */
-		if (regs[insn->src_reg].type != CONST_IMM &&
-		    regs[insn->src_reg].type != UNKNOWN_VALUE) {
-			min_val = BPF_REGISTER_MIN_RANGE;
-			max_val = BPF_REGISTER_MAX_RANGE;
-			src_align = 0;
-		} else {
-			src_align = regs[insn->src_reg].min_align;
-		}
-	} else if (insn->imm < BPF_REGISTER_MAX_RANGE &&
-		   (s64)insn->imm > BPF_REGISTER_MIN_RANGE) {
-		min_val = max_val = insn->imm;
-		src_align = calc_align(insn->imm);
-	}
-
-	dst_align = dst_reg->min_align;
-
-	/* We don't know anything about what was done to this register, mark it
-	 * as unknown. Also, if both derived bounds came from signed/unsigned
-	 * mixed compares and one side is unbounded, we cannot really do anything
-	 * with them as boundaries cannot be trusted. Thus, arithmetic of two
-	 * regs of such kind will get invalidated bounds on the dst side.
-	 */
-	if ((min_val == BPF_REGISTER_MIN_RANGE &&
-	     max_val == BPF_REGISTER_MAX_RANGE) ||
-	    (BPF_SRC(insn->code) == BPF_X &&
-	     ((min_val != BPF_REGISTER_MIN_RANGE &&
-	       max_val == BPF_REGISTER_MAX_RANGE) ||
-	      (min_val == BPF_REGISTER_MIN_RANGE &&
-	       max_val != BPF_REGISTER_MAX_RANGE) ||
-	      (dst_reg->min_value != BPF_REGISTER_MIN_RANGE &&
-	       dst_reg->max_value == BPF_REGISTER_MAX_RANGE) ||
-	      (dst_reg->min_value == BPF_REGISTER_MIN_RANGE &&
-	       dst_reg->max_value != BPF_REGISTER_MAX_RANGE)) &&
-	     regs[insn->dst_reg].value_from_signed !=
-	     regs[insn->src_reg].value_from_signed)) {
-		reset_reg_range_values(regs, insn->dst_reg);
-		return;
-	}
-
-	/* If one of our values was at the end of our ranges then we can't just
-	 * do our normal operations to the register, we need to set the values
-	 * to the min/max since they are undefined.
-	 */
-	if (opcode != BPF_SUB) {
-		if (min_val == BPF_REGISTER_MIN_RANGE)
-			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
-		if (max_val == BPF_REGISTER_MAX_RANGE)
-			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
+	bool src_known, dst_known;
+	s64 smin_val, smax_val;
+	u64 umin_val, umax_val;
+
+	if (BPF_CLASS(insn->code) != BPF_ALU64) {
+		/* 32-bit ALU ops are (32,32)->64 */
+		coerce_reg_to_32(dst_reg);
+		coerce_reg_to_32(&src_reg);
 	}
+	smin_val = src_reg.smin_value;
+	smax_val = src_reg.smax_value;
+	umin_val = src_reg.umin_value;
+	umax_val = src_reg.umax_value;
+	src_known = tnum_is_const(src_reg.var_off);
+	dst_known = tnum_is_const(dst_reg->var_off);
 
 	switch (opcode) {
 	case BPF_ADD:
-		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
-			dst_reg->min_value += min_val;
-		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
-			dst_reg->max_value += max_val;
-		dst_reg->min_align = min(src_align, dst_align);
+		if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
+		    signed_add_overflows(dst_reg->smax_value, smax_val)) {
+			dst_reg->smin_value = S64_MIN;
+			dst_reg->smax_value = S64_MAX;
+		} else {
+			dst_reg->smin_value += smin_val;
+			dst_reg->smax_value += smax_val;
+		}
+		if (dst_reg->umin_value + umin_val < umin_val ||
+		    dst_reg->umax_value + umax_val < umax_val) {
+			dst_reg->umin_value = 0;
+			dst_reg->umax_value = U64_MAX;
+		} else {
+			dst_reg->umin_value += umin_val;
+			dst_reg->umax_value += umax_val;
+		}
+		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
 		break;
 	case BPF_SUB:
-		/* If one of our values was at the end of our ranges, then the
-		 * _opposite_ value in the dst_reg goes to the end of our range.
-		 */
-		if (min_val == BPF_REGISTER_MIN_RANGE)
-			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
-		if (max_val == BPF_REGISTER_MAX_RANGE)
-			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
-		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
-			dst_reg->min_value -= max_val;
-		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
-			dst_reg->max_value -= min_val;
-		dst_reg->min_align = min(src_align, dst_align);
+		if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
+		    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
+			/* Overflow possible, we know nothing */
+			dst_reg->smin_value = S64_MIN;
+			dst_reg->smax_value = S64_MAX;
+		} else {
+			dst_reg->smin_value -= smax_val;
+			dst_reg->smax_value -= smin_val;
+		}
+		if (dst_reg->umin_value < umax_val) {
+			/* Overflow possible, we know nothing */
+			dst_reg->umin_value = 0;
+			dst_reg->umax_value = U64_MAX;
+		} else {
+			/* Cannot overflow (as long as bounds are consistent) */
+			dst_reg->umin_value -= umax_val;
+			dst_reg->umax_value -= umin_val;
+		}
+		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
 		break;
 	case BPF_MUL:
-		if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
-			dst_reg->min_value *= min_val;
-		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
-			dst_reg->max_value *= max_val;
-		dst_reg->min_align = max(src_align, dst_align);
+		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
+		if (smin_val < 0 || dst_reg->smin_value < 0) {
+			/* Ain't nobody got time to multiply that sign */
+			__mark_reg_unbounded(dst_reg);
+			__update_reg_bounds(dst_reg);
+			break;
+		}
+		/* Both values are positive, so we can work with unsigned and
+		 * copy the result to signed (unless it exceeds S64_MAX).
+		 */
+		if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
+			/* Potential overflow, we know nothing */
+			__mark_reg_unbounded(dst_reg);
+			/* (except what we can learn from the var_off) */
+			__update_reg_bounds(dst_reg);
+			break;
+		}
+		dst_reg->umin_value *= umin_val;
+		dst_reg->umax_value *= umax_val;
+		if (dst_reg->umax_value > S64_MAX) {
+			/* Overflow possible, we know nothing */
+			dst_reg->smin_value = S64_MIN;
+			dst_reg->smax_value = S64_MAX;
+		} else {
+			dst_reg->smin_value = dst_reg->umin_value;
+			dst_reg->smax_value = dst_reg->umax_value;
+		}
 		break;
 	case BPF_AND:
-		/* Disallow AND'ing of negative numbers, ain't nobody got time
-		 * for that.  Otherwise the minimum is 0 and the max is the max
-		 * value we could AND against.
+		if (src_known && dst_known) {
+			__mark_reg_known(dst_reg, dst_reg->var_off.value &
+						  src_reg.var_off.value);
+			break;
+		}
+		/* We get our minimum from the var_off, since that's inherently
+		 * bitwise.  Our maximum is the minimum of the operands' maxima.
 		 */
-		if (min_val < 0)
-			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
-		else
-			dst_reg->min_value = 0;
-		dst_reg->max_value = max_val;
-		dst_reg->min_align = max(src_align, dst_align);
+		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
+		dst_reg->umin_value = dst_reg->var_off.value;
+		dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
+		if (dst_reg->smin_value < 0 || smin_val < 0) {
+			/* Lose signed bounds when ANDing negative numbers,
+			 * ain't nobody got time for that.
+			 */
+			dst_reg->smin_value = S64_MIN;
+			dst_reg->smax_value = S64_MAX;
+		} else {
+			/* ANDing two positives gives a positive, so safe to
+			 * cast result into s64.
+			 */
+			dst_reg->smin_value = dst_reg->umin_value;
+			dst_reg->smax_value = dst_reg->umax_value;
+		}
+		/* We may learn something more from the var_off */
+		__update_reg_bounds(dst_reg);
 		break;
-	case BPF_LSH:
-		/* Gotta have special overflow logic here, if we're shifting
-		 * more than MAX_RANGE then just assume we have an invalid
-		 * range.
+	case BPF_OR:
+		if (src_known && dst_known) {
+			__mark_reg_known(dst_reg, dst_reg->var_off.value |
+						  src_reg.var_off.value);
+			break;
+		}
+		/* We get our maximum from the var_off, and our minimum is the
+		 * maximum of the operands' minima
 		 */
-		if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) {
-			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
-			dst_reg->min_align = 1;
+		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
+		dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
+		dst_reg->umax_value = dst_reg->var_off.value |
+				      dst_reg->var_off.mask;
+		if (dst_reg->smin_value < 0 || smin_val < 0) {
+			/* Lose signed bounds when ORing negative numbers,
+			 * ain't nobody got time for that.
+			 */
+			dst_reg->smin_value = S64_MIN;
+			dst_reg->smax_value = S64_MAX;
 		} else {
-			if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
-				dst_reg->min_value <<= min_val;
-			if (!dst_reg->min_align)
-				dst_reg->min_align = 1;
-			dst_reg->min_align <<= min_val;
-		}
-		if (max_val > ilog2(BPF_REGISTER_MAX_RANGE))
-			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
-		else if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
-			dst_reg->max_value <<= max_val;
+			/* ORing two positives gives a positive, so safe to
+			 * cast result into s64.
+			 */
+			dst_reg->smin_value = dst_reg->umin_value;
+			dst_reg->smax_value = dst_reg->umax_value;
+		}
+		/* We may learn something more from the var_off */
+		__update_reg_bounds(dst_reg);
 		break;
-	case BPF_RSH:
-		/* RSH by a negative number is undefined, and the BPF_RSH is an
-		 * unsigned shift, so make the appropriate casts.
+	case BPF_LSH:
+		if (umax_val > 63) {
+			/* Shifts greater than 63 are undefined.  This includes
+			 * shifts by a negative number.
+			 */
+			mark_reg_unknown(regs, insn->dst_reg);
+			break;
+		}
+		/* We lose all sign bit information (except what we can pick
+		 * up from var_off)
 		 */
-		if (min_val < 0 || dst_reg->min_value < 0) {
-			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+		dst_reg->smin_value = S64_MIN;
+		dst_reg->smax_value = S64_MAX;
+		/* If we might shift our top bit out, then we know nothing */
+		if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
+			dst_reg->umin_value = 0;
+			dst_reg->umax_value = U64_MAX;
 		} else {
-			dst_reg->min_value =
-				(u64)(dst_reg->min_value) >> min_val;
+			dst_reg->umin_value <<= umin_val;
+			dst_reg->umax_value <<= umax_val;
+		}
+		if (src_known)
+			dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
+		else
+			dst_reg->var_off = tnum_lshift(tnum_unknown, umin_val);
+		/* We may learn something more from the var_off */
+		__update_reg_bounds(dst_reg);
+		break;
+	case BPF_RSH:
+		if (umax_val > 63) {
+			/* Shifts greater than 63 are undefined.  This includes
+			 * shifts by a negative number.
+			 */
+			mark_reg_unknown(regs, insn->dst_reg);
+			break;
 		}
-		if (min_val < 0) {
-			dst_reg->min_align = 1;
+		/* BPF_RSH is an unsigned shift, so make the appropriate casts */
+		if (dst_reg->smin_value < 0) {
+			if (umin_val) {
+				/* Sign bit will be cleared */
+				dst_reg->smin_value = 0;
+			} else {
+				/* Lost sign bit information */
+				dst_reg->smin_value = S64_MIN;
+				dst_reg->smax_value = S64_MAX;
+			}
 		} else {
-			dst_reg->min_align >>= (u64) min_val;
-			if (!dst_reg->min_align)
-				dst_reg->min_align = 1;
+			dst_reg->smin_value =
+				(u64)(dst_reg->smin_value) >> umax_val;
 		}
-		if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
-			dst_reg->max_value >>= max_val;
+		if (src_known)
+			dst_reg->var_off = tnum_rshift(dst_reg->var_off,
+						       umin_val);
+		else
+			dst_reg->var_off = tnum_rshift(tnum_unknown, umin_val);
+		dst_reg->umin_value >>= umax_val;
+		dst_reg->umax_value >>= umin_val;
+		/* We may learn something more from the var_off */
+		__update_reg_bounds(dst_reg);
 		break;
 	default:
-		reset_reg_range_values(regs, insn->dst_reg);
+		mark_reg_unknown(regs, insn->dst_reg);
 		break;
 	}
 
-	check_reg_overflow(dst_reg);
+	__reg_deduce_bounds(dst_reg);
+	__reg_bound_offset(dst_reg);
+	return 0;
+}
+
+/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
+ * and var_off.
+ */
+static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
+				   struct bpf_insn *insn)
+{
+	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg, *src_reg;
+	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
+	u8 opcode = BPF_OP(insn->code);
+	int rc;
+
+	dst_reg = &regs[insn->dst_reg];
+	src_reg = NULL;
+	if (dst_reg->type != SCALAR_VALUE)
+		ptr_reg = dst_reg;
+	if (BPF_SRC(insn->code) == BPF_X) {
+		src_reg = &regs[insn->src_reg];
+		if (src_reg->type != SCALAR_VALUE) {
+			if (dst_reg->type != SCALAR_VALUE) {
+				/* Combining two pointers by any ALU op yields
+				 * an arbitrary scalar.
+				 */
+				if (!env->allow_ptr_leaks) {
+					verbose("R%d pointer %s pointer prohibited\n",
+						insn->dst_reg,
+						bpf_alu_string[opcode >> 4]);
+					return -EACCES;
+				}
+				mark_reg_unknown(regs, insn->dst_reg);
+				return 0;
+			} else {
+				/* scalar += pointer
+				 * This is legal, but we have to reverse our
+				 * src/dest handling in computing the range
+				 */
+				rc = adjust_ptr_min_max_vals(env, insn,
+							     src_reg, dst_reg);
+				if (rc == -EACCES && env->allow_ptr_leaks) {
+					/* scalar += unknown scalar */
+					__mark_reg_unknown(&off_reg);
+					return adjust_scalar_min_max_vals(
+							env, insn,
+							dst_reg, off_reg);
+				}
+				return rc;
+			}
+		} else if (ptr_reg) {
+			/* pointer += scalar */
+			rc = adjust_ptr_min_max_vals(env, insn,
+						     dst_reg, src_reg);
+			if (rc == -EACCES && env->allow_ptr_leaks) {
+				/* unknown scalar += scalar */
+				__mark_reg_unknown(dst_reg);
+				return adjust_scalar_min_max_vals(
+						env, insn, dst_reg, *src_reg);
+			}
+			return rc;
+		}
+	} else {
+		/* Pretend the src is a reg with a known value, since we only
+		 * need to be able to read from this state.
+		 */
+		off_reg.type = SCALAR_VALUE;
+		__mark_reg_known(&off_reg, insn->imm);
+		src_reg = &off_reg;
+		if (ptr_reg) { /* pointer += K */
+			rc = adjust_ptr_min_max_vals(env, insn,
+						     ptr_reg, src_reg);
+			if (rc == -EACCES && env->allow_ptr_leaks) {
+				/* unknown scalar += K */
+				__mark_reg_unknown(dst_reg);
+				return adjust_scalar_min_max_vals(
+						env, insn, dst_reg, off_reg);
+			}
+			return rc;
+		}
+	}
+
+	/* Got here implies adding two SCALAR_VALUEs */
+	if (WARN_ON_ONCE(ptr_reg)) {
+		print_verifier_state(&env->cur_state);
+		verbose("verifier internal error: unexpected ptr_reg\n");
+		return -EINVAL;
+	}
+	if (WARN_ON(!src_reg)) {
+		print_verifier_state(&env->cur_state);
+		verbose("verifier internal error: no src_reg\n");
+		return -EINVAL;
+	}
+	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
 }
 
 /* check validity of 32-bit and 64-bit arithmetic operations */
 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
+	struct bpf_reg_state *regs = env->cur_state.regs;
 	u8 opcode = BPF_OP(insn->code);
 	int err;
 
@@ -1985,7 +2299,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		}
 
 		/* check src operand */
-		err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
 		if (err)
 			return err;
 
@@ -1996,7 +2310,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		}
 
 		/* check dest operand */
-		err = check_reg_arg(regs, insn->dst_reg, DST_OP);
+		err = check_reg_arg(env, insn->dst_reg, DST_OP);
 		if (err)
 			return err;
 
@@ -2009,7 +2323,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 			}
 
 			/* check src operand */
-			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+			err = check_reg_arg(env, insn->src_reg, SRC_OP);
 			if (err)
 				return err;
 		} else {
@@ -2020,15 +2334,10 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		}
 
 		/* check dest operand */
-		err = check_reg_arg(regs, insn->dst_reg, DST_OP);
+		err = check_reg_arg(env, insn->dst_reg, DST_OP);
 		if (err)
 			return err;
 
-		/* we are setting our register to something new, we need to
-		 * reset its range values.
-		 */
-		reset_reg_range_values(regs, insn->dst_reg);
-
 		if (BPF_SRC(insn->code) == BPF_X) {
 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
 				/* case: R1 = R2
@@ -2036,24 +2345,24 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 				 */
 				regs[insn->dst_reg] = regs[insn->src_reg];
 			} else {
+				/* R1 = (u32) R2 */
 				if (is_pointer_value(env, insn->src_reg)) {
 					verbose("R%d partial copy of pointer\n",
 						insn->src_reg);
 					return -EACCES;
 				}
-				mark_reg_unknown_value(regs, insn->dst_reg);
+				mark_reg_unknown(regs, insn->dst_reg);
+				/* high 32 bits are known zero. */
+				regs[insn->dst_reg].var_off = tnum_cast(
+						regs[insn->dst_reg].var_off, 4);
+				__update_reg_bounds(&regs[insn->dst_reg]);
 			}
 		} else {
 			/* case: R = imm
 			 * remember the value we stored into this reg
 			 */
-			regs[insn->dst_reg].type = CONST_IMM;
-			regs[insn->dst_reg].imm = insn->imm;
-			regs[insn->dst_reg].id = 0;
-			regs[insn->dst_reg].max_value = insn->imm;
-			regs[insn->dst_reg].min_value = insn->imm;
-			regs[insn->dst_reg].min_align = calc_align(insn->imm);
-			regs[insn->dst_reg].value_from_signed = false;
+			regs[insn->dst_reg].type = SCALAR_VALUE;
+			__mark_reg_known(regs + insn->dst_reg, insn->imm);
 		}
 
 	} else if (opcode > BPF_END) {
@@ -2068,7 +2377,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 				return -EINVAL;
 			}
 			/* check src1 operand */
-			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+			err = check_reg_arg(env, insn->src_reg, SRC_OP);
 			if (err)
 				return err;
 		} else {
@@ -2079,7 +2388,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		}
 
 		/* check src2 operand */
-		err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
 		if (err)
 			return err;
 
@@ -2100,72 +2409,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		}
 
 		/* check dest operand */
-		err = check_reg_arg(regs, insn->dst_reg, DST_OP_NO_MARK);
+		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
 		if (err)
 			return err;
 
-		dst_reg = &regs[insn->dst_reg];
-
-		/* first we want to adjust our ranges. */
-		adjust_reg_min_max_vals(env, insn);
-
-		/* pattern match 'bpf_add Rx, imm' instruction */
-		if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
-		    dst_reg->type == FRAME_PTR && BPF_SRC(insn->code) == BPF_K) {
-			dst_reg->type = PTR_TO_STACK;
-			dst_reg->imm = insn->imm;
-			return 0;
-		} else if (opcode == BPF_ADD &&
-			   BPF_CLASS(insn->code) == BPF_ALU64 &&
-			   dst_reg->type == PTR_TO_STACK &&
-			   ((BPF_SRC(insn->code) == BPF_X &&
-			     regs[insn->src_reg].type == CONST_IMM) ||
-			    BPF_SRC(insn->code) == BPF_K)) {
-			if (BPF_SRC(insn->code) == BPF_X)
-				dst_reg->imm += regs[insn->src_reg].imm;
-			else
-				dst_reg->imm += insn->imm;
-			return 0;
-		} else if (opcode == BPF_ADD &&
-			   BPF_CLASS(insn->code) == BPF_ALU64 &&
-			   (dst_reg->type == PTR_TO_PACKET ||
-			    (BPF_SRC(insn->code) == BPF_X &&
-			     regs[insn->src_reg].type == PTR_TO_PACKET))) {
-			/* ptr_to_packet += K|X */
-			return check_packet_ptr_add(env, insn);
-		} else if (BPF_CLASS(insn->code) == BPF_ALU64 &&
-			   dst_reg->type == UNKNOWN_VALUE &&
-			   env->allow_ptr_leaks) {
-			/* unknown += K|X */
-			return evaluate_reg_alu(env, insn);
-		} else if (BPF_CLASS(insn->code) == BPF_ALU64 &&
-			   dst_reg->type == CONST_IMM &&
-			   env->allow_ptr_leaks) {
-			/* reg_imm += K|X */
-			return evaluate_reg_imm_alu(env, insn);
-		} else if (is_pointer_value(env, insn->dst_reg)) {
-			verbose("R%d pointer arithmetic prohibited\n",
-				insn->dst_reg);
-			return -EACCES;
-		} else if (BPF_SRC(insn->code) == BPF_X &&
-			   is_pointer_value(env, insn->src_reg)) {
-			verbose("R%d pointer arithmetic prohibited\n",
-				insn->src_reg);
-			return -EACCES;
-		}
-
-		/* If we did pointer math on a map value then just set it to our
-		 * PTR_TO_MAP_VALUE_ADJ type so we can deal with any stores or
-		 * loads to this register appropriately, otherwise just mark the
-		 * register as unknown.
-		 */
-		if (env->allow_ptr_leaks &&
-		    BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD &&
-		    (dst_reg->type == PTR_TO_MAP_VALUE ||
-		     dst_reg->type == PTR_TO_MAP_VALUE_ADJ))
-			dst_reg->type = PTR_TO_MAP_VALUE_ADJ;
-		else
-			mark_reg_unknown_value(regs, insn->dst_reg);
+		return adjust_reg_min_max_vals(env, insn);
 	}
 
 	return 0;
@@ -2177,27 +2425,48 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
 	struct bpf_reg_state *regs = state->regs, *reg;
 	int i;
 
-	/* LLVM can generate two kind of checks:
+	if (dst_reg->off < 0)
+		/* This doesn't give us any range */
+		return;
+
+	if (dst_reg->umax_value > MAX_PACKET_OFF ||
+	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
+		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
+		 * than pkt_end, but that's because it's also less than pkt.
+		 */
+		return;
+
+	/* LLVM can generate four kind of checks:
 	 *
-	 * Type 1:
+	 * Type 1/2:
 	 *
 	 *   r2 = r3;
 	 *   r2 += 8;
 	 *   if (r2 > pkt_end) goto <handle exception>
 	 *   <access okay>
 	 *
+	 *   r2 = r3;
+	 *   r2 += 8;
+	 *   if (r2 < pkt_end) goto <access okay>
+	 *   <handle exception>
+	 *
 	 *   Where:
 	 *     r2 == dst_reg, pkt_end == src_reg
 	 *     r2=pkt(id=n,off=8,r=0)
 	 *     r3=pkt(id=n,off=0,r=0)
 	 *
-	 * Type 2:
+	 * Type 3/4:
 	 *
 	 *   r2 = r3;
 	 *   r2 += 8;
 	 *   if (pkt_end >= r2) goto <access okay>
 	 *   <handle exception>
 	 *
+	 *   r2 = r3;
+	 *   r2 += 8;
+	 *   if (pkt_end <= r2) goto <handle exception>
+	 *   <access okay>
+	 *
 	 *   Where:
 	 *     pkt_end == dst_reg, r2 == src_reg
 	 *     r2=pkt(id=n,off=8,r=0)
@@ -2207,193 +2476,247 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
 	 * so that range of bytes [r3, r3 + 8) is safe to access.
 	 */
 
+	/* If our ids match, then we must have the same max_value.  And we
+	 * don't care about the other reg's fixed offset, since if it's too big
+	 * the range won't allow anything.
+	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
+	 */
 	for (i = 0; i < MAX_BPF_REG; i++)
 		if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
 			/* keep the maximum range already checked */
-			regs[i].range = max(regs[i].range, dst_reg->off);
+			regs[i].range = max_t(u16, regs[i].range, dst_reg->off);
 
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] != STACK_SPILL)
 			continue;
 		reg = &state->spilled_regs[i / BPF_REG_SIZE];
 		if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
-			reg->range = max(reg->range, dst_reg->off);
+			reg->range = max_t(u16, reg->range, dst_reg->off);
 	}
 }
 
 /* Adjusts the register min/max values in the case that the dst_reg is the
  * variable register that we are working on, and src_reg is a constant or we're
  * simply doing a BPF_K check.
+ * In JEQ/JNE cases we also adjust the var_off values.
  */
 static void reg_set_min_max(struct bpf_reg_state *true_reg,
 			    struct bpf_reg_state *false_reg, u64 val,
 			    u8 opcode)
 {
-	bool value_from_signed = true;
-	bool is_range = true;
+	/* If the dst_reg is a pointer, we can't learn anything about its
+	 * variable offset from the compare (unless src_reg were a pointer into
+	 * the same object, but we don't bother with that.
+	 * Since false_reg and true_reg have the same type by construction, we
+	 * only need to check one of them for pointerness.
+	 */
+	if (__is_pointer_value(false, false_reg))
+		return;
 
 	switch (opcode) {
 	case BPF_JEQ:
 		/* If this is false then we know nothing Jon Snow, but if it is
 		 * true then we know for sure.
 		 */
-		true_reg->max_value = true_reg->min_value = val;
-		is_range = false;
+		__mark_reg_known(true_reg, val);
 		break;
 	case BPF_JNE:
 		/* If this is true we know nothing Jon Snow, but if it is false
 		 * we know the value for sure;
 		 */
-		false_reg->max_value = false_reg->min_value = val;
-		is_range = false;
+		__mark_reg_known(false_reg, val);
 		break;
 	case BPF_JGT:
-		value_from_signed = false;
-		/* fallthrough */
+		false_reg->umax_value = min(false_reg->umax_value, val);
+		true_reg->umin_value = max(true_reg->umin_value, val + 1);
+		break;
 	case BPF_JSGT:
-		if (true_reg->value_from_signed != value_from_signed)
-			reset_reg_range_values(true_reg, 0);
-		if (false_reg->value_from_signed != value_from_signed)
-			reset_reg_range_values(false_reg, 0);
-		if (opcode == BPF_JGT) {
-			/* Unsigned comparison, the minimum value is 0. */
-			false_reg->min_value = 0;
-		}
-		/* If this is false then we know the maximum val is val,
-		 * otherwise we know the min val is val+1.
-		 */
-		false_reg->max_value = val;
-		false_reg->value_from_signed = value_from_signed;
-		true_reg->min_value = val + 1;
-		true_reg->value_from_signed = value_from_signed;
+		false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
+		true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
+		break;
+	case BPF_JLT:
+		false_reg->umin_value = max(false_reg->umin_value, val);
+		true_reg->umax_value = min(true_reg->umax_value, val - 1);
+		break;
+	case BPF_JSLT:
+		false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
+		true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
 		break;
 	case BPF_JGE:
-		value_from_signed = false;
-		/* fallthrough */
+		false_reg->umax_value = min(false_reg->umax_value, val - 1);
+		true_reg->umin_value = max(true_reg->umin_value, val);
+		break;
 	case BPF_JSGE:
-		if (true_reg->value_from_signed != value_from_signed)
-			reset_reg_range_values(true_reg, 0);
-		if (false_reg->value_from_signed != value_from_signed)
-			reset_reg_range_values(false_reg, 0);
-		if (opcode == BPF_JGE) {
-			/* Unsigned comparison, the minimum value is 0. */
-			false_reg->min_value = 0;
-		}
-		/* If this is false then we know the maximum value is val - 1,
-		 * otherwise we know the mimimum value is val.
-		 */
-		false_reg->max_value = val - 1;
-		false_reg->value_from_signed = value_from_signed;
-		true_reg->min_value = val;
-		true_reg->value_from_signed = value_from_signed;
+		false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
+		true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
+		break;
+	case BPF_JLE:
+		false_reg->umin_value = max(false_reg->umin_value, val + 1);
+		true_reg->umax_value = min(true_reg->umax_value, val);
+		break;
+	case BPF_JSLE:
+		false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
+		true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
 		break;
 	default:
 		break;
 	}
 
-	check_reg_overflow(false_reg);
-	check_reg_overflow(true_reg);
-	if (is_range) {
-		if (__is_pointer_value(false, false_reg))
-			reset_reg_range_values(false_reg, 0);
-		if (__is_pointer_value(false, true_reg))
-			reset_reg_range_values(true_reg, 0);
-	}
+	__reg_deduce_bounds(false_reg);
+	__reg_deduce_bounds(true_reg);
+	/* We might have learned some bits from the bounds. */
+	__reg_bound_offset(false_reg);
+	__reg_bound_offset(true_reg);
+	/* Intersecting with the old var_off might have improved our bounds
+	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
+	 * then new var_off is (0; 0x7f...fc) which improves our umax.
+	 */
+	__update_reg_bounds(false_reg);
+	__update_reg_bounds(true_reg);
 }
 
-/* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg
- * is the variable reg.
+/* Same as above, but for the case that dst_reg holds a constant and src_reg is
+ * the variable reg.
  */
 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 				struct bpf_reg_state *false_reg, u64 val,
 				u8 opcode)
 {
-	bool value_from_signed = true;
-	bool is_range = true;
+	if (__is_pointer_value(false, false_reg))
+		return;
 
 	switch (opcode) {
 	case BPF_JEQ:
 		/* If this is false then we know nothing Jon Snow, but if it is
 		 * true then we know for sure.
 		 */
-		true_reg->max_value = true_reg->min_value = val;
-		is_range = false;
+		__mark_reg_known(true_reg, val);
 		break;
 	case BPF_JNE:
 		/* If this is true we know nothing Jon Snow, but if it is false
 		 * we know the value for sure;
 		 */
-		false_reg->max_value = false_reg->min_value = val;
-		is_range = false;
+		__mark_reg_known(false_reg, val);
 		break;
 	case BPF_JGT:
-		value_from_signed = false;
-		/* fallthrough */
+		true_reg->umax_value = min(true_reg->umax_value, val - 1);
+		false_reg->umin_value = max(false_reg->umin_value, val);
+		break;
 	case BPF_JSGT:
-		if (true_reg->value_from_signed != value_from_signed)
-			reset_reg_range_values(true_reg, 0);
-		if (false_reg->value_from_signed != value_from_signed)
-			reset_reg_range_values(false_reg, 0);
-		if (opcode == BPF_JGT) {
-			/* Unsigned comparison, the minimum value is 0. */
-			true_reg->min_value = 0;
-		}
-		/*
-		 * If this is false, then the val is <= the register, if it is
-		 * true the register <= to the val.
-		 */
-		false_reg->min_value = val;
-		false_reg->value_from_signed = value_from_signed;
-		true_reg->max_value = val - 1;
-		true_reg->value_from_signed = value_from_signed;
+		true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
+		false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
+		break;
+	case BPF_JLT:
+		true_reg->umin_value = max(true_reg->umin_value, val + 1);
+		false_reg->umax_value = min(false_reg->umax_value, val);
+		break;
+	case BPF_JSLT:
+		true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
+		false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
 		break;
 	case BPF_JGE:
-		value_from_signed = false;
-		/* fallthrough */
+		true_reg->umax_value = min(true_reg->umax_value, val);
+		false_reg->umin_value = max(false_reg->umin_value, val + 1);
+		break;
 	case BPF_JSGE:
-		if (true_reg->value_from_signed != value_from_signed)
-			reset_reg_range_values(true_reg, 0);
-		if (false_reg->value_from_signed != value_from_signed)
-			reset_reg_range_values(false_reg, 0);
-		if (opcode == BPF_JGE) {
-			/* Unsigned comparison, the minimum value is 0. */
-			true_reg->min_value = 0;
-		}
-		/* If this is false then constant < register, if it is true then
-		 * the register < constant.
-		 */
-		false_reg->min_value = val + 1;
-		false_reg->value_from_signed = value_from_signed;
-		true_reg->max_value = val;
-		true_reg->value_from_signed = value_from_signed;
+		true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
+		false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
+		break;
+	case BPF_JLE:
+		true_reg->umin_value = max(true_reg->umin_value, val);
+		false_reg->umax_value = min(false_reg->umax_value, val - 1);
+		break;
+	case BPF_JSLE:
+		true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
+		false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
 		break;
 	default:
 		break;
 	}
 
-	check_reg_overflow(false_reg);
-	check_reg_overflow(true_reg);
-	if (is_range) {
-		if (__is_pointer_value(false, false_reg))
-			reset_reg_range_values(false_reg, 0);
-		if (__is_pointer_value(false, true_reg))
-			reset_reg_range_values(true_reg, 0);
+	__reg_deduce_bounds(false_reg);
+	__reg_deduce_bounds(true_reg);
+	/* We might have learned some bits from the bounds. */
+	__reg_bound_offset(false_reg);
+	__reg_bound_offset(true_reg);
+	/* Intersecting with the old var_off might have improved our bounds
+	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
+	 * then new var_off is (0; 0x7f...fc) which improves our umax.
+	 */
+	__update_reg_bounds(false_reg);
+	__update_reg_bounds(true_reg);
+}
+
+/* Regs are known to be equal, so intersect their min/max/var_off */
+static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
+				  struct bpf_reg_state *dst_reg)
+{
+	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
+							dst_reg->umin_value);
+	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
+							dst_reg->umax_value);
+	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
+							dst_reg->smin_value);
+	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
+							dst_reg->smax_value);
+	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
+							     dst_reg->var_off);
+	/* We might have learned new bounds from the var_off. */
+	__update_reg_bounds(src_reg);
+	__update_reg_bounds(dst_reg);
+	/* We might have learned something about the sign bit. */
+	__reg_deduce_bounds(src_reg);
+	__reg_deduce_bounds(dst_reg);
+	/* We might have learned some bits from the bounds. */
+	__reg_bound_offset(src_reg);
+	__reg_bound_offset(dst_reg);
+	/* Intersecting with the old var_off might have improved our bounds
+	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
+	 * then new var_off is (0; 0x7f...fc) which improves our umax.
+	 */
+	__update_reg_bounds(src_reg);
+	__update_reg_bounds(dst_reg);
+}
+
+static void reg_combine_min_max(struct bpf_reg_state *true_src,
+				struct bpf_reg_state *true_dst,
+				struct bpf_reg_state *false_src,
+				struct bpf_reg_state *false_dst,
+				u8 opcode)
+{
+	switch (opcode) {
+	case BPF_JEQ:
+		__reg_combine_min_max(true_src, true_dst);
+		break;
+	case BPF_JNE:
+		__reg_combine_min_max(false_src, false_dst);
+		break;
 	}
 }
 
 static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
-			 enum bpf_reg_type type)
+			 bool is_null)
 {
 	struct bpf_reg_state *reg = &regs[regno];
 
 	if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) {
-		if (type == UNKNOWN_VALUE) {
-			__mark_reg_unknown_value(regs, regno);
+		/* Old offset (both fixed and variable parts) should
+		 * have been known-zero, because we don't allow pointer
+		 * arithmetic on pointers that might be NULL.
+		 */
+		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
+				 !tnum_equals_const(reg->var_off, 0) ||
+				 reg->off)) {
+			__mark_reg_known_zero(reg);
+			reg->off = 0;
+		}
+		if (is_null) {
+			reg->type = SCALAR_VALUE;
 		} else if (reg->map_ptr->inner_map_meta) {
 			reg->type = CONST_PTR_TO_MAP;
 			reg->map_ptr = reg->map_ptr->inner_map_meta;
 		} else {
-			reg->type = type;
+			reg->type = PTR_TO_MAP_VALUE;
 		}
 		/* We don't need id from this point onwards anymore, thus we
 		 * should better reset it, so that state pruning has chances
@@ -2407,19 +2730,19 @@ static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
  * be folded together at some point.
  */
 static void mark_map_regs(struct bpf_verifier_state *state, u32 regno,
-			  enum bpf_reg_type type)
+			  bool is_null)
 {
 	struct bpf_reg_state *regs = state->regs;
 	u32 id = regs[regno].id;
 	int i;
 
 	for (i = 0; i < MAX_BPF_REG; i++)
-		mark_map_reg(regs, i, id, type);
+		mark_map_reg(regs, i, id, is_null);
 
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] != STACK_SPILL)
 			continue;
-		mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, id, type);
+		mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, id, is_null);
 	}
 }
 
@@ -2431,7 +2754,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	u8 opcode = BPF_OP(insn->code);
 	int err;
 
-	if (opcode > BPF_EXIT) {
+	if (opcode > BPF_JSLE) {
 		verbose("invalid BPF_JMP opcode %x\n", opcode);
 		return -EINVAL;
 	}
@@ -2443,7 +2766,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 		}
 
 		/* check src1 operand */
-		err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+		err = check_reg_arg(env, insn->src_reg, SRC_OP);
 		if (err)
 			return err;
 
@@ -2460,7 +2783,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	}
 
 	/* check src2 operand */
-	err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
 	if (err)
 		return err;
 
@@ -2469,7 +2792,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	/* detect if R == 0 where R was initialized to zero earlier */
 	if (BPF_SRC(insn->code) == BPF_K &&
 	    (opcode == BPF_JEQ || opcode == BPF_JNE) &&
-	    dst_reg->type == CONST_IMM && dst_reg->imm == insn->imm) {
+	    dst_reg->type == SCALAR_VALUE &&
+	    tnum_equals_const(dst_reg->var_off, insn->imm)) {
 		if (opcode == BPF_JEQ) {
 			/* if (imm == imm) goto pc+off;
 			 * only follow the goto, ignore fall-through
@@ -2491,17 +2815,30 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 
 	/* detect if we are comparing against a constant value so we can adjust
 	 * our min/max values for our dst register.
+	 * this is only legit if both are scalars (or pointers to the same
+	 * object, I suppose, but we don't support that right now), because
+	 * otherwise the different base pointers mean the offsets aren't
+	 * comparable.
 	 */
 	if (BPF_SRC(insn->code) == BPF_X) {
-		if (regs[insn->src_reg].type == CONST_IMM)
-			reg_set_min_max(&other_branch->regs[insn->dst_reg],
-					dst_reg, regs[insn->src_reg].imm,
-					opcode);
-		else if (dst_reg->type == CONST_IMM)
-			reg_set_min_max_inv(&other_branch->regs[insn->src_reg],
-					    &regs[insn->src_reg], dst_reg->imm,
-					    opcode);
-	} else {
+		if (dst_reg->type == SCALAR_VALUE &&
+		    regs[insn->src_reg].type == SCALAR_VALUE) {
+			if (tnum_is_const(regs[insn->src_reg].var_off))
+				reg_set_min_max(&other_branch->regs[insn->dst_reg],
+						dst_reg, regs[insn->src_reg].var_off.value,
+						opcode);
+			else if (tnum_is_const(dst_reg->var_off))
+				reg_set_min_max_inv(&other_branch->regs[insn->src_reg],
+						    &regs[insn->src_reg],
+						    dst_reg->var_off.value, opcode);
+			else if (opcode == BPF_JEQ || opcode == BPF_JNE)
+				/* Comparing for equality, we can combine knowledge */
+				reg_combine_min_max(&other_branch->regs[insn->src_reg],
+						    &other_branch->regs[insn->dst_reg],
+						    &regs[insn->src_reg],
+						    &regs[insn->dst_reg], opcode);
+		}
+	} else if (dst_reg->type == SCALAR_VALUE) {
 		reg_set_min_max(&other_branch->regs[insn->dst_reg],
 					dst_reg, insn->imm, opcode);
 	}
@@ -2513,18 +2850,24 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 		/* Mark all identical map registers in each branch as either
 		 * safe or unknown depending R == 0 or R != 0 conditional.
 		 */
-		mark_map_regs(this_branch, insn->dst_reg,
-			      opcode == BPF_JEQ ? PTR_TO_MAP_VALUE : UNKNOWN_VALUE);
-		mark_map_regs(other_branch, insn->dst_reg,
-			      opcode == BPF_JEQ ? UNKNOWN_VALUE : PTR_TO_MAP_VALUE);
+		mark_map_regs(this_branch, insn->dst_reg, opcode == BPF_JNE);
+		mark_map_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ);
 	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
 		   dst_reg->type == PTR_TO_PACKET &&
 		   regs[insn->src_reg].type == PTR_TO_PACKET_END) {
 		find_good_pkt_pointers(this_branch, dst_reg);
+	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
+		   dst_reg->type == PTR_TO_PACKET &&
+		   regs[insn->src_reg].type == PTR_TO_PACKET_END) {
+		find_good_pkt_pointers(other_branch, dst_reg);
 	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
 		   dst_reg->type == PTR_TO_PACKET_END &&
 		   regs[insn->src_reg].type == PTR_TO_PACKET) {
 		find_good_pkt_pointers(other_branch, &regs[insn->src_reg]);
+	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
+		   dst_reg->type == PTR_TO_PACKET_END &&
+		   regs[insn->src_reg].type == PTR_TO_PACKET) {
+		find_good_pkt_pointers(this_branch, &regs[insn->src_reg]);
 	} else if (is_pointer_value(env, insn->dst_reg)) {
 		verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
 		return -EACCES;
@@ -2557,16 +2900,15 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		return -EINVAL;
 	}
 
-	err = check_reg_arg(regs, insn->dst_reg, DST_OP);
+	err = check_reg_arg(env, insn->dst_reg, DST_OP);
 	if (err)
 		return err;
 
 	if (insn->src_reg == 0) {
 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
 
-		regs[insn->dst_reg].type = CONST_IMM;
-		regs[insn->dst_reg].imm = imm;
-		regs[insn->dst_reg].id = 0;
+		regs[insn->dst_reg].type = SCALAR_VALUE;
+		__mark_reg_known(&regs[insn->dst_reg], imm);
 		return 0;
 	}
 
@@ -2624,7 +2966,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	}
 
 	/* check whether implicit source operand (register R6) is readable */
-	err = check_reg_arg(regs, BPF_REG_6, SRC_OP);
+	err = check_reg_arg(env, BPF_REG_6, SRC_OP);
 	if (err)
 		return err;
 
@@ -2635,19 +2977,22 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 
 	if (mode == BPF_IND) {
 		/* check explicit source operand */
-		err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+		err = check_reg_arg(env, insn->src_reg, SRC_OP);
 		if (err)
 			return err;
 	}
 
 	/* reset caller saved regs to unreadable */
-	for (i = 0; i < CALLER_SAVED_REGS; i++)
+	for (i = 0; i < CALLER_SAVED_REGS; i++) {
 		mark_reg_not_init(regs, caller_saved[i]);
+		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
+	}
 
 	/* mark destination R0 register as readable, since it contains
-	 * the value fetched from the packet
+	 * the value fetched from the packet.
+	 * Already marked as written above.
 	 */
-	regs[BPF_REG_0].type = UNKNOWN_VALUE;
+	mark_reg_unknown(regs, BPF_REG_0);
 	return 0;
 }
 
@@ -2850,57 +3195,144 @@ err_free:
 	return ret;
 }
 
-/* the following conditions reduce the number of explored insns
- * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
+/* check %cur's range satisfies %old's */
+static bool range_within(struct bpf_reg_state *old,
+			 struct bpf_reg_state *cur)
+{
+	return old->umin_value <= cur->umin_value &&
+	       old->umax_value >= cur->umax_value &&
+	       old->smin_value <= cur->smin_value &&
+	       old->smax_value >= cur->smax_value;
+}
+
+/* Maximum number of register states that can exist at once */
+#define ID_MAP_SIZE	(MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
+struct idpair {
+	u32 old;
+	u32 cur;
+};
+
+/* If in the old state two registers had the same id, then they need to have
+ * the same id in the new state as well.  But that id could be different from
+ * the old state, so we need to track the mapping from old to new ids.
+ * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
+ * regs with old id 5 must also have new id 9 for the new state to be safe.  But
+ * regs with a different old id could still have new id 9, we don't care about
+ * that.
+ * So we look through our idmap to see if this old id has been seen before.  If
+ * so, we require the new id to match; otherwise, we add the id pair to the map.
  */
-static bool compare_ptrs_to_packet(struct bpf_verifier_env *env,
-				   struct bpf_reg_state *old,
-				   struct bpf_reg_state *cur)
+static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
 {
-	if (old->id != cur->id)
-		return false;
+	unsigned int i;
+
+	for (i = 0; i < ID_MAP_SIZE; i++) {
+		if (!idmap[i].old) {
+			/* Reached an empty slot; haven't seen this id before */
+			idmap[i].old = old_id;
+			idmap[i].cur = cur_id;
+			return true;
+		}
+		if (idmap[i].old == old_id)
+			return idmap[i].cur == cur_id;
+	}
+	/* We ran out of idmap slots, which should be impossible */
+	WARN_ON_ONCE(1);
+	return false;
+}
 
-	/* old ptr_to_packet is more conservative, since it allows smaller
-	 * range. Ex:
-	 * old(off=0,r=10) is equal to cur(off=0,r=20), because
-	 * old(off=0,r=10) means that with range=10 the verifier proceeded
-	 * further and found no issues with the program. Now we're in the same
-	 * spot with cur(off=0,r=20), so we're safe too, since anything further
-	 * will only be looking at most 10 bytes after this pointer.
-	 */
-	if (old->off == cur->off && old->range < cur->range)
+/* Returns true if (rold safe implies rcur safe) */
+static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
+		    struct idpair *idmap)
+{
+	if (!(rold->live & REG_LIVE_READ))
+		/* explored state didn't use this */
 		return true;
 
-	/* old(off=20,r=10) is equal to cur(off=22,re=22 or 5 or 0)
-	 * since both cannot be used for packet access and safe(old)
-	 * pointer has smaller off that could be used for further
-	 * 'if (ptr > data_end)' check
-	 * Ex:
-	 * old(off=20,r=10) and cur(off=22,r=22) and cur(off=22,r=0) mean
-	 * that we cannot access the packet.
-	 * The safe range is:
-	 * [ptr, ptr + range - off)
-	 * so whenever off >=range, it means no safe bytes from this pointer.
-	 * When comparing old->off <= cur->off, it means that older code
-	 * went with smaller offset and that offset was later
-	 * used to figure out the safe range after 'if (ptr > data_end)' check
-	 * Say, 'old' state was explored like:
-	 * ... R3(off=0, r=0)
-	 * R4 = R3 + 20
-	 * ... now R4(off=20,r=0)  <-- here
-	 * if (R4 > data_end)
-	 * ... R4(off=20,r=20), R3(off=0,r=20) and R3 can be used to access.
-	 * ... the code further went all the way to bpf_exit.
-	 * Now the 'cur' state at the mark 'here' has R4(off=30,r=0).
-	 * old_R4(off=20,r=0) equal to cur_R4(off=30,r=0), since if the verifier
-	 * goes further, such cur_R4 will give larger safe packet range after
-	 * 'if (R4 > data_end)' and all further insn were already good with r=20,
-	 * so they will be good with r=30 and we can prune the search.
-	 */
-	if (!env->strict_alignment && old->off <= cur->off &&
-	    old->off >= old->range && cur->off >= cur->range)
+	if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, live)) == 0)
 		return true;
 
+	if (rold->type == NOT_INIT)
+		/* explored state can't have used this */
+		return true;
+	if (rcur->type == NOT_INIT)
+		return false;
+	switch (rold->type) {
+	case SCALAR_VALUE:
+		if (rcur->type == SCALAR_VALUE) {
+			/* new val must satisfy old val knowledge */
+			return range_within(rold, rcur) &&
+			       tnum_in(rold->var_off, rcur->var_off);
+		} else {
+			/* if we knew anything about the old value, we're not
+			 * equal, because we can't know anything about the
+			 * scalar value of the pointer in the new value.
+			 */
+			return rold->umin_value == 0 &&
+			       rold->umax_value == U64_MAX &&
+			       rold->smin_value == S64_MIN &&
+			       rold->smax_value == S64_MAX &&
+			       tnum_is_unknown(rold->var_off);
+		}
+	case PTR_TO_MAP_VALUE:
+		/* If the new min/max/var_off satisfy the old ones and
+		 * everything else matches, we are OK.
+		 * We don't care about the 'id' value, because nothing
+		 * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
+		 */
+		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+		       range_within(rold, rcur) &&
+		       tnum_in(rold->var_off, rcur->var_off);
+	case PTR_TO_MAP_VALUE_OR_NULL:
+		/* a PTR_TO_MAP_VALUE could be safe to use as a
+		 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
+		 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
+		 * checked, doing so could have affected others with the same
+		 * id, and we can't check for that because we lost the id when
+		 * we converted to a PTR_TO_MAP_VALUE.
+		 */
+		if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
+			return false;
+		if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
+			return false;
+		/* Check our ids match any regs they're supposed to */
+		return check_ids(rold->id, rcur->id, idmap);
+	case PTR_TO_PACKET:
+		if (rcur->type != PTR_TO_PACKET)
+			return false;
+		/* We must have at least as much range as the old ptr
+		 * did, so that any accesses which were safe before are
+		 * still safe.  This is true even if old range < old off,
+		 * since someone could have accessed through (ptr - k), or
+		 * even done ptr -= k in a register, to get a safe access.
+		 */
+		if (rold->range > rcur->range)
+			return false;
+		/* If the offsets don't match, we can't trust our alignment;
+		 * nor can we be sure that we won't fall out of range.
+		 */
+		if (rold->off != rcur->off)
+			return false;
+		/* id relations must be preserved */
+		if (rold->id && !check_ids(rold->id, rcur->id, idmap))
+			return false;
+		/* new val must satisfy old val knowledge */
+		return range_within(rold, rcur) &&
+		       tnum_in(rold->var_off, rcur->var_off);
+	case PTR_TO_CTX:
+	case CONST_PTR_TO_MAP:
+	case PTR_TO_STACK:
+	case PTR_TO_PACKET_END:
+		/* Only valid matches are exact, which memcmp() above
+		 * would have accepted
+		 */
+	default:
+		/* Don't know what's going on, just say it's not safe */
+		return false;
+	}
+
+	/* Shouldn't get here; if we do, say it's not safe */
+	WARN_ON_ONCE(1);
 	return false;
 }
 
@@ -2934,44 +3366,18 @@ static bool states_equal(struct bpf_verifier_env *env,
 			 struct bpf_verifier_state *old,
 			 struct bpf_verifier_state *cur)
 {
-	bool varlen_map_access = env->varlen_map_value_access;
-	struct bpf_reg_state *rold, *rcur;
+	struct idpair *idmap;
+	bool ret = false;
 	int i;
 
-	for (i = 0; i < MAX_BPF_REG; i++) {
-		rold = &old->regs[i];
-		rcur = &cur->regs[i];
-
-		if (memcmp(rold, rcur, sizeof(*rold)) == 0)
-			continue;
-
-		/* If the ranges were not the same, but everything else was and
-		 * we didn't do a variable access into a map then we are a-ok.
-		 */
-		if (!varlen_map_access &&
-		    memcmp(rold, rcur, offsetofend(struct bpf_reg_state, id)) == 0)
-			continue;
-
-		/* If we didn't map access then again we don't care about the
-		 * mismatched range values and it's ok if our old type was
-		 * UNKNOWN and we didn't go to a NOT_INIT'ed reg.
-		 */
-		if (rold->type == NOT_INIT ||
-		    (!varlen_map_access && rold->type == UNKNOWN_VALUE &&
-		     rcur->type != NOT_INIT))
-			continue;
-
-		/* Don't care about the reg->id in this case. */
-		if (rold->type == PTR_TO_MAP_VALUE_OR_NULL &&
-		    rcur->type == PTR_TO_MAP_VALUE_OR_NULL &&
-		    rold->map_ptr == rcur->map_ptr)
-			continue;
-
-		if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
-		    compare_ptrs_to_packet(env, rold, rcur))
-			continue;
-
+	idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
+	/* If we failed to allocate the idmap, just say it's not safe */
+	if (!idmap)
 		return false;
+
+	for (i = 0; i < MAX_BPF_REG; i++) {
+		if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
+			goto out_free;
 	}
 
 	for (i = 0; i < MAX_BPF_STACK; i++) {
@@ -2983,35 +3389,104 @@ static bool states_equal(struct bpf_verifier_env *env,
 			 * this verifier states are not equivalent,
 			 * return false to continue verification of this path
 			 */
-			return false;
+			goto out_free;
 		if (i % BPF_REG_SIZE)
 			continue;
 		if (old->stack_slot_type[i] != STACK_SPILL)
 			continue;
-		if (memcmp(&old->spilled_regs[i / BPF_REG_SIZE],
-			   &cur->spilled_regs[i / BPF_REG_SIZE],
-			   sizeof(old->spilled_regs[0])))
-			/* when explored and current stack slot types are
-			 * the same, check that stored pointers types
+		if (!regsafe(&old->spilled_regs[i / BPF_REG_SIZE],
+			     &cur->spilled_regs[i / BPF_REG_SIZE],
+			     idmap))
+			/* when explored and current stack slot are both storing
+			 * spilled registers, check that stored pointers types
 			 * are the same as well.
 			 * Ex: explored safe path could have stored
-			 * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8}
+			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
 			 * but current path has stored:
-			 * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16}
+			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
 			 * such verifier states are not equivalent.
 			 * return false to continue verification of this path
 			 */
-			return false;
+			goto out_free;
 		else
 			continue;
 	}
-	return true;
+	ret = true;
+out_free:
+	kfree(idmap);
+	return ret;
+}
+
+/* A write screens off any subsequent reads; but write marks come from the
+ * straight-line code between a state and its parent.  When we arrive at a
+ * jump target (in the first iteration of the propagate_liveness() loop),
+ * we didn't arrive by the straight-line code, so read marks in state must
+ * propagate to parent regardless of state's write marks.
+ */
+static bool do_propagate_liveness(const struct bpf_verifier_state *state,
+				  struct bpf_verifier_state *parent)
+{
+	bool writes = parent == state->parent; /* Observe write marks */
+	bool touched = false; /* any changes made? */
+	int i;
+
+	if (!parent)
+		return touched;
+	/* Propagate read liveness of registers... */
+	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
+	/* We don't need to worry about FP liveness because it's read-only */
+	for (i = 0; i < BPF_REG_FP; i++) {
+		if (parent->regs[i].live & REG_LIVE_READ)
+			continue;
+		if (writes && (state->regs[i].live & REG_LIVE_WRITTEN))
+			continue;
+		if (state->regs[i].live & REG_LIVE_READ) {
+			parent->regs[i].live |= REG_LIVE_READ;
+			touched = true;
+		}
+	}
+	/* ... and stack slots */
+	for (i = 0; i < MAX_BPF_STACK / BPF_REG_SIZE; i++) {
+		if (parent->stack_slot_type[i * BPF_REG_SIZE] != STACK_SPILL)
+			continue;
+		if (state->stack_slot_type[i * BPF_REG_SIZE] != STACK_SPILL)
+			continue;
+		if (parent->spilled_regs[i].live & REG_LIVE_READ)
+			continue;
+		if (writes && (state->spilled_regs[i].live & REG_LIVE_WRITTEN))
+			continue;
+		if (state->spilled_regs[i].live & REG_LIVE_READ) {
+			parent->spilled_regs[i].live |= REG_LIVE_READ;
+			touched = true;
+		}
+	}
+	return touched;
+}
+
+/* "parent" is "a state from which we reach the current state", but initially
+ * it is not the state->parent (i.e. "the state whose straight-line code leads
+ * to the current state"), instead it is the state that happened to arrive at
+ * a (prunable) equivalent of the current state.  See comment above
+ * do_propagate_liveness() for consequences of this.
+ * This function is just a more efficient way of calling mark_reg_read() or
+ * mark_stack_slot_read() on each reg in "parent" that is read in "state",
+ * though it requires that parent != state->parent in the call arguments.
+ */
+static void propagate_liveness(const struct bpf_verifier_state *state,
+			       struct bpf_verifier_state *parent)
+{
+	while (do_propagate_liveness(state, parent)) {
+		/* Something changed, so we need to feed those changes onward */
+		state = parent;
+		parent = state->parent;
+	}
 }
 
 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 {
 	struct bpf_verifier_state_list *new_sl;
 	struct bpf_verifier_state_list *sl;
+	int i;
 
 	sl = env->explored_states[insn_idx];
 	if (!sl)
@@ -3021,11 +3496,20 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 		return 0;
 
 	while (sl != STATE_LIST_MARK) {
-		if (states_equal(env, &sl->state, &env->cur_state))
+		if (states_equal(env, &sl->state, &env->cur_state)) {
 			/* reached equivalent register/stack state,
-			 * prune the search
+			 * prune the search.
+			 * Registers read by the continuation are read by us.
+			 * If we have any write marks in env->cur_state, they
+			 * will prevent corresponding reads in the continuation
+			 * from reaching our parent (an explored_state).  Our
+			 * own state will get the read marks recorded, but
+			 * they'll be immediately forgotten as we're pruning
+			 * this state and will pop a new one.
 			 */
+			propagate_liveness(&sl->state, &env->cur_state);
 			return 1;
+		}
 		sl = sl->next;
 	}
 
@@ -3043,6 +3527,19 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	memcpy(&new_sl->state, &env->cur_state, sizeof(env->cur_state));
 	new_sl->next = env->explored_states[insn_idx];
 	env->explored_states[insn_idx] = new_sl;
+	/* connect new state to parentage chain */
+	env->cur_state.parent = &new_sl->state;
+	/* clear write marks in current state: the writes we did are not writes
+	 * our child did, so they don't screen off its reads from us.
+	 * (There are no read marks in current state, because reads always mark
+	 * their parent and current state never has children yet.  Only
+	 * explored_states can get read marks.)
+	 */
+	for (i = 0; i < BPF_REG_FP; i++)
+		env->cur_state.regs[i].live = REG_LIVE_NONE;
+	for (i = 0; i < MAX_BPF_STACK / BPF_REG_SIZE; i++)
+		if (env->cur_state.stack_slot_type[i * BPF_REG_SIZE] == STACK_SPILL)
+			env->cur_state.spilled_regs[i].live = REG_LIVE_NONE;
 	return 0;
 }
 
@@ -3066,8 +3563,8 @@ static int do_check(struct bpf_verifier_env *env)
 	bool do_print_state = false;
 
 	init_reg_state(regs);
+	state->parent = NULL;
 	insn_idx = 0;
-	env->varlen_map_value_access = false;
 	for (;;) {
 		struct bpf_insn *insn;
 		u8 class;
@@ -3136,11 +3633,11 @@ static int do_check(struct bpf_verifier_env *env)
 			/* check for reserved fields is already done */
 
 			/* check src operand */
-			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+			err = check_reg_arg(env, insn->src_reg, SRC_OP);
 			if (err)
 				return err;
 
-			err = check_reg_arg(regs, insn->dst_reg, DST_OP_NO_MARK);
+			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
 			if (err)
 				return err;
 
@@ -3190,11 +3687,11 @@ static int do_check(struct bpf_verifier_env *env)
 			}
 
 			/* check src1 operand */
-			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+			err = check_reg_arg(env, insn->src_reg, SRC_OP);
 			if (err)
 				return err;
 			/* check src2 operand */
-			err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
 			if (err)
 				return err;
 
@@ -3225,7 +3722,7 @@ static int do_check(struct bpf_verifier_env *env)
 				return -EINVAL;
 			}
 			/* check src operand */
-			err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
 			if (err)
 				return err;
 
@@ -3279,7 +3776,7 @@ static int do_check(struct bpf_verifier_env *env)
 				 * of bpf_exit, which means that program wrote
 				 * something into it earlier
 				 */
-				err = check_reg_arg(regs, BPF_REG_0, SRC_OP);
+				err = check_reg_arg(env, BPF_REG_0, SRC_OP);
 				if (err)
 					return err;
 
@@ -3319,7 +3816,6 @@ process_bpf_exit:
 				verbose("invalid BPF_LD mode\n");
 				return -EINVAL;
 			}
-			reset_reg_range_values(regs, insn->dst_reg);
 		} else {
 			verbose("unknown insn class %d\n", class);
 			return -EINVAL;
@@ -3678,7 +4174,11 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			continue;
 		}
 
-		if (ebpf_jit_enabled() && insn->imm == BPF_FUNC_map_lookup_elem) {
+		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
+		 * handlers are currently limited to 64 bit only.
+		 */
+		if (ebpf_jit_enabled() && BITS_PER_LONG == 64 &&
+		    insn->imm == BPF_FUNC_map_lookup_elem) {
 			map_ptr = env->insn_aux_data[i + delta].map_ptr;
 			if (map_ptr == BPF_MAP_PTR_POISON ||
 			    !map_ptr->ops->map_gen_lookup)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 294f192..fb415e3 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8134,7 +8134,7 @@ static void perf_event_free_bpf_handler(struct perf_event *event)
 
 static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 {
-	bool is_kprobe, is_tracepoint;
+	bool is_kprobe, is_tracepoint, is_syscall_tp;
 	struct bpf_prog *prog;
 
 	if (event->attr.type != PERF_TYPE_TRACEPOINT)
@@ -8145,7 +8145,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 
 	is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
 	is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
-	if (!is_kprobe && !is_tracepoint)
+	is_syscall_tp = is_syscall_trace_event(event->tp_event);
+	if (!is_kprobe && !is_tracepoint && !is_syscall_tp)
 		/* bpf programs can only be attached to u/kprobe or tracepoint */
 		return -EINVAL;
 
@@ -8154,13 +8155,14 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 		return PTR_ERR(prog);
 
 	if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) ||
-	    (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
+	    (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) ||
+	    (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
 		/* valid fd, but invalid bpf program type */
 		bpf_prog_put(prog);
 		return -EINVAL;
 	}
 
-	if (is_tracepoint) {
+	if (is_tracepoint || is_syscall_tp) {
 		int off = trace_event_get_offsets(event->tp_event);
 
 		if (prog->aux->max_ctx_offset > off) {
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 74d9a86..9c4eef2 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -559,11 +559,29 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
 static int sys_perf_refcount_enter;
 static int sys_perf_refcount_exit;
 
+static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs,
+			      struct syscall_metadata *sys_data,
+			      struct syscall_trace_enter *rec) {
+	struct syscall_tp_t {
+		unsigned long long regs;
+		unsigned long syscall_nr;
+		unsigned long args[sys_data->nb_args];
+	} param;
+	int i;
+
+	*(struct pt_regs **)&param = regs;
+	param.syscall_nr = rec->nr;
+	for (i = 0; i < sys_data->nb_args; i++)
+		param.args[i] = rec->args[i];
+	return trace_call_bpf(prog, &param);
+}
+
 static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 {
 	struct syscall_metadata *sys_data;
 	struct syscall_trace_enter *rec;
 	struct hlist_head *head;
+	struct bpf_prog *prog;
 	int syscall_nr;
 	int rctx;
 	int size;
@@ -578,8 +596,9 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 	if (!sys_data)
 		return;
 
+	prog = READ_ONCE(sys_data->enter_event->prog);
 	head = this_cpu_ptr(sys_data->enter_event->perf_events);
-	if (hlist_empty(head))
+	if (!prog && hlist_empty(head))
 		return;
 
 	/* get the size after alignment with the u32 buffer size field */
@@ -594,6 +613,13 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 	rec->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 			       (unsigned long *)&rec->args);
+
+	if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) ||
+	    hlist_empty(head)) {
+		perf_swevent_put_recursion_context(rctx);
+		return;
+	}
+
 	perf_trace_buf_submit(rec, size, rctx,
 			      sys_data->enter_event->event.type, 1, regs,
 			      head, NULL, NULL);
@@ -633,11 +659,26 @@ static void perf_sysenter_disable(struct trace_event_call *call)
 	mutex_unlock(&syscall_trace_lock);
 }
 
+static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs,
+			      struct syscall_trace_exit *rec) {
+	struct syscall_tp_t {
+		unsigned long long regs;
+		unsigned long syscall_nr;
+		unsigned long ret;
+	} param;
+
+	*(struct pt_regs **)&param = regs;
+	param.syscall_nr = rec->nr;
+	param.ret = rec->ret;
+	return trace_call_bpf(prog, &param);
+}
+
 static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 {
 	struct syscall_metadata *sys_data;
 	struct syscall_trace_exit *rec;
 	struct hlist_head *head;
+	struct bpf_prog *prog;
 	int syscall_nr;
 	int rctx;
 	int size;
@@ -652,8 +693,9 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 	if (!sys_data)
 		return;
 
+	prog = READ_ONCE(sys_data->exit_event->prog);
 	head = this_cpu_ptr(sys_data->exit_event->perf_events);
-	if (hlist_empty(head))
+	if (!prog && hlist_empty(head))
 		return;
 
 	/* We can probably do that at build time */
@@ -666,6 +708,13 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 
 	rec->nr = syscall_nr;
 	rec->ret = syscall_get_return_value(current, regs);
+
+	if ((prog && !perf_call_bpf_exit(prog, regs, rec)) ||
+	    hlist_empty(head)) {
+		perf_swevent_put_recursion_context(rctx);
+		return;
+	}
+
 	perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
 			      1, regs, head, NULL, NULL);
 }
diff --git a/lib/idr.c b/lib/idr.c
index b13682b..082778c 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -7,45 +7,32 @@
 DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
 static DEFINE_SPINLOCK(simple_ida_lock);
 
-/**
- * idr_alloc - allocate an id
- * @idr: idr handle
- * @ptr: pointer to be associated with the new id
- * @start: the minimum id (inclusive)
- * @end: the maximum id (exclusive)
- * @gfp: memory allocation flags
- *
- * Allocates an unused ID in the range [start, end).  Returns -ENOSPC
- * if there are no unused IDs in that range.
- *
- * Note that @end is treated as max when <= 0.  This is to always allow
- * using @start + N as @end as long as N is inside integer range.
- *
- * Simultaneous modifications to the @idr are not allowed and should be
- * prevented by the user, usually with a lock.  idr_alloc() may be called
- * concurrently with read-only accesses to the @idr, such as idr_find() and
- * idr_for_each_entry().
- */
-int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
+int idr_alloc_cmn(struct idr *idr, void *ptr, unsigned long *index,
+		  unsigned long start, unsigned long end, gfp_t gfp,
+		  bool ext)
 {
-	void __rcu **slot;
 	struct radix_tree_iter iter;
+	void __rcu **slot;
 
-	if (WARN_ON_ONCE(start < 0))
-		return -EINVAL;
 	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
 		return -EINVAL;
 
 	radix_tree_iter_init(&iter, start);
-	slot = idr_get_free(&idr->idr_rt, &iter, gfp, end);
+	if (ext)
+		slot = idr_get_free_ext(&idr->idr_rt, &iter, gfp, end);
+	else
+		slot = idr_get_free(&idr->idr_rt, &iter, gfp, end);
 	if (IS_ERR(slot))
 		return PTR_ERR(slot);
 
 	radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr);
 	radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE);
-	return iter.index;
+
+	if (index)
+		*index = iter.index;
+	return 0;
 }
-EXPORT_SYMBOL_GPL(idr_alloc);
+EXPORT_SYMBOL_GPL(idr_alloc_cmn);
 
 /**
  * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion
@@ -134,6 +121,20 @@ void *idr_get_next(struct idr *idr, int *nextid)
 }
 EXPORT_SYMBOL(idr_get_next);
 
+void *idr_get_next_ext(struct idr *idr, unsigned long *nextid)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
+	if (!slot)
+		return NULL;
+
+	*nextid = iter.index;
+	return rcu_dereference_raw(*slot);
+}
+EXPORT_SYMBOL(idr_get_next_ext);
+
 /**
  * idr_replace - replace pointer for given id
  * @idr: idr handle
@@ -150,12 +151,19 @@ EXPORT_SYMBOL(idr_get_next);
  */
 void *idr_replace(struct idr *idr, void *ptr, int id)
 {
+	if (WARN_ON_ONCE(id < 0))
+		return ERR_PTR(-EINVAL);
+
+	return idr_replace_ext(idr, ptr, id);
+}
+EXPORT_SYMBOL(idr_replace);
+
+void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id)
+{
 	struct radix_tree_node *node;
 	void __rcu **slot = NULL;
 	void *entry;
 
-	if (WARN_ON_ONCE(id < 0))
-		return ERR_PTR(-EINVAL);
 	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
 		return ERR_PTR(-EINVAL);
 
@@ -167,7 +175,7 @@ void *idr_replace(struct idr *idr, void *ptr, int id)
 
 	return entry;
 }
-EXPORT_SYMBOL(idr_replace);
+EXPORT_SYMBOL(idr_replace_ext);
 
 /**
  * DOC: IDA description
diff --git a/lib/nlattr.c b/lib/nlattr.c
index fb52435..927c2f1 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -27,6 +27,30 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
 	[NLA_S64]	= sizeof(s64),
 };
 
+static int validate_nla_bitfield32(const struct nlattr *nla,
+				   u32 *valid_flags_allowed)
+{
+	const struct nla_bitfield32 *bf = nla_data(nla);
+	u32 *valid_flags_mask = valid_flags_allowed;
+
+	if (!valid_flags_allowed)
+		return -EINVAL;
+
+	/*disallow invalid bit selector */
+	if (bf->selector & ~*valid_flags_mask)
+		return -EINVAL;
+
+	/*disallow invalid bit values */
+	if (bf->value & ~*valid_flags_mask)
+		return -EINVAL;
+
+	/*disallow valid bit values that are not selected*/
+	if (bf->value & ~bf->selector)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int validate_nla(const struct nlattr *nla, int maxtype,
 			const struct nla_policy *policy)
 {
@@ -46,6 +70,12 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 			return -ERANGE;
 		break;
 
+	case NLA_BITFIELD32:
+		if (attrlen != sizeof(struct nla_bitfield32))
+			return -ERANGE;
+
+		return validate_nla_bitfield32(nla, pt->validation_data);
+
 	case NLA_NUL_STRING:
 		if (pt->len)
 			minlen = min_t(int, attrlen, pt->len + 1);
@@ -272,6 +302,30 @@ size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize)
 EXPORT_SYMBOL(nla_strlcpy);
 
 /**
+ * nla_strdup - Copy string attribute payload into a newly allocated buffer
+ * @nla: attribute to copy the string from
+ * @flags: the type of memory to allocate (see kmalloc).
+ *
+ * Returns a pointer to the allocated buffer or NULL on error.
+ */
+char *nla_strdup(const struct nlattr *nla, gfp_t flags)
+{
+	size_t srclen = nla_len(nla);
+	char *src = nla_data(nla), *dst;
+
+	if (srclen > 0 && src[srclen - 1] == '\0')
+		srclen--;
+
+	dst = kmalloc(srclen + 1, flags);
+	if (dst != NULL) {
+		memcpy(dst, src, srclen);
+		dst[srclen] = '\0';
+	}
+	return dst;
+}
+EXPORT_SYMBOL(nla_strdup);
+
+/**
  * nla_memcpy - Copy a netlink attribute into another memory area
  * @dest: where to copy to memcpy
  * @src: netlink attribute to copy from
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 3527eb3..9717e2a 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -2138,13 +2138,13 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
 }
 EXPORT_SYMBOL(ida_pre_get);
 
-void __rcu **idr_get_free(struct radix_tree_root *root,
-			struct radix_tree_iter *iter, gfp_t gfp, int end)
+void __rcu **idr_get_free_cmn(struct radix_tree_root *root,
+			      struct radix_tree_iter *iter, gfp_t gfp,
+			      unsigned long max)
 {
 	struct radix_tree_node *node = NULL, *child;
 	void __rcu **slot = (void __rcu **)&root->rnode;
 	unsigned long maxindex, start = iter->next_index;
-	unsigned long max = end > 0 ? end - 1 : INT_MAX;
 	unsigned int shift, offset = 0;
 
  grow:
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index d9d5a41..aa8812a 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -952,6 +952,32 @@ static struct bpf_test tests[] = {
 		{ { 2, 0 }, { 3, 1 }, { 4, MAX_K } },
 	},
 	{
+		"JGE (jt 0), test 1",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2),
+			BPF_JUMP(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 1),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_RET | BPF_K, MAX_K)
+		},
+		CLASSIC,
+		{ 4, 4, 4, 3, 3 },
+		{ { 2, 0 }, { 3, 1 }, { 4, 1 } },
+	},
+	{
+		"JGE (jt 0), test 2",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_LEN, 0),
+			BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 2),
+			BPF_JUMP(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 1),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_RET | BPF_K, MAX_K)
+		},
+		CLASSIC,
+		{ 4, 4, 5, 3, 3 },
+		{ { 4, 1 }, { 5, 1 }, { 6, MAX_K } },
+	},
+	{
 		"JGE",
 		.u.insns = {
 			BPF_STMT(BPF_LDX | BPF_LEN, 0),
@@ -4492,6 +4518,35 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	/* BPF_JMP | BPF_JSLT | BPF_K */
+	{
+		"JMP_JSLT_K: Signed jump: if (-2 < -1) return 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 0xfffffffffffffffeLL),
+			BPF_JMP_IMM(BPF_JSLT, R1, -1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JMP_JSLT_K: Signed jump: if (-1 < -1) return 0",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_LD_IMM64(R1, 0xffffffffffffffffLL),
+			BPF_JMP_IMM(BPF_JSLT, R1, -1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	/* BPF_JMP | BPF_JSGT | BPF_K */
 	{
 		"JMP_JSGT_K: Signed jump: if (-1 > -2) return 1",
@@ -4521,6 +4576,73 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	/* BPF_JMP | BPF_JSLE | BPF_K */
+	{
+		"JMP_JSLE_K: Signed jump: if (-2 <= -1) return 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 0xfffffffffffffffeLL),
+			BPF_JMP_IMM(BPF_JSLE, R1, -1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JMP_JSLE_K: Signed jump: if (-1 <= -1) return 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 0xffffffffffffffffLL),
+			BPF_JMP_IMM(BPF_JSLE, R1, -1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JMP_JSLE_K: Signed jump: value walk 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 3),
+			BPF_JMP_IMM(BPF_JSLE, R1, 0, 6),
+			BPF_ALU64_IMM(BPF_SUB, R1, 1),
+			BPF_JMP_IMM(BPF_JSLE, R1, 0, 4),
+			BPF_ALU64_IMM(BPF_SUB, R1, 1),
+			BPF_JMP_IMM(BPF_JSLE, R1, 0, 2),
+			BPF_ALU64_IMM(BPF_SUB, R1, 1),
+			BPF_JMP_IMM(BPF_JSLE, R1, 0, 1),
+			BPF_EXIT_INSN(),		/* bad exit */
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),	/* good exit */
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JMP_JSLE_K: Signed jump: value walk 2",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 3),
+			BPF_JMP_IMM(BPF_JSLE, R1, 0, 4),
+			BPF_ALU64_IMM(BPF_SUB, R1, 2),
+			BPF_JMP_IMM(BPF_JSLE, R1, 0, 2),
+			BPF_ALU64_IMM(BPF_SUB, R1, 2),
+			BPF_JMP_IMM(BPF_JSLE, R1, 0, 1),
+			BPF_EXIT_INSN(),		/* bad exit */
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),	/* good exit */
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	/* BPF_JMP | BPF_JSGE | BPF_K */
 	{
 		"JMP_JSGE_K: Signed jump: if (-1 >= -2) return 1",
@@ -4617,6 +4739,35 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	/* BPF_JMP | BPF_JLT | BPF_K */
+	{
+		"JMP_JLT_K: if (2 < 3) return 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 2),
+			BPF_JMP_IMM(BPF_JLT, R1, 3, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JMP_JGT_K: Unsigned jump: if (1 < -1) return 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 1),
+			BPF_JMP_IMM(BPF_JLT, R1, -1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	/* BPF_JMP | BPF_JGE | BPF_K */
 	{
 		"JMP_JGE_K: if (3 >= 2) return 1",
@@ -4632,6 +4783,21 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	/* BPF_JMP | BPF_JLE | BPF_K */
+	{
+		"JMP_JLE_K: if (2 <= 3) return 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 2),
+			BPF_JMP_IMM(BPF_JLE, R1, 3, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	/* BPF_JMP | BPF_JGT | BPF_K jump backwards */
 	{
 		"JMP_JGT_K: if (3 > 2) return 1 (jump backwards)",
@@ -4662,6 +4828,36 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	/* BPF_JMP | BPF_JLT | BPF_K jump backwards */
+	{
+		"JMP_JGT_K: if (2 < 3) return 1 (jump backwards)",
+		.u.insns_int = {
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2), /* goto start */
+			BPF_ALU32_IMM(BPF_MOV, R0, 1), /* out: */
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 0), /* start: */
+			BPF_LD_IMM64(R1, 2), /* note: this takes 2 insns */
+			BPF_JMP_IMM(BPF_JLT, R1, 3, -6), /* goto out */
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JMP_JLE_K: if (3 <= 3) return 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 3),
+			BPF_JMP_IMM(BPF_JLE, R1, 3, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	/* BPF_JMP | BPF_JNE | BPF_K */
 	{
 		"JMP_JNE_K: if (3 != 2) return 1",
@@ -4752,6 +4948,37 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	/* BPF_JMP | BPF_JSLT | BPF_X */
+	{
+		"JMP_JSLT_X: Signed jump: if (-2 < -1) return 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, -1),
+			BPF_LD_IMM64(R2, -2),
+			BPF_JMP_REG(BPF_JSLT, R2, R1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JMP_JSLT_X: Signed jump: if (-1 < -1) return 0",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_LD_IMM64(R1, -1),
+			BPF_LD_IMM64(R2, -1),
+			BPF_JMP_REG(BPF_JSLT, R1, R2, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	/* BPF_JMP | BPF_JSGE | BPF_X */
 	{
 		"JMP_JSGE_X: Signed jump: if (-1 >= -2) return 1",
@@ -4783,6 +5010,37 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	/* BPF_JMP | BPF_JSLE | BPF_X */
+	{
+		"JMP_JSLE_X: Signed jump: if (-2 <= -1) return 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, -1),
+			BPF_LD_IMM64(R2, -2),
+			BPF_JMP_REG(BPF_JSLE, R2, R1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JMP_JSLE_X: Signed jump: if (-1 <= -1) return 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, -1),
+			BPF_LD_IMM64(R2, -1),
+			BPF_JMP_REG(BPF_JSLE, R1, R2, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	/* BPF_JMP | BPF_JGT | BPF_X */
 	{
 		"JMP_JGT_X: if (3 > 2) return 1",
@@ -4814,6 +5072,37 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	/* BPF_JMP | BPF_JLT | BPF_X */
+	{
+		"JMP_JLT_X: if (2 < 3) return 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 3),
+			BPF_LD_IMM64(R2, 2),
+			BPF_JMP_REG(BPF_JLT, R2, R1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JMP_JLT_X: Unsigned jump: if (1 < -1) return 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, -1),
+			BPF_LD_IMM64(R2, 1),
+			BPF_JMP_REG(BPF_JLT, R2, R1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	/* BPF_JMP | BPF_JGE | BPF_X */
 	{
 		"JMP_JGE_X: if (3 >= 2) return 1",
@@ -4845,6 +5134,37 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	/* BPF_JMP | BPF_JLE | BPF_X */
+	{
+		"JMP_JLE_X: if (2 <= 3) return 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 3),
+			BPF_LD_IMM64(R2, 2),
+			BPF_JMP_REG(BPF_JLE, R2, R1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JMP_JLE_X: if (3 <= 3) return 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 3),
+			BPF_LD_IMM64(R2, 3),
+			BPF_JMP_REG(BPF_JLE, R1, R2, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	{
 		/* Mainly testing JIT + imm64 here. */
 		"JMP_JGE_X: ldimm64 test 1",
@@ -4890,6 +5210,50 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } },
 	},
+	{
+		"JMP_JLE_X: ldimm64 test 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 3),
+			BPF_LD_IMM64(R2, 2),
+			BPF_JMP_REG(BPF_JLE, R2, R1, 2),
+			BPF_LD_IMM64(R0, 0xffffffffffffffffULL),
+			BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeULL),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0xeeeeeeeeU } },
+	},
+	{
+		"JMP_JLE_X: ldimm64 test 2",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_LD_IMM64(R1, 3),
+			BPF_LD_IMM64(R2, 2),
+			BPF_JMP_REG(BPF_JLE, R2, R1, 0),
+			BPF_LD_IMM64(R0, 0xffffffffffffffffULL),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0xffffffffU } },
+	},
+	{
+		"JMP_JLE_X: ldimm64 test 3",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_LD_IMM64(R1, 3),
+			BPF_LD_IMM64(R2, 2),
+			BPF_JMP_REG(BPF_JLE, R2, R1, 4),
+			BPF_LD_IMM64(R0, 0xffffffffffffffffULL),
+			BPF_LD_IMM64(R0, 0xeeeeeeeeeeeeeeeeULL),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
 	/* BPF_JMP | BPF_JNE | BPF_X */
 	{
 		"JMP_JNE_X: if (3 != 2) return 1",
diff --git a/net/Kconfig b/net/Kconfig
index 7d57ef3..9dba271 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -166,13 +166,6 @@ menuconfig NETFILTER
 
 if NETFILTER
 
-config NETFILTER_DEBUG
-	bool "Network packet filtering debugging"
-	depends on NETFILTER
-	help
-	  You can say Y here if you want to get additional messages useful in
-	  debugging the netfilter code.
-
 config NETFILTER_ADVANCED
 	bool "Advanced netfilter configuration"
 	depends on NETFILTER
@@ -235,6 +228,7 @@ source "net/openvswitch/Kconfig"
 source "net/vmw_vsock/Kconfig"
 source "net/netlink/Kconfig"
 source "net/mpls/Kconfig"
+source "net/nsh/Kconfig"
 source "net/hsr/Kconfig"
 source "net/switchdev/Kconfig"
 source "net/l3mdev/Kconfig"
@@ -301,6 +295,18 @@ config BPF_JIT
 	  /proc/sys/net/core/bpf_jit_harden   (optional)
 	  /proc/sys/net/core/bpf_jit_kallsyms (optional)
 
+config BPF_STREAM_PARSER
+	bool "enable BPF STREAM_PARSER"
+	depends on BPF_SYSCALL
+	select STREAM_PARSER
+	---help---
+	 Enabling this allows a stream parser to be used with
+	 BPF_MAP_TYPE_SOCKMAP.
+
+	 BPF_MAP_TYPE_SOCKMAP provides a map type to use with network sockets.
+	 It can be used to enforce socket policy, implement socket redirects,
+	 etc.
+
 config NET_FLOW_LIMIT
 	bool
 	depends on RPS
@@ -364,7 +370,6 @@ endmenu
 
 source "net/ax25/Kconfig"
 source "net/can/Kconfig"
-source "net/irda/Kconfig"
 source "net/bluetooth/Kconfig"
 source "net/rxrpc/Kconfig"
 source "net/kcm/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index bed80fa..ae2fe22 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -31,7 +31,6 @@ obj-$(CONFIG_NETROM)		+= netrom/
 obj-$(CONFIG_ROSE)		+= rose/
 obj-$(CONFIG_AX25)		+= ax25/
 obj-$(CONFIG_CAN)		+= can/
-obj-$(CONFIG_IRDA)		+= irda/
 obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
 obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
@@ -76,6 +75,7 @@ obj-$(CONFIG_NET_IFE)		+= ife/
 obj-$(CONFIG_OPENVSWITCH)	+= openvswitch/
 obj-$(CONFIG_VSOCKETS)	+= vmw_vsock/
 obj-$(CONFIG_MPLS)		+= mpls/
+obj-$(CONFIG_NET_NSH)		+= nsh/
 obj-$(CONFIG_HSR)		+= hsr/
 ifneq ($(CONFIG_NET_SWITCHDEV),)
 obj-y				+= switchdev/
diff --git a/net/atm/clip.c b/net/atm/clip.c
index f271a7b..65f706e 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -617,7 +617,7 @@ static void atmarpd_close(struct atm_vcc *vcc)
 	module_put(THIS_MODULE);
 }
 
-static struct atmdev_ops atmarpd_dev_ops = {
+static const struct atmdev_ops atmarpd_dev_ops = {
 	.close = atmarpd_close
 };
 
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 093fe87..a3d93a1 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -486,7 +486,7 @@ static void lec_atm_close(struct atm_vcc *vcc)
 	module_put(THIS_MODULE);
 }
 
-static struct atmdev_ops lecdev_ops = {
+static const struct atmdev_ops lecdev_ops = {
 	.close = lec_atm_close,
 	.send = lec_atm_send
 };
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 680a4b9..5677147 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -779,7 +779,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 	netif_rx(new_skb);
 }
 
-static struct atmdev_ops mpc_ops = { /* only send is required */
+static const struct atmdev_ops mpc_ops = { /* only send is required */
 	.close	= mpoad_close,
 	.send	= msg_from_mpoad
 };
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 983c3a2..0a20f6e 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -217,7 +217,7 @@ static void sigd_close(struct atm_vcc *vcc)
 	read_unlock(&vcc_sklist_lock);
 }
 
-static struct atmdev_ops sigd_dev_ops = {
+static const struct atmdev_ops sigd_dev_ops = {
 	.close = sigd_close,
 	.send =	sigd_send
 };
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index a350117..83ba548 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -729,11 +729,9 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr,
 				    const unsigned char *packet_buff,
 				    int packet_len, bool direct_link)
 {
-	unsigned char *skb_buff;
 	unsigned long new_direct_link_flag;
 
-	skb_buff = skb_put_data(forw_packet_aggr->skb, packet_buff,
-				packet_len);
+	skb_put_data(forw_packet_aggr->skb, packet_buff, packet_len);
 	forw_packet_aggr->packet_len += packet_len;
 	forw_packet_aggr->num_packets++;
 
@@ -1281,7 +1279,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
 	batadv_ogm_packet->tq = combined_tq;
 
 	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-		   "bidirectional: orig = %-15pM neigh = %-15pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n",
+		   "bidirectional: orig = %pM neigh = %pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n",
 		   orig_node->orig, orig_neigh_node->orig, total_count,
 		   neigh_rq_count, tq_own, tq_asym_penalty, tq_iface_penalty,
 		   batadv_ogm_packet->tq, if_incoming->net_dev->name,
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 1e3dc37..8be6173 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -137,7 +137,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
 	struct batadv_priv *bat_priv;
 	struct batadv_ogm2_packet *ogm_packet;
 	struct sk_buff *skb, *skb_tmp;
-	unsigned char *ogm_buff, *pkt_buff;
+	unsigned char *ogm_buff;
 	int ogm_buff_len;
 	u16 tvlv_len = 0;
 	int ret;
@@ -166,7 +166,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
 		goto reschedule;
 
 	skb_reserve(skb, ETH_HLEN);
-	pkt_buff = skb_put_data(skb, ogm_buff, ogm_buff_len);
+	skb_put_data(skb, ogm_buff, ogm_buff_len);
 
 	ogm_packet = (struct batadv_ogm2_packet *)skb->data;
 	ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno));
@@ -200,7 +200,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
 				type = "unknown";
 			}
 
-			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselve on %s surpressed: %s\n",
+			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselves on %s suppressed: %s\n",
 				   hard_iface->net_dev->name, type);
 
 			batadv_hardif_put(hard_iface);
@@ -683,18 +683,18 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
 	ogm_throughput = ntohl(ogm_packet->throughput);
 
 	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-		   "Received OGM2 packet via NB: %pM, IF: %s [%pM] (from OG: %pM, seqno %u, troughput %u, TTL %u, V %u, tvlv_len %u)\n",
+		   "Received OGM2 packet via NB: %pM, IF: %s [%pM] (from OG: %pM, seqno %u, throughput %u, TTL %u, V %u, tvlv_len %u)\n",
 		   ethhdr->h_source, if_incoming->net_dev->name,
 		   if_incoming->net_dev->dev_addr, ogm_packet->orig,
 		   ntohl(ogm_packet->seqno), ogm_throughput, ogm_packet->ttl,
 		   ogm_packet->version, ntohs(ogm_packet->tvlv_len));
 
-	/* If the troughput metric is 0, immediately drop the packet. No need to
-	 * create orig_node / neigh_node for an unusable route.
+	/* If the throughput metric is 0, immediately drop the packet. No need
+	 * to create orig_node / neigh_node for an unusable route.
 	 */
 	if (ogm_throughput == 0) {
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-			   "Drop packet: originator packet with troughput metric of 0\n");
+			   "Drop packet: originator packet with throughput metric of 0\n");
 		return;
 	}
 
@@ -762,7 +762,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
 				type = "unknown";
 			}
 
-			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s surpressed: %s\n",
+			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s suppressed: %s\n",
 				   ogm_packet->orig, hard_iface->net_dev->name,
 				   type);
 
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 6930d6b..b6cfa78 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -834,7 +834,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset)
 			last_seen_msecs = last_seen_msecs % 60000;
 			last_seen_secs = last_seen_msecs / 1000;
 
-			seq_printf(seq, " * %15pI4 %14pM %4i %6i:%02i\n",
+			seq_printf(seq, " * %15pI4 %pM %4i %6i:%02i\n",
 				   &dat_entry->ip, dat_entry->mac_addr,
 				   batadv_print_vid(dat_entry->vid),
 				   last_seen_mins, last_seen_secs);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 2be8f1f..05cc763 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2017.2"
+#define BATADV_SOURCE_VERSION "2017.3"
 #endif
 
 /* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index d239a9d..054a65e 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -911,7 +911,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
 				type = "unknown";
 			}
 
-			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s surpressed: %s\n",
+			batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s suppressed: %s\n",
 				   bcast_packet->orig,
 				   hard_iface->net_dev->name, type);
 
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index ab3b654..4e2576f 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -273,9 +273,6 @@ static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
 			   struct lowpan_peer *peer)
 {
 	const u8 *saddr;
-	struct lowpan_btle_dev *dev;
-
-	dev = lowpan_btle_dev(netdev);
 
 	saddr = peer->lladdr;
 
@@ -618,12 +615,8 @@ static void ifup(struct net_device *netdev)
 
 static void ifdown(struct net_device *netdev)
 {
-	int err;
-
 	rtnl_lock();
-	err = dev_close(netdev);
-	if (err < 0)
-		BT_INFO("iface %s cannot be closed (%d)", netdev->name, err);
+	dev_close(netdev);
 	rtnl_unlock();
 }
 
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 68f951b..c18115d 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -45,6 +45,11 @@ config BT_BREDR
 	bool "Bluetooth Classic (BR/EDR) features"
 	depends on BT
 	default y
+	help
+	  Bluetooth Classic includes support for Basic Rate (BR)
+	  available with Bluetooth version 1.0b or later and support
+	  for Enhanced Data Rate (EDR) available with Bluetooth
+	  version 2.0 or later.
 
 source "net/bluetooth/rfcomm/Kconfig"
 
@@ -58,11 +63,18 @@ config BT_HS
 	bool "Bluetooth High Speed (HS) features"
 	depends on BT_BREDR
 	default y
+	help
+	  Bluetooth High Speed includes support for off-loading
+	  Bluetooth connections via 802.11 (wifi) physical layer
+	  available with Bluetooth version 3.0 or later.
 
 config BT_LE
 	bool "Bluetooth Low Energy (LE) features"
 	depends on BT
 	default y
+	help
+	  Bluetooth Low Energy includes support low-energy physical
+	  layer available with Bluetooth version 4.0 or later.
 
 config BT_6LOWPAN
 	tristate "Bluetooth 6LoWPAN support"
@@ -114,4 +126,14 @@ config BT_DEBUGFS
 	  Provide extensive information about internal Bluetooth states
 	  in debugfs.
 
+config BT_LEGACY_IOCTL
+	bool "Enable legacy ioctl interfaces"
+	depends on BT && BT_BREDR
+	default y
+	help
+	  Enable support for legacy ioctl interfaces.  This is only needed
+	  for old and deprecated applications using direct ioctl calls for
+	  controller management.  Since Linux 3.4 all configuration and
+	  setup is done via mgmt interface and this is no longer needed.
+
 source "drivers/bluetooth/Kconfig"
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 65d734c..0bad296 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -878,6 +878,7 @@ static int hci_sock_release(struct socket *sock)
 	return 0;
 }
 
+#ifdef CONFIG_BT_LEGACY_IOCTL
 static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg)
 {
 	bdaddr_t bdaddr;
@@ -1049,6 +1050,7 @@ done:
 	release_sock(sk);
 	return err;
 }
+#endif
 
 static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
 			 int addr_len)
@@ -1969,7 +1971,11 @@ static const struct proto_ops hci_sock_ops = {
 	.getname	= hci_sock_getname,
 	.sendmsg	= hci_sock_sendmsg,
 	.recvmsg	= hci_sock_recvmsg,
+#ifdef CONFIG_BT_LEGACY_IOCTL
 	.ioctl		= hci_sock_ioctl,
+#else
+	.ioctl		= sock_no_ioctl,
+#endif
 	.poll		= datagram_poll,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index ca7a35e..aa300f3 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -13,7 +13,7 @@ static void bt_link_release(struct device *dev)
 	kfree(conn);
 }
 
-static struct device_type bt_link = {
+static const struct device_type bt_link = {
 	.name    = "link",
 	.release = bt_link_release,
 };
@@ -86,7 +86,7 @@ static void bt_host_release(struct device *dev)
 	module_put(THIS_MODULE);
 }
 
-static struct device_type bt_host = {
+static const struct device_type bt_host = {
 	.name    = "host",
 	.release = bt_host_release,
 };
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index aa4cf64..63e65d9 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -30,10 +30,10 @@
 
 #include <net/bluetooth/bluetooth.h>
 
-void baswap(bdaddr_t *dst, bdaddr_t *src)
+void baswap(bdaddr_t *dst, const bdaddr_t *src)
 {
-	unsigned char *d = (unsigned char *) dst;
-	unsigned char *s = (unsigned char *) src;
+	const unsigned char *s = (const unsigned char *)src;
+	unsigned char *d = (unsigned char *)dst;
 	unsigned int i;
 
 	for (i = 0; i < 6; i++)
diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c
index ee92c92..34a1227 100644
--- a/net/bluetooth/selftest.c
+++ b/net/bluetooth/selftest.c
@@ -164,7 +164,7 @@ static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32],
 		ret = -EINVAL;
 
 out:
-	kfree(dhkey_a);
+	kfree(tmp);
 	return ret;
 }
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 5a7be3b..f6b6a92 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -53,9 +53,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	brstats->tx_bytes += skb->len;
 	u64_stats_update_end(&brstats->syncp);
 
-#ifdef CONFIG_NET_SWITCHDEV
-	skb->offload_fwd_mark = 0;
-#endif
+	br_switchdev_frame_unmark(skb);
 	BR_INPUT_SKB_CB(skb)->brdev = dev;
 
 	skb_reset_mac_header(skb);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index a5e4a73..4ea5c8b 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -25,6 +25,7 @@
 #include <asm/unaligned.h>
 #include <linux/if_vlan.h>
 #include <net/switchdev.h>
+#include <trace/events/bridge.h>
 #include "br_private.h"
 
 static struct kmem_cache *br_fdb_cache __read_mostly;
@@ -169,29 +170,13 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr)
 	}
 }
 
-static void fdb_del_external_learn(struct net_bridge_fdb_entry *f)
-{
-	struct switchdev_obj_port_fdb fdb = {
-		.obj = {
-			.orig_dev = f->dst->dev,
-			.id = SWITCHDEV_OBJ_ID_PORT_FDB,
-			.flags = SWITCHDEV_F_DEFER,
-		},
-		.vid = f->vlan_id,
-	};
-
-	ether_addr_copy(fdb.addr, f->addr.addr);
-	switchdev_port_obj_del(f->dst->dev, &fdb.obj);
-}
-
 static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
 {
+	trace_fdb_delete(br, f);
+
 	if (f->is_static)
 		fdb_del_hw_addr(br, f->addr.addr);
 
-	if (f->added_by_external_learn)
-		fdb_del_external_learn(f);
-
 	hlist_del_init_rcu(&f->hlist);
 	fdb_notify(br, f, RTM_DELNEIGH);
 	call_rcu(&f->rcu, fdb_rcu_free);
@@ -598,8 +583,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 				fdb->updated = now;
 			if (unlikely(added_by_user))
 				fdb->added_by_user = 1;
-			if (unlikely(fdb_modified))
+			if (unlikely(fdb_modified)) {
+				trace_br_fdb_update(br, source, addr, vid, added_by_user);
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
+			}
 		}
 	} else {
 		spin_lock(&br->hash_lock);
@@ -608,6 +595,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 			if (fdb) {
 				if (unlikely(added_by_user))
 					fdb->added_by_user = 1;
+				trace_br_fdb_update(br, source, addr, vid, added_by_user);
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
 			}
 		}
@@ -888,6 +876,8 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	struct net_bridge *br = NULL;
 	int err = 0;
 
+	trace_br_fdb_add(ndm, dev, addr, vid, nlh_flags);
+
 	if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
 		pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
 		return -EINVAL;
@@ -1084,6 +1074,8 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
 	bool modified = false;
 	int err = 0;
 
+	trace_br_fdb_external_learn_add(br, p, addr, vid);
+
 	spin_lock_bh(&br->hash_lock);
 
 	head = &br->hash[br_mac_hash(addr, vid)];
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index a0b11e7..ca01def 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -713,9 +713,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 void br_mdb_init(void)
 {
-	rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL);
-	rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL);
-	rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL);
+	rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0);
+	rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0);
+	rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0);
 }
 
 void br_mdb_uninit(void)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 2261e51..c2eea1b 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -887,7 +887,7 @@ EXPORT_SYMBOL_GPL(br_netfilter_enable);
 
 /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
  * br_dev_queue_push_xmit is called afterwards */
-static struct nf_hook_ops br_nf_ops[] __read_mostly = {
+static const struct nf_hook_ops br_nf_ops[] = {
 	{
 		.hook = br_nf_pre_routing,
 		.pf = NFPROTO_BRIDGE,
@@ -985,22 +985,25 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
 		      int (*okfn)(struct net *, struct sock *,
 				  struct sk_buff *))
 {
-	struct nf_hook_entry *elem;
+	const struct nf_hook_entries *e;
 	struct nf_hook_state state;
+	struct nf_hook_ops **ops;
+	unsigned int i;
 	int ret;
 
-	for (elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
-	     elem && nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF;
-	     elem = rcu_dereference(elem->next))
-		;
-
-	if (!elem)
+	e = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
+	if (!e)
 		return okfn(net, sk, skb);
 
+	ops = nf_hook_entries_get_hook_ops(e);
+	for (i = 0; i < e->num_hook_entries &&
+	      ops[i]->priority <= NF_BR_PRI_BRNF; i++)
+		;
+
 	nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
 			   sk, net, okfn);
 
-	ret = nf_hook_slow(skb, &state, elem);
+	ret = nf_hook_slow(skb, &state, e, i);
 	if (ret == 1)
 		ret = okfn(net, sk, skb);
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index fd9ee73..e870cfc 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1091,6 +1091,11 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
 			       unsigned long mask);
 void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb,
 			     int type);
+
+static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
+{
+	skb->offload_fwd_mark = 0;
+}
 #else
 static inline int nbp_switchdev_mark_set(struct net_bridge_port *p)
 {
@@ -1119,6 +1124,10 @@ static inline void
 br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
 {
 }
+
+static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
+{
+}
 #endif /* CONFIG_NET_SWITCHDEV */
 
 #endif
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index d06968b..2b46c50 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -64,14 +64,14 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			if (NF_INVF(info, EBT_IP_DPORT,
 				    dst < info->dport[0] ||
 				    dst > info->dport[1]))
-			return false;
+				return false;
 		}
 		if (info->bitmask & EBT_IP_SPORT) {
 			u32 src = ntohs(pptr->src);
 			if (NF_INVF(info, EBT_IP_SPORT,
 				    src < info->sport[0] ||
 				    src > info->sport[1]))
-			return false;
+				return false;
 		}
 	}
 	return true;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 4617491..2a5a52a 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -89,7 +89,7 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			if (NF_INVF(info, EBT_IP6_SPORT,
 				    src < info->sport[0] ||
 				    src > info->sport[1]))
-			return false;
+				return false;
 		}
 		if ((info->bitmask & EBT_IP6_ICMP6) &&
 		    NF_INVF(info, EBT_IP6_ICMP6,
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index f22ef7c..45a00db 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -70,7 +70,7 @@ ebt_out_hook(void *priv, struct sk_buff *skb,
 	return ebt_do_table(skb, state, state->net->xt.frame_filter);
 }
 
-static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
+static const struct nf_hook_ops ebt_ops_filter[] = {
 	{
 		.hook		= ebt_in_hook,
 		.pf		= NFPROTO_BRIDGE,
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 2f7a4f3..57cd5bb 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -48,7 +48,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
 	return 0;
 }
 
-static struct ebt_table frame_nat = {
+static const struct ebt_table frame_nat = {
 	.name		= "nat",
 	.table		= &initial_table,
 	.valid_hooks	= NAT_VALID_HOOKS,
@@ -70,7 +70,7 @@ ebt_nat_out(void *priv, struct sk_buff *skb,
 	return ebt_do_table(skb, state, state->net->xt.frame_nat);
 }
 
-static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
+static const struct nf_hook_ops ebt_ops_nat[] = {
 	{
 		.hook		= ebt_nat_out,
 		.pf		= NFPROTO_BRIDGE,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 9c6e619..83951f9 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -252,13 +252,11 @@ unsigned int ebt_do_table(struct sk_buff *skb,
 		}
 		if (verdict == EBT_RETURN) {
 letsreturn:
-#ifdef CONFIG_NETFILTER_DEBUG
-			if (sp == 0) {
-				BUGPRINT("RETURN on base chain");
+			if (WARN(sp == 0, "RETURN on base chain")) {
 				/* act like this is EBT_CONTINUE */
 				goto letscontinue;
 			}
-#endif
+
 			sp--;
 			/* put all the local variables right */
 			i = cs[sp].n;
@@ -271,26 +269,24 @@ letsreturn:
 		}
 		if (verdict == EBT_CONTINUE)
 			goto letscontinue;
-#ifdef CONFIG_NETFILTER_DEBUG
-		if (verdict < 0) {
-			BUGPRINT("bogus standard verdict\n");
+
+		if (WARN(verdict < 0, "bogus standard verdict\n")) {
 			read_unlock_bh(&table->lock);
 			return NF_DROP;
 		}
-#endif
+
 		/* jump to a udc */
 		cs[sp].n = i + 1;
 		cs[sp].chaininfo = chaininfo;
 		cs[sp].e = ebt_next_entry(point);
 		i = 0;
 		chaininfo = (struct ebt_entries *) (base + verdict);
-#ifdef CONFIG_NETFILTER_DEBUG
-		if (chaininfo->distinguisher) {
-			BUGPRINT("jump to non-chain\n");
+
+		if (WARN(chaininfo->distinguisher, "jump to non-chain\n")) {
 			read_unlock_bh(&table->lock);
 			return NF_DROP;
 		}
-#endif
+
 		nentries = chaininfo->nentries;
 		point = (struct ebt_entry *)chaininfo->data;
 		counter_base = cb_base + chaininfo->counter_offset;
@@ -1069,15 +1065,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
 
 #ifdef CONFIG_AUDIT
 	if (audit_enabled) {
-		struct audit_buffer *ab;
-
-		ab = audit_log_start(current->audit_context, GFP_KERNEL,
-				     AUDIT_NETFILTER_CFG);
-		if (ab) {
-			audit_log_format(ab, "table=%s family=%u entries=%u",
-					 repl->name, AF_BRIDGE, repl->nentries);
-			audit_log_end(ab);
-		}
+		audit_log(current->audit_context, GFP_KERNEL,
+			  AUDIT_NETFILTER_CFG,
+			  "table=%s family=%u entries=%u",
+			  repl->name, AF_BRIDGE, repl->nentries);
 	}
 #endif
 	return ret;
diff --git a/net/can/gw.c b/net/can/gw.c
index 29748d8..73a02af 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1031,15 +1031,15 @@ static __init int cgw_module_init(void)
 	notifier.notifier_call = cgw_notifier;
 	register_netdevice_notifier(&notifier);
 
-	if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, NULL)) {
+	if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, 0)) {
 		unregister_netdevice_notifier(&notifier);
 		kmem_cache_destroy(cgw_cache);
 		return -ENOBUFS;
 	}
 
 	/* Only the first call to __rtnl_register can fail */
-	__rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, NULL);
-	__rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, NULL);
+	__rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, 0);
+	__rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, 0);
 
 	return 0;
 }
diff --git a/net/core/Makefile b/net/core/Makefile
index 79f9479..56d771a 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,9 +9,9 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
 obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
-			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o
+			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
+			fib_notifier.o
 
-obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 8c2f448..f7fb7e3 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -579,27 +579,12 @@ fault:
 }
 EXPORT_SYMBOL(skb_copy_datagram_from_iter);
 
-/**
- *	zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
- *	@skb: buffer to copy
- *	@from: the source to copy from
- *
- *	The function will first copy up to headlen, and then pin the userspace
- *	pages and build frags through them.
- *
- *	Returns 0, -EFAULT or -EMSGSIZE.
- */
-int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
+int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+			    struct iov_iter *from, size_t length)
 {
-	int len = iov_iter_count(from);
-	int copy = min_t(int, skb_headlen(skb), len);
-	int frag = 0;
+	int frag = skb_shinfo(skb)->nr_frags;
 
-	/* copy up to skb headlen */
-	if (skb_copy_datagram_from_iter(skb, 0, from, copy))
-		return -EFAULT;
-
-	while (iov_iter_count(from)) {
+	while (length && iov_iter_count(from)) {
 		struct page *pages[MAX_SKB_FRAGS];
 		size_t start;
 		ssize_t copied;
@@ -609,18 +594,24 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
 		if (frag == MAX_SKB_FRAGS)
 			return -EMSGSIZE;
 
-		copied = iov_iter_get_pages(from, pages, ~0U,
+		copied = iov_iter_get_pages(from, pages, length,
 					    MAX_SKB_FRAGS - frag, &start);
 		if (copied < 0)
 			return -EFAULT;
 
 		iov_iter_advance(from, copied);
+		length -= copied;
 
 		truesize = PAGE_ALIGN(copied + start);
 		skb->data_len += copied;
 		skb->len += copied;
 		skb->truesize += truesize;
-		refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+		if (sk && sk->sk_type == SOCK_STREAM) {
+			sk->sk_wmem_queued += truesize;
+			sk_mem_charge(sk, truesize);
+		} else {
+			refcount_add(truesize, &skb->sk->sk_wmem_alloc);
+		}
 		while (copied) {
 			int size = min_t(int, copied, PAGE_SIZE - start);
 			skb_fill_page_desc(skb, frag++, pages[n], start, size);
@@ -631,6 +622,28 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(__zerocopy_sg_from_iter);
+
+/**
+ *	zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
+ *	@skb: buffer to copy
+ *	@from: the source to copy from
+ *
+ *	The function will first copy up to headlen, and then pin the userspace
+ *	pages and build frags through them.
+ *
+ *	Returns 0, -EFAULT or -EMSGSIZE.
+ */
+int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
+{
+	int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
+
+	/* copy up to skb headlen */
+	if (skb_copy_datagram_from_iter(skb, 0, from, copy))
+		return -EFAULT;
+
+	return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
+}
 EXPORT_SYMBOL(zerocopy_sg_from_iter);
 
 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
diff --git a/net/core/dev.c b/net/core/dev.c
index 86b4b0a..6f845e4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -144,6 +144,7 @@
 #include <linux/netfilter_ingress.h>
 #include <linux/crash_dump.h>
 #include <linux/sctp.h>
+#include <net/udp_tunnel.h>
 
 #include "net-sysfs.h"
 
@@ -1413,7 +1414,7 @@ int dev_open(struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_open);
 
-static int __dev_close_many(struct list_head *head)
+static void __dev_close_many(struct list_head *head)
 {
 	struct net_device *dev;
 
@@ -1455,23 +1456,18 @@ static int __dev_close_many(struct list_head *head)
 		dev->flags &= ~IFF_UP;
 		netpoll_poll_enable(dev);
 	}
-
-	return 0;
 }
 
-static int __dev_close(struct net_device *dev)
+static void __dev_close(struct net_device *dev)
 {
-	int retval;
 	LIST_HEAD(single);
 
 	list_add(&dev->close_list, &single);
-	retval = __dev_close_many(&single);
+	__dev_close_many(&single);
 	list_del(&single);
-
-	return retval;
 }
 
-int dev_close_many(struct list_head *head, bool unlink)
+void dev_close_many(struct list_head *head, bool unlink)
 {
 	struct net_device *dev, *tmp;
 
@@ -1488,8 +1484,6 @@ int dev_close_many(struct list_head *head, bool unlink)
 		if (unlink)
 			list_del_init(&dev->close_list);
 	}
-
-	return 0;
 }
 EXPORT_SYMBOL(dev_close_many);
 
@@ -1502,7 +1496,7 @@ EXPORT_SYMBOL(dev_close_many);
  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
  *	chain.
  */
-int dev_close(struct net_device *dev)
+void dev_close(struct net_device *dev)
 {
 	if (dev->flags & IFF_UP) {
 		LIST_HEAD(single);
@@ -1511,7 +1505,6 @@ int dev_close(struct net_device *dev)
 		dev_close_many(&single, true);
 		list_del(&single);
 	}
-	return 0;
 }
 EXPORT_SYMBOL(dev_close);
 
@@ -1860,7 +1853,7 @@ static inline int deliver_skb(struct sk_buff *skb,
 			      struct packet_type *pt_prev,
 			      struct net_device *orig_dev)
 {
-	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
 		return -ENOMEM;
 	refcount_inc(&skb->users);
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
@@ -2738,8 +2731,7 @@ EXPORT_SYMBOL(skb_mac_gso_segment);
 static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
 {
 	if (tx_path)
-		return skb->ip_summed != CHECKSUM_PARTIAL &&
-		       skb->ip_summed != CHECKSUM_UNNECESSARY;
+		return skb->ip_summed != CHECKSUM_PARTIAL;
 
 	return skb->ip_summed == CHECKSUM_NONE;
 }
@@ -3865,6 +3857,121 @@ drop:
 	return NET_RX_DROP;
 }
 
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+				     struct bpf_prog *xdp_prog)
+{
+	struct xdp_buff xdp;
+	u32 act = XDP_DROP;
+	void *orig_data;
+	int hlen, off;
+	u32 mac_len;
+
+	/* Reinjected packets coming from act_mirred or similar should
+	 * not get XDP generic processing.
+	 */
+	if (skb_cloned(skb))
+		return XDP_PASS;
+
+	if (skb_linearize(skb))
+		goto do_drop;
+
+	/* The XDP program wants to see the packet starting at the MAC
+	 * header.
+	 */
+	mac_len = skb->data - skb_mac_header(skb);
+	hlen = skb_headlen(skb) + mac_len;
+	xdp.data = skb->data - mac_len;
+	xdp.data_end = xdp.data + hlen;
+	xdp.data_hard_start = skb->data - skb_headroom(skb);
+	orig_data = xdp.data;
+
+	act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+	off = xdp.data - orig_data;
+	if (off > 0)
+		__skb_pull(skb, off);
+	else if (off < 0)
+		__skb_push(skb, -off);
+
+	switch (act) {
+	case XDP_REDIRECT:
+	case XDP_TX:
+		__skb_push(skb, mac_len);
+		/* fall through */
+	case XDP_PASS:
+		break;
+
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		/* fall through */
+	case XDP_ABORTED:
+		trace_xdp_exception(skb->dev, xdp_prog, act);
+		/* fall through */
+	case XDP_DROP:
+	do_drop:
+		kfree_skb(skb);
+		break;
+	}
+
+	return act;
+}
+
+/* When doing generic XDP we have to bypass the qdisc layer and the
+ * network taps in order to match in-driver-XDP behavior.
+ */
+void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
+{
+	struct net_device *dev = skb->dev;
+	struct netdev_queue *txq;
+	bool free_skb = true;
+	int cpu, rc;
+
+	txq = netdev_pick_tx(dev, skb, NULL);
+	cpu = smp_processor_id();
+	HARD_TX_LOCK(dev, txq, cpu);
+	if (!netif_xmit_stopped(txq)) {
+		rc = netdev_start_xmit(skb, dev, txq, 0);
+		if (dev_xmit_complete(rc))
+			free_skb = false;
+	}
+	HARD_TX_UNLOCK(dev, txq);
+	if (free_skb) {
+		trace_xdp_exception(dev, xdp_prog, XDP_TX);
+		kfree_skb(skb);
+	}
+}
+EXPORT_SYMBOL_GPL(generic_xdp_tx);
+
+static struct static_key generic_xdp_needed __read_mostly;
+
+int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
+{
+	if (xdp_prog) {
+		u32 act = netif_receive_generic_xdp(skb, xdp_prog);
+		int err;
+
+		if (act != XDP_PASS) {
+			switch (act) {
+			case XDP_REDIRECT:
+				err = xdp_do_generic_redirect(skb->dev, skb,
+							      xdp_prog);
+				if (err)
+					goto out_redir;
+			/* fallthru to submit skb */
+			case XDP_TX:
+				generic_xdp_tx(skb, xdp_prog);
+				break;
+			}
+			return XDP_DROP;
+		}
+	}
+	return XDP_PASS;
+out_redir:
+	kfree_skb(skb);
+	return XDP_DROP;
+}
+EXPORT_SYMBOL_GPL(do_xdp_generic);
+
 static int netif_rx_internal(struct sk_buff *skb)
 {
 	int ret;
@@ -3872,6 +3979,19 @@ static int netif_rx_internal(struct sk_buff *skb)
 	net_timestamp_check(netdev_tstamp_prequeue, skb);
 
 	trace_netif_rx(skb);
+
+	if (static_key_false(&generic_xdp_needed)) {
+		int ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
+					 skb);
+
+		/* Consider XDP consuming the packet a success from
+		 * the netdev point of view we do not want to count
+		 * this as an error.
+		 */
+		if (ret != XDP_PASS)
+			return NET_RX_SUCCESS;
+	}
+
 #ifdef CONFIG_RPS
 	if (static_key_false(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -4292,7 +4412,7 @@ skip_classify:
 	}
 
 	if (pt_prev) {
-		if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+		if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
 			goto drop;
 		else
 			ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
@@ -4338,8 +4458,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	return ret;
 }
 
-static struct static_key generic_xdp_needed __read_mostly;
-
 static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
 {
 	struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
@@ -4373,89 +4491,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
 	return ret;
 }
 
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
-				     struct bpf_prog *xdp_prog)
-{
-	struct xdp_buff xdp;
-	u32 act = XDP_DROP;
-	void *orig_data;
-	int hlen, off;
-	u32 mac_len;
-
-	/* Reinjected packets coming from act_mirred or similar should
-	 * not get XDP generic processing.
-	 */
-	if (skb_cloned(skb))
-		return XDP_PASS;
-
-	if (skb_linearize(skb))
-		goto do_drop;
-
-	/* The XDP program wants to see the packet starting at the MAC
-	 * header.
-	 */
-	mac_len = skb->data - skb_mac_header(skb);
-	hlen = skb_headlen(skb) + mac_len;
-	xdp.data = skb->data - mac_len;
-	xdp.data_end = xdp.data + hlen;
-	xdp.data_hard_start = skb->data - skb_headroom(skb);
-	orig_data = xdp.data;
-
-	act = bpf_prog_run_xdp(xdp_prog, &xdp);
-
-	off = xdp.data - orig_data;
-	if (off > 0)
-		__skb_pull(skb, off);
-	else if (off < 0)
-		__skb_push(skb, -off);
-
-	switch (act) {
-	case XDP_TX:
-		__skb_push(skb, mac_len);
-		/* fall through */
-	case XDP_PASS:
-		break;
-
-	default:
-		bpf_warn_invalid_xdp_action(act);
-		/* fall through */
-	case XDP_ABORTED:
-		trace_xdp_exception(skb->dev, xdp_prog, act);
-		/* fall through */
-	case XDP_DROP:
-	do_drop:
-		kfree_skb(skb);
-		break;
-	}
-
-	return act;
-}
-
-/* When doing generic XDP we have to bypass the qdisc layer and the
- * network taps in order to match in-driver-XDP behavior.
- */
-static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
-{
-	struct net_device *dev = skb->dev;
-	struct netdev_queue *txq;
-	bool free_skb = true;
-	int cpu, rc;
-
-	txq = netdev_pick_tx(dev, skb, NULL);
-	cpu = smp_processor_id();
-	HARD_TX_LOCK(dev, txq, cpu);
-	if (!netif_xmit_stopped(txq)) {
-		rc = netdev_start_xmit(skb, dev, txq, 0);
-		if (dev_xmit_complete(rc))
-			free_skb = false;
-	}
-	HARD_TX_UNLOCK(dev, txq);
-	if (free_skb) {
-		trace_xdp_exception(dev, xdp_prog, XDP_TX);
-		kfree_skb(skb);
-	}
-}
-
 static int netif_receive_skb_internal(struct sk_buff *skb)
 {
 	int ret;
@@ -4468,17 +4503,12 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
 	rcu_read_lock();
 
 	if (static_key_false(&generic_xdp_needed)) {
-		struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+		int ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog),
+					 skb);
 
-		if (xdp_prog) {
-			u32 act = netif_receive_generic_xdp(skb, xdp_prog);
-
-			if (act != XDP_PASS) {
-				rcu_read_unlock();
-				if (act == XDP_TX)
-					generic_xdp_tx(skb, xdp_prog);
-				return NET_RX_DROP;
-			}
+		if (ret != XDP_PASS) {
+			rcu_read_unlock();
+			return NET_RX_DROP;
 		}
 	}
 
@@ -6691,8 +6721,12 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
 	 */
 
 	ret = 0;
-	if ((old_flags ^ flags) & IFF_UP)
-		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
+	if ((old_flags ^ flags) & IFF_UP) {
+		if (old_flags & IFF_UP)
+			__dev_close(dev);
+		else
+			ret = __dev_open(dev);
+	}
 
 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
@@ -7237,24 +7271,6 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 		features &= ~NETIF_F_GSO;
 	}
 
-	/* UFO needs SG and checksumming */
-	if (features & NETIF_F_UFO) {
-		/* maybe split UFO into V4 and V6? */
-		if (!(features & NETIF_F_HW_CSUM) &&
-		    ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) !=
-		     (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) {
-			netdev_dbg(dev,
-				"Dropping NETIF_F_UFO since no checksum offload features.\n");
-			features &= ~NETIF_F_UFO;
-		}
-
-		if (!(features & NETIF_F_SG)) {
-			netdev_dbg(dev,
-				"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
-			features &= ~NETIF_F_UFO;
-		}
-	}
-
 	/* GSO partial features require GSO partial be set */
 	if ((features & dev->gso_partial_features) &&
 	    !(features & NETIF_F_GSO_PARTIAL)) {
@@ -7315,8 +7331,27 @@ sync_lower:
 	netdev_for_each_lower_dev(dev, lower, iter)
 		netdev_sync_lower_features(dev, lower, features);
 
-	if (!err)
+	if (!err) {
+		netdev_features_t diff = features ^ dev->features;
+
+		if (diff & NETIF_F_RX_UDP_TUNNEL_PORT) {
+			/* udp_tunnel_{get,drop}_rx_info both need
+			 * NETIF_F_RX_UDP_TUNNEL_PORT enabled on the
+			 * device, or they won't do anything.
+			 * Thus we need to update dev->features
+			 * *before* calling udp_tunnel_get_rx_info,
+			 * but *after* calling udp_tunnel_drop_rx_info.
+			 */
+			if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
+				dev->features = features;
+				udp_tunnel_get_rx_info(dev);
+			} else {
+				udp_tunnel_drop_rx_info(dev);
+			}
+		}
+
 		dev->features = features;
+	}
 
 	return err < 0 ? 0 : 1;
 }
@@ -7518,6 +7553,12 @@ int register_netdevice(struct net_device *dev)
 	 */
 	dev->hw_features |= NETIF_F_SOFT_FEATURES;
 	dev->features |= NETIF_F_SOFT_FEATURES;
+
+	if (dev->netdev_ops->ndo_udp_tunnel_add) {
+		dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
+		dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
+	}
+
 	dev->wanted_features = dev->features & dev->hw_features;
 
 	if (!(dev->flags & IFF_LOOPBACK))
diff --git a/net/core/devlink.c b/net/core/devlink.c
index a0adfc3..7d430c1 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -29,6 +29,57 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/devlink.h>
 
+static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = {
+	{
+		.name = "destination mac",
+		.id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC,
+		.bitwidth = 48,
+	},
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ethernet = {
+	.name = "ethernet",
+	.id = DEVLINK_DPIPE_HEADER_ETHERNET,
+	.fields = devlink_dpipe_fields_ethernet,
+	.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ethernet),
+	.global = true,
+};
+EXPORT_SYMBOL(devlink_dpipe_header_ethernet);
+
+static struct devlink_dpipe_field devlink_dpipe_fields_ipv4[] = {
+	{
+		.name = "destination ip",
+		.id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP,
+		.bitwidth = 32,
+	},
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ipv4 = {
+	.name = "ipv4",
+	.id = DEVLINK_DPIPE_HEADER_IPV4,
+	.fields = devlink_dpipe_fields_ipv4,
+	.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv4),
+	.global = true,
+};
+EXPORT_SYMBOL(devlink_dpipe_header_ipv4);
+
+static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = {
+	{
+		.name = "destination ip",
+		.id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP,
+		.bitwidth = 128,
+	},
+};
+
+struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
+	.name = "ipv6",
+	.id = DEVLINK_DPIPE_HEADER_IPV6,
+	.fields = devlink_dpipe_fields_ipv6,
+	.fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6),
+	.global = true,
+};
+EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
 
 static LIST_HEAD(devlink_list);
@@ -1613,13 +1664,15 @@ static int devlink_dpipe_table_put(struct sk_buff *skb,
 				   struct devlink_dpipe_table *table)
 {
 	struct nlattr *table_attr;
+	u64 table_size;
 
+	table_size = table->table_ops->size_get(table->priv);
 	table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE);
 	if (!table_attr)
 		return -EMSGSIZE;
 
 	if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) ||
-	    nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table->size,
+	    nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size,
 			      DEVLINK_ATTR_PAD))
 		goto nla_put_failure;
 	if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
@@ -1960,6 +2013,28 @@ int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx)
 }
 EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close);
 
+void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry)
+
+{
+	unsigned int value_count, value_index;
+	struct devlink_dpipe_value *value;
+
+	value = entry->action_values;
+	value_count = entry->action_values_count;
+	for (value_index = 0; value_index < value_count; value_index++) {
+		kfree(value[value_index].value);
+		kfree(value[value_index].mask);
+	}
+
+	value = entry->match_values;
+	value_count = entry->match_values_count;
+	for (value_index = 0; value_index < value_count; value_index++) {
+		kfree(value[value_index].value);
+		kfree(value[value_index].mask);
+	}
+}
+EXPORT_SYMBOL(devlink_dpipe_entry_clear);
+
 static int devlink_dpipe_entries_fill(struct genl_info *info,
 				      enum devlink_command cmd, int flags,
 				      struct devlink_dpipe_table *table)
@@ -2684,20 +2759,21 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled);
  *	@table_name: table name
  *	@table_ops: table ops
  *	@priv: priv
- *	@size: size
  *	@counter_control_extern: external control for counters
  */
 int devlink_dpipe_table_register(struct devlink *devlink,
 				 const char *table_name,
 				 struct devlink_dpipe_table_ops *table_ops,
-				 void *priv, u64 size,
-				 bool counter_control_extern)
+				 void *priv, bool counter_control_extern)
 {
 	struct devlink_dpipe_table *table;
 
 	if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name))
 		return -EEXIST;
 
+	if (WARN_ON(!table_ops->size_get))
+		return -EINVAL;
+
 	table = kzalloc(sizeof(*table), GFP_KERNEL);
 	if (!table)
 		return -ENOMEM;
@@ -2705,7 +2781,6 @@ int devlink_dpipe_table_register(struct devlink *devlink,
 	table->name = table_name;
 	table->table_ops = table_ops;
 	table->priv = priv;
-	table->size = size;
 	table->counter_control_extern = counter_control_extern;
 
 	mutex_lock(&devlink_mutex);
diff --git a/net/core/dst.c b/net/core/dst.c
index 00aa972..a6c47da 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -55,7 +55,7 @@ const struct dst_metrics dst_default_metrics = {
 	 * We really want to avoid false sharing on this variable, and catch
 	 * any writes on it.
 	 */
-	.refcnt = ATOMIC_INIT(1),
+	.refcnt = REFCOUNT_INIT(1),
 };
 
 void dst_init(struct dst_entry *dst, struct dst_ops *ops,
@@ -213,7 +213,7 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
 		struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
 		unsigned long prev, new;
 
-		atomic_set(&p->refcnt, 1);
+		refcount_set(&p->refcnt, 1);
 		memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
 
 		new = (unsigned long) p;
@@ -225,7 +225,7 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
 			if (prev & DST_METRICS_READ_ONLY)
 				p = NULL;
 		} else if (prev & DST_METRICS_REFCOUNTED) {
-			if (atomic_dec_and_test(&old_p->refcnt))
+			if (refcount_dec_and_test(&old_p->refcnt))
 				kfree(old_p);
 		}
 	}
@@ -299,7 +299,8 @@ EXPORT_SYMBOL_GPL(metadata_dst_alloc);
 void metadata_dst_free(struct metadata_dst *md_dst)
 {
 #ifdef CONFIG_DST_CACHE
-	dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
+	if (md_dst->type == METADATA_IP_TUNNEL)
+		dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
 #endif
 	kfree(md_dst);
 }
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 674b6c9..6a582ae 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -76,7 +76,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_LRO_BIT] =              "rx-lro",
 
 	[NETIF_F_TSO_BIT] =              "tx-tcp-segmentation",
-	[NETIF_F_UFO_BIT] =              "tx-udp-fragmentation",
 	[NETIF_F_GSO_ROBUST_BIT] =       "tx-gso-robust",
 	[NETIF_F_TSO_ECN_BIT] =          "tx-tcp-ecn-segmentation",
 	[NETIF_F_TSO_MANGLEID_BIT] =	 "tx-tcp-mangleid-segmentation",
@@ -106,6 +105,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_HW_TC_BIT] =		 "hw-tc-offload",
 	[NETIF_F_HW_ESP_BIT] =		 "esp-hw-offload",
 	[NETIF_F_HW_ESP_TX_CSUM_BIT] =	 "esp-tx-csum-hw-offload",
+	[NETIF_F_RX_UDP_TUNNEL_PORT_BIT] =	 "rx-udp_tunnel-port-offload",
 };
 
 static const char
@@ -299,9 +299,6 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd)
 	case ETHTOOL_GTSO:
 	case ETHTOOL_STSO:
 		return NETIF_F_ALL_TSO;
-	case ETHTOOL_GUFO:
-	case ETHTOOL_SUFO:
-		return NETIF_F_UFO;
 	case ETHTOOL_GGSO:
 	case ETHTOOL_SGSO:
 		return NETIF_F_GSO;
@@ -2515,6 +2512,33 @@ static int set_phy_tunable(struct net_device *dev, void __user *useraddr)
 	return ret;
 }
 
+static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_fecparam fecparam = { ETHTOOL_GFECPARAM };
+
+	if (!dev->ethtool_ops->get_fecparam)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_fecparam(dev, &fecparam);
+
+	if (copy_to_user(useraddr, &fecparam, sizeof(fecparam)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr)
+{
+	struct ethtool_fecparam fecparam;
+
+	if (!dev->ethtool_ops->set_fecparam)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&fecparam, useraddr, sizeof(fecparam)))
+		return -EFAULT;
+
+	return dev->ethtool_ops->set_fecparam(dev, &fecparam);
+}
+
 /* The main entry point in this file.  Called from net/core/dev_ioctl.c */
 
 int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -2555,7 +2579,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GPHYSTATS:
 	case ETHTOOL_GTSO:
 	case ETHTOOL_GPERMADDR:
-	case ETHTOOL_GUFO:
 	case ETHTOOL_GGSO:
 	case ETHTOOL_GGRO:
 	case ETHTOOL_GFLAGS:
@@ -2574,6 +2597,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GTUNABLE:
 	case ETHTOOL_PHY_GTUNABLE:
 	case ETHTOOL_GLINKSETTINGS:
+	case ETHTOOL_GFECPARAM:
 		break;
 	default:
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -2723,7 +2747,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GRXCSUM:
 	case ETHTOOL_GSG:
 	case ETHTOOL_GTSO:
-	case ETHTOOL_GUFO:
 	case ETHTOOL_GGSO:
 	case ETHTOOL_GGRO:
 		rc = ethtool_get_one_feature(dev, useraddr, ethcmd);
@@ -2732,7 +2755,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SRXCSUM:
 	case ETHTOOL_SSG:
 	case ETHTOOL_STSO:
-	case ETHTOOL_SUFO:
 	case ETHTOOL_SGSO:
 	case ETHTOOL_SGRO:
 		rc = ethtool_set_one_feature(dev, useraddr, ethcmd);
@@ -2785,6 +2807,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_PHY_STUNABLE:
 		rc = set_phy_tunable(dev, useraddr);
 		break;
+	case ETHTOOL_GFECPARAM:
+		rc = ethtool_get_fecparam(dev, useraddr);
+		break;
+	case ETHTOOL_SFECPARAM:
+		rc = ethtool_set_fecparam(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
new file mode 100644
index 0000000..4fc202d
--- /dev/null
+++ b/net/core/fib_notifier.c
@@ -0,0 +1,173 @@
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
+
+static ATOMIC_NOTIFIER_HEAD(fib_chain);
+
+int call_fib_notifier(struct notifier_block *nb, struct net *net,
+		      enum fib_event_type event_type,
+		      struct fib_notifier_info *info)
+{
+	info->net = net;
+	return nb->notifier_call(nb, event_type, info);
+}
+EXPORT_SYMBOL(call_fib_notifier);
+
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+		       struct fib_notifier_info *info)
+{
+	info->net = net;
+	return atomic_notifier_call_chain(&fib_chain, event_type, info);
+}
+EXPORT_SYMBOL(call_fib_notifiers);
+
+static unsigned int fib_seq_sum(void)
+{
+	struct fib_notifier_ops *ops;
+	unsigned int fib_seq = 0;
+	struct net *net;
+
+	rtnl_lock();
+	for_each_net(net) {
+		list_for_each_entry(ops, &net->fib_notifier_ops, list) {
+			if (!try_module_get(ops->owner))
+				continue;
+			fib_seq += ops->fib_seq_read(net);
+			module_put(ops->owner);
+		}
+	}
+	rtnl_unlock();
+
+	return fib_seq;
+}
+
+static int fib_net_dump(struct net *net, struct notifier_block *nb)
+{
+	struct fib_notifier_ops *ops;
+
+	list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
+		int err;
+
+		if (!try_module_get(ops->owner))
+			continue;
+		err = ops->fib_dump(net, nb);
+		module_put(ops->owner);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static bool fib_dump_is_consistent(struct notifier_block *nb,
+				   void (*cb)(struct notifier_block *nb),
+				   unsigned int fib_seq)
+{
+	atomic_notifier_chain_register(&fib_chain, nb);
+	if (fib_seq == fib_seq_sum())
+		return true;
+	atomic_notifier_chain_unregister(&fib_chain, nb);
+	if (cb)
+		cb(nb);
+	return false;
+}
+
+#define FIB_DUMP_MAX_RETRIES 5
+int register_fib_notifier(struct notifier_block *nb,
+			  void (*cb)(struct notifier_block *nb))
+{
+	int retries = 0;
+	int err;
+
+	do {
+		unsigned int fib_seq = fib_seq_sum();
+		struct net *net;
+
+		rcu_read_lock();
+		for_each_net_rcu(net) {
+			err = fib_net_dump(net, nb);
+			if (err)
+				goto err_fib_net_dump;
+		}
+		rcu_read_unlock();
+
+		if (fib_dump_is_consistent(nb, cb, fib_seq))
+			return 0;
+	} while (++retries < FIB_DUMP_MAX_RETRIES);
+
+	return -EBUSY;
+
+err_fib_net_dump:
+	rcu_read_unlock();
+	return err;
+}
+EXPORT_SYMBOL(register_fib_notifier);
+
+int unregister_fib_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&fib_chain, nb);
+}
+EXPORT_SYMBOL(unregister_fib_notifier);
+
+static int __fib_notifier_ops_register(struct fib_notifier_ops *ops,
+				       struct net *net)
+{
+	struct fib_notifier_ops *o;
+
+	list_for_each_entry(o, &net->fib_notifier_ops, list)
+		if (ops->family == o->family)
+			return -EEXIST;
+	list_add_tail_rcu(&ops->list, &net->fib_notifier_ops);
+	return 0;
+}
+
+struct fib_notifier_ops *
+fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net)
+{
+	struct fib_notifier_ops *ops;
+	int err;
+
+	ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
+	if (!ops)
+		return ERR_PTR(-ENOMEM);
+
+	err = __fib_notifier_ops_register(ops, net);
+	if (err)
+		goto err_register;
+
+	return ops;
+
+err_register:
+	kfree(ops);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(fib_notifier_ops_register);
+
+void fib_notifier_ops_unregister(struct fib_notifier_ops *ops)
+{
+	list_del_rcu(&ops->list);
+	kfree_rcu(ops, rcu);
+}
+EXPORT_SYMBOL(fib_notifier_ops_unregister);
+
+static int __net_init fib_notifier_net_init(struct net *net)
+{
+	INIT_LIST_HEAD(&net->fib_notifier_ops);
+	return 0;
+}
+
+static struct pernet_operations fib_notifier_net_ops = {
+	.init = fib_notifier_net_init,
+};
+
+static int __init fib_notifier_init(void)
+{
+	return register_pernet_subsys(&fib_notifier_net_ops);
+}
+
+subsys_initcall(fib_notifier_init);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index fdcb1bc..9a6d97c 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -299,6 +299,67 @@ out:
 }
 EXPORT_SYMBOL_GPL(fib_rules_lookup);
 
+static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
+				  enum fib_event_type event_type,
+				  struct fib_rule *rule, int family)
+{
+	struct fib_rule_notifier_info info = {
+		.info.family = family,
+		.rule = rule,
+	};
+
+	return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_fib_rule_notifiers(struct net *net,
+				   enum fib_event_type event_type,
+				   struct fib_rule *rule,
+				   struct fib_rules_ops *ops)
+{
+	struct fib_rule_notifier_info info = {
+		.info.family = ops->family,
+		.rule = rule,
+	};
+
+	ops->fib_rules_seq++;
+	return call_fib_notifiers(net, event_type, &info.info);
+}
+
+/* Called with rcu_read_lock() */
+int fib_rules_dump(struct net *net, struct notifier_block *nb, int family)
+{
+	struct fib_rules_ops *ops;
+	struct fib_rule *rule;
+
+	ops = lookup_rules_ops(net, family);
+	if (!ops)
+		return -EAFNOSUPPORT;
+	list_for_each_entry_rcu(rule, &ops->rules_list, list)
+		call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule,
+				       family);
+	rules_ops_put(ops);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fib_rules_dump);
+
+unsigned int fib_rules_seq_read(struct net *net, int family)
+{
+	unsigned int fib_rules_seq;
+	struct fib_rules_ops *ops;
+
+	ASSERT_RTNL();
+
+	ops = lookup_rules_ops(net, family);
+	if (!ops)
+		return 0;
+	fib_rules_seq = ops->fib_rules_seq;
+	rules_ops_put(ops);
+
+	return fib_rules_seq;
+}
+EXPORT_SYMBOL_GPL(fib_rules_seq_read);
+
 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
 			    struct fib_rules_ops *ops)
 {
@@ -548,6 +609,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (rule->tun_id)
 		ip_tunnel_need_metadata();
 
+	call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops);
 	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
 	flush_route_cache(ops);
 	rules_ops_put(ops);
@@ -687,6 +749,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 			}
 		}
 
+		call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops);
 		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
 				   NETLINK_CB(skb).portid);
 		fib_rule_put(rule);
@@ -963,9 +1026,9 @@ static struct pernet_operations fib_rules_net_ops = {
 static int __init fib_rules_init(void)
 {
 	int err;
-	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, 0);
 
 	err = register_pernet_subsys(&fib_rules_net_ops);
 	if (err < 0)
diff --git a/net/core/filter.c b/net/core/filter.c
index 1699749..5912c73 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -55,6 +55,7 @@
 #include <net/sock_reuseport.h>
 #include <net/busy_poll.h>
 #include <net/tcp.h>
+#include <linux/bpf_trace.h>
 
 /**
  *	sk_filter_trim_cap - run a packet through a socket filter
@@ -513,14 +514,27 @@ do_pass:
 				break;
 			}
 
-			/* Convert JEQ into JNE when 'jump_true' is next insn. */
-			if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
-				insn->code = BPF_JMP | BPF_JNE | bpf_src;
+			/* Convert some jumps when 'jump_true' is next insn. */
+			if (fp->jt == 0) {
+				switch (BPF_OP(fp->code)) {
+				case BPF_JEQ:
+					insn->code = BPF_JMP | BPF_JNE | bpf_src;
+					break;
+				case BPF_JGT:
+					insn->code = BPF_JMP | BPF_JLE | bpf_src;
+					break;
+				case BPF_JGE:
+					insn->code = BPF_JMP | BPF_JLT | bpf_src;
+					break;
+				default:
+					goto jmp_rest;
+				}
+
 				target = i + fp->jf + 1;
 				BPF_EMIT_JMP;
 				break;
 			}
-
+jmp_rest:
 			/* Other jumps are mapped into two insns: Jxx and JA. */
 			target = i + fp->jt + 1;
 			insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
@@ -1778,6 +1792,8 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
 struct redirect_info {
 	u32 ifindex;
 	u32 flags;
+	struct bpf_map *map;
+	struct bpf_map *map_to_flush;
 };
 
 static DEFINE_PER_CPU(struct redirect_info, redirect_info);
@@ -1791,6 +1807,7 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
 
 	ri->ifindex = ifindex;
 	ri->flags = flags;
+	ri->map = NULL;
 
 	return TC_ACT_REDIRECT;
 }
@@ -1818,6 +1835,45 @@ static const struct bpf_func_proto bpf_redirect_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+	if (unlikely(flags))
+		return SK_ABORTED;
+
+	ri->ifindex = key;
+	ri->flags = flags;
+	ri->map = map;
+
+	return SK_REDIRECT;
+}
+
+struct sock *do_sk_redirect_map(void)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct sock *sk = NULL;
+
+	if (ri->map) {
+		sk = __sock_map_lookup_elem(ri->map, ri->ifindex);
+
+		ri->ifindex = 0;
+		ri->map = NULL;
+		/* we do not clear flags for future lookup */
+	}
+
+	return sk;
+}
+
+static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
+	.func           = bpf_sk_redirect_map,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_CONST_MAP_PTR,
+	.arg2_type      = ARG_ANYTHING,
+	.arg3_type      = ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
 	return task_get_classid(skb);
@@ -2024,8 +2080,8 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 		return ret;
 
 	if (skb_is_gso(skb)) {
-		/* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to
-		 * be changed into SKB_GSO_TCPV6.
+		/* SKB_GSO_TCPV4 needs to be changed into
+		 * SKB_GSO_TCPV6.
 		 */
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
 			skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
@@ -2060,8 +2116,8 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 		return ret;
 
 	if (skb_is_gso(skb)) {
-		/* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to
-		 * be changed into SKB_GSO_TCPV4.
+		/* SKB_GSO_TCPV6 needs to be changed into
+		 * SKB_GSO_TCPV4.
 		 */
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
 			skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
@@ -2412,6 +2468,180 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+static int __bpf_tx_xdp(struct net_device *dev,
+			struct bpf_map *map,
+			struct xdp_buff *xdp,
+			u32 index)
+{
+	int err;
+
+	if (!dev->netdev_ops->ndo_xdp_xmit) {
+		return -EOPNOTSUPP;
+	}
+
+	err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+	if (err)
+		return err;
+	if (map)
+		__dev_map_insert_ctx(map, index);
+	else
+		dev->netdev_ops->ndo_xdp_flush(dev);
+	return 0;
+}
+
+void xdp_do_flush_map(void)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_map *map = ri->map_to_flush;
+
+	ri->map_to_flush = NULL;
+	if (map)
+		__dev_map_flush(map);
+}
+EXPORT_SYMBOL_GPL(xdp_do_flush_map);
+
+static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
+			       struct bpf_prog *xdp_prog)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_map *map = ri->map;
+	u32 index = ri->ifindex;
+	struct net_device *fwd;
+	int err;
+
+	ri->ifindex = 0;
+	ri->map = NULL;
+
+	fwd = __dev_map_lookup_elem(map, index);
+	if (!fwd) {
+		err = -EINVAL;
+		goto err;
+	}
+	if (ri->map_to_flush && ri->map_to_flush != map)
+		xdp_do_flush_map();
+
+	err = __bpf_tx_xdp(fwd, map, xdp, index);
+	if (unlikely(err))
+		goto err;
+
+	ri->map_to_flush = map;
+	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+	return 0;
+err:
+	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+	return err;
+}
+
+int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
+		    struct bpf_prog *xdp_prog)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct net_device *fwd;
+	u32 index = ri->ifindex;
+	int err;
+
+	if (ri->map)
+		return xdp_do_redirect_map(dev, xdp, xdp_prog);
+
+	fwd = dev_get_by_index_rcu(dev_net(dev), index);
+	ri->ifindex = 0;
+	if (unlikely(!fwd)) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
+	if (unlikely(err))
+		goto err;
+
+	_trace_xdp_redirect(dev, xdp_prog, index);
+	return 0;
+err:
+	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(xdp_do_redirect);
+
+int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
+			    struct bpf_prog *xdp_prog)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	u32 index = ri->ifindex;
+	struct net_device *fwd;
+	unsigned int len;
+	int err = 0;
+
+	fwd = dev_get_by_index_rcu(dev_net(dev), index);
+	ri->ifindex = 0;
+	if (unlikely(!fwd)) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	if (unlikely(!(fwd->flags & IFF_UP))) {
+		err = -ENETDOWN;
+		goto err;
+	}
+
+	len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
+	if (skb->len > len) {
+		err = -EMSGSIZE;
+		goto err;
+	}
+
+	skb->dev = fwd;
+	_trace_xdp_redirect(dev, xdp_prog, index);
+	return 0;
+err:
+	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
+	return err;
+}
+EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
+
+BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+	if (unlikely(flags))
+		return XDP_ABORTED;
+
+	ri->ifindex = ifindex;
+	ri->flags = flags;
+
+	return XDP_REDIRECT;
+}
+
+static const struct bpf_func_proto bpf_xdp_redirect_proto = {
+	.func           = bpf_xdp_redirect,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_ANYTHING,
+	.arg2_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+	if (unlikely(flags))
+		return XDP_ABORTED;
+
+	ri->ifindex = ifindex;
+	ri->flags = flags;
+	ri->map = map;
+
+	return XDP_REDIRECT;
+}
+
+static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
+	.func           = bpf_xdp_redirect_map,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_CONST_MAP_PTR,
+	.arg2_type      = ARG_ANYTHING,
+	.arg3_type      = ARG_ANYTHING,
+};
+
 bool bpf_helper_changes_pkt_data(void *func)
 {
 	if (func == bpf_skb_vlan_push ||
@@ -2916,6 +3146,20 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 }
 
 static const struct bpf_func_proto *
+sock_filter_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	/* inet and inet6 sockets are created in a process
+	 * context so there is always a valid uid/gid
+	 */
+	case BPF_FUNC_get_current_uid_gid:
+		return &bpf_get_current_uid_gid_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
+static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -3007,6 +3251,10 @@ xdp_func_proto(enum bpf_func_id func_id)
 		return &bpf_get_smp_processor_id_proto;
 	case BPF_FUNC_xdp_adjust_head:
 		return &bpf_xdp_adjust_head_proto;
+	case BPF_FUNC_redirect:
+		return &bpf_xdp_redirect_proto;
+	case BPF_FUNC_redirect_map:
+		return &bpf_xdp_redirect_map_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -3045,6 +3293,32 @@ static const struct bpf_func_proto *
 	switch (func_id) {
 	case BPF_FUNC_setsockopt:
 		return &bpf_setsockopt_proto;
+	case BPF_FUNC_sock_map_update:
+		return &bpf_sock_map_update_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
+static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_skb_store_bytes:
+		return &bpf_skb_store_bytes_proto;
+	case BPF_FUNC_skb_load_bytes:
+		return &bpf_skb_load_bytes_proto;
+	case BPF_FUNC_skb_pull_data:
+		return &bpf_skb_pull_data_proto;
+	case BPF_FUNC_skb_change_tail:
+		return &bpf_skb_change_tail_proto;
+	case BPF_FUNC_skb_change_head:
+		return &bpf_skb_change_head_proto;
+	case BPF_FUNC_get_socket_cookie:
+		return &bpf_get_socket_cookie_proto;
+	case BPF_FUNC_get_socket_uid:
+		return &bpf_get_socket_uid_proto;
+	case BPF_FUNC_sk_redirect_map:
+		return &bpf_sk_redirect_map_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -3102,6 +3376,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
 		if (off + size > offsetofend(struct __sk_buff, cb[4]))
 			return false;
 		break;
+	case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
+	case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
+	case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
+	case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
 	case bpf_ctx_range(struct __sk_buff, data):
 	case bpf_ctx_range(struct __sk_buff, data_end):
 		if (size != size_default)
@@ -3130,6 +3408,7 @@ static bool sk_filter_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data):
 	case bpf_ctx_range(struct __sk_buff, data_end):
+	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 		return false;
 	}
 
@@ -3151,6 +3430,7 @@ static bool lwt_is_valid_access(int off, int size,
 {
 	switch (off) {
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
+	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 		return false;
 	}
 
@@ -3184,6 +3464,8 @@ static bool sock_filter_is_valid_access(int off, int size,
 	if (type == BPF_WRITE) {
 		switch (off) {
 		case offsetof(struct bpf_sock, bound_dev_if):
+		case offsetof(struct bpf_sock, mark):
+		case offsetof(struct bpf_sock, priority):
 			break;
 		default:
 			return false;
@@ -3201,8 +3483,8 @@ static bool sock_filter_is_valid_access(int off, int size,
 	return true;
 }
 
-static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
-			       const struct bpf_prog *prog)
+static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
+				const struct bpf_prog *prog, int drop_verdict)
 {
 	struct bpf_insn *insn = insn_buf;
 
@@ -3229,7 +3511,7 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
 	 * return TC_ACT_SHOT;
 	 */
 	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
-	*insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, TC_ACT_SHOT);
+	*insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
 	*insn++ = BPF_EXIT_INSN();
 
 	/* restore: */
@@ -3240,6 +3522,12 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
 	return insn - insn_buf;
 }
 
+static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
+			       const struct bpf_prog *prog)
+{
+	return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
+}
+
 static bool tc_cls_act_is_valid_access(int off, int size,
 				       enum bpf_access_type type,
 				       struct bpf_insn_access_aux *info)
@@ -3264,6 +3552,8 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data_end):
 		info->reg_type = PTR_TO_PACKET_END;
 		break;
+	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+		return false;
 	}
 
 	return bpf_skb_is_valid_access(off, size, type, info);
@@ -3336,6 +3626,41 @@ static bool sock_ops_is_valid_access(int off, int size,
 	return __is_valid_sock_ops_access(off, size);
 }
 
+static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
+			   const struct bpf_prog *prog)
+{
+	return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
+}
+
+static bool sk_skb_is_valid_access(int off, int size,
+				   enum bpf_access_type type,
+				   struct bpf_insn_access_aux *info)
+{
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case bpf_ctx_range(struct __sk_buff, mark):
+		case bpf_ctx_range(struct __sk_buff, tc_index):
+		case bpf_ctx_range(struct __sk_buff, priority):
+			break;
+		default:
+			return false;
+		}
+	}
+
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, tc_classid):
+		return false;
+	case bpf_ctx_range(struct __sk_buff, data):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
+	return bpf_skb_is_valid_access(off, size, type, info);
+}
+
 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 				  const struct bpf_insn *si,
 				  struct bpf_insn *insn_buf,
@@ -3521,6 +3846,106 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 		*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
 #endif
 		break;
+	case offsetof(struct __sk_buff, family):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct sock_common,
+						     skc_family,
+						     2, target_size));
+		break;
+	case offsetof(struct __sk_buff, remote_ip4):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct sock_common,
+						     skc_daddr,
+						     4, target_size));
+		break;
+	case offsetof(struct __sk_buff, local_ip4):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+					  skc_rcv_saddr) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct sock_common,
+						     skc_rcv_saddr,
+						     4, target_size));
+		break;
+	case offsetof(struct __sk_buff, remote_ip6[0]) ...
+	     offsetof(struct __sk_buff, remote_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+					  skc_v6_daddr.s6_addr32[0]) != 4);
+
+		off = si->off;
+		off -= offsetof(struct __sk_buff, remote_ip6[0]);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common,
+					       skc_v6_daddr.s6_addr32[0]) +
+				      off);
+#else
+		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+		break;
+	case offsetof(struct __sk_buff, local_ip6[0]) ...
+	     offsetof(struct __sk_buff, local_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+					  skc_v6_rcv_saddr.s6_addr32[0]) != 4);
+
+		off = si->off;
+		off -= offsetof(struct __sk_buff, local_ip6[0]);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common,
+					       skc_v6_rcv_saddr.s6_addr32[0]) +
+				      off);
+#else
+		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+		break;
+
+	case offsetof(struct __sk_buff, remote_port):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct sock_common,
+						     skc_dport,
+						     2, target_size));
+#ifndef __BIG_ENDIAN_BITFIELD
+		*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
+#endif
+		break;
+
+	case offsetof(struct __sk_buff, local_port):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct sock_common,
+						     skc_num, 2, target_size));
+		break;
 	}
 
 	return insn - insn_buf;
@@ -3545,6 +3970,28 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
 				      offsetof(struct sock, sk_bound_dev_if));
 		break;
 
+	case offsetof(struct bpf_sock, mark):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4);
+
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
+					offsetof(struct sock, sk_mark));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+				      offsetof(struct sock, sk_mark));
+		break;
+
+	case offsetof(struct bpf_sock, priority):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4);
+
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
+					offsetof(struct sock, sk_priority));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+				      offsetof(struct sock, sk_priority));
+		break;
+
 	case offsetof(struct bpf_sock, family):
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
 
@@ -3794,7 +4241,7 @@ const struct bpf_verifier_ops lwt_xmit_prog_ops = {
 };
 
 const struct bpf_verifier_ops cg_sock_prog_ops = {
-	.get_func_proto		= bpf_base_func_proto,
+	.get_func_proto		= sock_filter_func_proto,
 	.is_valid_access	= sock_filter_is_valid_access,
 	.convert_ctx_access	= sock_filter_convert_ctx_access,
 };
@@ -3805,6 +4252,13 @@ const struct bpf_verifier_ops sock_ops_prog_ops = {
 	.convert_ctx_access	= sock_ops_convert_ctx_access,
 };
 
+const struct bpf_verifier_ops sk_skb_prog_ops = {
+	.get_func_proto		= sk_skb_func_proto,
+	.is_valid_access	= sk_skb_is_valid_access,
+	.convert_ctx_access	= bpf_convert_ctx_access,
+	.gen_prologue		= sk_skb_prologue,
+};
+
 int sk_detach_filter(struct sock *sk)
 {
 	int ret = -ENOENT;
diff --git a/net/core/flow.c b/net/core/flow.c
deleted file mode 100644
index f7f5d19..0000000
--- a/net/core/flow.c
+++ /dev/null
@@ -1,516 +0,0 @@
-/* flow.c: Generic flow cache.
- *
- * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
- * Copyright (C) 2003 David S. Miller (davem@redhat.com)
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/jhash.h>
-#include <linux/interrupt.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/completion.h>
-#include <linux/percpu.h>
-#include <linux/bitops.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/mutex.h>
-#include <net/flow.h>
-#include <linux/atomic.h>
-#include <linux/security.h>
-#include <net/net_namespace.h>
-
-struct flow_cache_entry {
-	union {
-		struct hlist_node	hlist;
-		struct list_head	gc_list;
-	} u;
-	struct net			*net;
-	u16				family;
-	u8				dir;
-	u32				genid;
-	struct flowi			key;
-	struct flow_cache_object	*object;
-};
-
-struct flow_flush_info {
-	struct flow_cache		*cache;
-	atomic_t			cpuleft;
-	struct completion		completion;
-};
-
-static struct kmem_cache *flow_cachep __read_mostly;
-
-#define flow_cache_hash_size(cache)	(1U << (cache)->hash_shift)
-#define FLOW_HASH_RND_PERIOD		(10 * 60 * HZ)
-
-static void flow_cache_new_hashrnd(unsigned long arg)
-{
-	struct flow_cache *fc = (void *) arg;
-	int i;
-
-	for_each_possible_cpu(i)
-		per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
-
-	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&fc->rnd_timer);
-}
-
-static int flow_entry_valid(struct flow_cache_entry *fle,
-				struct netns_xfrm *xfrm)
-{
-	if (atomic_read(&xfrm->flow_cache_genid) != fle->genid)
-		return 0;
-	if (fle->object && !fle->object->ops->check(fle->object))
-		return 0;
-	return 1;
-}
-
-static void flow_entry_kill(struct flow_cache_entry *fle,
-				struct netns_xfrm *xfrm)
-{
-	if (fle->object)
-		fle->object->ops->delete(fle->object);
-	kmem_cache_free(flow_cachep, fle);
-}
-
-static void flow_cache_gc_task(struct work_struct *work)
-{
-	struct list_head gc_list;
-	struct flow_cache_entry *fce, *n;
-	struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
-						flow_cache_gc_work);
-
-	INIT_LIST_HEAD(&gc_list);
-	spin_lock_bh(&xfrm->flow_cache_gc_lock);
-	list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
-	spin_unlock_bh(&xfrm->flow_cache_gc_lock);
-
-	list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) {
-		flow_entry_kill(fce, xfrm);
-		atomic_dec(&xfrm->flow_cache_gc_count);
-	}
-}
-
-static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
-				     unsigned int deleted,
-				     struct list_head *gc_list,
-				     struct netns_xfrm *xfrm)
-{
-	if (deleted) {
-		atomic_add(deleted, &xfrm->flow_cache_gc_count);
-		fcp->hash_count -= deleted;
-		spin_lock_bh(&xfrm->flow_cache_gc_lock);
-		list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
-		spin_unlock_bh(&xfrm->flow_cache_gc_lock);
-		schedule_work(&xfrm->flow_cache_gc_work);
-	}
-}
-
-static void __flow_cache_shrink(struct flow_cache *fc,
-				struct flow_cache_percpu *fcp,
-				unsigned int shrink_to)
-{
-	struct flow_cache_entry *fle;
-	struct hlist_node *tmp;
-	LIST_HEAD(gc_list);
-	unsigned int deleted = 0;
-	struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
-						flow_cache_global);
-	unsigned int i;
-
-	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		unsigned int saved = 0;
-
-		hlist_for_each_entry_safe(fle, tmp,
-					  &fcp->hash_table[i], u.hlist) {
-			if (saved < shrink_to &&
-			    flow_entry_valid(fle, xfrm)) {
-				saved++;
-			} else {
-				deleted++;
-				hlist_del(&fle->u.hlist);
-				list_add_tail(&fle->u.gc_list, &gc_list);
-			}
-		}
-	}
-
-	flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
-}
-
-static void flow_cache_shrink(struct flow_cache *fc,
-			      struct flow_cache_percpu *fcp)
-{
-	unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
-
-	__flow_cache_shrink(fc, fcp, shrink_to);
-}
-
-static void flow_new_hash_rnd(struct flow_cache *fc,
-			      struct flow_cache_percpu *fcp)
-{
-	get_random_bytes(&fcp->hash_rnd, sizeof(u32));
-	fcp->hash_rnd_recalc = 0;
-	__flow_cache_shrink(fc, fcp, 0);
-}
-
-static u32 flow_hash_code(struct flow_cache *fc,
-			  struct flow_cache_percpu *fcp,
-			  const struct flowi *key,
-			  unsigned int keysize)
-{
-	const u32 *k = (const u32 *) key;
-	const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
-
-	return jhash2(k, length, fcp->hash_rnd)
-		& (flow_cache_hash_size(fc) - 1);
-}
-
-/* I hear what you're saying, use memcmp.  But memcmp cannot make
- * important assumptions that we can here, such as alignment.
- */
-static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
-			    unsigned int keysize)
-{
-	const flow_compare_t *k1, *k1_lim, *k2;
-
-	k1 = (const flow_compare_t *) key1;
-	k1_lim = k1 + keysize;
-
-	k2 = (const flow_compare_t *) key2;
-
-	do {
-		if (*k1++ != *k2++)
-			return 1;
-	} while (k1 < k1_lim);
-
-	return 0;
-}
-
-struct flow_cache_object *
-flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
-		  flow_resolve_t resolver, void *ctx)
-{
-	struct flow_cache *fc = &net->xfrm.flow_cache_global;
-	struct flow_cache_percpu *fcp;
-	struct flow_cache_entry *fle, *tfle;
-	struct flow_cache_object *flo;
-	unsigned int keysize;
-	unsigned int hash;
-
-	local_bh_disable();
-	fcp = this_cpu_ptr(fc->percpu);
-
-	fle = NULL;
-	flo = NULL;
-
-	keysize = flow_key_size(family);
-	if (!keysize)
-		goto nocache;
-
-	/* Packet really early in init?  Making flow_cache_init a
-	 * pre-smp initcall would solve this.  --RR */
-	if (!fcp->hash_table)
-		goto nocache;
-
-	if (fcp->hash_rnd_recalc)
-		flow_new_hash_rnd(fc, fcp);
-
-	hash = flow_hash_code(fc, fcp, key, keysize);
-	hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) {
-		if (tfle->net == net &&
-		    tfle->family == family &&
-		    tfle->dir == dir &&
-		    flow_key_compare(key, &tfle->key, keysize) == 0) {
-			fle = tfle;
-			break;
-		}
-	}
-
-	if (unlikely(!fle)) {
-		if (fcp->hash_count > fc->high_watermark)
-			flow_cache_shrink(fc, fcp);
-
-		if (atomic_read(&net->xfrm.flow_cache_gc_count) >
-		    2 * num_online_cpus() * fc->high_watermark) {
-			flo = ERR_PTR(-ENOBUFS);
-			goto ret_object;
-		}
-
-		fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
-		if (fle) {
-			fle->net = net;
-			fle->family = family;
-			fle->dir = dir;
-			memcpy(&fle->key, key, keysize * sizeof(flow_compare_t));
-			fle->object = NULL;
-			hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
-			fcp->hash_count++;
-		}
-	} else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) {
-		flo = fle->object;
-		if (!flo)
-			goto ret_object;
-		flo = flo->ops->get(flo);
-		if (flo)
-			goto ret_object;
-	} else if (fle->object) {
-	        flo = fle->object;
-	        flo->ops->delete(flo);
-	        fle->object = NULL;
-	}
-
-nocache:
-	flo = NULL;
-	if (fle) {
-		flo = fle->object;
-		fle->object = NULL;
-	}
-	flo = resolver(net, key, family, dir, flo, ctx);
-	if (fle) {
-		fle->genid = atomic_read(&net->xfrm.flow_cache_genid);
-		if (!IS_ERR(flo))
-			fle->object = flo;
-		else
-			fle->genid--;
-	} else {
-		if (!IS_ERR_OR_NULL(flo))
-			flo->ops->delete(flo);
-	}
-ret_object:
-	local_bh_enable();
-	return flo;
-}
-EXPORT_SYMBOL(flow_cache_lookup);
-
-static void flow_cache_flush_tasklet(unsigned long data)
-{
-	struct flow_flush_info *info = (void *)data;
-	struct flow_cache *fc = info->cache;
-	struct flow_cache_percpu *fcp;
-	struct flow_cache_entry *fle;
-	struct hlist_node *tmp;
-	LIST_HEAD(gc_list);
-	unsigned int deleted = 0;
-	struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
-						flow_cache_global);
-	unsigned int i;
-
-	fcp = this_cpu_ptr(fc->percpu);
-	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		hlist_for_each_entry_safe(fle, tmp,
-					  &fcp->hash_table[i], u.hlist) {
-			if (flow_entry_valid(fle, xfrm))
-				continue;
-
-			deleted++;
-			hlist_del(&fle->u.hlist);
-			list_add_tail(&fle->u.gc_list, &gc_list);
-		}
-	}
-
-	flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
-
-	if (atomic_dec_and_test(&info->cpuleft))
-		complete(&info->completion);
-}
-
-/*
- * Return whether a cpu needs flushing.  Conservatively, we assume
- * the presence of any entries means the core may require flushing,
- * since the flow_cache_ops.check() function may assume it's running
- * on the same core as the per-cpu cache component.
- */
-static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
-{
-	struct flow_cache_percpu *fcp;
-	unsigned int i;
-
-	fcp = per_cpu_ptr(fc->percpu, cpu);
-	for (i = 0; i < flow_cache_hash_size(fc); i++)
-		if (!hlist_empty(&fcp->hash_table[i]))
-			return 0;
-	return 1;
-}
-
-static void flow_cache_flush_per_cpu(void *data)
-{
-	struct flow_flush_info *info = data;
-	struct tasklet_struct *tasklet;
-
-	tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet;
-	tasklet->data = (unsigned long)info;
-	tasklet_schedule(tasklet);
-}
-
-void flow_cache_flush(struct net *net)
-{
-	struct flow_flush_info info;
-	cpumask_var_t mask;
-	int i, self;
-
-	/* Track which cpus need flushing to avoid disturbing all cores. */
-	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-		return;
-	cpumask_clear(mask);
-
-	/* Don't want cpus going down or up during this. */
-	get_online_cpus();
-	mutex_lock(&net->xfrm.flow_flush_sem);
-	info.cache = &net->xfrm.flow_cache_global;
-	for_each_online_cpu(i)
-		if (!flow_cache_percpu_empty(info.cache, i))
-			cpumask_set_cpu(i, mask);
-	atomic_set(&info.cpuleft, cpumask_weight(mask));
-	if (atomic_read(&info.cpuleft) == 0)
-		goto done;
-
-	init_completion(&info.completion);
-
-	local_bh_disable();
-	self = cpumask_test_and_clear_cpu(smp_processor_id(), mask);
-	on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0);
-	if (self)
-		flow_cache_flush_tasklet((unsigned long)&info);
-	local_bh_enable();
-
-	wait_for_completion(&info.completion);
-
-done:
-	mutex_unlock(&net->xfrm.flow_flush_sem);
-	put_online_cpus();
-	free_cpumask_var(mask);
-}
-
-static void flow_cache_flush_task(struct work_struct *work)
-{
-	struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
-						flow_cache_flush_work);
-	struct net *net = container_of(xfrm, struct net, xfrm);
-
-	flow_cache_flush(net);
-}
-
-void flow_cache_flush_deferred(struct net *net)
-{
-	schedule_work(&net->xfrm.flow_cache_flush_work);
-}
-
-static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
-{
-	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
-	unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
-
-	if (!fcp->hash_table) {
-		fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
-		if (!fcp->hash_table) {
-			pr_err("NET: failed to allocate flow cache sz %u\n", sz);
-			return -ENOMEM;
-		}
-		fcp->hash_rnd_recalc = 1;
-		fcp->hash_count = 0;
-		tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
-	}
-	return 0;
-}
-
-static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node)
-{
-	struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
-
-	return flow_cache_cpu_prepare(fc, cpu);
-}
-
-static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node)
-{
-	struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node);
-	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
-
-	__flow_cache_shrink(fc, fcp, 0);
-	return 0;
-}
-
-int flow_cache_init(struct net *net)
-{
-	int i;
-	struct flow_cache *fc = &net->xfrm.flow_cache_global;
-
-	if (!flow_cachep)
-		flow_cachep = kmem_cache_create("flow_cache",
-						sizeof(struct flow_cache_entry),
-						0, SLAB_PANIC, NULL);
-	spin_lock_init(&net->xfrm.flow_cache_gc_lock);
-	INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list);
-	INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
-	INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
-	mutex_init(&net->xfrm.flow_flush_sem);
-	atomic_set(&net->xfrm.flow_cache_gc_count, 0);
-
-	fc->hash_shift = 10;
-	fc->low_watermark = 2 * flow_cache_hash_size(fc);
-	fc->high_watermark = 4 * flow_cache_hash_size(fc);
-
-	fc->percpu = alloc_percpu(struct flow_cache_percpu);
-	if (!fc->percpu)
-		return -ENOMEM;
-
-	if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node))
-		goto err;
-
-	setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
-		    (unsigned long) fc);
-	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&fc->rnd_timer);
-
-	return 0;
-
-err:
-	for_each_possible_cpu(i) {
-		struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
-		kfree(fcp->hash_table);
-		fcp->hash_table = NULL;
-	}
-
-	free_percpu(fc->percpu);
-	fc->percpu = NULL;
-
-	return -ENOMEM;
-}
-EXPORT_SYMBOL(flow_cache_init);
-
-void flow_cache_fini(struct net *net)
-{
-	int i;
-	struct flow_cache *fc = &net->xfrm.flow_cache_global;
-
-	del_timer_sync(&fc->rnd_timer);
-
-	cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node);
-
-	for_each_possible_cpu(i) {
-		struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
-		kfree(fcp->hash_table);
-		fcp->hash_table = NULL;
-	}
-
-	free_percpu(fc->percpu);
-	fc->percpu = NULL;
-}
-EXPORT_SYMBOL(flow_cache_fini);
-
-void __init flow_cache_hp_init(void)
-{
-	int ret;
-
-	ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE,
-				      "net/flow:prepare",
-				      flow_cache_cpu_up_prep,
-				      flow_cache_cpu_dead);
-	WARN_ON(ret < 0);
-}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index fc5fc45..0a97737 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -4,6 +4,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/if_vlan.h>
+#include <net/dsa.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/gre.h>
@@ -114,12 +115,6 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
 }
 EXPORT_SYMBOL(__skb_flow_get_ports);
 
-enum flow_dissect_ret {
-	FLOW_DISSECT_RET_OUT_GOOD,
-	FLOW_DISSECT_RET_OUT_BAD,
-	FLOW_DISSECT_RET_OUT_PROTO_AGAIN,
-};
-
 static enum flow_dissect_ret
 __skb_flow_dissect_mpls(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
@@ -340,7 +335,7 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
 	if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
 		return FLOW_DISSECT_RET_OUT_GOOD;
 
-	return FLOW_DISSECT_RET_OUT_PROTO_AGAIN;
+	return FLOW_DISSECT_RET_PROTO_AGAIN;
 }
 
 static void
@@ -401,6 +396,18 @@ __skb_flow_dissect_ipv6(const struct sk_buff *skb,
 	key_ip->ttl = iph->hop_limit;
 }
 
+/* Maximum number of protocol headers that can be parsed in
+ * __skb_flow_dissect
+ */
+#define MAX_FLOW_DISSECT_HDRS	15
+
+static bool skb_flow_dissect_allowed(int *num_hdrs)
+{
+	++*num_hdrs;
+
+	return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS);
+}
+
 /**
  * __skb_flow_dissect - extract the flow_keys struct and return it
  * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -430,7 +437,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	struct flow_dissector_key_icmp *key_icmp;
 	struct flow_dissector_key_tags *key_tags;
 	struct flow_dissector_key_vlan *key_vlan;
+	enum flow_dissect_ret fdret;
 	bool skip_vlan = false;
+	int num_hdrs = 0;
 	u8 ip_proto = 0;
 	bool ret;
 
@@ -440,6 +449,19 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 			 skb->vlan_proto : skb->protocol;
 		nhoff = skb_network_offset(skb);
 		hlen = skb_headlen(skb);
+#if IS_ENABLED(CONFIG_NET_DSA)
+		if (unlikely(skb->dev && netdev_uses_dsa(skb->dev))) {
+			const struct dsa_device_ops *ops;
+			int offset;
+
+			ops = skb->dev->dsa_ptr->tag_ops;
+			if (ops->flow_dissect &&
+			    !ops->flow_dissect(skb, &proto, &offset)) {
+				hlen -= offset;
+				nhoff += offset;
+			}
+		}
+#endif
 	}
 
 	/* It is ensured by skb_flow_dissector_init() that control key will
@@ -468,14 +490,19 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	}
 
 proto_again:
+	fdret = FLOW_DISSECT_RET_CONTINUE;
+
 	switch (proto) {
 	case htons(ETH_P_IP): {
 		const struct iphdr *iph;
 		struct iphdr _iph;
-ip:
+
 		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
-		if (!iph || iph->ihl < 5)
-			goto out_bad;
+		if (!iph || iph->ihl < 5) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
+
 		nhoff += iph->ihl * 4;
 
 		ip_proto = iph->protocol;
@@ -495,19 +522,25 @@ ip:
 			key_control->flags |= FLOW_DIS_IS_FRAGMENT;
 
 			if (iph->frag_off & htons(IP_OFFSET)) {
-				goto out_good;
+				fdret = FLOW_DISSECT_RET_OUT_GOOD;
+				break;
 			} else {
 				key_control->flags |= FLOW_DIS_FIRST_FRAG;
-				if (!(flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
-					goto out_good;
+				if (!(flags &
+				      FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) {
+					fdret = FLOW_DISSECT_RET_OUT_GOOD;
+					break;
+				}
 			}
 		}
 
 		__skb_flow_dissect_ipv4(skb, flow_dissector,
 					target_container, data, iph);
 
-		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
-			goto out_good;
+		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) {
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
+			break;
+		}
 
 		break;
 	}
@@ -515,10 +548,11 @@ ip:
 		const struct ipv6hdr *iph;
 		struct ipv6hdr _iph;
 
-ipv6:
 		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
-		if (!iph)
-			goto out_bad;
+		if (!iph) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		ip_proto = iph->nexthdr;
 		nhoff += sizeof(struct ipv6hdr);
@@ -547,15 +581,17 @@ ipv6:
 								     target_container);
 				key_tags->flow_label = ntohl(flow_label);
 			}
-			if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
-				goto out_good;
+			if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) {
+				fdret = FLOW_DISSECT_RET_OUT_GOOD;
+				break;
+			}
 		}
 
 		__skb_flow_dissect_ipv6(skb, flow_dissector,
 					target_container, data, iph);
 
 		if (flags & FLOW_DISSECTOR_F_STOP_AT_L3)
-			goto out_good;
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
 
 		break;
 	}
@@ -571,12 +607,17 @@ ipv6:
 		if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
 			vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
 						    data, hlen, &_vlan);
-			if (!vlan)
-				goto out_bad;
+			if (!vlan) {
+				fdret = FLOW_DISSECT_RET_OUT_BAD;
+				break;
+			}
+
 			proto = vlan->h_vlan_encapsulated_proto;
 			nhoff += sizeof(*vlan);
-			if (skip_vlan)
-				goto proto_again;
+			if (skip_vlan) {
+				fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+				break;
+			}
 		}
 
 		skip_vlan = true;
@@ -599,7 +640,8 @@ ipv6:
 			}
 		}
 
-		goto proto_again;
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
 	}
 	case htons(ETH_P_PPP_SES): {
 		struct {
@@ -607,18 +649,27 @@ ipv6:
 			__be16 proto;
 		} *hdr, _hdr;
 		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
-		if (!hdr)
-			goto out_bad;
+		if (!hdr) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
+
 		proto = hdr->proto;
 		nhoff += PPPOE_SES_HLEN;
 		switch (proto) {
 		case htons(PPP_IP):
-			goto ip;
+			proto = htons(ETH_P_IP);
+			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+			break;
 		case htons(PPP_IPV6):
-			goto ipv6;
+			proto = htons(ETH_P_IPV6);
+			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+			break;
 		default:
-			goto out_bad;
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
 		}
+		break;
 	}
 	case htons(ETH_P_TIPC): {
 		struct {
@@ -626,8 +677,10 @@ ipv6:
 			__be32 srcnode;
 		} *hdr, _hdr;
 		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
-		if (!hdr)
-			goto out_bad;
+		if (!hdr) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		if (dissector_uses_key(flow_dissector,
 				       FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
@@ -637,56 +690,64 @@ ipv6:
 			key_addrs->tipcaddrs.srcnode = hdr->srcnode;
 			key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
 		}
-		goto out_good;
+		fdret = FLOW_DISSECT_RET_OUT_GOOD;
+		break;
 	}
 
 	case htons(ETH_P_MPLS_UC):
 	case htons(ETH_P_MPLS_MC):
-mpls:
-		switch (__skb_flow_dissect_mpls(skb, flow_dissector,
+		fdret = __skb_flow_dissect_mpls(skb, flow_dissector,
 						target_container, data,
-						nhoff, hlen)) {
-		case FLOW_DISSECT_RET_OUT_GOOD:
-			goto out_good;
-		case FLOW_DISSECT_RET_OUT_BAD:
-		default:
-			goto out_bad;
-		}
+						nhoff, hlen);
+		break;
 	case htons(ETH_P_FCOE):
-		if ((hlen - nhoff) < FCOE_HEADER_LEN)
-			goto out_bad;
+		if ((hlen - nhoff) < FCOE_HEADER_LEN) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		nhoff += FCOE_HEADER_LEN;
-		goto out_good;
+		fdret = FLOW_DISSECT_RET_OUT_GOOD;
+		break;
 
 	case htons(ETH_P_ARP):
 	case htons(ETH_P_RARP):
-		switch (__skb_flow_dissect_arp(skb, flow_dissector,
+		fdret = __skb_flow_dissect_arp(skb, flow_dissector,
 					       target_container, data,
-					       nhoff, hlen)) {
-		case FLOW_DISSECT_RET_OUT_GOOD:
-			goto out_good;
-		case FLOW_DISSECT_RET_OUT_BAD:
-		default:
-			goto out_bad;
-		}
+					       nhoff, hlen);
+		break;
+
+	default:
+		fdret = FLOW_DISSECT_RET_OUT_BAD;
+		break;
+	}
+
+	/* Process result of proto processing */
+	switch (fdret) {
+	case FLOW_DISSECT_RET_OUT_GOOD:
+		goto out_good;
+	case FLOW_DISSECT_RET_PROTO_AGAIN:
+		if (skb_flow_dissect_allowed(&num_hdrs))
+			goto proto_again;
+		goto out_good;
+	case FLOW_DISSECT_RET_CONTINUE:
+	case FLOW_DISSECT_RET_IPPROTO_AGAIN:
+		break;
+	case FLOW_DISSECT_RET_OUT_BAD:
 	default:
 		goto out_bad;
 	}
 
 ip_proto_again:
+	fdret = FLOW_DISSECT_RET_CONTINUE;
+
 	switch (ip_proto) {
 	case IPPROTO_GRE:
-		switch (__skb_flow_dissect_gre(skb, key_control, flow_dissector,
+		fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector,
 					       target_container, data,
-					       &proto, &nhoff, &hlen, flags)) {
-		case FLOW_DISSECT_RET_OUT_GOOD:
-			goto out_good;
-		case FLOW_DISSECT_RET_OUT_BAD:
-			goto out_bad;
-		case FLOW_DISSECT_RET_OUT_PROTO_AGAIN:
-			goto proto_again;
-		}
+					       &proto, &nhoff, &hlen, flags);
+		break;
+
 	case NEXTHDR_HOP:
 	case NEXTHDR_ROUTING:
 	case NEXTHDR_DEST: {
@@ -697,13 +758,16 @@ ip_proto_again:
 
 		opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr),
 					      data, hlen, &_opthdr);
-		if (!opthdr)
-			goto out_bad;
+		if (!opthdr) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		ip_proto = opthdr[0];
 		nhoff += (opthdr[1] + 1) << 3;
 
-		goto ip_proto_again;
+		fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN;
+		break;
 	}
 	case NEXTHDR_FRAGMENT: {
 		struct frag_hdr _fh, *fh;
@@ -714,8 +778,10 @@ ip_proto_again:
 		fh = __skb_header_pointer(skb, nhoff, sizeof(_fh),
 					  data, hlen, &_fh);
 
-		if (!fh)
-			goto out_bad;
+		if (!fh) {
+			fdret = FLOW_DISSECT_RET_OUT_BAD;
+			break;
+		}
 
 		key_control->flags |= FLOW_DIS_IS_FRAGMENT;
 
@@ -724,34 +790,50 @@ ip_proto_again:
 
 		if (!(fh->frag_off & htons(IP6_OFFSET))) {
 			key_control->flags |= FLOW_DIS_FIRST_FRAG;
-			if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG)
-				goto ip_proto_again;
+			if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) {
+				fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN;
+				break;
+			}
 		}
-		goto out_good;
+
+		fdret = FLOW_DISSECT_RET_OUT_GOOD;
+		break;
 	}
 	case IPPROTO_IPIP:
 		proto = htons(ETH_P_IP);
 
 		key_control->flags |= FLOW_DIS_ENCAPSULATION;
-		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
-			goto out_good;
+		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) {
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
+			break;
+		}
+
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
 
-		goto ip;
 	case IPPROTO_IPV6:
 		proto = htons(ETH_P_IPV6);
 
 		key_control->flags |= FLOW_DIS_ENCAPSULATION;
-		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
-			goto out_good;
+		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) {
+			fdret = FLOW_DISSECT_RET_OUT_GOOD;
+			break;
+		}
+
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
+
 
-		goto ipv6;
 	case IPPROTO_MPLS:
 		proto = htons(ETH_P_MPLS_UC);
-		goto mpls;
+		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+		break;
+
 	case IPPROTO_TCP:
 		__skb_flow_dissect_tcp(skb, flow_dissector, target_container,
 				       data, nhoff, hlen);
 		break;
+
 	default:
 		break;
 	}
@@ -773,6 +855,24 @@ ip_proto_again:
 		key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
 	}
 
+	/* Process result of IP proto processing */
+	switch (fdret) {
+	case FLOW_DISSECT_RET_PROTO_AGAIN:
+		if (skb_flow_dissect_allowed(&num_hdrs))
+			goto proto_again;
+		break;
+	case FLOW_DISSECT_RET_IPPROTO_AGAIN:
+		if (skb_flow_dissect_allowed(&num_hdrs))
+			goto ip_proto_again;
+		break;
+	case FLOW_DISSECT_RET_OUT_GOOD:
+	case FLOW_DISSECT_RET_CONTINUE:
+		break;
+	case FLOW_DISSECT_RET_OUT_BAD:
+	default:
+		goto out_bad;
+	}
+
 out_good:
 	ret = true;
 
@@ -998,51 +1098,6 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
 }
 EXPORT_SYMBOL(skb_get_hash_perturb);
 
-__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
-{
-	struct flow_keys keys;
-
-	memset(&keys, 0, sizeof(keys));
-
-	memcpy(&keys.addrs.v6addrs.src, &fl6->saddr,
-	       sizeof(keys.addrs.v6addrs.src));
-	memcpy(&keys.addrs.v6addrs.dst, &fl6->daddr,
-	       sizeof(keys.addrs.v6addrs.dst));
-	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-	keys.ports.src = fl6->fl6_sport;
-	keys.ports.dst = fl6->fl6_dport;
-	keys.keyid.keyid = fl6->fl6_gre_key;
-	keys.tags.flow_label = (__force u32)fl6->flowlabel;
-	keys.basic.ip_proto = fl6->flowi6_proto;
-
-	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
-			  flow_keys_have_l4(&keys));
-
-	return skb->hash;
-}
-EXPORT_SYMBOL(__skb_get_hash_flowi6);
-
-__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
-{
-	struct flow_keys keys;
-
-	memset(&keys, 0, sizeof(keys));
-
-	keys.addrs.v4addrs.src = fl4->saddr;
-	keys.addrs.v4addrs.dst = fl4->daddr;
-	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-	keys.ports.src = fl4->fl4_sport;
-	keys.ports.dst = fl4->fl4_dport;
-	keys.keyid.keyid = fl4->fl4_gre_key;
-	keys.basic.ip_proto = fl4->flowi4_proto;
-
-	__skb_set_sw_hash(skb, flow_hash_from_keys(&keys),
-			  flow_keys_have_l4(&keys));
-
-	return skb->hash;
-}
-EXPORT_SYMBOL(__skb_get_hash_flowi4);
-
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
 		   const struct flow_keys *keys, int hlen)
 {
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index d9cb353..0b17175 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -44,6 +44,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
 		return "SEG6";
 	case LWTUNNEL_ENCAP_BPF:
 		return "BPF";
+	case LWTUNNEL_ENCAP_SEG6_LOCAL:
+		return "SEG6LOCAL";
 	case LWTUNNEL_ENCAP_IP6:
 	case LWTUNNEL_ENCAP_IP:
 	case LWTUNNEL_ENCAP_NONE:
@@ -65,7 +67,7 @@ struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
 
 	return lws;
 }
-EXPORT_SYMBOL(lwtunnel_state_alloc);
+EXPORT_SYMBOL_GPL(lwtunnel_state_alloc);
 
 static const struct lwtunnel_encap_ops __rcu *
 		lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly;
@@ -80,7 +82,7 @@ int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
 			&lwtun_encaps[num],
 			NULL, ops) ? 0 : -1;
 }
-EXPORT_SYMBOL(lwtunnel_encap_add_ops);
+EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops);
 
 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
 			   unsigned int encap_type)
@@ -99,7 +101,7 @@ int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_encap_del_ops);
+EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops);
 
 int lwtunnel_build_state(u16 encap_type,
 			 struct nlattr *encap, unsigned int family,
@@ -138,7 +140,7 @@ int lwtunnel_build_state(u16 encap_type,
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_build_state);
+EXPORT_SYMBOL_GPL(lwtunnel_build_state);
 
 int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack)
 {
@@ -175,7 +177,7 @@ int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack)
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_valid_encap_type);
+EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type);
 
 int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
 				   struct netlink_ext_ack *extack)
@@ -205,7 +207,7 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
 
 	return 0;
 }
-EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr);
+EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr);
 
 void lwtstate_free(struct lwtunnel_state *lws)
 {
@@ -219,7 +221,7 @@ void lwtstate_free(struct lwtunnel_state *lws)
 	}
 	module_put(ops->owner);
 }
-EXPORT_SYMBOL(lwtstate_free);
+EXPORT_SYMBOL_GPL(lwtstate_free);
 
 int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
 {
@@ -259,7 +261,7 @@ nla_put_failure:
 
 	return (ret == -EOPNOTSUPP ? 0 : ret);
 }
-EXPORT_SYMBOL(lwtunnel_fill_encap);
+EXPORT_SYMBOL_GPL(lwtunnel_fill_encap);
 
 int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
 {
@@ -281,7 +283,7 @@ int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_get_encap_size);
+EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size);
 
 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
 {
@@ -309,7 +311,7 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_cmp_encap);
+EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap);
 
 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
@@ -343,7 +345,7 @@ drop:
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_output);
+EXPORT_SYMBOL_GPL(lwtunnel_output);
 
 int lwtunnel_xmit(struct sk_buff *skb)
 {
@@ -378,7 +380,7 @@ drop:
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_xmit);
+EXPORT_SYMBOL_GPL(lwtunnel_xmit);
 
 int lwtunnel_input(struct sk_buff *skb)
 {
@@ -412,4 +414,4 @@ drop:
 
 	return ret;
 }
-EXPORT_SYMBOL(lwtunnel_input);
+EXPORT_SYMBOL_GPL(lwtunnel_input);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d071362..16a1a4c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3261,13 +3261,13 @@ EXPORT_SYMBOL(neigh_sysctl_unregister);
 
 static int __init neigh_init(void)
 {
-	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
 
 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
-		      NULL);
-	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
+		      0);
+	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
 
 	return 0;
 }
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b4f9922..927a6dc 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -97,7 +97,8 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
 		return restart_syscall();
 
 	if (dev_isalive(netdev)) {
-		if ((ret = (*set)(netdev, new)) == 0)
+		ret = (*set)(netdev, new);
+		if (ret == 0)
 			ret = len;
 	}
 	rtnl_unlock();
@@ -160,6 +161,7 @@ static ssize_t broadcast_show(struct device *dev,
 			      struct device_attribute *attr, char *buf)
 {
 	struct net_device *ndev = to_net_dev(dev);
+
 	if (dev_isalive(ndev))
 		return sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
 	return -EINVAL;
@@ -170,7 +172,7 @@ static int change_carrier(struct net_device *dev, unsigned long new_carrier)
 {
 	if (!netif_running(dev))
 		return -EINVAL;
-	return dev_change_carrier(dev, (bool) new_carrier);
+	return dev_change_carrier(dev, (bool)new_carrier);
 }
 
 static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
@@ -183,9 +185,10 @@ static ssize_t carrier_show(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
 	struct net_device *netdev = to_net_dev(dev);
-	if (netif_running(netdev)) {
+
+	if (netif_running(netdev))
 		return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));
-	}
+
 	return -EINVAL;
 }
 static DEVICE_ATTR_RW(carrier);
@@ -290,6 +293,7 @@ static ssize_t carrier_changes_show(struct device *dev,
 				    char *buf)
 {
 	struct net_device *netdev = to_net_dev(dev);
+
 	return sprintf(buf, fmt_dec,
 		       atomic_read(&netdev->carrier_changes));
 }
@@ -299,7 +303,7 @@ static DEVICE_ATTR_RO(carrier_changes);
 
 static int change_mtu(struct net_device *dev, unsigned long new_mtu)
 {
-	return dev_set_mtu(dev, (int) new_mtu);
+	return dev_set_mtu(dev, (int)new_mtu);
 }
 
 static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
@@ -311,7 +315,7 @@ NETDEVICE_SHOW_RW(mtu, fmt_dec);
 
 static int change_flags(struct net_device *dev, unsigned long new_flags)
 {
-	return dev_change_flags(dev, (unsigned int) new_flags);
+	return dev_change_flags(dev, (unsigned int)new_flags);
 }
 
 static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
@@ -362,8 +366,8 @@ static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
 }
 
 static ssize_t gro_flush_timeout_store(struct device *dev,
-				  struct device_attribute *attr,
-				  const char *buf, size_t len)
+				       struct device_attribute *attr,
+				       const char *buf, size_t len)
 {
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -412,7 +416,7 @@ static DEVICE_ATTR_RW(ifalias);
 
 static int change_group(struct net_device *dev, unsigned long new_group)
 {
-	dev_set_group(dev, (int) new_group);
+	dev_set_group(dev, (int)new_group);
 	return 0;
 }
 
@@ -426,7 +430,7 @@ static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store);
 
 static int change_proto_down(struct net_device *dev, unsigned long proto_down)
 {
-	return dev_change_proto_down(dev, (bool) proto_down);
+	return dev_change_proto_down(dev, (bool)proto_down);
 }
 
 static ssize_t proto_down_store(struct device *dev,
@@ -508,7 +512,7 @@ static ssize_t phys_switch_id_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(phys_switch_id);
 
-static struct attribute *net_class_attrs[] = {
+static struct attribute *net_class_attrs[] __ro_after_init = {
 	&dev_attr_netdev_group.attr,
 	&dev_attr_type.attr,
 	&dev_attr_dev_id.attr,
@@ -549,14 +553,14 @@ static ssize_t netstat_show(const struct device *d,
 	ssize_t ret = -EINVAL;
 
 	WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
-			offset % sizeof(u64) != 0);
+		offset % sizeof(u64) != 0);
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev)) {
 		struct rtnl_link_stats64 temp;
 		const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
 
-		ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
+		ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset));
 	}
 	read_unlock(&dev_base_lock);
 	return ret;
@@ -565,7 +569,7 @@ static ssize_t netstat_show(const struct device *d,
 /* generate a read-only statistics attribute */
 #define NETSTAT_ENTRY(name)						\
 static ssize_t name##_show(struct device *d,				\
-			   struct device_attribute *attr, char *buf) 	\
+			   struct device_attribute *attr, char *buf)	\
 {									\
 	return netstat_show(d, attr, buf,				\
 			    offsetof(struct rtnl_link_stats64, name));	\
@@ -597,7 +601,7 @@ NETSTAT_ENTRY(rx_compressed);
 NETSTAT_ENTRY(tx_compressed);
 NETSTAT_ENTRY(rx_nohandler);
 
-static struct attribute *netstat_attrs[] = {
+static struct attribute *netstat_attrs[] __ro_after_init = {
 	&dev_attr_rx_packets.attr,
 	&dev_attr_tx_packets.attr,
 	&dev_attr_rx_bytes.attr,
@@ -625,7 +629,6 @@ static struct attribute *netstat_attrs[] = {
 	NULL
 };
 
-
 static const struct attribute_group netstat_group = {
 	.name  = "statistics",
 	.attrs  = netstat_attrs,
@@ -647,33 +650,33 @@ static const struct attribute_group wireless_group = {
 #endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_SYSFS
-#define to_rx_queue_attr(_attr) container_of(_attr,		\
-    struct rx_queue_attribute, attr)
+#define to_rx_queue_attr(_attr) \
+	container_of(_attr, struct rx_queue_attribute, attr)
 
 #define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
 
 static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
 				  char *buf)
 {
-	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
 
 	if (!attribute->show)
 		return -EIO;
 
-	return attribute->show(queue, attribute, buf);
+	return attribute->show(queue, buf);
 }
 
 static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
 				   const char *buf, size_t count)
 {
-	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
 
 	if (!attribute->store)
 		return -EIO;
 
-	return attribute->store(queue, attribute, buf, count);
+	return attribute->store(queue, buf, count);
 }
 
 static const struct sysfs_ops rx_queue_sysfs_ops = {
@@ -682,8 +685,7 @@ static const struct sysfs_ops rx_queue_sysfs_ops = {
 };
 
 #ifdef CONFIG_RPS
-static ssize_t show_rps_map(struct netdev_rx_queue *queue,
-			    struct rx_queue_attribute *attribute, char *buf)
+static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf)
 {
 	struct rps_map *map;
 	cpumask_var_t mask;
@@ -706,8 +708,7 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue,
 }
 
 static ssize_t store_rps_map(struct netdev_rx_queue *queue,
-		      struct rx_queue_attribute *attribute,
-		      const char *buf, size_t len)
+			     const char *buf, size_t len)
 {
 	struct rps_map *old_map, *map;
 	cpumask_var_t mask;
@@ -727,8 +728,8 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 	}
 
 	map = kzalloc(max_t(unsigned int,
-	    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
-	    GFP_KERNEL);
+			    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
+		      GFP_KERNEL);
 	if (!map) {
 		free_cpumask_var(mask);
 		return -ENOMEM;
@@ -738,9 +739,9 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 	for_each_cpu_and(cpu, mask, cpu_online_mask)
 		map->cpus[i++] = cpu;
 
-	if (i)
+	if (i) {
 		map->len = i;
-	else {
+	} else {
 		kfree(map);
 		map = NULL;
 	}
@@ -765,7 +766,6 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 }
 
 static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
-					   struct rx_queue_attribute *attr,
 					   char *buf)
 {
 	struct rps_dev_flow_table *flow_table;
@@ -788,8 +788,7 @@ static void rps_dev_flow_table_release(struct rcu_head *rcu)
 }
 
 static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
-				     struct rx_queue_attribute *attr,
-				     const char *buf, size_t len)
+					    const char *buf, size_t len)
 {
 	unsigned long mask, count;
 	struct rps_dev_flow_table *table, *old_table;
@@ -831,8 +830,9 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 		table->mask = mask;
 		for (count = 0; count <= mask; count++)
 			table->flows[count].cpu = RPS_NO_CPU;
-	} else
+	} else {
 		table = NULL;
+	}
 
 	spin_lock(&rps_dev_flow_lock);
 	old_table = rcu_dereference_protected(queue->rps_flow_table,
@@ -846,16 +846,15 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 	return len;
 }
 
-static struct rx_queue_attribute rps_cpus_attribute =
-	__ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
-
+static struct rx_queue_attribute rps_cpus_attribute __ro_after_init
+	= __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
 
-static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
-	__ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
-	    show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
+static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init
+	= __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+		 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
 #endif /* CONFIG_RPS */
 
-static struct attribute *rx_queue_default_attrs[] = {
+static struct attribute *rx_queue_default_attrs[] __ro_after_init = {
 #ifdef CONFIG_RPS
 	&rps_cpus_attribute.attr,
 	&rps_dev_flow_table_cnt_attribute.attr,
@@ -870,7 +869,6 @@ static void rx_queue_release(struct kobject *kobj)
 	struct rps_map *map;
 	struct rps_dev_flow_table *flow_table;
 
-
 	map = rcu_dereference_protected(queue->rps_map, 1);
 	if (map) {
 		RCU_INIT_POINTER(queue->rps_map, NULL);
@@ -900,7 +898,7 @@ static const void *rx_queue_namespace(struct kobject *kobj)
 	return ns;
 }
 
-static struct kobj_type rx_queue_ktype = {
+static struct kobj_type rx_queue_ktype __ro_after_init = {
 	.sysfs_ops = &rx_queue_sysfs_ops,
 	.release = rx_queue_release,
 	.default_attrs = rx_queue_default_attrs,
@@ -915,23 +913,22 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
 
 	kobj->kset = dev->queues_kset;
 	error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
-	    "rx-%u", index);
+				     "rx-%u", index);
 	if (error)
-		goto exit;
+		return error;
 
 	if (dev->sysfs_rx_queue_group) {
 		error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
-		if (error)
-			goto exit;
+		if (error) {
+			kobject_put(kobj);
+			return error;
+		}
 	}
 
 	kobject_uevent(kobj, KOBJ_ADD);
 	dev_hold(queue->dev);
 
 	return error;
-exit:
-	kobject_put(kobj);
-	return error;
 }
 #endif /* CONFIG_SYSFS */
 
@@ -976,39 +973,40 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
  */
 struct netdev_queue_attribute {
 	struct attribute attr;
-	ssize_t (*show)(struct netdev_queue *queue,
-	    struct netdev_queue_attribute *attr, char *buf);
+	ssize_t (*show)(struct netdev_queue *queue, char *buf);
 	ssize_t (*store)(struct netdev_queue *queue,
-	    struct netdev_queue_attribute *attr, const char *buf, size_t len);
+			 const char *buf, size_t len);
 };
-#define to_netdev_queue_attr(_attr) container_of(_attr,		\
-    struct netdev_queue_attribute, attr)
+#define to_netdev_queue_attr(_attr) \
+	container_of(_attr, struct netdev_queue_attribute, attr)
 
 #define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
 
 static ssize_t netdev_queue_attr_show(struct kobject *kobj,
 				      struct attribute *attr, char *buf)
 {
-	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+	const struct netdev_queue_attribute *attribute
+		= to_netdev_queue_attr(attr);
 	struct netdev_queue *queue = to_netdev_queue(kobj);
 
 	if (!attribute->show)
 		return -EIO;
 
-	return attribute->show(queue, attribute, buf);
+	return attribute->show(queue, buf);
 }
 
 static ssize_t netdev_queue_attr_store(struct kobject *kobj,
 				       struct attribute *attr,
 				       const char *buf, size_t count)
 {
-	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+	const struct netdev_queue_attribute *attribute
+		= to_netdev_queue_attr(attr);
 	struct netdev_queue *queue = to_netdev_queue(kobj);
 
 	if (!attribute->store)
 		return -EIO;
 
-	return attribute->store(queue, attribute, buf, count);
+	return attribute->store(queue, buf, count);
 }
 
 static const struct sysfs_ops netdev_queue_sysfs_ops = {
@@ -1016,9 +1014,7 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
 	.store = netdev_queue_attr_store,
 };
 
-static ssize_t show_trans_timeout(struct netdev_queue *queue,
-				  struct netdev_queue_attribute *attribute,
-				  char *buf)
+static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
 {
 	unsigned long trans_timeout;
 
@@ -1040,8 +1036,7 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
 	return i;
 }
 
-static ssize_t show_traffic_class(struct netdev_queue *queue,
-				  struct netdev_queue_attribute *attribute,
+static ssize_t traffic_class_show(struct netdev_queue *queue,
 				  char *buf)
 {
 	struct net_device *dev = queue->dev;
@@ -1055,16 +1050,14 @@ static ssize_t show_traffic_class(struct netdev_queue *queue,
 }
 
 #ifdef CONFIG_XPS
-static ssize_t show_tx_maxrate(struct netdev_queue *queue,
-			       struct netdev_queue_attribute *attribute,
+static ssize_t tx_maxrate_show(struct netdev_queue *queue,
 			       char *buf)
 {
 	return sprintf(buf, "%lu\n", queue->tx_maxrate);
 }
 
-static ssize_t set_tx_maxrate(struct netdev_queue *queue,
-			      struct netdev_queue_attribute *attribute,
-			      const char *buf, size_t len)
+static ssize_t tx_maxrate_store(struct netdev_queue *queue,
+				const char *buf, size_t len)
 {
 	struct net_device *dev = queue->dev;
 	int err, index = get_netdev_queue_index(queue);
@@ -1089,16 +1082,15 @@ static ssize_t set_tx_maxrate(struct netdev_queue *queue,
 	return err;
 }
 
-static struct netdev_queue_attribute queue_tx_maxrate =
-	__ATTR(tx_maxrate, S_IRUGO | S_IWUSR,
-	       show_tx_maxrate, set_tx_maxrate);
+static struct netdev_queue_attribute queue_tx_maxrate __ro_after_init
+	= __ATTR_RW(tx_maxrate);
 #endif
 
-static struct netdev_queue_attribute queue_trans_timeout =
-	__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
+static struct netdev_queue_attribute queue_trans_timeout __ro_after_init
+	= __ATTR_RO(tx_timeout);
 
-static struct netdev_queue_attribute queue_traffic_class =
-	__ATTR(traffic_class, S_IRUGO, show_traffic_class, NULL);
+static struct netdev_queue_attribute queue_traffic_class __ro_after_init
+	= __ATTR_RO(traffic_class);
 
 #ifdef CONFIG_BQL
 /*
@@ -1115,9 +1107,9 @@ static ssize_t bql_set(const char *buf, const size_t count,
 	unsigned int value;
 	int err;
 
-	if (!strcmp(buf, "max") || !strcmp(buf, "max\n"))
+	if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) {
 		value = DQL_MAX_LIMIT;
-	else {
+	} else {
 		err = kstrtouint(buf, 10, &value);
 		if (err < 0)
 			return err;
@@ -1131,7 +1123,6 @@ static ssize_t bql_set(const char *buf, const size_t count,
 }
 
 static ssize_t bql_show_hold_time(struct netdev_queue *queue,
-				  struct netdev_queue_attribute *attr,
 				  char *buf)
 {
 	struct dql *dql = &queue->dql;
@@ -1140,7 +1131,6 @@ static ssize_t bql_show_hold_time(struct netdev_queue *queue,
 }
 
 static ssize_t bql_set_hold_time(struct netdev_queue *queue,
-				 struct netdev_queue_attribute *attribute,
 				 const char *buf, size_t len)
 {
 	struct dql *dql = &queue->dql;
@@ -1156,12 +1146,11 @@ static ssize_t bql_set_hold_time(struct netdev_queue *queue,
 	return len;
 }
 
-static struct netdev_queue_attribute bql_hold_time_attribute =
-	__ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time,
-	    bql_set_hold_time);
+static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
+	= __ATTR(hold_time, S_IRUGO | S_IWUSR,
+		 bql_show_hold_time, bql_set_hold_time);
 
 static ssize_t bql_show_inflight(struct netdev_queue *queue,
-				 struct netdev_queue_attribute *attr,
 				 char *buf)
 {
 	struct dql *dql = &queue->dql;
@@ -1169,33 +1158,31 @@ static ssize_t bql_show_inflight(struct netdev_queue *queue,
 	return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed);
 }
 
-static struct netdev_queue_attribute bql_inflight_attribute =
+static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
 	__ATTR(inflight, S_IRUGO, bql_show_inflight, NULL);
 
 #define BQL_ATTR(NAME, FIELD)						\
 static ssize_t bql_show_ ## NAME(struct netdev_queue *queue,		\
-				 struct netdev_queue_attribute *attr,	\
 				 char *buf)				\
 {									\
 	return bql_show(buf, queue->dql.FIELD);				\
 }									\
 									\
 static ssize_t bql_set_ ## NAME(struct netdev_queue *queue,		\
-				struct netdev_queue_attribute *attr,	\
 				const char *buf, size_t len)		\
 {									\
 	return bql_set(buf, len, &queue->dql.FIELD);			\
 }									\
 									\
-static struct netdev_queue_attribute bql_ ## NAME ## _attribute =	\
-	__ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME,		\
-	    bql_set_ ## NAME);
+static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \
+	= __ATTR(NAME, S_IRUGO | S_IWUSR,				\
+		 bql_show_ ## NAME, bql_set_ ## NAME)
 
-BQL_ATTR(limit, limit)
-BQL_ATTR(limit_max, max_limit)
-BQL_ATTR(limit_min, min_limit)
+BQL_ATTR(limit, limit);
+BQL_ATTR(limit_max, max_limit);
+BQL_ATTR(limit_min, min_limit);
 
-static struct attribute *dql_attrs[] = {
+static struct attribute *dql_attrs[] __ro_after_init = {
 	&bql_limit_attribute.attr,
 	&bql_limit_max_attribute.attr,
 	&bql_limit_min_attribute.attr,
@@ -1211,8 +1198,8 @@ static const struct attribute_group dql_group = {
 #endif /* CONFIG_BQL */
 
 #ifdef CONFIG_XPS
-static ssize_t show_xps_map(struct netdev_queue *queue,
-			    struct netdev_queue_attribute *attribute, char *buf)
+static ssize_t xps_cpus_show(struct netdev_queue *queue,
+			     char *buf)
 {
 	struct net_device *dev = queue->dev;
 	int cpu, len, num_tc = 1, tc = 0;
@@ -1258,9 +1245,8 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
 	return len < PAGE_SIZE ? len : -EINVAL;
 }
 
-static ssize_t store_xps_map(struct netdev_queue *queue,
-		      struct netdev_queue_attribute *attribute,
-		      const char *buf, size_t len)
+static ssize_t xps_cpus_store(struct netdev_queue *queue,
+			      const char *buf, size_t len)
 {
 	struct net_device *dev = queue->dev;
 	unsigned long index;
@@ -1288,11 +1274,11 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 	return err ? : len;
 }
 
-static struct netdev_queue_attribute xps_cpus_attribute =
-    __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
+	= __ATTR_RW(xps_cpus);
 #endif /* CONFIG_XPS */
 
-static struct attribute *netdev_queue_default_attrs[] = {
+static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
 	&queue_trans_timeout.attr,
 	&queue_traffic_class.attr,
 #ifdef CONFIG_XPS
@@ -1322,7 +1308,7 @@ static const void *netdev_queue_namespace(struct kobject *kobj)
 	return ns;
 }
 
-static struct kobj_type netdev_queue_ktype = {
+static struct kobj_type netdev_queue_ktype __ro_after_init = {
 	.sysfs_ops = &netdev_queue_sysfs_ops,
 	.release = netdev_queue_release,
 	.default_attrs = netdev_queue_default_attrs,
@@ -1337,23 +1323,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
 
 	kobj->kset = dev->queues_kset;
 	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
-	    "tx-%u", index);
+				     "tx-%u", index);
 	if (error)
-		goto exit;
+		return error;
 
 #ifdef CONFIG_BQL
 	error = sysfs_create_group(kobj, &dql_group);
-	if (error)
-		goto exit;
+	if (error) {
+		kobject_put(kobj);
+		return error;
+	}
 #endif
 
 	kobject_uevent(kobj, KOBJ_ADD);
 	dev_hold(queue->dev);
 
 	return 0;
-exit:
-	kobject_put(kobj);
-	return error;
 }
 #endif /* CONFIG_SYSFS */
 
@@ -1395,7 +1380,7 @@ static int register_queue_kobjects(struct net_device *dev)
 
 #ifdef CONFIG_SYSFS
 	dev->queues_kset = kset_create_and_add("queues",
-	    NULL, &dev->dev.kobj);
+					       NULL, &dev->dev.kobj);
 	if (!dev->queues_kset)
 		return -ENOMEM;
 	real_rx = dev->real_num_rx_queues;
@@ -1463,7 +1448,7 @@ static const void *net_netlink_ns(struct sock *sk)
 	return sock_net(sk);
 }
 
-struct kobj_ns_type_operations net_ns_type_operations = {
+const struct kobj_ns_type_operations net_ns_type_operations = {
 	.type = KOBJ_NS_TYPE_NET,
 	.current_may_mount = net_current_may_mount,
 	.grab_current_ns = net_grab_current_ns,
@@ -1485,7 +1470,8 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
 
 	/* pass ifindex to uevent.
 	 * ifindex is useful as it won't change (interface name may change)
-	 * and is what RtNetlink uses natively. */
+	 * and is what RtNetlink uses natively.
+	 */
 	retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);
 
 exit:
@@ -1513,7 +1499,7 @@ static const void *net_namespace(struct device *d)
 	return dev_net(dev);
 }
 
-static struct class net_class = {
+static struct class net_class __ro_after_init = {
 	.name = "net",
 	.dev_release = netdev_release,
 	.dev_groups = net_class_groups,
@@ -1560,7 +1546,7 @@ EXPORT_SYMBOL(of_find_net_device_by_node);
  */
 void netdev_unregister_kobject(struct net_device *ndev)
 {
-	struct device *dev = &(ndev->dev);
+	struct device *dev = &ndev->dev;
 
 	if (!atomic_read(&dev_net(ndev)->count))
 		dev_set_uevent_suppress(dev, 1);
@@ -1577,7 +1563,7 @@ void netdev_unregister_kobject(struct net_device *ndev)
 /* Create sysfs entries for network device. */
 int netdev_register_kobject(struct net_device *ndev)
 {
-	struct device *dev = &(ndev->dev);
+	struct device *dev = &ndev->dev;
 	const struct attribute_group **groups = ndev->sysfs_groups;
 	int error = 0;
 
@@ -1620,14 +1606,14 @@ int netdev_register_kobject(struct net_device *ndev)
 	return error;
 }
 
-int netdev_class_create_file_ns(struct class_attribute *class_attr,
+int netdev_class_create_file_ns(const struct class_attribute *class_attr,
 				const void *ns)
 {
 	return class_create_file_ns(&net_class, class_attr, ns);
 }
 EXPORT_SYMBOL(netdev_class_create_file_ns);
 
-void netdev_class_remove_file_ns(struct class_attribute *class_attr,
+void netdev_class_remove_file_ns(const struct class_attribute *class_attr,
 				 const void *ns)
 {
 	class_remove_file_ns(&net_class, class_attr, ns);
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 92da5e4..1132820 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -32,10 +32,18 @@
 #include <trace/events/sock.h>
 #include <trace/events/udp.h>
 #include <trace/events/fib.h>
+#include <trace/events/qdisc.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <trace/events/fib6.h>
 EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
 #endif
+#if IS_ENABLED(CONFIG_BRIDGE)
+#include <trace/events/bridge.h>
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add);
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add);
+EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete);
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
+#endif
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 8726d05..6cfdc7c 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -855,9 +855,10 @@ static int __init net_ns_init(void)
 
 	register_pernet_subsys(&net_ns_ops);
 
-	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
+		      RTNL_FLAG_DOIT_UNLOCKED);
 	rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
-		      NULL);
+		      RTNL_FLAG_DOIT_UNLOCKED);
 
 	return 0;
 }
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9201e36..a78fd61 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -62,7 +62,7 @@
 struct rtnl_link {
 	rtnl_doit_func		doit;
 	rtnl_dumpit_func	dumpit;
-	rtnl_calcit_func 	calcit;
+	unsigned int		flags;
 };
 
 static DEFINE_MUTEX(rtnl_mutex);
@@ -127,7 +127,8 @@ bool lockdep_rtnl_is_held(void)
 EXPORT_SYMBOL(lockdep_rtnl_is_held);
 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 
-static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
+static struct rtnl_link __rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
+static refcount_t rtnl_msg_handlers_ref[RTNL_FAMILY_MAX + 1];
 
 static inline int rtm_msgindex(int msgtype)
 {
@@ -143,58 +144,13 @@ static inline int rtm_msgindex(int msgtype)
 	return msgindex;
 }
 
-static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
-{
-	struct rtnl_link *tab;
-
-	if (protocol <= RTNL_FAMILY_MAX)
-		tab = rtnl_msg_handlers[protocol];
-	else
-		tab = NULL;
-
-	if (tab == NULL || tab[msgindex].doit == NULL)
-		tab = rtnl_msg_handlers[PF_UNSPEC];
-
-	return tab[msgindex].doit;
-}
-
-static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
-{
-	struct rtnl_link *tab;
-
-	if (protocol <= RTNL_FAMILY_MAX)
-		tab = rtnl_msg_handlers[protocol];
-	else
-		tab = NULL;
-
-	if (tab == NULL || tab[msgindex].dumpit == NULL)
-		tab = rtnl_msg_handlers[PF_UNSPEC];
-
-	return tab[msgindex].dumpit;
-}
-
-static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
-{
-	struct rtnl_link *tab;
-
-	if (protocol <= RTNL_FAMILY_MAX)
-		tab = rtnl_msg_handlers[protocol];
-	else
-		tab = NULL;
-
-	if (tab == NULL || tab[msgindex].calcit == NULL)
-		tab = rtnl_msg_handlers[PF_UNSPEC];
-
-	return tab[msgindex].calcit;
-}
-
 /**
  * __rtnl_register - Register a rtnetlink message type
  * @protocol: Protocol family or PF_UNSPEC
  * @msgtype: rtnetlink message type
  * @doit: Function pointer called for each request message
  * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @calcit: Function pointer to calc size of dump message
+ * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
  *
  * Registers the specified function pointers (at least one of them has
  * to be non-NULL) to be called whenever a request message for the
@@ -208,7 +164,7 @@ static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
  */
 int __rtnl_register(int protocol, int msgtype,
 		    rtnl_doit_func doit, rtnl_dumpit_func dumpit,
-		    rtnl_calcit_func calcit)
+		    unsigned int flags)
 {
 	struct rtnl_link *tab;
 	int msgindex;
@@ -216,23 +172,20 @@ int __rtnl_register(int protocol, int msgtype,
 	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 	msgindex = rtm_msgindex(msgtype);
 
-	tab = rtnl_msg_handlers[protocol];
+	tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]);
 	if (tab == NULL) {
 		tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
 		if (tab == NULL)
 			return -ENOBUFS;
 
-		rtnl_msg_handlers[protocol] = tab;
+		rcu_assign_pointer(rtnl_msg_handlers[protocol], tab);
 	}
 
 	if (doit)
 		tab[msgindex].doit = doit;
-
 	if (dumpit)
 		tab[msgindex].dumpit = dumpit;
-
-	if (calcit)
-		tab[msgindex].calcit = calcit;
+	tab[msgindex].flags |= flags;
 
 	return 0;
 }
@@ -249,9 +202,9 @@ EXPORT_SYMBOL_GPL(__rtnl_register);
  */
 void rtnl_register(int protocol, int msgtype,
 		   rtnl_doit_func doit, rtnl_dumpit_func dumpit,
-		   rtnl_calcit_func calcit)
+		   unsigned int flags)
 {
-	if (__rtnl_register(protocol, msgtype, doit, dumpit, calcit) < 0)
+	if (__rtnl_register(protocol, msgtype, doit, dumpit, flags) < 0)
 		panic("Unable to register rtnetlink message handler, "
 		      "protocol = %d, message type = %d\n",
 		      protocol, msgtype);
@@ -267,17 +220,23 @@ EXPORT_SYMBOL_GPL(rtnl_register);
  */
 int rtnl_unregister(int protocol, int msgtype)
 {
+	struct rtnl_link *handlers;
 	int msgindex;
 
 	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 	msgindex = rtm_msgindex(msgtype);
 
-	if (rtnl_msg_handlers[protocol] == NULL)
+	rtnl_lock();
+	handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
+	if (!handlers) {
+		rtnl_unlock();
 		return -ENOENT;
+	}
 
-	rtnl_msg_handlers[protocol][msgindex].doit = NULL;
-	rtnl_msg_handlers[protocol][msgindex].dumpit = NULL;
-	rtnl_msg_handlers[protocol][msgindex].calcit = NULL;
+	handlers[msgindex].doit = NULL;
+	handlers[msgindex].dumpit = NULL;
+	handlers[msgindex].flags = 0;
+	rtnl_unlock();
 
 	return 0;
 }
@@ -292,10 +251,20 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
  */
 void rtnl_unregister_all(int protocol)
 {
+	struct rtnl_link *handlers;
+
 	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 
-	kfree(rtnl_msg_handlers[protocol]);
-	rtnl_msg_handlers[protocol] = NULL;
+	rtnl_lock();
+	handlers = rtnl_dereference(rtnl_msg_handlers[protocol]);
+	RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL);
+	rtnl_unlock();
+
+	synchronize_net();
+
+	while (refcount_read(&rtnl_msg_handlers_ref[protocol]) > 1)
+		schedule();
+	kfree(handlers);
 }
 EXPORT_SYMBOL_GPL(rtnl_unregister_all);
 
@@ -433,16 +402,24 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev)
 {
 	struct net_device *master_dev;
 	const struct rtnl_link_ops *ops;
+	size_t size = 0;
 
-	master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
+	rcu_read_lock();
+
+	master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev);
 	if (!master_dev)
-		return 0;
+		goto out;
+
 	ops = master_dev->rtnl_link_ops;
 	if (!ops || !ops->get_slave_size)
-		return 0;
+		goto out;
 	/* IFLA_INFO_SLAVE_DATA + nested data */
-	return nla_total_size(sizeof(struct nlattr)) +
+	size = nla_total_size(sizeof(struct nlattr)) +
 	       ops->get_slave_size(master_dev, dev);
+
+out:
+	rcu_read_unlock();
+	return size;
 }
 
 static size_t rtnl_link_get_size(const struct net_device *dev)
@@ -1644,8 +1621,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	s_h = cb->args[0];
 	s_idx = cb->args[1];
 
-	cb->seq = net->dev_base_seq;
-
 	/* A hack to preserve kernel<->userspace interface.
 	 * The correct header is ifinfomsg. It is consistent with rtnl_getlink.
 	 * However, before Linux v3.9 the code here assumed rtgenmsg and that's
@@ -1691,8 +1666,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 
 				goto out_err;
 			}
-
-			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
 			idx++;
 		}
@@ -1702,6 +1675,8 @@ out:
 out_err:
 	cb->args[1] = idx;
 	cb->args[0] = h;
+	cb->seq = net->dev_base_seq;
+	nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 
 	return err;
 }
@@ -2831,11 +2806,13 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
 	 * traverse the list of net devices and compute the minimum
 	 * buffer size based upon the filter mask.
 	 */
-	list_for_each_entry(dev, &net->dev_base_head, dev_list) {
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev) {
 		min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
 					     if_nlmsg_size(dev,
 						           ext_filter_mask));
 	}
+	rcu_read_unlock();
 
 	return nlmsg_total_size(min_ifinfo_dump_size);
 }
@@ -2847,19 +2824,29 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 
 	if (s_idx == 0)
 		s_idx = 1;
+
 	for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
 		int type = cb->nlh->nlmsg_type-RTM_BASE;
+		struct rtnl_link *handlers;
+		rtnl_dumpit_func dumpit;
+
 		if (idx < s_idx || idx == PF_PACKET)
 			continue;
-		if (rtnl_msg_handlers[idx] == NULL ||
-		    rtnl_msg_handlers[idx][type].dumpit == NULL)
+
+		handlers = rtnl_dereference(rtnl_msg_handlers[idx]);
+		if (!handlers)
 			continue;
+
+		dumpit = READ_ONCE(handlers[type].dumpit);
+		if (!dumpit)
+			continue;
+
 		if (idx > s_idx) {
 			memset(&cb->args[0], 0, sizeof(cb->args));
 			cb->prev_seq = 0;
 			cb->seq = 0;
 		}
-		if (rtnl_msg_handlers[idx][type].dumpit(skb, cb))
+		if (dumpit(skb, cb))
 			break;
 	}
 	cb->family = idx;
@@ -4162,11 +4149,13 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 			     struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
+	struct rtnl_link *handlers;
+	int err = -EOPNOTSUPP;
 	rtnl_doit_func doit;
+	unsigned int flags;
 	int kind;
 	int family;
 	int type;
-	int err;
 
 	type = nlh->nlmsg_type;
 	if (type > RTM_MAX)
@@ -4184,20 +4173,40 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
+	if (family >= ARRAY_SIZE(rtnl_msg_handlers))
+		family = PF_UNSPEC;
+
+	rcu_read_lock();
+	handlers = rcu_dereference(rtnl_msg_handlers[family]);
+	if (!handlers) {
+		family = PF_UNSPEC;
+		handlers = rcu_dereference(rtnl_msg_handlers[family]);
+	}
+
 	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
 		struct sock *rtnl;
 		rtnl_dumpit_func dumpit;
-		rtnl_calcit_func calcit;
 		u16 min_dump_alloc = 0;
 
-		dumpit = rtnl_get_dumpit(family, type);
-		if (dumpit == NULL)
-			return -EOPNOTSUPP;
-		calcit = rtnl_get_calcit(family, type);
-		if (calcit)
-			min_dump_alloc = calcit(skb, nlh);
+		dumpit = READ_ONCE(handlers[type].dumpit);
+		if (!dumpit) {
+			family = PF_UNSPEC;
+			handlers = rcu_dereference(rtnl_msg_handlers[PF_UNSPEC]);
+			if (!handlers)
+				goto err_unlock;
+
+			dumpit = READ_ONCE(handlers[type].dumpit);
+			if (!dumpit)
+				goto err_unlock;
+		}
+
+		refcount_inc(&rtnl_msg_handlers_ref[family]);
+
+		if (type == RTM_GETLINK - RTM_BASE)
+			min_dump_alloc = rtnl_calcit(skb, nlh);
+
+		rcu_read_unlock();
 
-		__rtnl_unlock();
 		rtnl = net->rtnl;
 		{
 			struct netlink_dump_control c = {
@@ -4206,22 +4215,47 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 			};
 			err = netlink_dump_start(rtnl, skb, nlh, &c);
 		}
-		rtnl_lock();
+		refcount_dec(&rtnl_msg_handlers_ref[family]);
 		return err;
 	}
 
-	doit = rtnl_get_doit(family, type);
-	if (doit == NULL)
-		return -EOPNOTSUPP;
+	doit = READ_ONCE(handlers[type].doit);
+	if (!doit) {
+		family = PF_UNSPEC;
+		handlers = rcu_dereference(rtnl_msg_handlers[family]);
+	}
+
+	flags = READ_ONCE(handlers[type].flags);
+	if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
+		refcount_inc(&rtnl_msg_handlers_ref[family]);
+		doit = READ_ONCE(handlers[type].doit);
+		rcu_read_unlock();
+		if (doit)
+			err = doit(skb, nlh, extack);
+		refcount_dec(&rtnl_msg_handlers_ref[family]);
+		return err;
+	}
 
-	return doit(skb, nlh, extack);
+	rcu_read_unlock();
+
+	rtnl_lock();
+	handlers = rtnl_dereference(rtnl_msg_handlers[family]);
+	if (handlers) {
+		doit = READ_ONCE(handlers[type].doit);
+		if (doit)
+			err = doit(skb, nlh, extack);
+	}
+	rtnl_unlock();
+	return err;
+
+err_unlock:
+	rcu_read_unlock();
+	return -EOPNOTSUPP;
 }
 
 static void rtnetlink_rcv(struct sk_buff *skb)
 {
-	rtnl_lock();
 	netlink_rcv_skb(skb, &rtnetlink_rcv_msg);
-	rtnl_unlock();
 }
 
 static int rtnetlink_bind(struct net *net, int group)
@@ -4294,29 +4328,34 @@ static struct pernet_operations rtnetlink_net_ops = {
 
 void __init rtnetlink_init(void)
 {
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(rtnl_msg_handlers_ref); i++)
+		refcount_set(&rtnl_msg_handlers_ref[i], 1);
+
 	if (register_pernet_subsys(&rtnetlink_net_ops))
 		panic("rtnetlink_init: cannot initialize rtnetlink\n");
 
 	register_netdevice_notifier(&rtnetlink_dev_notifier);
 
 	rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
-		      rtnl_dump_ifinfo, rtnl_calcit);
-	rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, NULL);
+		      rtnl_dump_ifinfo, 0);
+	rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0);
 
-	rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0);
 
-	rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL);
-	rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL);
-	rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL);
+	rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
+	rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0);
+	rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, 0);
 
-	rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
-	rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
-	rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
+	rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
+	rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0);
+	rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, 0);
 
 	rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
-		      NULL);
+		      0);
 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e075566..68065d7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -158,31 +158,6 @@ out:
  *
  */
 
-struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
-{
-	struct sk_buff *skb;
-
-	/* Get the HEAD */
-	skb = kmem_cache_alloc_node(skbuff_head_cache,
-				    gfp_mask & ~__GFP_DMA, node);
-	if (!skb)
-		goto out;
-
-	/*
-	 * Only clear those fields we need to clear, not those that we will
-	 * actually initialise below. Hence, don't put any more fields after
-	 * the tail pointer in struct sk_buff!
-	 */
-	memset(skb, 0, offsetof(struct sk_buff, tail));
-	skb->head = NULL;
-	skb->truesize = sizeof(struct sk_buff);
-	refcount_set(&skb->users, 1);
-
-	skb->mac_header = (typeof(skb->mac_header))~0U;
-out:
-	return skb;
-}
-
 /**
  *	__alloc_skb	-	allocate a network buffer
  *	@size: size to allocate
@@ -592,21 +567,10 @@ static void skb_release_data(struct sk_buff *skb)
 	for (i = 0; i < shinfo->nr_frags; i++)
 		__skb_frag_unref(&shinfo->frags[i]);
 
-	/*
-	 * If skb buf is from userspace, we need to notify the caller
-	 * the lower device DMA has done;
-	 */
-	if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) {
-		struct ubuf_info *uarg;
-
-		uarg = shinfo->destructor_arg;
-		if (uarg->callback)
-			uarg->callback(uarg, true);
-	}
-
 	if (shinfo->frag_list)
 		kfree_skb_list(shinfo->frag_list);
 
+	skb_zcopy_clear(skb, true);
 	skb_free_head(skb);
 }
 
@@ -720,14 +684,7 @@ EXPORT_SYMBOL(kfree_skb_list);
  */
 void skb_tx_error(struct sk_buff *skb)
 {
-	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
-		struct ubuf_info *uarg;
-
-		uarg = skb_shinfo(skb)->destructor_arg;
-		if (uarg->callback)
-			uarg->callback(uarg, false);
-		skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
-	}
+	skb_zcopy_clear(skb, true);
 }
 EXPORT_SYMBOL(skb_tx_error);
 
@@ -762,8 +719,7 @@ void consume_stateless_skb(struct sk_buff *skb)
 		return;
 
 	trace_consume_skb(skb);
-	if (likely(skb->head))
-		skb_release_data(skb);
+	skb_release_data(skb);
 	kfree_skbmem(skb);
 }
 
@@ -941,6 +897,267 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
 }
 EXPORT_SYMBOL_GPL(skb_morph);
 
+static int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
+{
+	unsigned long max_pg, num_pg, new_pg, old_pg;
+	struct user_struct *user;
+
+	if (capable(CAP_IPC_LOCK) || !size)
+		return 0;
+
+	num_pg = (size >> PAGE_SHIFT) + 2;	/* worst case */
+	max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	user = mmp->user ? : current_user();
+
+	do {
+		old_pg = atomic_long_read(&user->locked_vm);
+		new_pg = old_pg + num_pg;
+		if (new_pg > max_pg)
+			return -ENOBUFS;
+	} while (atomic_long_cmpxchg(&user->locked_vm, old_pg, new_pg) !=
+		 old_pg);
+
+	if (!mmp->user) {
+		mmp->user = get_uid(user);
+		mmp->num_pg = num_pg;
+	} else {
+		mmp->num_pg += num_pg;
+	}
+
+	return 0;
+}
+
+static void mm_unaccount_pinned_pages(struct mmpin *mmp)
+{
+	if (mmp->user) {
+		atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm);
+		free_uid(mmp->user);
+	}
+}
+
+struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
+{
+	struct ubuf_info *uarg;
+	struct sk_buff *skb;
+
+	WARN_ON_ONCE(!in_task());
+
+	if (!sock_flag(sk, SOCK_ZEROCOPY))
+		return NULL;
+
+	skb = sock_omalloc(sk, 0, GFP_KERNEL);
+	if (!skb)
+		return NULL;
+
+	BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb));
+	uarg = (void *)skb->cb;
+	uarg->mmp.user = NULL;
+
+	if (mm_account_pinned_pages(&uarg->mmp, size)) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	uarg->callback = sock_zerocopy_callback;
+	uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
+	uarg->len = 1;
+	uarg->bytelen = size;
+	uarg->zerocopy = 1;
+	refcount_set(&uarg->refcnt, 1);
+	sock_hold(sk);
+
+	return uarg;
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_alloc);
+
+static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
+{
+	return container_of((void *)uarg, struct sk_buff, cb);
+}
+
+struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
+					struct ubuf_info *uarg)
+{
+	if (uarg) {
+		const u32 byte_limit = 1 << 19;		/* limit to a few TSO */
+		u32 bytelen, next;
+
+		/* realloc only when socket is locked (TCP, UDP cork),
+		 * so uarg->len and sk_zckey access is serialized
+		 */
+		if (!sock_owned_by_user(sk)) {
+			WARN_ON_ONCE(1);
+			return NULL;
+		}
+
+		bytelen = uarg->bytelen + size;
+		if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) {
+			/* TCP can create new skb to attach new uarg */
+			if (sk->sk_type == SOCK_STREAM)
+				goto new_alloc;
+			return NULL;
+		}
+
+		next = (u32)atomic_read(&sk->sk_zckey);
+		if ((u32)(uarg->id + uarg->len) == next) {
+			if (mm_account_pinned_pages(&uarg->mmp, size))
+				return NULL;
+			uarg->len++;
+			uarg->bytelen = bytelen;
+			atomic_set(&sk->sk_zckey, ++next);
+			sock_zerocopy_get(uarg);
+			return uarg;
+		}
+	}
+
+new_alloc:
+	return sock_zerocopy_alloc(sk, size);
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_realloc);
+
+static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
+{
+	struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+	u32 old_lo, old_hi;
+	u64 sum_len;
+
+	old_lo = serr->ee.ee_info;
+	old_hi = serr->ee.ee_data;
+	sum_len = old_hi - old_lo + 1ULL + len;
+
+	if (sum_len >= (1ULL << 32))
+		return false;
+
+	if (lo != old_hi + 1)
+		return false;
+
+	serr->ee.ee_data += len;
+	return true;
+}
+
+void sock_zerocopy_callback(struct ubuf_info *uarg, bool success)
+{
+	struct sk_buff *tail, *skb = skb_from_uarg(uarg);
+	struct sock_exterr_skb *serr;
+	struct sock *sk = skb->sk;
+	struct sk_buff_head *q;
+	unsigned long flags;
+	u32 lo, hi;
+	u16 len;
+
+	mm_unaccount_pinned_pages(&uarg->mmp);
+
+	/* if !len, there was only 1 call, and it was aborted
+	 * so do not queue a completion notification
+	 */
+	if (!uarg->len || sock_flag(sk, SOCK_DEAD))
+		goto release;
+
+	len = uarg->len;
+	lo = uarg->id;
+	hi = uarg->id + len - 1;
+
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	serr->ee.ee_errno = 0;
+	serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
+	serr->ee.ee_data = hi;
+	serr->ee.ee_info = lo;
+	if (!success)
+		serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
+
+	q = &sk->sk_error_queue;
+	spin_lock_irqsave(&q->lock, flags);
+	tail = skb_peek_tail(q);
+	if (!tail || SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY ||
+	    !skb_zerocopy_notify_extend(tail, lo, len)) {
+		__skb_queue_tail(q, skb);
+		skb = NULL;
+	}
+	spin_unlock_irqrestore(&q->lock, flags);
+
+	sk->sk_error_report(sk);
+
+release:
+	consume_skb(skb);
+	sock_put(sk);
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_callback);
+
+void sock_zerocopy_put(struct ubuf_info *uarg)
+{
+	if (uarg && refcount_dec_and_test(&uarg->refcnt)) {
+		if (uarg->callback)
+			uarg->callback(uarg, uarg->zerocopy);
+		else
+			consume_skb(skb_from_uarg(uarg));
+	}
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_put);
+
+void sock_zerocopy_put_abort(struct ubuf_info *uarg)
+{
+	if (uarg) {
+		struct sock *sk = skb_from_uarg(uarg)->sk;
+
+		atomic_dec(&sk->sk_zckey);
+		uarg->len--;
+
+		sock_zerocopy_put(uarg);
+	}
+}
+EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
+
+extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+				   struct iov_iter *from, size_t length);
+
+int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
+			     struct msghdr *msg, int len,
+			     struct ubuf_info *uarg)
+{
+	struct ubuf_info *orig_uarg = skb_zcopy(skb);
+	struct iov_iter orig_iter = msg->msg_iter;
+	int err, orig_len = skb->len;
+
+	/* An skb can only point to one uarg. This edge case happens when
+	 * TCP appends to an skb, but zerocopy_realloc triggered a new alloc.
+	 */
+	if (orig_uarg && uarg != orig_uarg)
+		return -EEXIST;
+
+	err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
+	if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
+		/* Streams do not free skb on error. Reset to prev state. */
+		msg->msg_iter = orig_iter;
+		___pskb_trim(skb, orig_len);
+		return err;
+	}
+
+	skb_zcopy_set(skb, uarg);
+	return skb->len - orig_len;
+}
+EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
+
+static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
+			      gfp_t gfp_mask)
+{
+	if (skb_zcopy(orig)) {
+		if (skb_zcopy(nskb)) {
+			/* !gfp_mask callers are verified to !skb_zcopy(nskb) */
+			if (!gfp_mask) {
+				WARN_ON_ONCE(1);
+				return -ENOMEM;
+			}
+			if (skb_uarg(nskb) == skb_uarg(orig))
+				return 0;
+			if (skb_copy_ubufs(nskb, GFP_ATOMIC))
+				return -EIO;
+		}
+		skb_zcopy_set(nskb, skb_uarg(orig));
+	}
+	return 0;
+}
+
 /**
  *	skb_copy_ubufs	-	copy userspace skb frags buffers to kernel
  *	@skb: the skb to modify
@@ -958,15 +1175,19 @@ EXPORT_SYMBOL_GPL(skb_morph);
  */
 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
 {
-	int i;
 	int num_frags = skb_shinfo(skb)->nr_frags;
 	struct page *page, *head = NULL;
-	struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg;
+	int i, new_frags;
+	u32 d_off;
 
-	for (i = 0; i < num_frags; i++) {
-		u8 *vaddr;
-		skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+	if (!num_frags)
+		return 0;
+
+	if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
+		return -EINVAL;
 
+	new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	for (i = 0; i < new_frags; i++) {
 		page = alloc_page(gfp_mask);
 		if (!page) {
 			while (head) {
@@ -976,28 +1197,51 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
 			}
 			return -ENOMEM;
 		}
-		vaddr = kmap_atomic(skb_frag_page(f));
-		memcpy(page_address(page),
-		       vaddr + f->page_offset, skb_frag_size(f));
-		kunmap_atomic(vaddr);
 		set_page_private(page, (unsigned long)head);
 		head = page;
 	}
 
+	page = head;
+	d_off = 0;
+	for (i = 0; i < num_frags; i++) {
+		skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+		u32 p_off, p_len, copied;
+		struct page *p;
+		u8 *vaddr;
+
+		skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f),
+				      p, p_off, p_len, copied) {
+			u32 copy, done = 0;
+			vaddr = kmap_atomic(p);
+
+			while (done < p_len) {
+				if (d_off == PAGE_SIZE) {
+					d_off = 0;
+					page = (struct page *)page_private(page);
+				}
+				copy = min_t(u32, PAGE_SIZE - d_off, p_len - done);
+				memcpy(page_address(page) + d_off,
+				       vaddr + p_off + done, copy);
+				done += copy;
+				d_off += copy;
+			}
+			kunmap_atomic(vaddr);
+		}
+	}
+
 	/* skb frags release userspace buffers */
 	for (i = 0; i < num_frags; i++)
 		skb_frag_unref(skb, i);
 
-	uarg->callback(uarg, false);
-
 	/* skb frags point to kernel buffers */
-	for (i = num_frags - 1; i >= 0; i--) {
-		__skb_fill_page_desc(skb, i, head, 0,
-				     skb_shinfo(skb)->frags[i].size);
+	for (i = 0; i < new_frags - 1; i++) {
+		__skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE);
 		head = (struct page *)page_private(head);
 	}
+	__skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
+	skb_shinfo(skb)->nr_frags = new_frags;
 
-	skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
+	skb_zcopy_clear(skb, false);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(skb_copy_ubufs);
@@ -1158,7 +1402,8 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
 	if (skb_shinfo(skb)->nr_frags) {
 		int i;
 
-		if (skb_orphan_frags(skb, gfp_mask)) {
+		if (skb_orphan_frags(skb, gfp_mask) ||
+		    skb_zerocopy_clone(n, skb, gfp_mask)) {
 			kfree_skb(n);
 			n = NULL;
 			goto out;
@@ -1235,9 +1480,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	 * be since all we did is relocate the values
 	 */
 	if (skb_cloned(skb)) {
-		/* copy this zero copy skb frags */
 		if (skb_orphan_frags(skb, gfp_mask))
 			goto nofrags;
+		if (skb_zcopy(skb))
+			refcount_inc(&skb_uarg(skb)->refcnt);
 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
 			skb_frag_ref(skb, i);
 
@@ -1722,6 +1968,8 @@ pull_pages:
 			if (eat) {
 				skb_shinfo(skb)->frags[k].page_offset += eat;
 				skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
+				if (!i)
+					goto end;
 				eat = 0;
 			}
 			k++;
@@ -1729,9 +1977,13 @@ pull_pages:
 	}
 	skb_shinfo(skb)->nr_frags = k;
 
+end:
 	skb->tail     += delta;
 	skb->data_len -= delta;
 
+	if (!skb->data_len)
+		skb_zcopy_clear(skb, false);
+
 	return skb_tail_pointer(skb);
 }
 EXPORT_SYMBOL(__pskb_pull_tail);
@@ -1779,16 +2031,20 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 
 		end = start + skb_frag_size(f);
 		if ((copy = end - offset) > 0) {
+			u32 p_off, p_len, copied;
+			struct page *p;
 			u8 *vaddr;
 
 			if (copy > len)
 				copy = len;
 
-			vaddr = kmap_atomic(skb_frag_page(f));
-			memcpy(to,
-			       vaddr + f->page_offset + offset - start,
-			       copy);
-			kunmap_atomic(vaddr);
+			skb_frag_foreach_page(f,
+					      f->page_offset + offset - start,
+					      copy, p, p_off, p_len, copied) {
+				vaddr = kmap_atomic(p);
+				memcpy(to + copied, vaddr + p_off, p_len);
+				kunmap_atomic(vaddr);
+			}
 
 			if ((len -= copy) == 0)
 				return 0;
@@ -2008,6 +2264,107 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
 }
 EXPORT_SYMBOL_GPL(skb_splice_bits);
 
+/* Send skb data on a socket. Socket must be locked. */
+int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
+			 int len)
+{
+	unsigned int orig_len = len;
+	struct sk_buff *head = skb;
+	unsigned short fragidx;
+	int slen, ret;
+
+do_frag_list:
+
+	/* Deal with head data */
+	while (offset < skb_headlen(skb) && len) {
+		struct kvec kv;
+		struct msghdr msg;
+
+		slen = min_t(int, len, skb_headlen(skb) - offset);
+		kv.iov_base = skb->data + offset;
+		kv.iov_len = slen;
+		memset(&msg, 0, sizeof(msg));
+
+		ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen);
+		if (ret <= 0)
+			goto error;
+
+		offset += ret;
+		len -= ret;
+	}
+
+	/* All the data was skb head? */
+	if (!len)
+		goto out;
+
+	/* Make offset relative to start of frags */
+	offset -= skb_headlen(skb);
+
+	/* Find where we are in frag list */
+	for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
+		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+
+		if (offset < frag->size)
+			break;
+
+		offset -= frag->size;
+	}
+
+	for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
+		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
+
+		slen = min_t(size_t, len, frag->size - offset);
+
+		while (slen) {
+			ret = kernel_sendpage_locked(sk, frag->page.p,
+						     frag->page_offset + offset,
+						     slen, MSG_DONTWAIT);
+			if (ret <= 0)
+				goto error;
+
+			len -= ret;
+			offset += ret;
+			slen -= ret;
+		}
+
+		offset = 0;
+	}
+
+	if (len) {
+		/* Process any frag lists */
+
+		if (skb == head) {
+			if (skb_has_frag_list(skb)) {
+				skb = skb_shinfo(skb)->frag_list;
+				goto do_frag_list;
+			}
+		} else if (skb->next) {
+			skb = skb->next;
+			goto do_frag_list;
+		}
+	}
+
+out:
+	return orig_len - len;
+
+error:
+	return orig_len == len ? ret : orig_len - len;
+}
+EXPORT_SYMBOL_GPL(skb_send_sock_locked);
+
+/* Send skb data on a socket. */
+int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
+{
+	int ret = 0;
+
+	lock_sock(sk);
+	ret = skb_send_sock_locked(sk, skb, offset, len);
+	release_sock(sk);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(skb_send_sock);
+
 /**
  *	skb_store_bits - store bits from kernel buffer to skb
  *	@skb: destination buffer
@@ -2047,15 +2404,20 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
 
 		end = start + skb_frag_size(frag);
 		if ((copy = end - offset) > 0) {
+			u32 p_off, p_len, copied;
+			struct page *p;
 			u8 *vaddr;
 
 			if (copy > len)
 				copy = len;
 
-			vaddr = kmap_atomic(skb_frag_page(frag));
-			memcpy(vaddr + frag->page_offset + offset - start,
-			       from, copy);
-			kunmap_atomic(vaddr);
+			skb_frag_foreach_page(frag,
+					      frag->page_offset + offset - start,
+					      copy, p, p_off, p_len, copied) {
+				vaddr = kmap_atomic(p);
+				memcpy(vaddr + p_off, from + copied, p_len);
+				kunmap_atomic(vaddr);
+			}
 
 			if ((len -= copy) == 0)
 				return 0;
@@ -2120,20 +2482,27 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 
 		end = start + skb_frag_size(frag);
 		if ((copy = end - offset) > 0) {
+			u32 p_off, p_len, copied;
+			struct page *p;
 			__wsum csum2;
 			u8 *vaddr;
 
 			if (copy > len)
 				copy = len;
-			vaddr = kmap_atomic(skb_frag_page(frag));
-			csum2 = ops->update(vaddr + frag->page_offset +
-					    offset - start, copy, 0);
-			kunmap_atomic(vaddr);
-			csum = ops->combine(csum, csum2, pos, copy);
+
+			skb_frag_foreach_page(frag,
+					      frag->page_offset + offset - start,
+					      copy, p, p_off, p_len, copied) {
+				vaddr = kmap_atomic(p);
+				csum2 = ops->update(vaddr + p_off, p_len, 0);
+				kunmap_atomic(vaddr);
+				csum = ops->combine(csum, csum2, pos, p_len);
+				pos += p_len;
+			}
+
 			if (!(len -= copy))
 				return csum;
 			offset += copy;
-			pos    += copy;
 		}
 		start = end;
 	}
@@ -2206,24 +2575,31 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 
 		end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
 		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			u32 p_off, p_len, copied;
+			struct page *p;
 			__wsum csum2;
 			u8 *vaddr;
-			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
 			if (copy > len)
 				copy = len;
-			vaddr = kmap_atomic(skb_frag_page(frag));
-			csum2 = csum_partial_copy_nocheck(vaddr +
-							  frag->page_offset +
-							  offset - start, to,
-							  copy, 0);
-			kunmap_atomic(vaddr);
-			csum = csum_block_add(csum, csum2, pos);
+
+			skb_frag_foreach_page(frag,
+					      frag->page_offset + offset - start,
+					      copy, p, p_off, p_len, copied) {
+				vaddr = kmap_atomic(p);
+				csum2 = csum_partial_copy_nocheck(vaddr + p_off,
+								  to + copied,
+								  p_len, 0);
+				kunmap_atomic(vaddr);
+				csum = csum_block_add(csum, csum2, pos);
+				pos += p_len;
+			}
+
 			if (!(len -= copy))
 				return csum;
 			offset += copy;
 			to     += copy;
-			pos    += copy;
 		}
 		start = end;
 	}
@@ -2363,6 +2739,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
 		skb_tx_error(from);
 		return -ENOMEM;
 	}
+	skb_zerocopy_clone(to, from, GFP_ATOMIC);
 
 	for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
 		if (!len)
@@ -2660,6 +3037,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 
 	skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags &
 				      SKBTX_SHARED_FRAG;
+	skb_zerocopy_clone(skb1, skb, 0);
 	if (len < pos)	/* Split line is inside header. */
 		skb_split_inside_header(skb, skb1, len, pos);
 	else		/* Second chunk has no header, nothing to copy. */
@@ -2703,6 +3081,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
 
 	if (skb_headlen(skb))
 		return 0;
+	if (skb_zcopy(tgt) || skb_zcopy(skb))
+		return 0;
 
 	todo = shiftlen;
 	from = 0;
@@ -3276,6 +3656,8 @@ normal:
 
 		skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
 					      SKBTX_SHARED_FRAG;
+		if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
+			goto err;
 
 		while (pos < offset + len) {
 			if (i >= nfrags) {
@@ -4399,6 +4781,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 
 	if (skb_has_frag_list(to) || skb_has_frag_list(from))
 		return false;
+	if (skb_zcopy(to) || skb_zcopy(from))
+		return false;
 
 	if (skb_headlen(from) != 0) {
 		struct page *page;
diff --git a/net/core/sock.c b/net/core/sock.c
index ac2a404..9b7b6bb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -307,16 +307,6 @@ static struct lock_class_key af_wlock_keys[AF_MAX];
 static struct lock_class_key af_elock_keys[AF_MAX];
 static struct lock_class_key af_kern_callback_keys[AF_MAX];
 
-/* Take into consideration the size of the struct sk_buff overhead in the
- * determination of these values, since that is non-constant across
- * platforms.  This makes socket queueing behavior and performance
- * not depend upon such differences.
- */
-#define _SK_MEM_PACKETS		256
-#define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
-#define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
-#define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
-
 /* Run time adjustable parameters. */
 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
 EXPORT_SYMBOL(sysctl_wmem_max);
@@ -1055,6 +1045,20 @@ set_rcvbuf:
 		if (val == 1)
 			dst_negative_advice(sk);
 		break;
+
+	case SO_ZEROCOPY:
+		if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6)
+			ret = -ENOTSUPP;
+		else if (sk->sk_protocol != IPPROTO_TCP)
+			ret = -ENOTSUPP;
+		else if (sk->sk_state != TCP_CLOSE)
+			ret = -EBUSY;
+		else if (val < 0 || val > 1)
+			ret = -EINVAL;
+		else
+			sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -1383,6 +1387,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val64 = sock_gen_cookie(sk);
 		break;
 
+	case SO_ZEROCOPY:
+		v.val = sock_flag(sk, SOCK_ZEROCOPY);
+		break;
+
 	default:
 		/* We implement the SO_SNDLOWAT etc to not be settable
 		 * (1003.1g 7).
@@ -1670,6 +1678,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		atomic_set(&newsk->sk_drops, 0);
 		newsk->sk_send_head	= NULL;
 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+		atomic_set(&newsk->sk_zckey, 0);
 
 		sock_reset_flag(newsk, SOCK_DONE);
 
@@ -1757,7 +1766,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
 	sk->sk_route_caps &= ~sk->sk_route_nocaps;
 	if (sk_can_gso(sk)) {
-		if (dst->header_len) {
+		if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 		} else {
 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
@@ -1923,6 +1932,33 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 }
 EXPORT_SYMBOL(sock_wmalloc);
 
+static void sock_ofree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	atomic_sub(skb->truesize, &sk->sk_omem_alloc);
+}
+
+struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
+			     gfp_t priority)
+{
+	struct sk_buff *skb;
+
+	/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
+	if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
+	    sysctl_optmem_max)
+		return NULL;
+
+	skb = alloc_skb(size, priority);
+	if (!skb)
+		return NULL;
+
+	atomic_add(skb->truesize, &sk->sk_omem_alloc);
+	skb->sk = sk;
+	skb->destructor = sock_ofree;
+	return skb;
+}
+
 /*
  * Allocate a memory block from the socket's option memory buffer.
  */
@@ -2408,9 +2444,6 @@ EXPORT_SYMBOL(__sk_mem_reclaim);
 
 int sk_set_peek_off(struct sock *sk, int val)
 {
-	if (val < 0)
-		return -EINVAL;
-
 	sk->sk_peek_off = val;
 	return 0;
 }
@@ -2500,6 +2533,12 @@ int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
 }
 EXPORT_SYMBOL(sock_no_sendmsg);
 
+int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
+{
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(sock_no_sendmsg_locked);
+
 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
 		    int flags)
 {
@@ -2528,6 +2567,22 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz
 }
 EXPORT_SYMBOL(sock_no_sendpage);
 
+ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
+				int offset, size_t size, int flags)
+{
+	ssize_t res;
+	struct msghdr msg = {.msg_flags = flags};
+	struct kvec iov;
+	char *kaddr = kmap(page);
+
+	iov.iov_base = kaddr + offset;
+	iov.iov_len = size;
+	res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size);
+	kunmap(page);
+	return res;
+}
+EXPORT_SYMBOL(sock_no_sendpage_locked);
+
 /*
  *	Default Socket Callbacks
  */
@@ -2673,6 +2728,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
 
 	sk->sk_stamp = SK_DEFAULT_STAMP;
+	atomic_set(&sk->sk_zckey, 0);
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	sk->sk_napi_id		=	0;
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 733f523..bae7d78 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1938,8 +1938,8 @@ static int __init dcbnl_init(void)
 {
 	INIT_LIST_HEAD(&dcb_app_list);
 
-	rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0);
 
 	return 0;
 }
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 1b202f1..001c086 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -256,7 +256,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 	sk = __inet_lookup_established(net, &dccp_hashinfo,
 				       iph->daddr, dh->dccph_dport,
 				       iph->saddr, ntohs(dh->dccph_sport),
-				       inet_iif(skb));
+				       inet_iif(skb), 0);
 	if (!sk) {
 		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
 		return;
@@ -804,7 +804,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 
 lookup:
 	sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
-			       dh->dccph_sport, dh->dccph_dport, &refcounted);
+			       dh->dccph_sport, dh->dccph_dport, 0, &refcounted);
 	if (!sk) {
 		dccp_pr_debug("failed to look up flow ID in table and "
 			      "get corresponding socket\n");
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 1b58eac..5df7857 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -16,6 +16,7 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/xfrm.h>
+#include <linux/string.h>
 
 #include <net/addrconf.h>
 #include <net/inet_common.h>
@@ -30,6 +31,7 @@
 #include <net/ip6_checksum.h>
 #include <net/xfrm.h>
 #include <net/secure_seq.h>
+#include <net/sock.h>
 
 #include "dccp.h"
 #include "ipv6.h"
@@ -89,7 +91,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	sk = __inet6_lookup_established(net, &dccp_hashinfo,
 					&hdr->daddr, dh->dccph_dport,
 					&hdr->saddr, ntohs(dh->dccph_sport),
-					inet6_iif(skb));
+					inet6_iif(skb), 0);
 
 	if (!sk) {
 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
@@ -597,19 +599,13 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 					       --ANK (980728)
 	 */
 	if (np->rxopt.all)
-	/*
-	 * FIXME: Add handling of IPV6_PKTOPTIONS skb. See the comments below
-	 *        (wrt ipv6_pktopions) and net/ipv6/tcp_ipv6.c for an example.
-	 */
 		opt_skb = skb_clone(skb, GFP_ATOMIC);
 
 	if (sk->sk_state == DCCP_OPEN) { /* Fast path */
 		if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
 			goto reset;
-		if (opt_skb) {
-			/* XXX This is where we would goto ipv6_pktoptions. */
-			__kfree_skb(opt_skb);
-		}
+		if (opt_skb)
+			goto ipv6_pktoptions;
 		return 0;
 	}
 
@@ -640,10 +636,8 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 	if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
 		goto reset;
-	if (opt_skb) {
-		/* XXX This is where we would goto ipv6_pktoptions. */
-		__kfree_skb(opt_skb);
-	}
+	if (opt_skb)
+		goto ipv6_pktoptions;
 	return 0;
 
 reset:
@@ -653,6 +647,35 @@ discard:
 		__kfree_skb(opt_skb);
 	kfree_skb(skb);
 	return 0;
+
+/* Handling IPV6_PKTOPTIONS skb the similar
+ * way it's done for net/ipv6/tcp_ipv6.c
+ */
+ipv6_pktoptions:
+	if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) {
+		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
+			np->mcast_oif = inet6_iif(opt_skb);
+		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
+			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
+			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
+		if (np->repflow)
+			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
+		if (ipv6_opt_accepted(sk, opt_skb,
+				      &DCCP_SKB_CB(opt_skb)->header.h6)) {
+			skb_set_owner_r(opt_skb, sk);
+			memmove(IP6CB(opt_skb),
+				&DCCP_SKB_CB(opt_skb)->header.h6,
+				sizeof(struct inet6_skb_parm));
+			opt_skb = xchg(&np->pktoptions, opt_skb);
+		} else {
+			__kfree_skb(opt_skb);
+			opt_skb = xchg(&np->pktoptions, NULL);
+		}
+	}
+
+	kfree_skb(opt_skb);
+	return 0;
 }
 
 static int dccp_v6_rcv(struct sk_buff *skb)
@@ -687,7 +710,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
 lookup:
 	sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
 			        dh->dccph_sport, dh->dccph_dport,
-				inet6_iif(skb), &refcounted);
+				inet6_iif(skb), 0, &refcounted);
 	if (!sk) {
 		dccp_pr_debug("failed to look up flow ID in table and "
 			      "get corresponding socket\n");
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index fa0110b..4d339de 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1419,9 +1419,9 @@ void __init dn_dev_init(void)
 
 	dn_dev_devices_on();
 
-	rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, NULL);
-	rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, NULL);
-	rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, NULL);
+	rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, 0);
+	rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, 0);
+	rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, 0);
 
 	proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops);
 
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index f9f6fb3..3d37464 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -791,8 +791,8 @@ void __init dn_fib_init(void)
 
 	register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
 
-	rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, NULL);
-	rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, NULL);
+	rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, 0);
+	rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, 0);
 }
 
 
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 21dedf6..22bf0b9 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -94,7 +94,7 @@ struct neigh_table dn_neigh_table = {
 			[NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
 			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
 			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
-			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64*1024,
+			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
 			[NEIGH_VAR_PROXY_QLEN] = 0,
 			[NEIGH_VAR_ANYCAST_DELAY] = 0,
 			[NEIGH_VAR_PROXY_DELAY] = 0,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index bcbe548..0bd3afd 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1922,10 +1922,10 @@ void __init dn_route_init(void)
 
 #ifdef CONFIG_DECNET_ROUTER
 	rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
-		      dn_fib_dump, NULL);
+		      dn_fib_dump, 0);
 #else
 	rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
-		      dn_cache_dump, NULL);
+		      dn_cache_dump, 0);
 #endif
 }
 
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index aa8ffec..ab395e5 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -115,7 +115,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
 	RCV_SKB_FAIL(-EINVAL);
 }
 
-static struct nf_hook_ops dnrmg_ops __read_mostly = {
+static const struct nf_hook_ops dnrmg_ops = {
 	.hook		= dnrmg_hook,
 	.pf		= NFPROTO_DECNET,
 	.hooknum	= NF_DN_ROUTE,
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 416ac4e..03c58b0 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -67,17 +67,17 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
 	[DSA_TAG_PROTO_NONE] = &none_ops,
 };
 
-int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
-		      struct dsa_port *dport, int port)
+int dsa_cpu_dsa_setup(struct dsa_port *port)
 {
-	struct device_node *port_dn = dport->dn;
+	struct device_node *port_dn = port->dn;
+	struct dsa_switch *ds = port->ds;
 	struct phy_device *phydev;
 	int ret, mode;
 
 	if (of_phy_is_fixed_link(port_dn)) {
 		ret = of_phy_register_fixed_link(port_dn);
 		if (ret) {
-			dev_err(dev, "failed to register fixed PHY\n");
+			dev_err(ds->dev, "failed to register fixed PHY\n");
 			return ret;
 		}
 		phydev = of_phy_find_device(port_dn);
@@ -90,7 +90,7 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
 		genphy_config_init(phydev);
 		genphy_read_status(phydev);
 		if (ds->ops->adjust_link)
-			ds->ops->adjust_link(ds, port, phydev);
+			ds->ops->adjust_link(ds, port->index, phydev);
 
 		put_device(&phydev->mdio.dev);
 	}
@@ -186,10 +186,12 @@ struct net_device *dsa_dev_to_net_device(struct device *dev)
 EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
 
 static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
-			  struct packet_type *pt, struct net_device *orig_dev)
+			  struct packet_type *pt, struct net_device *unused)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct sk_buff *nskb = NULL;
+	struct pcpu_sw_netstats *s;
+	struct dsa_slave_priv *p;
 
 	if (unlikely(dst == NULL)) {
 		kfree_skb(skb);
@@ -200,19 +202,23 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (!skb)
 		return 0;
 
-	nskb = dst->rcv(skb, dev, pt, orig_dev);
+	nskb = dst->rcv(skb, dev, pt);
 	if (!nskb) {
 		kfree_skb(skb);
 		return 0;
 	}
 
 	skb = nskb;
+	p = netdev_priv(skb->dev);
 	skb_push(skb, ETH_HLEN);
 	skb->pkt_type = PACKET_HOST;
 	skb->protocol = eth_type_trans(skb, skb->dev);
 
-	skb->dev->stats.rx_packets++;
-	skb->dev->stats.rx_bytes += skb->len;
+	s = this_cpu_ptr(p->stats64);
+	u64_stats_update_begin(&s->syncp);
+	s->rx_packets++;
+	s->rx_bytes += skb->len;
+	u64_stats_update_end(&s->syncp);
 
 	netif_receive_skb(skb);
 
@@ -220,6 +226,11 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 }
 
 #ifdef CONFIG_PM_SLEEP
+static bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
+{
+	return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev;
+}
+
 int dsa_switch_suspend(struct dsa_switch *ds)
 {
 	int i, ret = 0;
@@ -271,10 +282,22 @@ static struct packet_type dsa_pack_type __read_mostly = {
 	.func	= dsa_switch_rcv,
 };
 
+static struct workqueue_struct *dsa_owq;
+
+bool dsa_schedule_work(struct work_struct *work)
+{
+	return queue_work(dsa_owq, work);
+}
+
 static int __init dsa_init_module(void)
 {
 	int rc;
 
+	dsa_owq = alloc_ordered_workqueue("dsa_ordered",
+					  WQ_MEM_RECLAIM);
+	if (!dsa_owq)
+		return -ENOMEM;
+
 	rc = dsa_slave_register_notifier();
 	if (rc)
 		return rc;
@@ -294,6 +317,7 @@ static void __exit dsa_cleanup_module(void)
 	dsa_slave_unregister_notifier();
 	dev_remove_pack(&dsa_pack_type);
 	dsa_legacy_unregister();
+	destroy_workqueue(dsa_owq);
 }
 module_exit(dsa_cleanup_module);
 
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 20bc9c5..873af01 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -219,7 +219,7 @@ static int dsa_dsa_port_apply(struct dsa_port *port)
 	struct dsa_switch *ds = port->ds;
 	int err;
 
-	err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index);
+	err = dsa_cpu_dsa_setup(port);
 	if (err) {
 		dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n",
 			 port->index, err);
@@ -243,7 +243,7 @@ static int dsa_cpu_port_apply(struct dsa_port *port)
 	struct dsa_switch *ds = port->ds;
 	int err;
 
-	err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index);
+	err = dsa_cpu_dsa_setup(port);
 	if (err) {
 		dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n",
 			 port->index, err);
@@ -275,7 +275,7 @@ static int dsa_user_port_apply(struct dsa_port *port)
 	if (!name)
 		name = "eth%d";
 
-	err = dsa_slave_create(ds, ds->dev, port->index, name);
+	err = dsa_slave_create(port, name);
 	if (err) {
 		dev_warn(ds->dev, "Failed to create slave %d: %d\n",
 			 port->index, err);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 55982cc..9c3eeb7 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -43,10 +43,10 @@ struct dsa_notifier_bridge_info {
 
 /* DSA_NOTIFIER_FDB_* */
 struct dsa_notifier_fdb_info {
-	const struct switchdev_obj_port_fdb *fdb;
-	struct switchdev_trans *trans;
 	int sw_index;
 	int port;
+	const unsigned char *addr;
+	u16 vid;
 };
 
 /* DSA_NOTIFIER_MDB_* */
@@ -65,18 +65,13 @@ struct dsa_notifier_vlan_info {
 	int port;
 };
 
-struct dsa_device_ops {
-	struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
-	struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
-			       struct packet_type *pt,
-			       struct net_device *orig_dev);
-};
-
 struct dsa_slave_priv {
 	/* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */
 	struct sk_buff *	(*xmit)(struct sk_buff *skb,
 					struct net_device *dev);
 
+	struct pcpu_sw_netstats	*stats64;
+
 	/* DSA port data, such as switch, port index, etc. */
 	struct dsa_port		*dp;
 
@@ -99,16 +94,23 @@ struct dsa_slave_priv {
 };
 
 /* dsa.c */
-int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev,
-		      struct dsa_port *dport, int port);
+int dsa_cpu_dsa_setup(struct dsa_port *port);
 void dsa_cpu_dsa_destroy(struct dsa_port *dport);
 const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
 int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp);
 void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp);
+bool dsa_schedule_work(struct work_struct *work);
 
 /* legacy.c */
 int dsa_legacy_register(void);
 void dsa_legacy_unregister(void);
+int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+		       struct net_device *dev,
+		       const unsigned char *addr, u16 vid,
+		       u16 flags);
+int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+		       struct net_device *dev,
+		       const unsigned char *addr, u16 vid);
 
 /* port.c */
 int dsa_port_set_state(struct dsa_port *dp, u8 state,
@@ -120,35 +122,25 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
 			    struct switchdev_trans *trans);
 int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
 			 struct switchdev_trans *trans);
-int dsa_port_fdb_add(struct dsa_port *dp,
-		     const struct switchdev_obj_port_fdb *fdb,
-		     struct switchdev_trans *trans);
-int dsa_port_fdb_del(struct dsa_port *dp,
-		     const struct switchdev_obj_port_fdb *fdb);
-int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
-		      switchdev_obj_dump_cb_t *cb);
+int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+		     u16 vid);
+int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+		     u16 vid);
 int dsa_port_mdb_add(struct dsa_port *dp,
 		     const struct switchdev_obj_port_mdb *mdb,
 		     struct switchdev_trans *trans);
 int dsa_port_mdb_del(struct dsa_port *dp,
 		     const struct switchdev_obj_port_mdb *mdb);
-int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
-		      switchdev_obj_dump_cb_t *cb);
 int dsa_port_vlan_add(struct dsa_port *dp,
 		      const struct switchdev_obj_port_vlan *vlan,
 		      struct switchdev_trans *trans);
 int dsa_port_vlan_del(struct dsa_port *dp,
 		      const struct switchdev_obj_port_vlan *vlan);
-int dsa_port_vlan_dump(struct dsa_port *dp,
-		       struct switchdev_obj_port_vlan *vlan,
-		       switchdev_obj_dump_cb_t *cb);
-
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
 void dsa_slave_mii_bus_init(struct dsa_switch *ds);
 void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops);
-int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
-		     int port, const char *name);
+int dsa_slave_create(struct dsa_port *port, const char *name);
 void dsa_slave_destroy(struct net_device *slave_dev);
 int dsa_slave_suspend(struct net_device *slave_dev);
 int dsa_slave_resume(struct net_device *slave_dev);
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 1d7a328..91e6f79 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -78,25 +78,23 @@ dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
 }
 
 /* basic switch operations **************************************************/
-static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev)
+static int dsa_cpu_dsa_setups(struct dsa_switch *ds)
 {
-	struct dsa_port *dport;
 	int ret, port;
 
 	for (port = 0; port < ds->num_ports; port++) {
 		if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
 			continue;
 
-		dport = &ds->ports[port];
-		ret = dsa_cpu_dsa_setup(ds, dev, dport, port);
+		ret = dsa_cpu_dsa_setup(&ds->ports[port]);
 		if (ret)
 			return ret;
 	}
 	return 0;
 }
 
-static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master,
-				struct device *parent)
+static int dsa_switch_setup_one(struct dsa_switch *ds,
+				struct net_device *master)
 {
 	const struct dsa_switch_ops *ops = ds->ops;
 	struct dsa_switch_tree *dst = ds->dst;
@@ -176,7 +174,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master
 	}
 
 	if (!ds->slave_mii_bus && ops->phy_read) {
-		ds->slave_mii_bus = devm_mdiobus_alloc(parent);
+		ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
 		if (!ds->slave_mii_bus)
 			return -ENOMEM;
 		dsa_slave_mii_bus_init(ds);
@@ -196,14 +194,14 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master
 		if (!(ds->enabled_port_mask & (1 << i)))
 			continue;
 
-		ret = dsa_slave_create(ds, parent, i, cd->port_names[i]);
+		ret = dsa_slave_create(&ds->ports[i], cd->port_names[i]);
 		if (ret < 0)
 			netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
 				   index, i, cd->port_names[i], ret);
 	}
 
 	/* Perform configuration of the CPU and DSA ports */
-	ret = dsa_cpu_dsa_setups(ds, parent);
+	ret = dsa_cpu_dsa_setups(ds);
 	if (ret < 0)
 		netdev_err(master, "[%d] : can't configure CPU and DSA ports\n",
 			   index);
@@ -252,7 +250,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, struct net_device *master,
 	ds->ops = ops;
 	ds->priv = priv;
 
-	ret = dsa_switch_setup_one(ds, master, parent);
+	ret = dsa_switch_setup_one(ds, master);
 	if (ret)
 		return ERR_PTR(ret);
 
@@ -741,6 +739,28 @@ static int dsa_resume(struct device *d)
 }
 #endif
 
+/* legacy way, bypassing the bridge *****************************************/
+int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+		       struct net_device *dev,
+		       const unsigned char *addr, u16 vid,
+		       u16 flags)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = p->dp;
+
+	return dsa_port_fdb_add(dp, addr, vid);
+}
+
+int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
+		       struct net_device *dev,
+		       const unsigned char *addr, u16 vid)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = p->dp;
+
+	return dsa_port_fdb_del(dp, addr, vid);
+}
+
 static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
 
 static const struct of_device_id dsa_of_match_table[] = {
diff --git a/net/dsa/port.c b/net/dsa/port.c
index efc3bce..659676b 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -146,43 +146,33 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
 	return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
 }
 
-int dsa_port_fdb_add(struct dsa_port *dp,
-		     const struct switchdev_obj_port_fdb *fdb,
-		     struct switchdev_trans *trans)
+int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+		     u16 vid)
 {
 	struct dsa_notifier_fdb_info info = {
 		.sw_index = dp->ds->index,
 		.port = dp->index,
-		.trans = trans,
-		.fdb = fdb,
+		.addr = addr,
+		.vid = vid,
 	};
 
 	return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, &info);
 }
 
-int dsa_port_fdb_del(struct dsa_port *dp,
-		     const struct switchdev_obj_port_fdb *fdb)
+int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+		     u16 vid)
 {
 	struct dsa_notifier_fdb_info info = {
 		.sw_index = dp->ds->index,
 		.port = dp->index,
-		.fdb = fdb,
+		.addr = addr,
+		.vid = vid,
+
 	};
 
 	return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
 }
 
-int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
-		      switchdev_obj_dump_cb_t *cb)
-{
-	struct dsa_switch *ds = dp->ds;
-
-	if (ds->ops->port_fdb_dump)
-		return ds->ops->port_fdb_dump(ds, dp->index, fdb, cb);
-
-	return -EOPNOTSUPP;
-}
-
 int dsa_port_mdb_add(struct dsa_port *dp,
 		     const struct switchdev_obj_port_mdb *mdb,
 		     struct switchdev_trans *trans)
@@ -209,17 +199,6 @@ int dsa_port_mdb_del(struct dsa_port *dp,
 	return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info);
 }
 
-int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
-		      switchdev_obj_dump_cb_t *cb)
-{
-	struct dsa_switch *ds = dp->ds;
-
-	if (ds->ops->port_mdb_dump)
-		return ds->ops->port_mdb_dump(ds, dp->index, mdb, cb);
-
-	return -EOPNOTSUPP;
-}
-
 int dsa_port_vlan_add(struct dsa_port *dp,
 		      const struct switchdev_obj_port_vlan *vlan,
 		      struct switchdev_trans *trans)
@@ -245,15 +224,3 @@ int dsa_port_vlan_del(struct dsa_port *dp,
 
 	return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
 }
-
-int dsa_port_vlan_dump(struct dsa_port *dp,
-		       struct switchdev_obj_port_vlan *vlan,
-		       switchdev_obj_dump_cb_t *cb)
-{
-	struct dsa_switch *ds = dp->ds;
-
-	if (ds->ops->port_vlan_dump)
-		return ds->ops->port_vlan_dump(ds, dp->index, vlan, cb);
-
-	return -EOPNOTSUPP;
-}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9507bd3..2afa995 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -199,6 +199,83 @@ out:
 	return 0;
 }
 
+struct dsa_slave_dump_ctx {
+	struct net_device *dev;
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+	int idx;
+};
+
+static int
+dsa_slave_port_fdb_do_dump(const unsigned char *addr, u16 vid,
+			   bool is_static, void *data)
+{
+	struct dsa_slave_dump_ctx *dump = data;
+	u32 portid = NETLINK_CB(dump->cb->skb).portid;
+	u32 seq = dump->cb->nlh->nlmsg_seq;
+	struct nlmsghdr *nlh;
+	struct ndmsg *ndm;
+
+	if (dump->idx < dump->cb->args[2])
+		goto skip;
+
+	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
+			sizeof(*ndm), NLM_F_MULTI);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	ndm = nlmsg_data(nlh);
+	ndm->ndm_family  = AF_BRIDGE;
+	ndm->ndm_pad1    = 0;
+	ndm->ndm_pad2    = 0;
+	ndm->ndm_flags   = NTF_SELF;
+	ndm->ndm_type    = 0;
+	ndm->ndm_ifindex = dump->dev->ifindex;
+	ndm->ndm_state   = is_static ? NUD_NOARP : NUD_REACHABLE;
+
+	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, addr))
+		goto nla_put_failure;
+
+	if (vid && nla_put_u16(dump->skb, NDA_VLAN, vid))
+		goto nla_put_failure;
+
+	nlmsg_end(dump->skb, nlh);
+
+skip:
+	dump->idx++;
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(dump->skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int
+dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+		   struct net_device *dev, struct net_device *filter_dev,
+		   int *idx)
+{
+	struct dsa_slave_dump_ctx dump = {
+		.dev = dev,
+		.skb = skb,
+		.cb = cb,
+		.idx = *idx,
+	};
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_port *dp = p->dp;
+	struct dsa_switch *ds = dp->ds;
+	int err;
+
+	if (!ds->ops->port_fdb_dump)
+		return -EOPNOTSUPP;
+
+	err = ds->ops->port_fdb_dump(ds, dp->index,
+				     dsa_slave_port_fdb_do_dump,
+				     &dump);
+	*idx = dump.idx;
+	return err;
+}
+
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -250,9 +327,6 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 	 */
 
 	switch (obj->id) {
-	case SWITCHDEV_OBJ_ID_PORT_FDB:
-		err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj), trans);
-		break;
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
 		err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
 		break;
@@ -276,9 +350,6 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
 	int err;
 
 	switch (obj->id) {
-	case SWITCHDEV_OBJ_ID_PORT_FDB:
-		err = dsa_port_fdb_del(dp, SWITCHDEV_OBJ_PORT_FDB(obj));
-		break;
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
 		err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
 		break;
@@ -293,32 +364,6 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
 	return err;
 }
 
-static int dsa_slave_port_obj_dump(struct net_device *dev,
-				   struct switchdev_obj *obj,
-				   switchdev_obj_dump_cb_t *cb)
-{
-	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_port *dp = p->dp;
-	int err;
-
-	switch (obj->id) {
-	case SWITCHDEV_OBJ_ID_PORT_FDB:
-		err = dsa_port_fdb_dump(dp, SWITCHDEV_OBJ_PORT_FDB(obj), cb);
-		break;
-	case SWITCHDEV_OBJ_ID_PORT_MDB:
-		err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb);
-		break;
-	case SWITCHDEV_OBJ_ID_PORT_VLAN:
-		err = dsa_port_vlan_dump(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), cb);
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		break;
-	}
-
-	return err;
-}
-
 static int dsa_slave_port_attr_get(struct net_device *dev,
 				   struct switchdev_attr *attr)
 {
@@ -330,6 +375,9 @@ static int dsa_slave_port_attr_get(struct net_device *dev,
 		attr->u.ppid.id_len = sizeof(ds->index);
 		memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len);
 		break;
+	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
+		attr->u.brport_flags_support = 0;
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -352,10 +400,14 @@ static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p,
 static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct pcpu_sw_netstats *s;
 	struct sk_buff *nskb;
 
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += skb->len;
+	s = this_cpu_ptr(p->stats64);
+	u64_stats_update_begin(&s->syncp);
+	s->tx_packets++;
+	s->tx_bytes += skb->len;
+	u64_stats_update_end(&s->syncp);
 
 	/* Transmit function may have to reallocate the original SKB,
 	 * in which case it must have freed it. Only free it here on error.
@@ -594,11 +646,26 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->dp->ds;
-
-	data[0] = dev->stats.tx_packets;
-	data[1] = dev->stats.tx_bytes;
-	data[2] = dev->stats.rx_packets;
-	data[3] = dev->stats.rx_bytes;
+	struct pcpu_sw_netstats *s;
+	unsigned int start;
+	int i;
+
+	for_each_possible_cpu(i) {
+		u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
+
+		s = per_cpu_ptr(p->stats64, i);
+		do {
+			start = u64_stats_fetch_begin_irq(&s->syncp);
+			tx_packets = s->tx_packets;
+			tx_bytes = s->tx_bytes;
+			rx_packets = s->rx_packets;
+			rx_bytes = s->rx_bytes;
+		} while (u64_stats_fetch_retry_irq(&s->syncp, start));
+		data[0] += tx_packets;
+		data[1] += tx_bytes;
+		data[2] += rx_packets;
+		data[3] += rx_bytes;
+	}
 	if (ds->ops->get_ethtool_stats)
 		ds->ops->get_ethtool_stats(ds, p->dp->index, data + 4);
 }
@@ -648,17 +715,24 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
 	struct dsa_switch *ds = p->dp->ds;
 	int ret;
 
-	if (!ds->ops->set_eee)
+	/* Port's PHY and MAC both need to be EEE capable */
+	if (!p->phy)
+		return -ENODEV;
+
+	if (!ds->ops->set_mac_eee)
 		return -EOPNOTSUPP;
 
-	ret = ds->ops->set_eee(ds, p->dp->index, p->phy, e);
+	ret = ds->ops->set_mac_eee(ds, p->dp->index, e);
 	if (ret)
 		return ret;
 
-	if (p->phy)
-		ret = phy_ethtool_set_eee(p->phy, e);
+	if (e->eee_enabled) {
+		ret = phy_init_eee(p->phy, 0);
+		if (ret)
+			return ret;
+	}
 
-	return ret;
+	return phy_ethtool_set_eee(p->phy, e);
 }
 
 static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
@@ -667,17 +741,18 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
 	struct dsa_switch *ds = p->dp->ds;
 	int ret;
 
-	if (!ds->ops->get_eee)
+	/* Port's PHY and MAC both need to be EEE capable */
+	if (!p->phy)
+		return -ENODEV;
+
+	if (!ds->ops->get_mac_eee)
 		return -EOPNOTSUPP;
 
-	ret = ds->ops->get_eee(ds, p->dp->index, e);
+	ret = ds->ops->get_mac_eee(ds, p->dp->index, e);
 	if (ret)
 		return ret;
 
-	if (p->phy)
-		ret = phy_ethtool_get_eee(p->phy, e);
-
-	return ret;
+	return phy_ethtool_get_eee(p->phy, e);
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -747,12 +822,12 @@ dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p,
 }
 
 static int dsa_slave_add_cls_matchall(struct net_device *dev,
-				      __be16 protocol,
 				      struct tc_cls_matchall_offload *cls,
 				      bool ingress)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_mall_tc_entry *mall_tc_entry;
+	__be16 protocol = cls->common.protocol;
 	struct dsa_switch *ds = p->dp->ds;
 	struct net *net = dev_net(dev);
 	struct dsa_slave_priv *to_p;
@@ -765,7 +840,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
 	if (!ds->ops->port_mirror_add)
 		return err;
 
-	if (!tc_single_action(cls->exts))
+	if (!tcf_exts_has_one_action(cls->exts))
 		return err;
 
 	tcf_exts_to_list(cls->exts, &actions);
@@ -836,31 +911,71 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
 	kfree(mall_tc_entry);
 }
 
-static int dsa_slave_setup_tc(struct net_device *dev, u32 handle,
-			      u32 chain_index, __be16 protocol,
-			      struct tc_to_netdev *tc)
+static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev,
+					   struct tc_cls_matchall_offload *cls)
 {
-	bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS);
+	bool ingress;
 
-	if (chain_index)
+	if (is_classid_clsact_ingress(cls->common.classid))
+		ingress = true;
+	else if (is_classid_clsact_egress(cls->common.classid))
+		ingress = false;
+	else
 		return -EOPNOTSUPP;
 
-	switch (tc->type) {
-	case TC_SETUP_MATCHALL:
-		switch (tc->cls_mall->command) {
-		case TC_CLSMATCHALL_REPLACE:
-			return dsa_slave_add_cls_matchall(dev, protocol,
-							  tc->cls_mall,
-							  ingress);
-		case TC_CLSMATCHALL_DESTROY:
-			dsa_slave_del_cls_matchall(dev, tc->cls_mall);
-			return 0;
-		}
+	if (cls->common.chain_index)
+		return -EOPNOTSUPP;
+
+	switch (cls->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		return dsa_slave_add_cls_matchall(dev, cls, ingress);
+	case TC_CLSMATCHALL_DESTROY:
+		dsa_slave_del_cls_matchall(dev, cls);
+		return 0;
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
+static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			      void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_CLSMATCHALL:
+		return dsa_slave_setup_tc_cls_matchall(dev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void dsa_slave_get_stats64(struct net_device *dev,
+				  struct rtnl_link_stats64 *stats)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct pcpu_sw_netstats *s;
+	unsigned int start;
+	int i;
+
+	netdev_stats_to_stats64(stats, &dev->stats);
+	for_each_possible_cpu(i) {
+		u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
+
+		s = per_cpu_ptr(p->stats64, i);
+		do {
+			start = u64_stats_fetch_begin_irq(&s->syncp);
+			tx_packets = s->tx_packets;
+			tx_bytes = s->tx_bytes;
+			rx_packets = s->rx_packets;
+			rx_bytes = s->rx_bytes;
+		} while (u64_stats_fetch_retry_irq(&s->syncp, start));
+
+		stats->tx_packets += tx_packets;
+		stats->tx_bytes += tx_bytes;
+		stats->rx_packets += rx_packets;
+		stats->rx_bytes += rx_bytes;
+	}
+}
+
 void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
 {
 	ops->get_sset_count = dsa_cpu_port_get_sset_count;
@@ -921,9 +1036,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_change_rx_flags	= dsa_slave_change_rx_flags,
 	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
 	.ndo_set_mac_address	= dsa_slave_set_mac_address,
-	.ndo_fdb_add		= switchdev_port_fdb_add,
-	.ndo_fdb_del		= switchdev_port_fdb_del,
-	.ndo_fdb_dump		= switchdev_port_fdb_dump,
+	.ndo_fdb_add		= dsa_legacy_fdb_add,
+	.ndo_fdb_del		= dsa_legacy_fdb_del,
+	.ndo_fdb_dump		= dsa_slave_fdb_dump,
 	.ndo_do_ioctl		= dsa_slave_ioctl,
 	.ndo_get_iflink		= dsa_slave_get_iflink,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -931,11 +1046,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_netpoll_cleanup	= dsa_slave_netpoll_cleanup,
 	.ndo_poll_controller	= dsa_slave_poll_controller,
 #endif
-	.ndo_bridge_getlink	= switchdev_port_bridge_getlink,
-	.ndo_bridge_setlink	= switchdev_port_bridge_setlink,
-	.ndo_bridge_dellink	= switchdev_port_bridge_dellink,
 	.ndo_get_phys_port_name	= dsa_slave_get_phys_port_name,
 	.ndo_setup_tc		= dsa_slave_setup_tc,
+	.ndo_get_stats64	= dsa_slave_get_stats64,
 };
 
 static const struct switchdev_ops dsa_slave_switchdev_ops = {
@@ -943,7 +1056,6 @@ static const struct switchdev_ops dsa_slave_switchdev_ops = {
 	.switchdev_port_attr_set	= dsa_slave_port_attr_set,
 	.switchdev_port_obj_add		= dsa_slave_port_obj_add,
 	.switchdev_port_obj_del		= dsa_slave_port_obj_del,
-	.switchdev_port_obj_dump	= dsa_slave_port_obj_dump,
 };
 
 static struct device_type dsa_type = {
@@ -1134,9 +1246,9 @@ int dsa_slave_resume(struct net_device *slave_dev)
 	return 0;
 }
 
-int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
-		     int port, const char *name)
+int dsa_slave_create(struct dsa_port *port, const char *name)
 {
+	struct dsa_switch *ds = port->ds;
 	struct dsa_switch_tree *dst = ds->dst;
 	struct net_device *master;
 	struct net_device *slave_dev;
@@ -1147,8 +1259,12 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	cpu_dp = ds->dst->cpu_dp;
 	master = cpu_dp->netdev;
 
-	slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name,
-				 NET_NAME_UNKNOWN, ether_setup);
+	if (!ds->num_tx_queues)
+		ds->num_tx_queues = 1;
+
+	slave_dev = alloc_netdev_mqs(sizeof(struct dsa_slave_priv), name,
+				     NET_NAME_UNKNOWN, ether_setup,
+				     ds->num_tx_queues, 1);
 	if (slave_dev == NULL)
 		return -ENOMEM;
 
@@ -1166,12 +1282,17 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
 				 NULL);
 
-	SET_NETDEV_DEV(slave_dev, parent);
-	slave_dev->dev.of_node = ds->ports[port].dn;
+	SET_NETDEV_DEV(slave_dev, port->ds->dev);
+	slave_dev->dev.of_node = port->dn;
 	slave_dev->vlan_features = master->vlan_features;
 
 	p = netdev_priv(slave_dev);
-	p->dp = &ds->ports[port];
+	p->stats64 = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!p->stats64) {
+		free_netdev(slave_dev);
+		return -ENOMEM;
+	}
+	p->dp = port;
 	INIT_LIST_HEAD(&p->mall_tc_list);
 	p->xmit = dst->tag_ops->xmit;
 
@@ -1179,12 +1300,13 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	p->old_link = -1;
 	p->old_duplex = -1;
 
-	ds->ports[port].netdev = slave_dev;
+	port->netdev = slave_dev;
 	ret = register_netdev(slave_dev);
 	if (ret) {
 		netdev_err(master, "error %d registering interface %s\n",
 			   ret, slave_dev->name);
-		ds->ports[port].netdev = NULL;
+		port->netdev = NULL;
+		free_percpu(p->stats64);
 		free_netdev(slave_dev);
 		return ret;
 	}
@@ -1195,6 +1317,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	if (ret) {
 		netdev_err(master, "error %d setting up slave phy\n", ret);
 		unregister_netdev(slave_dev);
+		free_percpu(p->stats64);
 		free_netdev(slave_dev);
 		return ret;
 	}
@@ -1217,6 +1340,7 @@ void dsa_slave_destroy(struct net_device *slave_dev)
 			of_phy_deregister_fixed_link(port_dn);
 	}
 	unregister_netdev(slave_dev);
+	free_percpu(p->stats64);
 	free_netdev(slave_dev);
 }
 
@@ -1259,19 +1383,142 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
+struct dsa_switchdev_event_work {
+	struct work_struct work;
+	struct switchdev_notifier_fdb_info fdb_info;
+	struct net_device *dev;
+	unsigned long event;
+};
+
+static void dsa_slave_switchdev_event_work(struct work_struct *work)
+{
+	struct dsa_switchdev_event_work *switchdev_work =
+		container_of(work, struct dsa_switchdev_event_work, work);
+	struct net_device *dev = switchdev_work->dev;
+	struct switchdev_notifier_fdb_info *fdb_info;
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	int err;
+
+	rtnl_lock();
+	switch (switchdev_work->event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+		fdb_info = &switchdev_work->fdb_info;
+		err = dsa_port_fdb_add(p->dp, fdb_info->addr, fdb_info->vid);
+		if (err) {
+			netdev_dbg(dev, "fdb add failed err=%d\n", err);
+			break;
+		}
+		call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
+					 &fdb_info->info);
+		break;
+
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		fdb_info = &switchdev_work->fdb_info;
+		err = dsa_port_fdb_del(p->dp, fdb_info->addr, fdb_info->vid);
+		if (err) {
+			netdev_dbg(dev, "fdb del failed err=%d\n", err);
+			dev_close(dev);
+		}
+		break;
+	}
+	rtnl_unlock();
+
+	kfree(switchdev_work->fdb_info.addr);
+	kfree(switchdev_work);
+	dev_put(dev);
+}
+
+static int
+dsa_slave_switchdev_fdb_work_init(struct dsa_switchdev_event_work *
+				  switchdev_work,
+				  const struct switchdev_notifier_fdb_info *
+				  fdb_info)
+{
+	memcpy(&switchdev_work->fdb_info, fdb_info,
+	       sizeof(switchdev_work->fdb_info));
+	switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+	if (!switchdev_work->fdb_info.addr)
+		return -ENOMEM;
+	ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
+			fdb_info->addr);
+	return 0;
+}
+
+/* Called under rcu_read_lock() */
+static int dsa_slave_switchdev_event(struct notifier_block *unused,
+				     unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	struct dsa_switchdev_event_work *switchdev_work;
+
+	if (!dsa_slave_dev_check(dev))
+		return NOTIFY_DONE;
+
+	switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
+	if (!switchdev_work)
+		return NOTIFY_BAD;
+
+	INIT_WORK(&switchdev_work->work,
+		  dsa_slave_switchdev_event_work);
+	switchdev_work->dev = dev;
+	switchdev_work->event = event;
+
+	switch (event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		if (dsa_slave_switchdev_fdb_work_init(switchdev_work,
+						      ptr))
+			goto err_fdb_work_init;
+		dev_hold(dev);
+		break;
+	default:
+		kfree(switchdev_work);
+		return NOTIFY_DONE;
+	}
+
+	dsa_schedule_work(&switchdev_work->work);
+	return NOTIFY_OK;
+
+err_fdb_work_init:
+	kfree(switchdev_work);
+	return NOTIFY_BAD;
+}
+
 static struct notifier_block dsa_slave_nb __read_mostly = {
-	.notifier_call	= dsa_slave_netdevice_event,
+	.notifier_call  = dsa_slave_netdevice_event,
+};
+
+static struct notifier_block dsa_slave_switchdev_notifier = {
+	.notifier_call = dsa_slave_switchdev_event,
 };
 
 int dsa_slave_register_notifier(void)
 {
-	return register_netdevice_notifier(&dsa_slave_nb);
+	int err;
+
+	err = register_netdevice_notifier(&dsa_slave_nb);
+	if (err)
+		return err;
+
+	err = register_switchdev_notifier(&dsa_slave_switchdev_notifier);
+	if (err)
+		goto err_switchdev_nb;
+
+	return 0;
+
+err_switchdev_nb:
+	unregister_netdevice_notifier(&dsa_slave_nb);
+	return err;
 }
 
 void dsa_slave_unregister_notifier(void)
 {
 	int err;
 
+	err = unregister_switchdev_notifier(&dsa_slave_switchdev_notifier);
+	if (err)
+		pr_err("DSA: failed to unregister switchdev notifier (%d)\n", err);
+
 	err = unregister_netdevice_notifier(&dsa_slave_nb);
 	if (err)
 		pr_err("DSA: failed to unregister slave notifier (%d)\n", err);
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 97e2e9c..e6c06aa 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -83,30 +83,20 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
 static int dsa_switch_fdb_add(struct dsa_switch *ds,
 			      struct dsa_notifier_fdb_info *info)
 {
-	const struct switchdev_obj_port_fdb *fdb = info->fdb;
-	struct switchdev_trans *trans = info->trans;
-
 	/* Do not care yet about other switch chips of the fabric */
 	if (ds->index != info->sw_index)
 		return 0;
 
-	if (switchdev_trans_ph_prepare(trans)) {
-		if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
-			return -EOPNOTSUPP;
-
-		return ds->ops->port_fdb_prepare(ds, info->port, fdb, trans);
-	}
-
-	ds->ops->port_fdb_add(ds, info->port, fdb, trans);
+	if (!ds->ops->port_fdb_add)
+		return -EOPNOTSUPP;
 
-	return 0;
+	return ds->ops->port_fdb_add(ds, info->port, info->addr,
+				     info->vid);
 }
 
 static int dsa_switch_fdb_del(struct dsa_switch *ds,
 			      struct dsa_notifier_fdb_info *info)
 {
-	const struct switchdev_obj_port_fdb *fdb = info->fdb;
-
 	/* Do not care yet about other switch chips of the fabric */
 	if (ds->index != info->sw_index)
 		return 0;
@@ -114,7 +104,8 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds,
 	if (!ds->ops->port_fdb_del)
 		return -EOPNOTSUPP;
 
-	return ds->ops->port_fdb_del(ds, info->port, fdb);
+	return ds->ops->port_fdb_del(ds, info->port, info->addr,
+				     info->vid);
 }
 
 static int dsa_switch_mdb_add(struct dsa_switch *ds,
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index c697d98..dbb0164 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -62,6 +62,7 @@
 static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
+	u16 queue = skb_get_queue_mapping(skb);
 	u8 *brcm_tag;
 
 	if (skb_cow_head(skb, BRCM_TAG_LEN) < 0)
@@ -78,7 +79,7 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
 	 * deprecated
 	 */
 	brcm_tag[0] = (1 << BRCM_OPCODE_SHIFT) |
-			((skb->priority << BRCM_IG_TC_SHIFT) & BRCM_IG_TC_MASK);
+		       ((queue & BRCM_IG_TC_MASK) << BRCM_IG_TC_SHIFT);
 	brcm_tag[1] = 0;
 	brcm_tag[2] = 0;
 	if (p->dp->index == 8)
@@ -89,8 +90,7 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
 }
 
 static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-				    struct packet_type *pt,
-				    struct net_device *orig_dev)
+				    struct packet_type *pt)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 12867a4..fbf9ca9 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -65,8 +65,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
-			       struct packet_type *pt,
-			       struct net_device *orig_dev)
+			       struct packet_type *pt)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct dsa_switch *ds;
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 67a9d26..76367ba 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -78,8 +78,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
-				struct packet_type *pt,
-				struct net_device *orig_dev)
+				struct packet_type *pt)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct dsa_switch *ds;
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index fcd90f7..010ca0a 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -78,8 +78,7 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
-			       struct packet_type *pt,
-			       struct net_device *orig_dev)
+			       struct packet_type *pt)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 247774d..0b98261 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -39,7 +39,6 @@
  */
 
 #define LAN9303_TAG_LEN 4
-#define LAN9303_MAX_PORTS 3
 
 static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
 {
@@ -69,7 +68,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
-			struct packet_type *pt, struct net_device *orig_dev)
+			struct packet_type *pt)
 {
 	u16 *lan9303_tag;
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
@@ -104,7 +103,7 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	source_port = ntohs(lan9303_tag[1]) & 0x3;
 
-	if (source_port >= LAN9303_MAX_PORTS) {
+	if (source_port >= ds->num_ports) {
 		dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n");
 		return NULL;
 	}
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 2f32b7e..ec8ee5f 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -44,8 +44,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-				   struct packet_type *pt,
-				   struct net_device *orig_dev)
+				   struct packet_type *pt)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct dsa_switch *ds;
@@ -87,7 +86,17 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	return skb;
 }
 
+static int mtk_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto,
+				int *offset)
+{
+	*offset = 4;
+	*proto = ((__be16 *)skb->data)[1];
+
+	return 0;
+}
+
 const struct dsa_device_ops mtk_netdev_ops = {
-	.xmit	= mtk_tag_xmit,
-	.rcv	= mtk_tag_rcv,
+	.xmit		= mtk_tag_xmit,
+	.rcv		= mtk_tag_rcv,
+	.flow_dissect	= mtk_tag_flow_dissect,
 };
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 1867a3d..1d4c707 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -63,8 +63,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-				   struct packet_type *pt,
-				   struct net_device *orig_dev)
+				   struct packet_type *pt)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 9c7b1d7..d2fd492 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -56,8 +56,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
-				   struct packet_type *pt,
-				   struct net_device *orig_dev)
+				   struct packet_type *pt)
 {
 	struct dsa_switch_tree *dst = dev->dsa_ptr;
 	struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index de2661c..974765b 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -54,7 +54,7 @@
 
 static int open_count;
 
-static struct header_ops lowpan_header_ops = {
+static const struct header_ops lowpan_header_ops = {
 	.create	= lowpan_header_create,
 };
 
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 30d875d..f85b08b 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -580,19 +580,14 @@ static int __net_init lowpan_frags_init_net(struct net *net)
 {
 	struct netns_ieee802154_lowpan *ieee802154_lowpan =
 		net_ieee802154_lowpan(net);
-	int res;
 
 	ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
 
-	res = inet_frags_init_net(&ieee802154_lowpan->frags);
-	if (res)
-		return res;
-	res = lowpan_frags_ns_sysctl_register(net);
-	if (res)
-		inet_frags_uninit_net(&ieee802154_lowpan->frags);
-	return res;
+	inet_frags_init_net(&ieee802154_lowpan->frags);
+
+	return lowpan_frags_ns_sysctl_register(net);
 }
 
 static void __net_exit lowpan_frags_exit_net(struct net *net)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 2e548ec..e31108e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -944,6 +944,8 @@ const struct proto_ops inet_stream_ops = {
 	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
 	.read_sock	   = tcp_read_sock,
+	.sendmsg_locked    = tcp_sendmsg_locked,
+	.sendpage_locked   = tcp_sendpage_locked,
 	.peek_len	   = tcp_peek_len,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
@@ -1219,10 +1221,9 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
 struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 				 netdev_features_t features)
 {
-	bool udpfrag = false, fixedid = false, gso_partial, encap;
+	bool fixedid = false, gso_partial, encap;
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	const struct net_offload *ops;
-	unsigned int offset = 0;
 	struct iphdr *iph;
 	int proto, tot_len;
 	int nhoff;
@@ -1257,7 +1258,6 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 	segs = ERR_PTR(-EPROTONOSUPPORT);
 
 	if (!skb->encapsulation || encap) {
-		udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
 		fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
 
 		/* fixed ID is invalid if DF bit is not set */
@@ -1277,13 +1277,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 	skb = segs;
 	do {
 		iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
-		if (udpfrag) {
-			iph->frag_off = htons(offset >> 3);
-			if (skb->next)
-				iph->frag_off |= htons(IP_MF);
-			offset += skb->len - nhoff - ihl;
-			tot_len = skb->len - nhoff;
-		} else if (skb_is_gso(skb)) {
+		if (skb_is_gso(skb)) {
 			if (!fixedid) {
 				iph->id = htons(id);
 				id += skb_shinfo(skb)->gso_segs;
@@ -1602,6 +1596,9 @@ static const struct net_protocol igmp_protocol = {
 };
 #endif
 
+/* thinking of making this const? Don't.
+ * early_demux can change based on sysctl.
+ */
 static struct net_protocol tcp_protocol = {
 	.early_demux	=	tcp_v4_early_demux,
 	.early_demux_handler =  tcp_v4_early_demux,
@@ -1612,6 +1609,9 @@ static struct net_protocol tcp_protocol = {
 	.icmp_strict_tag_validation = 1,
 };
 
+/* thinking of making this const? Don't.
+ * early_demux can change based on sysctl.
+ */
 static struct net_protocol udp_protocol = {
 	.early_demux =	udp_v4_early_demux,
 	.early_demux_handler =	udp_v4_early_demux,
@@ -1778,6 +1778,11 @@ static const struct net_offload ipip_offload = {
 	},
 };
 
+static int __init ipip_offload_init(void)
+{
+	return inet_add_offload(&ipip_offload, IPPROTO_IPIP);
+}
+
 static int __init ipv4_offload_init(void)
 {
 	/*
@@ -1787,9 +1792,10 @@ static int __init ipv4_offload_init(void)
 		pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
 	if (tcpv4_offload_init() < 0)
 		pr_crit("%s: Cannot add TCP protocol offload\n", __func__);
+	if (ipip_offload_init() < 0)
+		pr_crit("%s: Cannot add IPIP protocol offload\n", __func__);
 
 	dev_add_offload(&ip_packet_offload);
-	inet_add_offload(&ipip_offload, IPPROTO_IPIP);
 	return 0;
 }
 
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 8b52179..7c45b88 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -171,7 +171,7 @@ struct neigh_table arp_tbl = {
 			[NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
 			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
 			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
-			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
+			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
 			[NEIGH_VAR_PROXY_QLEN] = 64,
 			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
 			[NEIGH_VAR_PROXY_DELAY]	= (8 * HZ) / 10,
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 38d9af9..d7adc06 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2491,9 +2491,9 @@ void __init devinet_init(void)
 
 	rtnl_af_register(&inet_af_ops);
 
-	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
-	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
-	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
+	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
+	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
+	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
-		      inet_netconf_dump_devconf, NULL);
+		      inet_netconf_dump_devconf, 0);
 }
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index df68963..b00e4a4 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -501,18 +501,59 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	return esp_output_tail(x, skb, &esp);
 }
 
+static inline int esp_remove_trailer(struct sk_buff *skb)
+{
+	struct xfrm_state *x = xfrm_input_state(skb);
+	struct xfrm_offload *xo = xfrm_offload(skb);
+	struct crypto_aead *aead = x->data;
+	int alen, hlen, elen;
+	int padlen, trimlen;
+	__wsum csumdiff;
+	u8 nexthdr[2];
+	int ret;
+
+	alen = crypto_aead_authsize(aead);
+	hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+	elen = skb->len - hlen;
+
+	if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
+		ret = xo->proto;
+		goto out;
+	}
+
+	if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
+		BUG();
+
+	ret = -EINVAL;
+	padlen = nexthdr[0];
+	if (padlen + 2 + alen >= elen) {
+		net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n",
+				    padlen + 2, elen - alen);
+		goto out;
+	}
+
+	trimlen = alen + padlen + 2;
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0);
+		skb->csum = csum_block_sub(skb->csum, csumdiff,
+					   skb->len - trimlen);
+	}
+	pskb_trim(skb, skb->len - trimlen);
+
+	ret = nexthdr[1];
+
+out:
+	return ret;
+}
+
 int esp_input_done2(struct sk_buff *skb, int err)
 {
 	const struct iphdr *iph;
 	struct xfrm_state *x = xfrm_input_state(skb);
 	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct crypto_aead *aead = x->data;
-	int alen = crypto_aead_authsize(aead);
 	int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
-	int elen = skb->len - hlen;
 	int ihl;
-	u8 nexthdr[2];
-	int padlen;
 
 	if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
 		kfree(ESP_SKB_CB(skb)->tmp);
@@ -520,16 +561,10 @@ int esp_input_done2(struct sk_buff *skb, int err)
 	if (unlikely(err))
 		goto out;
 
-	if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
-		BUG();
-
-	err = -EINVAL;
-	padlen = nexthdr[0];
-	if (padlen + 2 + alen >= elen)
+	err = esp_remove_trailer(skb);
+	if (unlikely(err < 0))
 		goto out;
 
-	/* ... check padding bits here. Silly. :-) */
-
 	iph = ip_hdr(skb);
 	ihl = iph->ihl * 4;
 
@@ -570,15 +605,12 @@ int esp_input_done2(struct sk_buff *skb, int err)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
-	pskb_trim(skb, skb->len - alen - padlen - 2);
-	__skb_pull(skb, hlen);
+	skb_pull_rcsum(skb, hlen);
 	if (x->props.mode == XFRM_MODE_TUNNEL)
 		skb_reset_transport_header(skb);
 	else
 		skb_set_transport_header(skb, -ihl);
 
-	err = nexthdr[1];
-
 	/* RFC4303: Drop dummy packets without any error */
 	if (err == IPPROTO_NONE)
 		err = -EINVAL;
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 5011232..f8b918c 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -182,11 +182,13 @@ out:
 static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct crypto_aead *aead = x->data;
+	struct xfrm_offload *xo = xfrm_offload(skb);
 
 	if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead)))
 		return -EINVAL;
 
-	skb->ip_summed = CHECKSUM_NONE;
+	if (!(xo->flags & CRYPTO_DONE))
+		skb->ip_summed = CHECKSUM_NONE;
 
 	return esp_input_done2(skb, 0);
 }
@@ -303,3 +305,4 @@ module_init(esp4_offload_init);
 module_exit(esp4_offload_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
+MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET, XFRM_PROTO_ESP);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 044d2a1..37819ab 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1247,22 +1247,28 @@ static int __net_init ip_fib_net_init(struct net *net)
 	int err;
 	size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
 
-	net->ipv4.fib_seq = 0;
+	err = fib4_notifier_init(net);
+	if (err)
+		return err;
 
 	/* Avoid false sharing : Use at least a full cache line */
 	size = max_t(size_t, size, L1_CACHE_BYTES);
 
 	net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
-	if (!net->ipv4.fib_table_hash)
-		return -ENOMEM;
+	if (!net->ipv4.fib_table_hash) {
+		err = -ENOMEM;
+		goto err_table_hash_alloc;
+	}
 
 	err = fib4_rules_init(net);
 	if (err < 0)
-		goto fail;
+		goto err_rules_init;
 	return 0;
 
-fail:
+err_rules_init:
 	kfree(net->ipv4.fib_table_hash);
+err_table_hash_alloc:
+	fib4_notifier_exit(net);
 	return err;
 }
 
@@ -1292,6 +1298,7 @@ static void ip_fib_net_exit(struct net *net)
 #endif
 	rtnl_unlock();
 	kfree(net->ipv4.fib_table_hash);
+	fib4_notifier_exit(net);
 }
 
 static int __net_init fib_net_init(struct net *net)
@@ -1341,7 +1348,7 @@ void __init ip_fib_init(void)
 	register_netdevice_notifier(&fib_netdev_notifier);
 	register_inetaddr_notifier(&fib_inetaddr_notifier);
 
-	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
-	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
-	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
+	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
+	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
+	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, 0);
 }
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 769ab87..5b2af19 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -32,6 +32,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 				 struct netlink_ext_ack *extack);
 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
 		 struct netlink_ext_ack *extack);
+bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi);
 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id,
 		  u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi,
 		  unsigned int);
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index e0714d9..cfd420b0 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -1,86 +1,73 @@
 #include <linux/rtnetlink.h>
 #include <linux/notifier.h>
-#include <linux/rcupdate.h>
+#include <linux/socket.h>
 #include <linux/kernel.h>
+#include <linux/export.h>
 #include <net/net_namespace.h>
+#include <net/fib_notifier.h>
 #include <net/netns/ipv4.h>
 #include <net/ip_fib.h>
 
-static ATOMIC_NOTIFIER_HEAD(fib_chain);
-
-int call_fib_notifier(struct notifier_block *nb, struct net *net,
-		      enum fib_event_type event_type,
-		      struct fib_notifier_info *info)
+int call_fib4_notifier(struct notifier_block *nb, struct net *net,
+		       enum fib_event_type event_type,
+		       struct fib_notifier_info *info)
 {
-	info->net = net;
-	return nb->notifier_call(nb, event_type, info);
+	info->family = AF_INET;
+	return call_fib_notifier(nb, net, event_type, info);
 }
 
-int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
-		       struct fib_notifier_info *info)
+int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
+			struct fib_notifier_info *info)
 {
+	ASSERT_RTNL();
+
+	info->family = AF_INET;
 	net->ipv4.fib_seq++;
-	info->net = net;
-	return atomic_notifier_call_chain(&fib_chain, event_type, info);
+	return call_fib_notifiers(net, event_type, info);
 }
 
-static unsigned int fib_seq_sum(void)
+static unsigned int fib4_seq_read(struct net *net)
 {
-	unsigned int fib_seq = 0;
-	struct net *net;
-
-	rtnl_lock();
-	for_each_net(net)
-		fib_seq += net->ipv4.fib_seq;
-	rtnl_unlock();
+	ASSERT_RTNL();
 
-	return fib_seq;
+	return net->ipv4.fib_seq + fib4_rules_seq_read(net);
 }
 
-static bool fib_dump_is_consistent(struct notifier_block *nb,
-				   void (*cb)(struct notifier_block *nb),
-				   unsigned int fib_seq)
+static int fib4_dump(struct net *net, struct notifier_block *nb)
 {
-	atomic_notifier_chain_register(&fib_chain, nb);
-	if (fib_seq == fib_seq_sum())
-		return true;
-	atomic_notifier_chain_unregister(&fib_chain, nb);
-	if (cb)
-		cb(nb);
-	return false;
+	int err;
+
+	err = fib4_rules_dump(net, nb);
+	if (err)
+		return err;
+
+	fib_notify(net, nb);
+
+	return 0;
 }
 
-#define FIB_DUMP_MAX_RETRIES 5
-int register_fib_notifier(struct notifier_block *nb,
-			  void (*cb)(struct notifier_block *nb))
-{
-	int retries = 0;
+static const struct fib_notifier_ops fib4_notifier_ops_template = {
+	.family		= AF_INET,
+	.fib_seq_read	= fib4_seq_read,
+	.fib_dump	= fib4_dump,
+	.owner		= THIS_MODULE,
+};
 
-	do {
-		unsigned int fib_seq = fib_seq_sum();
-		struct net *net;
+int __net_init fib4_notifier_init(struct net *net)
+{
+	struct fib_notifier_ops *ops;
 
-		/* Mutex semantics guarantee that every change done to
-		 * FIB tries before we read the change sequence counter
-		 * is now visible to us.
-		 */
-		rcu_read_lock();
-		for_each_net_rcu(net) {
-			fib_rules_notify(net, nb);
-			fib_notify(net, nb);
-		}
-		rcu_read_unlock();
+	net->ipv4.fib_seq = 0;
 
-		if (fib_dump_is_consistent(nb, cb, fib_seq))
-			return 0;
-	} while (++retries < FIB_DUMP_MAX_RETRIES);
+	ops = fib_notifier_ops_register(&fib4_notifier_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	net->ipv4.notifier_ops = ops;
 
-	return -EBUSY;
+	return 0;
 }
-EXPORT_SYMBOL(register_fib_notifier);
 
-int unregister_fib_notifier(struct notifier_block *nb)
+void __net_exit fib4_notifier_exit(struct net *net)
 {
-	return atomic_notifier_chain_unregister(&fib_chain, nb);
+	fib_notifier_ops_unregister(net->ipv4.notifier_ops);
 }
-EXPORT_SYMBOL(unregister_fib_notifier);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 778ecf9..35d646a 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -68,6 +68,16 @@ bool fib4_rule_default(const struct fib_rule *rule)
 }
 EXPORT_SYMBOL_GPL(fib4_rule_default);
 
+int fib4_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return fib_rules_dump(net, nb, AF_INET);
+}
+
+unsigned int fib4_rules_seq_read(struct net *net)
+{
+	return fib_rules_seq_read(net, AF_INET);
+}
+
 int __fib_lookup(struct net *net, struct flowi4 *flp,
 		 struct fib_result *res, unsigned int flags)
 {
@@ -185,38 +195,6 @@ static struct fib_table *fib_empty_table(struct net *net)
 	return NULL;
 }
 
-static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
-				  enum fib_event_type event_type,
-				  struct fib_rule *rule)
-{
-	struct fib_rule_notifier_info info = {
-		.rule = rule,
-	};
-
-	return call_fib_notifier(nb, net, event_type, &info.info);
-}
-
-static int call_fib_rule_notifiers(struct net *net,
-				   enum fib_event_type event_type,
-				   struct fib_rule *rule)
-{
-	struct fib_rule_notifier_info info = {
-		.rule = rule,
-	};
-
-	return call_fib_notifiers(net, event_type, &info.info);
-}
-
-/* Called with rcu_read_lock() */
-void fib_rules_notify(struct net *net, struct notifier_block *nb)
-{
-	struct fib_rules_ops *ops = net->ipv4.rules_ops;
-	struct fib_rule *rule;
-
-	list_for_each_entry_rcu(rule, &ops->rules_list, list)
-		call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule);
-}
-
 static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
 	FRA_GENERIC_POLICY,
 	[FRA_FLOW]	= { .type = NLA_U32 },
@@ -273,7 +251,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	rule4->tos = frh->tos;
 
 	net->ipv4.fib_has_custom_rules = true;
-	call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule);
 
 	err = 0;
 errout:
@@ -295,7 +272,6 @@ static int fib4_rule_delete(struct fib_rule *rule)
 		net->ipv4.fib_num_tclassid_users--;
 #endif
 	net->ipv4.fib_has_custom_rules = true;
-	call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule);
 errout:
 	return err;
 }
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ec3a9ce..57a5d48 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -44,6 +44,7 @@
 #include <net/netlink.h>
 #include <net/nexthop.h>
 #include <net/lwtunnel.h>
+#include <net/fib_notifier.h>
 
 #include "fib_lookup.h"
 
@@ -219,7 +220,7 @@ static void free_fib_info_rcu(struct rcu_head *head)
 	} endfor_nexthops(fi);
 
 	m = fi->fib_metrics;
-	if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt))
+	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
 		kfree(m);
 	kfree(fi);
 }
@@ -695,6 +696,40 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
 	return 0;
 }
 
+bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
+{
+	struct nlattr *nla;
+	int remaining;
+
+	if (!cfg->fc_mx)
+		return true;
+
+	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+		int type = nla_type(nla);
+		u32 val;
+
+		if (!type)
+			continue;
+		if (type > RTAX_MAX)
+			return false;
+
+		if (type == RTAX_CC_ALGO) {
+			char tmp[TCP_CA_NAME_MAX];
+			bool ecn_ca = false;
+
+			nla_strlcpy(tmp, nla, sizeof(tmp));
+			val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
+		} else {
+			val = nla_get_u32(nla);
+		}
+
+		if (fi->fib_metrics->metrics[type - 1] != val)
+			return false;
+	}
+
+	return true;
+}
+
 
 /*
  * Picture
@@ -1089,7 +1124,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 			kfree(fi);
 			return ERR_PTR(err);
 		}
-		atomic_set(&fi->fib_metrics->refcnt, 1);
+		refcount_set(&fi->fib_metrics->refcnt, 1);
 	} else {
 		fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
 	}
@@ -1344,6 +1379,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 			    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
 				rtm->rtm_flags |= RTNH_F_DEAD;
 		}
+		if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD)
+			rtm->rtm_flags |= RTNH_F_OFFLOAD;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		if (fi->fib_nh[0].nh_tclassid &&
 		    nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
@@ -1451,14 +1488,14 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
 		if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
 		    fib_nh->nh_flags & RTNH_F_LINKDOWN)
 			break;
-		return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
-					  &info.info);
+		return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type,
+					   &info.info);
 	case FIB_EVENT_NH_DEL:
 		if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
 		     fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
 		    (fib_nh->nh_flags & RTNH_F_DEAD))
-			return call_fib_notifiers(dev_net(fib_nh->nh_dev),
-						  event_type, &info.info);
+			return call_fib4_notifiers(dev_net(fib_nh->nh_dev),
+						   event_type, &info.info);
 	default:
 		break;
 	}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 64668c6..c636650 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -81,6 +81,7 @@
 #include <net/tcp.h>
 #include <net/sock.h>
 #include <net/ip_fib.h>
+#include <net/fib_notifier.h>
 #include <trace/events/fib.h>
 #include "fib_lookup.h"
 
@@ -97,7 +98,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
 		.type = type,
 		.tb_id = tb_id,
 	};
-	return call_fib_notifier(nb, net, event_type, &info.info);
+	return call_fib4_notifier(nb, net, event_type, &info.info);
 }
 
 static int call_fib_entry_notifiers(struct net *net,
@@ -113,7 +114,7 @@ static int call_fib_entry_notifiers(struct net *net,
 		.type = type,
 		.tb_id = tb_id,
 	};
-	return call_fib_notifiers(net, event_type, &info.info);
+	return call_fib4_notifiers(net, event_type, &info.info);
 }
 
 #define MAX_STAT_DEPTH 32
@@ -1562,7 +1563,8 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
 		     fi->fib_prefsrc == cfg->fc_prefsrc) &&
 		    (!cfg->fc_protocol ||
 		     fi->fib_protocol == cfg->fc_protocol) &&
-		    fib_nh_match(cfg, fi, extack) == 0) {
+		    fib_nh_match(cfg, fi, extack) == 0 &&
+		    fib_metrics_match(cfg, fi)) {
 			fa_to_delete = fa;
 			break;
 		}
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index d5cac99..416bb30 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -24,7 +24,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 	__be16 protocol = skb->protocol;
 	u16 mac_len = skb->mac_len;
 	int gre_offset, outer_hlen;
-	bool need_csum, ufo, gso_partial;
+	bool need_csum, gso_partial;
 
 	if (!skb->encapsulation)
 		goto out;
@@ -47,20 +47,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
 	skb->encap_hdr_csum = need_csum;
 
-	ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
-
 	features &= skb->dev->hw_enc_features;
 
-	/* The only checksum offload we care about from here on out is the
-	 * outer one so strip the existing checksum feature flags based
-	 * on the fact that we will be computing our checksum in software.
-	 */
-	if (ufo) {
-		features &= ~NETIF_F_CSUM_MASK;
-		if (!need_csum)
-			features |= NETIF_F_HW_CSUM;
-	}
-
 	/* segment inner packet. */
 	segs = skb_mac_gso_segment(skb, features);
 	if (IS_ERR_OR_NULL(segs)) {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c2be26b..681e339 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -412,7 +412,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	int type = icmp_param->data.icmph.type;
 	int code = icmp_param->data.icmph.code;
 
-	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
+	if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
 		return;
 
 	/* Needed by both icmp_global_allow and icmp_xmit_lock */
@@ -694,7 +694,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 					  iph->tos;
 	mark = IP4_REPLY_MARK(net, skb_in->mark);
 
-	if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))
+	if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
 		goto out_unlock;
 
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index caf2f11..ab183af 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2557,7 +2557,8 @@ done:
 /*
  * check if a multicast source filter allows delivery for a given <src,dst,intf>
  */
-int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
+int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr,
+		   int dif, int sdif)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_mc_socklist *pmc;
@@ -2572,7 +2573,8 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
 	rcu_read_lock();
 	for_each_pmc_rcu(inet, pmc) {
 		if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
-		    pmc->multi.imr_ifindex == dif)
+		    (pmc->multi.imr_ifindex == dif ||
+		     (sdif && pmc->multi.imr_ifindex == sdif)))
 			break;
 	}
 	ret = inet->mc_all;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 3828b3a..c9c35b6 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -93,8 +93,17 @@ void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill);
 
-static size_t inet_sk_attr_size(void)
+static size_t inet_sk_attr_size(struct sock *sk,
+				const struct inet_diag_req_v2 *req,
+				bool net_admin)
 {
+	const struct inet_diag_handler *handler;
+	size_t aux = 0;
+
+	handler = inet_diag_table[req->sdiag_protocol];
+	if (handler && handler->idiag_get_aux_size)
+		aux = handler->idiag_get_aux_size(sk, net_admin);
+
 	return	  nla_total_size(sizeof(struct tcp_info))
 		+ nla_total_size(1) /* INET_DIAG_SHUTDOWN */
 		+ nla_total_size(1) /* INET_DIAG_TOS */
@@ -105,6 +114,7 @@ static size_t inet_sk_attr_size(void)
 		+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
 		+ nla_total_size(TCP_CA_NAME_MAX)
 		+ nla_total_size(sizeof(struct tcpvegas_info))
+		+ aux
 		+ 64;
 }
 
@@ -260,6 +270,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 
 	handler->idiag_get_info(sk, r, info);
 
+	if (ext & (1 << (INET_DIAG_INFO - 1)) && handler->idiag_get_aux)
+		if (handler->idiag_get_aux(sk, net_admin, skb) < 0)
+			goto errout;
+
 	if (sk->sk_state < TCP_TIME_WAIT) {
 		union tcp_cc_info info;
 		size_t sz = 0;
@@ -274,6 +288,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 			goto errout;
 	}
 
+	if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) {
+		u32 classid = 0;
+
+#ifdef CONFIG_SOCK_CGROUP_DATA
+		classid = sock_cgroup_classid(&sk->sk_cgrp_data);
+#endif
+
+		if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
+			goto errout;
+	}
+
 out:
 	nlmsg_end(skb, nlh);
 	return 0;
@@ -438,6 +463,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 			    const struct nlmsghdr *nlh,
 			    const struct inet_diag_req_v2 *req)
 {
+	bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN);
 	struct net *net = sock_net(in_skb->sk);
 	struct sk_buff *rep;
 	struct sock *sk;
@@ -447,7 +473,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 	if (IS_ERR(sk))
 		return PTR_ERR(sk);
 
-	rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
+	rep = nlmsg_new(inet_sk_attr_size(sk, req, net_admin), GFP_KERNEL);
 	if (!rep) {
 		err = -ENOMEM;
 		goto out;
@@ -456,8 +482,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 	err = sk_diag_fill(sk, rep, req,
 			   sk_user_ns(NETLINK_CB(in_skb).sk),
 			   NETLINK_CB(in_skb).portid,
-			   nlh->nlmsg_seq, 0, nlh,
-			   netlink_net_capable(in_skb, CAP_NET_ADMIN));
+			   nlh->nlmsg_seq, 0, nlh, net_admin);
 	if (err < 0) {
 		WARN_ON(err == -EMSGSIZE);
 		nlmsg_free(rep);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 96e95e8..af74d04 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -234,10 +234,8 @@ evict_again:
 	cond_resched();
 
 	if (read_seqretry(&f->rnd_seqlock, seq) ||
-	    percpu_counter_sum(&nf->mem))
+	    sum_frag_mem_limit(nf))
 		goto evict_again;
-
-	percpu_counter_destroy(&nf->mem);
 }
 EXPORT_SYMBOL(inet_frags_exit_net);
 
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2e3389d..597bb4c 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -170,7 +170,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port);
 
 static inline int compute_score(struct sock *sk, struct net *net,
 				const unsigned short hnum, const __be32 daddr,
-				const int dif, bool exact_dif)
+				const int dif, const int sdif, bool exact_dif)
 {
 	int score = -1;
 	struct inet_sock *inet = inet_sk(sk);
@@ -185,9 +185,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
 			score += 4;
 		}
 		if (sk->sk_bound_dev_if || exact_dif) {
-			if (sk->sk_bound_dev_if != dif)
+			bool dev_match = (sk->sk_bound_dev_if == dif ||
+					  sk->sk_bound_dev_if == sdif);
+
+			if (exact_dif && !dev_match)
 				return -1;
-			score += 4;
+			if (sk->sk_bound_dev_if && dev_match)
+				score += 4;
 		}
 		if (sk->sk_incoming_cpu == raw_smp_processor_id())
 			score++;
@@ -208,7 +212,7 @@ struct sock *__inet_lookup_listener(struct net *net,
 				    struct sk_buff *skb, int doff,
 				    const __be32 saddr, __be16 sport,
 				    const __be32 daddr, const unsigned short hnum,
-				    const int dif)
+				    const int dif, const int sdif)
 {
 	unsigned int hash = inet_lhashfn(net, hnum);
 	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -218,7 +222,8 @@ struct sock *__inet_lookup_listener(struct net *net,
 	u32 phash = 0;
 
 	sk_for_each_rcu(sk, &ilb->head) {
-		score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
+		score = compute_score(sk, net, hnum, daddr,
+				      dif, sdif, exact_dif);
 		if (score > hiscore) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -268,7 +273,7 @@ struct sock *__inet_lookup_established(struct net *net,
 				  struct inet_hashinfo *hashinfo,
 				  const __be32 saddr, const __be16 sport,
 				  const __be32 daddr, const u16 hnum,
-				  const int dif)
+				  const int dif, const int sdif)
 {
 	INET_ADDR_COOKIE(acookie, saddr, daddr);
 	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
@@ -286,11 +291,12 @@ begin:
 		if (sk->sk_hash != hash)
 			continue;
 		if (likely(INET_MATCH(sk, net, acookie,
-				      saddr, daddr, ports, dif))) {
+				      saddr, daddr, ports, dif, sdif))) {
 			if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
 				goto out;
 			if (unlikely(!INET_MATCH(sk, net, acookie,
-						 saddr, daddr, ports, dif))) {
+						 saddr, daddr, ports,
+						 dif, sdif))) {
 				sock_gen_put(sk);
 				goto begin;
 			}
@@ -321,9 +327,10 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 	__be32 daddr = inet->inet_rcv_saddr;
 	__be32 saddr = inet->inet_daddr;
 	int dif = sk->sk_bound_dev_if;
+	struct net *net = sock_net(sk);
+	int sdif = l3mdev_master_ifindex_by_index(net, dif);
 	INET_ADDR_COOKIE(acookie, saddr, daddr);
 	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
-	struct net *net = sock_net(sk);
 	unsigned int hash = inet_ehashfn(net, daddr, lport,
 					 saddr, inet->inet_dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
@@ -339,7 +346,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 			continue;
 
 		if (likely(INET_MATCH(sk2, net, acookie,
-					 saddr, daddr, ports, dif))) {
+					 saddr, daddr, ports, dif, sdif))) {
 			if (sk2->sk_state == TCP_TIME_WAIT) {
 				tw = inet_twsk(sk2);
 				if (twsk_unique(sk, sk2, twp))
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index c5a117c..e7eb590 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -33,7 +33,7 @@
  *  also be removed if the pool is overloaded i.e. if the total amount of
  *  entries is greater-or-equal than the threshold.
  *
- *  Node pool is organised as an AVL tree.
+ *  Node pool is organised as an RB tree.
  *  Such an implementation has been chosen not just for fun.  It's a way to
  *  prevent easy and efficient DoS attacks by creating hash collisions.  A huge
  *  amount of long living nodes in a single hash slot would significantly delay
@@ -45,7 +45,7 @@
  *      AND reference count being 0.
  *  3.  Global variable peer_total is modified under the pool lock.
  *  4.  struct inet_peer fields modification:
- *		avl_left, avl_right, avl_parent, avl_height: pool lock
+ *		rb_node: pool lock
  *		refcnt: atomically against modifications on other CPU;
  *		   usually under some other lock to prevent node disappearing
  *		daddr: unchangeable
@@ -53,30 +53,15 @@
 
 static struct kmem_cache *peer_cachep __read_mostly;
 
-static LIST_HEAD(gc_list);
-static const int gc_delay = 60 * HZ;
-static struct delayed_work gc_work;
-static DEFINE_SPINLOCK(gc_lock);
-
-#define node_height(x) x->avl_height
-
-#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
-#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
-static const struct inet_peer peer_fake_node = {
-	.avl_left	= peer_avl_empty_rcu,
-	.avl_right	= peer_avl_empty_rcu,
-	.avl_height	= 0
-};
-
 void inet_peer_base_init(struct inet_peer_base *bp)
 {
-	bp->root = peer_avl_empty_rcu;
+	bp->rb_root = RB_ROOT;
 	seqlock_init(&bp->lock);
 	bp->total = 0;
 }
 EXPORT_SYMBOL_GPL(inet_peer_base_init);
 
-#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
+#define PEER_MAX_GC 32
 
 /* Exported for sysctl_net_ipv4.  */
 int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries more
@@ -84,53 +69,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries m
 int inet_peer_minttl __read_mostly = 120 * HZ;	/* TTL under high load: 120 sec */
 int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min */
 
-static void inetpeer_gc_worker(struct work_struct *work)
-{
-	struct inet_peer *p, *n, *c;
-	struct list_head list;
-
-	spin_lock_bh(&gc_lock);
-	list_replace_init(&gc_list, &list);
-	spin_unlock_bh(&gc_lock);
-
-	if (list_empty(&list))
-		return;
-
-	list_for_each_entry_safe(p, n, &list, gc_list) {
-
-		if (need_resched())
-			cond_resched();
-
-		c = rcu_dereference_protected(p->avl_left, 1);
-		if (c != peer_avl_empty) {
-			list_add_tail(&c->gc_list, &list);
-			p->avl_left = peer_avl_empty_rcu;
-		}
-
-		c = rcu_dereference_protected(p->avl_right, 1);
-		if (c != peer_avl_empty) {
-			list_add_tail(&c->gc_list, &list);
-			p->avl_right = peer_avl_empty_rcu;
-		}
-
-		n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
-
-		if (refcount_read(&p->refcnt) == 1) {
-			list_del(&p->gc_list);
-			kmem_cache_free(peer_cachep, p);
-		}
-	}
-
-	if (list_empty(&list))
-		return;
-
-	spin_lock_bh(&gc_lock);
-	list_splice(&list, &gc_list);
-	spin_unlock_bh(&gc_lock);
-
-	schedule_delayed_work(&gc_work, gc_delay);
-}
-
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
 {
@@ -153,225 +91,65 @@ void __init inet_initpeers(void)
 			sizeof(struct inet_peer),
 			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
 			NULL);
-
-	INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker);
 }
 
-#define rcu_deref_locked(X, BASE)				\
-	rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock))
-
-/*
- * Called with local BH disabled and the pool lock held.
- */
-#define lookup(_daddr, _stack, _base)				\
-({								\
-	struct inet_peer *u;					\
-	struct inet_peer __rcu **v;				\
-								\
-	stackptr = _stack;					\
-	*stackptr++ = &_base->root;				\
-	for (u = rcu_deref_locked(_base->root, _base);		\
-	     u != peer_avl_empty;) {				\
-		int cmp = inetpeer_addr_cmp(_daddr, &u->daddr);	\
-		if (cmp == 0)					\
-			break;					\
-		if (cmp == -1)					\
-			v = &u->avl_left;			\
-		else						\
-			v = &u->avl_right;			\
-		*stackptr++ = v;				\
-		u = rcu_deref_locked(*v, _base);		\
-	}							\
-	u;							\
-})
-
-/*
- * Called with rcu_read_lock()
- * Because we hold no lock against a writer, its quite possible we fall
- * in an endless loop.
- * But every pointer we follow is guaranteed to be valid thanks to RCU.
- * We exit from this function if number of links exceeds PEER_MAXDEPTH
- */
-static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
-				    struct inet_peer_base *base)
+/* Called with rcu_read_lock() or base->lock held */
+static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
+				struct inet_peer_base *base,
+				unsigned int seq,
+				struct inet_peer *gc_stack[],
+				unsigned int *gc_cnt,
+				struct rb_node **parent_p,
+				struct rb_node ***pp_p)
 {
-	struct inet_peer *u = rcu_dereference(base->root);
-	int count = 0;
+	struct rb_node **pp, *parent, *next;
+	struct inet_peer *p;
+
+	pp = &base->rb_root.rb_node;
+	parent = NULL;
+	while (1) {
+		int cmp;
 
-	while (u != peer_avl_empty) {
-		int cmp = inetpeer_addr_cmp(daddr, &u->daddr);
+		next = rcu_dereference_raw(*pp);
+		if (!next)
+			break;
+		parent = next;
+		p = rb_entry(parent, struct inet_peer, rb_node);
+		cmp = inetpeer_addr_cmp(daddr, &p->daddr);
 		if (cmp == 0) {
-			/* Before taking a reference, check if this entry was
-			 * deleted (refcnt=0)
-			 */
-			if (!refcount_inc_not_zero(&u->refcnt)) {
-				u = NULL;
-			}
-			return u;
+			if (!refcount_inc_not_zero(&p->refcnt))
+				break;
+			return p;
+		}
+		if (gc_stack) {
+			if (*gc_cnt < PEER_MAX_GC)
+				gc_stack[(*gc_cnt)++] = p;
+		} else if (unlikely(read_seqretry(&base->lock, seq))) {
+			break;
 		}
 		if (cmp == -1)
-			u = rcu_dereference(u->avl_left);
+			pp = &(*pp)->rb_left;
 		else
-			u = rcu_dereference(u->avl_right);
-		if (unlikely(++count == PEER_MAXDEPTH))
-			break;
+			pp = &(*pp)->rb_right;
 	}
+	*parent_p = parent;
+	*pp_p = pp;
 	return NULL;
 }
 
-/* Called with local BH disabled and the pool lock held. */
-#define lookup_rightempty(start, base)				\
-({								\
-	struct inet_peer *u;					\
-	struct inet_peer __rcu **v;				\
-	*stackptr++ = &start->avl_left;				\
-	v = &start->avl_left;					\
-	for (u = rcu_deref_locked(*v, base);			\
-	     u->avl_right != peer_avl_empty_rcu;) {		\
-		v = &u->avl_right;				\
-		*stackptr++ = v;				\
-		u = rcu_deref_locked(*v, base);			\
-	}							\
-	u;							\
-})
-
-/* Called with local BH disabled and the pool lock held.
- * Variable names are the proof of operation correctness.
- * Look into mm/map_avl.c for more detail description of the ideas.
- */
-static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
-			       struct inet_peer __rcu ***stackend,
-			       struct inet_peer_base *base)
-{
-	struct inet_peer __rcu **nodep;
-	struct inet_peer *node, *l, *r;
-	int lh, rh;
-
-	while (stackend > stack) {
-		nodep = *--stackend;
-		node = rcu_deref_locked(*nodep, base);
-		l = rcu_deref_locked(node->avl_left, base);
-		r = rcu_deref_locked(node->avl_right, base);
-		lh = node_height(l);
-		rh = node_height(r);
-		if (lh > rh + 1) { /* l: RH+2 */
-			struct inet_peer *ll, *lr, *lrl, *lrr;
-			int lrh;
-			ll = rcu_deref_locked(l->avl_left, base);
-			lr = rcu_deref_locked(l->avl_right, base);
-			lrh = node_height(lr);
-			if (lrh <= node_height(ll)) {	/* ll: RH+1 */
-				RCU_INIT_POINTER(node->avl_left, lr);	/* lr: RH or RH+1 */
-				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
-				node->avl_height = lrh + 1; /* RH+1 or RH+2 */
-				RCU_INIT_POINTER(l->avl_left, ll);       /* ll: RH+1 */
-				RCU_INIT_POINTER(l->avl_right, node);	/* node: RH+1 or RH+2 */
-				l->avl_height = node->avl_height + 1;
-				RCU_INIT_POINTER(*nodep, l);
-			} else { /* ll: RH, lr: RH+1 */
-				lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */
-				lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */
-				RCU_INIT_POINTER(node->avl_left, lrr);	/* lrr: RH or RH-1 */
-				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
-				node->avl_height = rh + 1; /* node: RH+1 */
-				RCU_INIT_POINTER(l->avl_left, ll);	/* ll: RH */
-				RCU_INIT_POINTER(l->avl_right, lrl);	/* lrl: RH or RH-1 */
-				l->avl_height = rh + 1;	/* l: RH+1 */
-				RCU_INIT_POINTER(lr->avl_left, l);	/* l: RH+1 */
-				RCU_INIT_POINTER(lr->avl_right, node);	/* node: RH+1 */
-				lr->avl_height = rh + 2;
-				RCU_INIT_POINTER(*nodep, lr);
-			}
-		} else if (rh > lh + 1) { /* r: LH+2 */
-			struct inet_peer *rr, *rl, *rlr, *rll;
-			int rlh;
-			rr = rcu_deref_locked(r->avl_right, base);
-			rl = rcu_deref_locked(r->avl_left, base);
-			rlh = node_height(rl);
-			if (rlh <= node_height(rr)) {	/* rr: LH+1 */
-				RCU_INIT_POINTER(node->avl_right, rl);	/* rl: LH or LH+1 */
-				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
-				node->avl_height = rlh + 1; /* LH+1 or LH+2 */
-				RCU_INIT_POINTER(r->avl_right, rr);	/* rr: LH+1 */
-				RCU_INIT_POINTER(r->avl_left, node);	/* node: LH+1 or LH+2 */
-				r->avl_height = node->avl_height + 1;
-				RCU_INIT_POINTER(*nodep, r);
-			} else { /* rr: RH, rl: RH+1 */
-				rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */
-				rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */
-				RCU_INIT_POINTER(node->avl_right, rll);	/* rll: LH or LH-1 */
-				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
-				node->avl_height = lh + 1; /* node: LH+1 */
-				RCU_INIT_POINTER(r->avl_right, rr);	/* rr: LH */
-				RCU_INIT_POINTER(r->avl_left, rlr);	/* rlr: LH or LH-1 */
-				r->avl_height = lh + 1;	/* r: LH+1 */
-				RCU_INIT_POINTER(rl->avl_right, r);	/* r: LH+1 */
-				RCU_INIT_POINTER(rl->avl_left, node);	/* node: LH+1 */
-				rl->avl_height = lh + 2;
-				RCU_INIT_POINTER(*nodep, rl);
-			}
-		} else {
-			node->avl_height = (lh > rh ? lh : rh) + 1;
-		}
-	}
-}
-
-/* Called with local BH disabled and the pool lock held. */
-#define link_to_pool(n, base)					\
-do {								\
-	n->avl_height = 1;					\
-	n->avl_left = peer_avl_empty_rcu;			\
-	n->avl_right = peer_avl_empty_rcu;			\
-	/* lockless readers can catch us now */			\
-	rcu_assign_pointer(**--stackptr, n);			\
-	peer_avl_rebalance(stack, stackptr, base);		\
-} while (0)
-
 static void inetpeer_free_rcu(struct rcu_head *head)
 {
 	kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
 }
 
-static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
-			     struct inet_peer __rcu **stack[PEER_MAXDEPTH])
-{
-	struct inet_peer __rcu ***stackptr, ***delp;
-
-	if (lookup(&p->daddr, stack, base) != p)
-		BUG();
-	delp = stackptr - 1; /* *delp[0] == p */
-	if (p->avl_left == peer_avl_empty_rcu) {
-		*delp[0] = p->avl_right;
-		--stackptr;
-	} else {
-		/* look for a node to insert instead of p */
-		struct inet_peer *t;
-		t = lookup_rightempty(p, base);
-		BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t);
-		**--stackptr = t->avl_left;
-		/* t is removed, t->daddr > x->daddr for any
-		 * x in p->avl_left subtree.
-		 * Put t in the old place of p. */
-		RCU_INIT_POINTER(*delp[0], t);
-		t->avl_left = p->avl_left;
-		t->avl_right = p->avl_right;
-		t->avl_height = p->avl_height;
-		BUG_ON(delp[1] != &p->avl_left);
-		delp[1] = &t->avl_left; /* was &p->avl_left */
-	}
-	peer_avl_rebalance(stack, stackptr, base);
-	base->total--;
-	call_rcu(&p->rcu, inetpeer_free_rcu);
-}
-
 /* perform garbage collect on all items stacked during a lookup */
-static int inet_peer_gc(struct inet_peer_base *base,
-			struct inet_peer __rcu **stack[PEER_MAXDEPTH],
-			struct inet_peer __rcu ***stackptr)
+static void inet_peer_gc(struct inet_peer_base *base,
+			 struct inet_peer *gc_stack[],
+			 unsigned int gc_cnt)
 {
-	struct inet_peer *p, *gchead = NULL;
+	struct inet_peer *p;
 	__u32 delta, ttl;
-	int cnt = 0;
+	int i;
 
 	if (base->total >= inet_peer_threshold)
 		ttl = 0; /* be aggressive */
@@ -379,43 +157,38 @@ static int inet_peer_gc(struct inet_peer_base *base,
 		ttl = inet_peer_maxttl
 				- (inet_peer_maxttl - inet_peer_minttl) / HZ *
 					base->total / inet_peer_threshold * HZ;
-	stackptr--; /* last stack slot is peer_avl_empty */
-	while (stackptr > stack) {
-		stackptr--;
-		p = rcu_deref_locked(**stackptr, base);
-		if (refcount_read(&p->refcnt) == 1) {
-			smp_rmb();
-			delta = (__u32)jiffies - p->dtime;
-			if (delta >= ttl && refcount_dec_if_one(&p->refcnt)) {
-				p->gc_next = gchead;
-				gchead = p;
-			}
-		}
+	for (i = 0; i < gc_cnt; i++) {
+		p = gc_stack[i];
+		delta = (__u32)jiffies - p->dtime;
+		if (delta < ttl || !refcount_dec_if_one(&p->refcnt))
+			gc_stack[i] = NULL;
 	}
-	while ((p = gchead) != NULL) {
-		gchead = p->gc_next;
-		cnt++;
-		unlink_from_pool(p, base, stack);
+	for (i = 0; i < gc_cnt; i++) {
+		p = gc_stack[i];
+		if (p) {
+			rb_erase(&p->rb_node, &base->rb_root);
+			base->total--;
+			call_rcu(&p->rcu, inetpeer_free_rcu);
+		}
 	}
-	return cnt;
 }
 
 struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 			       const struct inetpeer_addr *daddr,
 			       int create)
 {
-	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
-	struct inet_peer *p;
-	unsigned int sequence;
-	int invalidated, gccnt = 0;
+	struct inet_peer *p, *gc_stack[PEER_MAX_GC];
+	struct rb_node **pp, *parent;
+	unsigned int gc_cnt, seq;
+	int invalidated;
 
 	/* Attempt a lockless lookup first.
 	 * Because of a concurrent writer, we might not find an existing entry.
 	 */
 	rcu_read_lock();
-	sequence = read_seqbegin(&base->lock);
-	p = lookup_rcu(daddr, base);
-	invalidated = read_seqretry(&base->lock, sequence);
+	seq = read_seqbegin(&base->lock);
+	p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp);
+	invalidated = read_seqretry(&base->lock, seq);
 	rcu_read_unlock();
 
 	if (p)
@@ -428,36 +201,31 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 	/* retry an exact lookup, taking the lock before.
 	 * At least, nodes should be hot in our cache.
 	 */
+	parent = NULL;
 	write_seqlock_bh(&base->lock);
-relookup:
-	p = lookup(daddr, stack, base);
-	if (p != peer_avl_empty) {
-		refcount_inc(&p->refcnt);
-		write_sequnlock_bh(&base->lock);
-		return p;
-	}
-	if (!gccnt) {
-		gccnt = inet_peer_gc(base, stack, stackptr);
-		if (gccnt && create)
-			goto relookup;
-	}
-	p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
-	if (p) {
-		p->daddr = *daddr;
-		refcount_set(&p->refcnt, 2);
-		atomic_set(&p->rid, 0);
-		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
-		p->rate_tokens = 0;
-		/* 60*HZ is arbitrary, but chosen enough high so that the first
-		 * calculation of tokens is at its maximum.
-		 */
-		p->rate_last = jiffies - 60*HZ;
-		INIT_LIST_HEAD(&p->gc_list);
 
-		/* Link the node. */
-		link_to_pool(p, base);
-		base->total++;
+	gc_cnt = 0;
+	p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp);
+	if (!p && create) {
+		p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
+		if (p) {
+			p->daddr = *daddr;
+			refcount_set(&p->refcnt, 2);
+			atomic_set(&p->rid, 0);
+			p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
+			p->rate_tokens = 0;
+			/* 60*HZ is arbitrary, but chosen enough high so that the first
+			 * calculation of tokens is at its maximum.
+			 */
+			p->rate_last = jiffies - 60*HZ;
+
+			rb_link_node(&p->rb_node, parent, pp);
+			rb_insert_color(&p->rb_node, &base->rb_root);
+			base->total++;
+		}
 	}
+	if (gc_cnt)
+		inet_peer_gc(base, gc_stack, gc_cnt);
 	write_sequnlock_bh(&base->lock);
 
 	return p;
@@ -467,8 +235,9 @@ EXPORT_SYMBOL_GPL(inet_getpeer);
 void inet_putpeer(struct inet_peer *p)
 {
 	p->dtime = (__u32)jiffies;
-	smp_mb__before_atomic();
-	refcount_dec(&p->refcnt);
+
+	if (refcount_dec_and_test(&p->refcnt))
+		call_rcu(&p->rcu, inetpeer_free_rcu);
 }
 EXPORT_SYMBOL_GPL(inet_putpeer);
 
@@ -513,30 +282,16 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
 }
 EXPORT_SYMBOL(inet_peer_xrlim_allow);
 
-static void inetpeer_inval_rcu(struct rcu_head *head)
-{
-	struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu);
-
-	spin_lock_bh(&gc_lock);
-	list_add_tail(&p->gc_list, &gc_list);
-	spin_unlock_bh(&gc_lock);
-
-	schedule_delayed_work(&gc_work, gc_delay);
-}
-
 void inetpeer_invalidate_tree(struct inet_peer_base *base)
 {
-	struct inet_peer *root;
-
-	write_seqlock_bh(&base->lock);
+	struct inet_peer *p, *n;
 
-	root = rcu_deref_locked(base->root, base);
-	if (root != peer_avl_empty) {
-		base->root = peer_avl_empty_rcu;
-		base->total = 0;
-		call_rcu(&root->gc_rcu, inetpeer_inval_rcu);
+	rbtree_postorder_for_each_entry_safe(p, n, &base->rb_root, rb_node) {
+		inet_putpeer(p);
+		cond_resched();
 	}
 
-	write_sequnlock_bh(&base->lock);
+	base->rb_root = RB_ROOT;
+	base->total = 0;
 }
 EXPORT_SYMBOL(inetpeer_invalidate_tree);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9a8cfac..46408c2 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -844,8 +844,6 @@ static void __init ip4_frags_ctl_register(void)
 
 static int __net_init ipv4_frags_init_net(struct net *net)
 {
-	int res;
-
 	/* Fragment cache limits.
 	 *
 	 * The fragment memory accounting code, (tries to) account for
@@ -871,13 +869,9 @@ static int __net_init ipv4_frags_init_net(struct net *net)
 
 	net->ipv4.frags.max_dist = 64;
 
-	res = inet_frags_init_net(&net->ipv4.frags);
-	if (res)
-		return res;
-	res = ip4_frags_ns_ctl_register(net);
-	if (res)
-		inet_frags_uninit_net(&net->ipv4.frags);
-	return res;
+	inet_frags_init_net(&net->ipv4.frags);
+
+	return ip4_frags_ns_ctl_register(net);
 }
 
 static void __net_exit ipv4_frags_exit_net(struct net *net)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7a7829e..0162fb9 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -48,6 +48,7 @@
 #include <net/rtnetlink.h>
 #include <net/gre.h>
 #include <net/dst_metadata.h>
+#include <net/erspan.h>
 
 /*
    Problems & solutions
@@ -112,9 +113,12 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 static int ipgre_tunnel_init(struct net_device *dev);
+static void erspan_build_header(struct sk_buff *skb,
+				__be32 id, u32 index, bool truncate);
 
 static unsigned int ipgre_net_id __read_mostly;
 static unsigned int gre_tap_net_id __read_mostly;
+static unsigned int erspan_net_id __read_mostly;
 
 static void ipgre_err(struct sk_buff *skb, u32 info,
 		      const struct tnl_ptk_info *tpi)
@@ -246,6 +250,81 @@ static void gre_err(struct sk_buff *skb, u32 info)
 	ipgre_err(skb, info, &tpi);
 }
 
+static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+		      int gre_hdr_len)
+{
+	struct net *net = dev_net(skb->dev);
+	struct metadata_dst *tun_dst = NULL;
+	struct ip_tunnel_net *itn;
+	struct ip_tunnel *tunnel;
+	struct erspanhdr *ershdr;
+	const struct iphdr *iph;
+	__be32 session_id;
+	__be32 index;
+	int len;
+
+	itn = net_generic(net, erspan_net_id);
+	len = gre_hdr_len + sizeof(*ershdr);
+
+	if (unlikely(!pskb_may_pull(skb, len)))
+		return -ENOMEM;
+
+	iph = ip_hdr(skb);
+	ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
+
+	/* The original GRE header does not have key field,
+	 * Use ERSPAN 10-bit session ID as key.
+	 */
+	session_id = cpu_to_be32(ntohs(ershdr->session_id));
+	tpi->key = session_id;
+	index = ershdr->md.index;
+	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
+				  tpi->flags | TUNNEL_KEY,
+				  iph->saddr, iph->daddr, tpi->key);
+
+	if (tunnel) {
+		if (__iptunnel_pull_header(skb,
+					   gre_hdr_len + sizeof(*ershdr),
+					   htons(ETH_P_TEB),
+					   false, false) < 0)
+			goto drop;
+
+		if (tunnel->collect_md) {
+			struct ip_tunnel_info *info;
+			struct erspan_metadata *md;
+			__be64 tun_id;
+			__be16 flags;
+
+			tpi->flags |= TUNNEL_KEY;
+			flags = tpi->flags;
+			tun_id = key32_to_tunnel_id(tpi->key);
+
+			tun_dst = ip_tun_rx_dst(skb, flags,
+						tun_id, sizeof(*md));
+			if (!tun_dst)
+				return PACKET_REJECT;
+
+			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+			if (!md)
+				return PACKET_REJECT;
+
+			md->index = index;
+			info = &tun_dst->u.tun_info;
+			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+			info->options_len = sizeof(*md);
+		} else {
+			tunnel->index = ntohl(index);
+		}
+
+		skb_reset_mac_header(skb);
+		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+		return PACKET_RCVD;
+	}
+drop:
+	kfree_skb(skb);
+	return PACKET_RCVD;
+}
+
 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
 {
@@ -328,6 +407,11 @@ static int gre_rcv(struct sk_buff *skb)
 	if (hdr_len < 0)
 		goto drop;
 
+	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
+		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
+			return 0;
+	}
+
 	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
 		return 0;
 
@@ -376,39 +460,33 @@ static struct rtable *gre_get_rt(struct sk_buff *skb,
 	return ip_route_output_key(net, fl);
 }
 
-static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
-			__be16 proto)
+static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
+				      struct net_device *dev,
+				      struct flowi4 *fl,
+				      int tunnel_hlen)
 {
 	struct ip_tunnel_info *tun_info;
 	const struct ip_tunnel_key *key;
 	struct rtable *rt = NULL;
-	struct flowi4 fl;
 	int min_headroom;
-	int tunnel_hlen;
-	__be16 df, flags;
 	bool use_cache;
 	int err;
 
 	tun_info = skb_tunnel_info(skb);
-	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
-		     ip_tunnel_info_af(tun_info) != AF_INET))
-		goto err_free_skb;
-
 	key = &tun_info->key;
 	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
+
 	if (use_cache)
-		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
+		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
 	if (!rt) {
-		rt = gre_get_rt(skb, dev, &fl, key);
+		rt = gre_get_rt(skb, dev, fl, key);
 		if (IS_ERR(rt))
-				goto err_free_skb;
+			goto err_free_skb;
 		if (use_cache)
 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
-					  fl.saddr);
+					  fl->saddr);
 	}
 
-	tunnel_hlen = gre_calc_hlen(key->tun_flags);
-
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ tunnel_hlen + sizeof(struct iphdr);
 	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
@@ -420,6 +498,37 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 		if (unlikely(err))
 			goto err_free_rt;
 	}
+	return rt;
+
+err_free_rt:
+	ip_rt_put(rt);
+err_free_skb:
+	kfree_skb(skb);
+	dev->stats.tx_dropped++;
+	return NULL;
+}
+
+static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+			__be16 proto)
+{
+	struct ip_tunnel_info *tun_info;
+	const struct ip_tunnel_key *key;
+	struct rtable *rt = NULL;
+	struct flowi4 fl;
+	int tunnel_hlen;
+	__be16 df, flags;
+
+	tun_info = skb_tunnel_info(skb);
+	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+		     ip_tunnel_info_af(tun_info) != AF_INET))
+		goto err_free_skb;
+
+	key = &tun_info->key;
+	tunnel_hlen = gre_calc_hlen(key->tun_flags);
+
+	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
+	if (!rt)
+		return;
 
 	/* Push Tunnel header. */
 	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
@@ -442,6 +551,64 @@ err_free_skb:
 	dev->stats.tx_dropped++;
 }
 
+static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+			   __be16 proto)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct ip_tunnel_info *tun_info;
+	const struct ip_tunnel_key *key;
+	struct erspan_metadata *md;
+	struct rtable *rt = NULL;
+	bool truncate = false;
+	struct flowi4 fl;
+	int tunnel_hlen;
+	__be16 df;
+
+	tun_info = skb_tunnel_info(skb);
+	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+		     ip_tunnel_info_af(tun_info) != AF_INET))
+		goto err_free_skb;
+
+	key = &tun_info->key;
+
+	/* ERSPAN has fixed 8 byte GRE header */
+	tunnel_hlen = 8 + sizeof(struct erspanhdr);
+
+	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
+	if (!rt)
+		return;
+
+	if (gre_handle_offloads(skb, false))
+		goto err_free_rt;
+
+	if (skb->len > dev->mtu) {
+		pskb_trim(skb, dev->mtu);
+		truncate = true;
+	}
+
+	md = ip_tunnel_info_opts(tun_info);
+	if (!md)
+		goto err_free_rt;
+
+	erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
+			    ntohl(md->index), truncate);
+
+	gre_build_header(skb, 8, TUNNEL_SEQ,
+			 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
+
+	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
+
+	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
+		      key->tos, key->ttl, df, false);
+	return;
+
+err_free_rt:
+	ip_rt_put(rt);
+err_free_skb:
+	kfree_skb(skb);
+	dev->stats.tx_dropped++;
+}
+
 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 {
 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -503,6 +670,86 @@ free_skb:
 	return NETDEV_TX_OK;
 }
 
+static inline u8 tos_to_cos(u8 tos)
+{
+	u8 dscp, cos;
+
+	dscp = tos >> 2;
+	cos = dscp >> 3;
+	return cos;
+}
+
+static void erspan_build_header(struct sk_buff *skb,
+				__be32 id, u32 index, bool truncate)
+{
+	struct iphdr *iphdr = ip_hdr(skb);
+	struct ethhdr *eth = eth_hdr(skb);
+	enum erspan_encap_type enc_type;
+	struct erspanhdr *ershdr;
+	struct qtag_prefix {
+		__be16 eth_type;
+		__be16 tci;
+	} *qp;
+	u16 vlan_tci = 0;
+
+	enc_type = ERSPAN_ENCAP_NOVLAN;
+
+	/* If mirrored packet has vlan tag, extract tci and
+	 *  perserve vlan header in the mirrored frame.
+	 */
+	if (eth->h_proto == htons(ETH_P_8021Q)) {
+		qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
+		vlan_tci = ntohs(qp->tci);
+		enc_type = ERSPAN_ENCAP_INFRAME;
+	}
+
+	skb_push(skb, sizeof(*ershdr));
+	ershdr = (struct erspanhdr *)skb->data;
+	memset(ershdr, 0, sizeof(*ershdr));
+
+	ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
+				 (ERSPAN_VERSION << VER_OFFSET));
+	ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
+			   ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
+			   (enc_type << EN_OFFSET & EN_MASK) |
+			   ((truncate << T_OFFSET) & T_MASK));
+	ershdr->md.index = htonl(index & INDEX_MASK);
+}
+
+static netdev_tx_t erspan_xmit(struct sk_buff *skb,
+			       struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	bool truncate = false;
+
+	if (tunnel->collect_md) {
+		erspan_fb_xmit(skb, dev, skb->protocol);
+		return NETDEV_TX_OK;
+	}
+
+	if (gre_handle_offloads(skb, false))
+		goto free_skb;
+
+	if (skb_cow_head(skb, dev->needed_headroom))
+		goto free_skb;
+
+	if (skb->len > dev->mtu) {
+		pskb_trim(skb, dev->mtu);
+		truncate = true;
+	}
+
+	/* Push ERSPAN header */
+	erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
+	tunnel->parms.o_flags &= ~TUNNEL_KEY;
+	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
+	return NETDEV_TX_OK;
+
+free_skb:
+	kfree_skb(skb);
+	dev->stats.tx_dropped++;
+	return NETDEV_TX_OK;
+}
+
 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
 				struct net_device *dev)
 {
@@ -828,6 +1075,42 @@ out:
 	return ipgre_tunnel_validate(tb, data, extack);
 }
 
+static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
+			   struct netlink_ext_ack *extack)
+{
+	__be16 flags = 0;
+	int ret;
+
+	if (!data)
+		return 0;
+
+	ret = ipgre_tap_validate(tb, data, extack);
+	if (ret)
+		return ret;
+
+	/* ERSPAN should only have GRE sequence and key flag */
+	if (data[IFLA_GRE_OFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+	if (data[IFLA_GRE_IFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+	if (!data[IFLA_GRE_COLLECT_METADATA] &&
+	    flags != (GRE_SEQ | GRE_KEY))
+		return -EINVAL;
+
+	/* ERSPAN Session ID only has 10-bit. Since we reuse
+	 * 32-bit key field as ID, check it's range.
+	 */
+	if (data[IFLA_GRE_IKEY] &&
+	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
+		return -EINVAL;
+
+	if (data[IFLA_GRE_OKEY] &&
+	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int ipgre_netlink_parms(struct net_device *dev,
 				struct nlattr *data[],
 				struct nlattr *tb[],
@@ -892,6 +1175,13 @@ static int ipgre_netlink_parms(struct net_device *dev,
 	if (data[IFLA_GRE_FWMARK])
 		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
 
+	if (data[IFLA_GRE_ERSPAN_INDEX]) {
+		t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+
+		if (t->index & ~INDEX_MASK)
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -949,6 +1239,36 @@ static const struct net_device_ops gre_tap_netdev_ops = {
 	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
 };
 
+static int erspan_tunnel_init(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	int t_hlen;
+
+	tunnel->tun_hlen = 8;
+	tunnel->parms.iph.protocol = IPPROTO_GRE;
+	t_hlen = tunnel->hlen + sizeof(struct iphdr) + sizeof(struct erspanhdr);
+
+	dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
+	dev->mtu = ETH_DATA_LEN - t_hlen - 4;
+	dev->features		|= GRE_FEATURES;
+	dev->hw_features	|= GRE_FEATURES;
+	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
+
+	return ip_tunnel_init(dev);
+}
+
+static const struct net_device_ops erspan_netdev_ops = {
+	.ndo_init		= erspan_tunnel_init,
+	.ndo_uninit		= ip_tunnel_uninit,
+	.ndo_start_xmit		= erspan_xmit,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_change_mtu		= ip_tunnel_change_mtu,
+	.ndo_get_stats64	= ip_tunnel_get_stats64,
+	.ndo_get_iflink		= ip_tunnel_get_iflink,
+	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
+};
+
 static void ipgre_tap_setup(struct net_device *dev)
 {
 	ether_setup(dev);
@@ -1041,6 +1361,8 @@ static size_t ipgre_get_size(const struct net_device *dev)
 		nla_total_size(1) +
 		/* IFLA_GRE_FWMARK */
 		nla_total_size(4) +
+		/* IFLA_GRE_ERSPAN_INDEX */
+		nla_total_size(4) +
 		0;
 }
 
@@ -1083,12 +1405,25 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			goto nla_put_failure;
 	}
 
+	if (t->index)
+		if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
+			goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static void erspan_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+	dev->netdev_ops = &erspan_netdev_ops;
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	ip_tunnel_setup(dev, erspan_net_id);
+}
+
 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
 	[IFLA_GRE_LINK]		= { .type = NLA_U32 },
 	[IFLA_GRE_IFLAGS]	= { .type = NLA_U16 },
@@ -1107,6 +1442,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
 	[IFLA_GRE_COLLECT_METADATA]	= { .type = NLA_FLAG },
 	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
 	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
+	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
 };
 
 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
@@ -1139,6 +1475,21 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
 	.get_link_net	= ip_tunnel_get_link_net,
 };
 
+static struct rtnl_link_ops erspan_link_ops __read_mostly = {
+	.kind		= "erspan",
+	.maxtype	= IFLA_GRE_MAX,
+	.policy		= ipgre_policy,
+	.priv_size	= sizeof(struct ip_tunnel),
+	.setup		= erspan_setup,
+	.validate	= erspan_validate,
+	.newlink	= ipgre_newlink,
+	.changelink	= ipgre_changelink,
+	.dellink	= ip_tunnel_dellink,
+	.get_size	= ipgre_get_size,
+	.fill_info	= ipgre_fill_info,
+	.get_link_net	= ip_tunnel_get_link_net,
+};
+
 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 					u8 name_assign_type)
 {
@@ -1202,6 +1553,26 @@ static struct pernet_operations ipgre_tap_net_ops = {
 	.size = sizeof(struct ip_tunnel_net),
 };
 
+static int __net_init erspan_init_net(struct net *net)
+{
+	return ip_tunnel_init_net(net, erspan_net_id,
+				  &erspan_link_ops, "erspan0");
+}
+
+static void __net_exit erspan_exit_net(struct net *net)
+{
+	struct ip_tunnel_net *itn = net_generic(net, erspan_net_id);
+
+	ip_tunnel_delete_net(itn, &erspan_link_ops);
+}
+
+static struct pernet_operations erspan_net_ops = {
+	.init = erspan_init_net,
+	.exit = erspan_exit_net,
+	.id   = &erspan_net_id,
+	.size = sizeof(struct ip_tunnel_net),
+};
+
 static int __init ipgre_init(void)
 {
 	int err;
@@ -1214,7 +1585,11 @@ static int __init ipgre_init(void)
 
 	err = register_pernet_device(&ipgre_tap_net_ops);
 	if (err < 0)
-		goto pnet_tap_faied;
+		goto pnet_tap_failed;
+
+	err = register_pernet_device(&erspan_net_ops);
+	if (err < 0)
+		goto pnet_erspan_failed;
 
 	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
 	if (err < 0) {
@@ -1230,15 +1605,23 @@ static int __init ipgre_init(void)
 	if (err < 0)
 		goto tap_ops_failed;
 
+	err = rtnl_link_register(&erspan_link_ops);
+	if (err < 0)
+		goto erspan_link_failed;
+
 	return 0;
 
+erspan_link_failed:
+	rtnl_link_unregister(&ipgre_tap_ops);
 tap_ops_failed:
 	rtnl_link_unregister(&ipgre_link_ops);
 rtnl_link_failed:
 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
 add_proto_failed:
+	unregister_pernet_device(&erspan_net_ops);
+pnet_erspan_failed:
 	unregister_pernet_device(&ipgre_tap_net_ops);
-pnet_tap_faied:
+pnet_tap_failed:
 	unregister_pernet_device(&ipgre_net_ops);
 	return err;
 }
@@ -1247,9 +1630,11 @@ static void __exit ipgre_fini(void)
 {
 	rtnl_link_unregister(&ipgre_tap_ops);
 	rtnl_link_unregister(&ipgre_link_ops);
+	rtnl_link_unregister(&erspan_link_ops);
 	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
 	unregister_pernet_device(&ipgre_tap_net_ops);
 	unregister_pernet_device(&ipgre_net_ops);
+	unregister_pernet_device(&erspan_net_ops);
 }
 
 module_init(ipgre_init);
@@ -1257,5 +1642,7 @@ module_exit(ipgre_fini);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_RTNL_LINK("gre");
 MODULE_ALIAS_RTNL_LINK("gretap");
+MODULE_ALIAS_RTNL_LINK("erspan");
 MODULE_ALIAS_NETDEV("gre0");
 MODULE_ALIAS_NETDEV("gretap0");
+MODULE_ALIAS_NETDEV("erspan0");
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 93157f2..525ae88 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -86,8 +86,8 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
  * NOTE: dopt cannot point to skb.
  */
 
-int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
-		      const struct ip_options *sopt)
+int __ip_options_echo(struct net *net, struct ip_options *dopt,
+		      struct sk_buff *skb, const struct ip_options *sopt)
 {
 	unsigned char *sptr, *dptr;
 	int soffset, doffset;
@@ -140,7 +140,7 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
 						__be32 addr;
 
 						memcpy(&addr, dptr+soffset-1, 4);
-						if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) {
+						if (inet_addr_type(net, addr) != RTN_UNICAST) {
 							dopt->ts_needtime = 1;
 							soffset += 8;
 						}
@@ -174,9 +174,6 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb,
 				doffset -= 4;
 		}
 		if (doffset > 3) {
-			__be32 daddr = fib_compute_spec_dst(skb);
-
-			memcpy(&start[doffset-1], &daddr, 4);
 			dopt->faddr = faddr;
 			dptr[0] = start[0];
 			dptr[1] = doffset+3;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e153c40..e8e675b 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -853,61 +853,6 @@ csum_page(struct page *page, int offset, int copy)
 	return csum;
 }
 
-static inline int ip_ufo_append_data(struct sock *sk,
-			struct sk_buff_head *queue,
-			int getfrag(void *from, char *to, int offset, int len,
-			       int odd, struct sk_buff *skb),
-			void *from, int length, int hh_len, int fragheaderlen,
-			int transhdrlen, int maxfraglen, unsigned int flags)
-{
-	struct sk_buff *skb;
-	int err;
-
-	/* There is support for UDP fragmentation offload by network
-	 * device, so create one single skb packet containing complete
-	 * udp datagram
-	 */
-	skb = skb_peek_tail(queue);
-	if (!skb) {
-		skb = sock_alloc_send_skb(sk,
-			hh_len + fragheaderlen + transhdrlen + 20,
-			(flags & MSG_DONTWAIT), &err);
-
-		if (!skb)
-			return err;
-
-		/* reserve space for Hardware header */
-		skb_reserve(skb, hh_len);
-
-		/* create space for UDP/IP header */
-		skb_put(skb, fragheaderlen + transhdrlen);
-
-		/* initialize network header pointer */
-		skb_reset_network_header(skb);
-
-		/* initialize protocol header pointer */
-		skb->transport_header = skb->network_header + fragheaderlen;
-
-		skb->csum = 0;
-
-		if (flags & MSG_CONFIRM)
-			skb_set_dst_pending_confirm(skb, 1);
-
-		__skb_queue_tail(queue, skb);
-	} else if (skb_is_gso(skb)) {
-		goto append;
-	}
-
-	skb->ip_summed = CHECKSUM_PARTIAL;
-	/* specify the length of each IP datagram fragment */
-	skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen;
-	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-
-append:
-	return skb_append_datato_frags(sk, skb, getfrag, from,
-				       (length - transhdrlen));
-}
-
 static int __ip_append_data(struct sock *sk,
 			    struct flowi4 *fl4,
 			    struct sk_buff_head *queue,
@@ -965,19 +910,6 @@ static int __ip_append_data(struct sock *sk,
 		csummode = CHECKSUM_PARTIAL;
 
 	cork->length += length;
-	if ((skb && skb_is_gso(skb)) ||
-	    (((length + (skb ? skb->len : fragheaderlen)) > mtu) &&
-	    (skb_queue_len(queue) <= 1) &&
-	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
-	    (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) {
-		err = ip_ufo_append_data(sk, queue, getfrag, from, length,
-					 hh_len, fragheaderlen, transhdrlen,
-					 maxfraglen, flags);
-		if (err)
-			goto error;
-		return 0;
-	}
 
 	/* So, what's going on in the loop below?
 	 *
@@ -1288,28 +1220,14 @@ ssize_t	ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
 	if (!skb)
 		return -EINVAL;
 
-	if ((size + skb->len > mtu) &&
-	    (skb_queue_len(&sk->sk_write_queue) == 1) &&
-	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->dst.dev->features & NETIF_F_UFO)) {
-		if (skb->ip_summed != CHECKSUM_PARTIAL)
-			return -EOPNOTSUPP;
-
-		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
-		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-	}
 	cork->length += size;
 
 	while (size > 0) {
-		if (skb_is_gso(skb)) {
-			len = size;
-		} else {
+		/* Check if the remaining data fits into current packet. */
+		len = mtu - skb->len;
+		if (len < size)
+			len = maxfraglen - skb->len;
 
-			/* Check if the remaining data fits into current packet. */
-			len = mtu - skb->len;
-			if (len < size)
-				len = maxfraglen - skb->len;
-		}
 		if (len <= 0) {
 			struct sk_buff *skb_prev;
 			int alloclen;
@@ -1603,7 +1521,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 	int err;
 	int oif;
 
-	if (__ip_options_echo(&replyopts.opt.opt, skb, sopt))
+	if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt))
 		return;
 
 	ipc.addr = daddr;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ecc4b4a..e558e4f 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -80,7 +80,8 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
 }
 
 
-static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
+static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg,
+				 struct sk_buff *skb)
 {
 	unsigned char optbuf[sizeof(struct ip_options) + 40];
 	struct ip_options *opt = (struct ip_options *)optbuf;
@@ -88,7 +89,7 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
 	if (IPCB(skb)->opt.optlen == 0)
 		return;
 
-	if (ip_options_echo(opt, skb)) {
+	if (ip_options_echo(net, opt, skb)) {
 		msg->msg_flags |= MSG_CTRUNC;
 		return;
 	}
@@ -204,7 +205,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
 	}
 
 	if (flags & IP_CMSG_RETOPTS) {
-		ip_cmsg_recv_retopts(msg, skb);
+		ip_cmsg_recv_retopts(sock_net(sk), msg, skb);
 
 		flags &= ~IP_CMSG_RETOPTS;
 		if (!flags)
@@ -1206,6 +1207,7 @@ e_inval:
 void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
 {
 	struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
+	bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);
 	bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
 		       ipv6_sk_rxinfo(sk);
 
@@ -1219,7 +1221,7 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
 		 * (e.g., process binds socket to eth0 for Tx which is
 		 * redirected to loopback in the rtable/dst).
 		 */
-		if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
+		if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX || l3slave)
 			pktinfo->ipi_ifindex = inet_iif(skb);
 
 		pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
@@ -1227,14 +1229,7 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
 		pktinfo->ipi_ifindex = 0;
 		pktinfo->ipi_spec_dst.s_addr = 0;
 	}
-	/* We need to keep the dst for __ip_options_echo()
-	 * We could restrict the test to opt.ts_needtime || opt.srr,
-	 * but the following is good enough as IP options are not often used.
-	 */
-	if (unlikely(IPCB(skb)->opt.optlen))
-		skb_dst_force(skb);
-	else
-		skb_dst_drop(skb);
+	skb_dst_drop(skb);
 }
 
 int ip_setsockopt(struct sock *sk, int level,
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 0192c25..5ed63d2 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -584,33 +584,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
 	.get_link_net	= ip_tunnel_get_link_net,
 };
 
-static bool is_vti_tunnel(const struct net_device *dev)
-{
-	return dev->netdev_ops == &vti_netdev_ops;
-}
-
-static int vti_device_event(struct notifier_block *unused,
-			    unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct ip_tunnel *tunnel = netdev_priv(dev);
-
-	if (!is_vti_tunnel(dev))
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case NETDEV_DOWN:
-		if (!net_eq(tunnel->net, dev_net(dev)))
-			xfrm_garbage_collect(tunnel->net);
-		break;
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block vti_notifier_block __read_mostly = {
-	.notifier_call = vti_device_event,
-};
-
 static int __init vti_init(void)
 {
 	const char *msg;
@@ -618,8 +591,6 @@ static int __init vti_init(void)
 
 	pr_info("IPv4 over IPsec tunneling driver\n");
 
-	register_netdevice_notifier(&vti_notifier_block);
-
 	msg = "tunnel device";
 	err = register_pernet_device(&vti_net_ops);
 	if (err < 0)
@@ -652,7 +623,6 @@ xfrm_proto_ah_failed:
 xfrm_proto_esp_failed:
 	unregister_pernet_device(&vti_net_ops);
 pernet_dev_failed:
-	unregister_netdevice_notifier(&vti_notifier_block);
 	pr_err("vti init: failed to register %s\n", msg);
 	return err;
 }
@@ -664,7 +634,6 @@ static void __exit vti_fini(void)
 	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
 	xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
 	unregister_pernet_device(&vti_net_ops);
-	unregister_netdevice_notifier(&vti_notifier_block);
 }
 
 module_init(vti_init);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 06863ea..c9b3e6e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -3114,14 +3114,14 @@ int __init ip_mr_init(void)
 	}
 #endif
 	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
-		      ipmr_rtm_getroute, ipmr_rtm_dumproute, NULL);
+		      ipmr_rtm_getroute, ipmr_rtm_dumproute, 0);
 	rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
-		      ipmr_rtm_route, NULL, NULL);
+		      ipmr_rtm_route, NULL, 0);
 	rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
-		      ipmr_rtm_route, NULL, NULL);
+		      ipmr_rtm_route, NULL, 0);
 
 	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK,
-		      NULL, ipmr_rtm_dumplink, NULL);
+		      NULL, ipmr_rtm_dumplink, 0);
 	return 0;
 
 #ifdef CONFIG_IP_PIMSM_V2
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 9e9d9af..e044571 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1117,7 +1117,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
 			    struct xt_table_info *newinfo, unsigned char *base)
 {
 	struct xt_entry_target *t;
-	struct xt_target *target;
 	struct arpt_entry *de;
 	unsigned int origsize;
 	int h;
@@ -1132,7 +1131,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
 
 	de->target_offset = e->target_offset - (origsize - *size);
 	t = compat_arpt_get_target(e);
-	target = t->u.kernel.target;
 	xt_compat_target_from_user(t, dstptr, size);
 
 	de->next_offset = e->next_offset - (origsize - *size);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 622ed28..576cba2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -35,12 +35,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("IPv4 packet filter");
 
-#ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x)		WARN_ON(!(x))
-#else
-#define IP_NF_ASSERT(x)
-#endif
-
 void *ipt_alloc_initial_table(const struct xt_table *info)
 {
 	return xt_alloc_initial_table(ipt, IPT);
@@ -151,7 +145,7 @@ static const char *const comments[] = {
 	[NF_IP_TRACE_COMMENT_POLICY]	= "policy",
 };
 
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
 	.type = NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
@@ -263,7 +257,7 @@ ipt_do_table(struct sk_buff *skb,
 	acpar.hotdrop = false;
 	acpar.state   = state;
 
-	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+	WARN_ON(!(table->valid_hooks & (1 << hook)));
 	local_bh_disable();
 	addend = xt_write_recseq_begin();
 	private = table->private;
@@ -293,7 +287,7 @@ ipt_do_table(struct sk_buff *skb,
 		const struct xt_entry_match *ematch;
 		struct xt_counters *counter;
 
-		IP_NF_ASSERT(e);
+		WARN_ON(!e);
 		if (!ip_packet_match(ip, indev, outdev,
 		    &e->ip, acpar.fragoff)) {
  no_match:
@@ -312,7 +306,7 @@ ipt_do_table(struct sk_buff *skb,
 		ADD_COUNTER(*counter, skb->len, 1);
 
 		t = ipt_get_target(e);
-		IP_NF_ASSERT(t->u.kernel.target);
+		WARN_ON(!t->u.kernel.target);
 
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 		/* The packet is traced: log it */
@@ -1356,7 +1350,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 			    struct xt_table_info *newinfo, unsigned char *base)
 {
 	struct xt_entry_target *t;
-	struct xt_target *target;
 	struct ipt_entry *de;
 	unsigned int origsize;
 	int h;
@@ -1375,7 +1368,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
 
 	de->target_offset = e->target_offset - (origsize - *size);
 	t = compat_ipt_get_target(e);
-	target = t->u.kernel.target;
 	xt_compat_target_from_user(t, dstptr, size);
 
 	de->next_offset = e->next_offset - (origsize - *size);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index efaa04d..17b4ca5 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -625,7 +625,7 @@ arp_mangle(void *priv,
 	return NF_ACCEPT;
 }
 
-static struct nf_hook_ops cip_arp_ops __read_mostly = {
+static const struct nf_hook_ops cip_arp_ops = {
 	.hook = arp_mangle,
 	.pf = NFPROTO_ARP,
 	.hooknum = NF_ARP_OUT,
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index f1528f7..811689e 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -416,7 +416,7 @@ static unsigned int ipv4_synproxy_hook(void *priv,
 	return NF_ACCEPT;
 }
 
-static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv4_synproxy_ops[] = {
 	{
 		.hook		= ipv4_synproxy_hook,
 		.pf		= NFPROTO_IPV4,
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 138a24bc..a1a07b3 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -67,7 +67,7 @@ static unsigned int iptable_nat_ipv4_local_fn(void *priv,
 	return nf_nat_ipv4_local_fn(priv, skb, state, iptable_nat_do_chain);
 }
 
-static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
+static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
 	/* Before packet filtering, change destination */
 	{
 		.hook		= iptable_nat_ipv4_in,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2e14ed1..fe374da 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -63,13 +63,6 @@ static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-static void ipv4_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "src=%pI4 dst=%pI4 ",
-		   &tuple->src.u3.ip, &tuple->dst.u3.ip);
-}
-
 static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			    unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -174,7 +167,7 @@ static unsigned int ipv4_conntrack_local(void *priv,
 
 /* Connection tracking may drop packets, but never alters them, so
    make it the first hook. */
-static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv4_conntrack_ops[] = {
 	{
 		.hook		= ipv4_conntrack_in,
 		.pf		= NFPROTO_IPV4,
@@ -303,11 +296,6 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
 
 	return 0;
 }
-
-static int ipv4_nlattr_tuple_size(void)
-{
-	return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1);
-}
 #endif
 
 static struct nf_sockopt_ops so_getorigdst = {
@@ -358,16 +346,15 @@ static void ipv4_hooks_unregister(struct net *net)
 
 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
 	.l3proto	 = PF_INET,
-	.name		 = "ipv4",
 	.pkt_to_tuple	 = ipv4_pkt_to_tuple,
 	.invert_tuple	 = ipv4_invert_tuple,
-	.print_tuple	 = ipv4_print_tuple,
 	.get_l4proto	 = ipv4_get_l4proto,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr = ipv4_tuple_to_nlattr,
-	.nlattr_tuple_size = ipv4_nlattr_tuple_size,
 	.nlattr_to_tuple = ipv4_nlattr_to_tuple,
 	.nla_policy	 = ipv4_nla_policy,
+	.nla_size	 = NLA_ALIGN(NLA_HDRLEN + sizeof(u32)) + /* CTA_IP_V4_SRC */
+			   NLA_ALIGN(NLA_HDRLEN + sizeof(u32)),  /* CTA_IP_V4_DST */
 #endif
 	.net_ns_get	 = ipv4_hooks_register,
 	.net_ns_put	 = ipv4_hooks_unregister,
@@ -398,24 +385,12 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
 
 static int ipv4_net_init(struct net *net)
 {
-	int ret = 0;
-
-	ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
-					    ARRAY_SIZE(builtin_l4proto4));
-	if (ret < 0)
-		return ret;
-	ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv4: pernet registration failed\n");
-		nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
-						ARRAY_SIZE(builtin_l4proto4));
-	}
-	return ret;
+	return nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
+					     ARRAY_SIZE(builtin_l4proto4));
 }
 
 static void ipv4_net_exit(struct net *net)
 {
-	nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
 	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
 					ARRAY_SIZE(builtin_l4proto4));
 }
@@ -433,6 +408,11 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 
 	need_conntrack();
 
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	if (WARN_ON(nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1) !=
+	    nf_conntrack_l3proto_ipv4.nla_size))
+		return -EINVAL;
+#endif
 	ret = nf_register_sockopt(&so_getorigdst);
 	if (ret < 0) {
 		pr_err("Unable to register netfilter socket option\n");
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 73c591d..a046c29 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -71,16 +71,6 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void icmp_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "type=%u code=%u id=%u ",
-		   tuple->dst.u.icmp.type,
-		   tuple->dst.u.icmp.code,
-		   ntohs(tuple->src.u.icmp.id));
-}
-
 static unsigned int *icmp_get_timeouts(struct net *net)
 {
 	return &icmp_pernet(net)->timeout;
@@ -92,7 +82,6 @@ static int icmp_packet(struct nf_conn *ct,
 		       unsigned int dataoff,
 		       enum ip_conntrack_info ctinfo,
 		       u_int8_t pf,
-		       unsigned int hooknum,
 		       unsigned int *timeout)
 {
 	/* Do not immediately delete the connection after the first
@@ -137,7 +126,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conntrack_zone tmp;
 
-	NF_CT_ASSERT(!skb_nfct(skb));
+	WARN_ON(skb_nfct(skb));
 	zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
 
 	/* Are they talking about one of our connections? */
@@ -362,10 +351,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
 {
 	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_ICMP,
-	.name			= "icmp",
 	.pkt_to_tuple		= icmp_pkt_to_tuple,
 	.invert_tuple		= icmp_invert_tuple,
-	.print_tuple		= icmp_print_tuple,
 	.packet			= icmp_packet,
 	.get_timeouts		= icmp_get_timeouts,
 	.new			= icmp_new,
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 346bf7c..37fe1616 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -90,7 +90,7 @@ static unsigned int ipv4_conntrack_defrag(void *priv,
 	return NF_ACCEPT;
 }
 
-static struct nf_hook_ops ipv4_defrag_ops[] = {
+static const struct nf_hook_ops ipv4_defrag_ops[] = {
 	{
 		.hook		= ipv4_conntrack_defrag,
 		.pf		= NFPROTO_IPV4,
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index 2f3895d..df5c2a2 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -25,7 +25,7 @@
 #include <linux/netfilter/xt_LOG.h>
 #include <net/netfilter/nf_log.h>
 
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index c83a996..4388de0 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -24,7 +24,7 @@
 #include <linux/netfilter/xt_LOG.h>
 #include <net/netfilter/nf_log.h>
 
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 574f7eb..ac8342d 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -252,16 +252,16 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 	if (set_h245_addr(skb, protoff, data, dataoff, taddr,
 			  &ct->tuplehash[!dir].tuple.dst.u3,
 			  htons((port & htons(1)) ? nated_port + 1 :
-						    nated_port)) == 0) {
-		/* Save ports */
-		info->rtp_port[i][dir] = rtp_port;
-		info->rtp_port[i][!dir] = htons(nated_port);
-	} else {
+						    nated_port))) {
 		nf_ct_unexpect_related(rtp_exp);
 		nf_ct_unexpect_related(rtcp_exp);
 		return -1;
 	}
 
+	/* Save ports */
+	info->rtp_port[i][dir] = rtp_port;
+	info->rtp_port[i][!dir] = htons(nated_port);
+
 	/* Success */
 	pr_debug("nf_nat_h323: expect RTP %pI4:%hu->%pI4:%hu\n",
 		 &rtp_exp->tuple.src.u3.ip,
@@ -370,15 +370,15 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 	/* Modify signal */
 	if (set_h225_addr(skb, protoff, data, dataoff, taddr,
 			  &ct->tuplehash[!dir].tuple.dst.u3,
-			  htons(nated_port)) == 0) {
-		/* Save ports */
-		info->sig_port[dir] = port;
-		info->sig_port[!dir] = htons(nated_port);
-	} else {
+			  htons(nated_port))) {
 		nf_ct_unexpect_related(exp);
 		return -1;
 	}
 
+	/* Save ports */
+	info->sig_port[dir] = port;
+	info->sig_port[!dir] = htons(nated_port);
+
 	pr_debug("nf_nat_q931: expect H.245 %pI4:%hu->%pI4:%hu\n",
 		 &exp->tuple.src.u3.ip,
 		 ntohs(exp->tuple.src.u.tcp.port),
@@ -462,24 +462,27 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 	/* Modify signal */
 	if (set_h225_addr(skb, protoff, data, 0, &taddr[idx],
 			  &ct->tuplehash[!dir].tuple.dst.u3,
-			  htons(nated_port)) == 0) {
-		/* Save ports */
-		info->sig_port[dir] = port;
-		info->sig_port[!dir] = htons(nated_port);
-
-		/* Fix for Gnomemeeting */
-		if (idx > 0 &&
-		    get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
-		    (ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
-			set_h225_addr(skb, protoff, data, 0, &taddr[0],
-				      &ct->tuplehash[!dir].tuple.dst.u3,
-				      info->sig_port[!dir]);
-		}
-	} else {
+			  htons(nated_port))) {
 		nf_ct_unexpect_related(exp);
 		return -1;
 	}
 
+	/* Save ports */
+	info->sig_port[dir] = port;
+	info->sig_port[!dir] = htons(nated_port);
+
+	/* Fix for Gnomemeeting */
+	if (idx > 0 &&
+	    get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
+	    (ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
+		if (set_h225_addr(skb, protoff, data, 0, &taddr[0],
+				  &ct->tuplehash[!dir].tuple.dst.u3,
+				  info->sig_port[!dir])) {
+			nf_ct_unexpect_related(exp);
+			return -1;
+		}
+	}
+
 	/* Success */
 	pr_debug("nf_nat_ras: expect Q.931 %pI4:%hu->%pI4:%hu\n",
 		 &exp->tuple.src.u3.ip,
@@ -550,9 +553,9 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
 	}
 
 	/* Modify signal */
-	if (!set_h225_addr(skb, protoff, data, dataoff, taddr,
-			   &ct->tuplehash[!dir].tuple.dst.u3,
-			   htons(nated_port)) == 0) {
+	if (set_h225_addr(skb, protoff, data, dataoff, taddr,
+			  &ct->tuplehash[!dir].tuple.dst.u3,
+			  htons(nated_port))) {
 		nf_ct_unexpect_related(exp);
 		return -1;
 	}
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index feedd75..a0f37b2 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -190,7 +190,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
 	struct nf_conntrack_tuple target;
 	unsigned long statusbit;
 
-	NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
 
 	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
 		return 0;
@@ -306,8 +306,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
 
 	default:
 		/* ESTABLISHED */
-		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
-			     ctinfo == IP_CT_ESTABLISHED_REPLY);
+		WARN_ON(ctinfo != IP_CT_ESTABLISHED &&
+			ctinfo != IP_CT_ESTABLISHED_REPLY);
 		if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
 			goto oif_changed;
 	}
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index f39037f..0c366aa 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -34,12 +34,12 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 	const struct rtable *rt;
 	__be32 newsrc, nh;
 
-	NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
+	WARN_ON(hooknum != NF_INET_POST_ROUTING);
 
 	ct = nf_ct_get(skb, &ctinfo);
 
-	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-			    ctinfo == IP_CT_RELATED_REPLY));
+	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+			 ctinfo == IP_CT_RELATED_REPLY)));
 
 	/* Source address is 0.0.0.0 - locally generated packet that is
 	 * probably not supposed to be masqueraded.
@@ -96,7 +96,7 @@ static int masq_device_event(struct notifier_block *this,
 		 * conntracks which were associated with that device,
 		 * and forget them.
 		 */
-		NF_CT_ASSERT(dev->ifindex != 0);
+		WARN_ON(dev->ifindex == 0);
 
 		nf_ct_iterate_cleanup_net(net, device_cmp,
 					  (void *)(long)dev->ifindex, 0, 0);
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index de3681d..e50976e 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -32,9 +32,10 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
 			const struct nft_pktinfo *pkt)
 {
 	const struct nft_fib *priv = nft_expr_priv(expr);
+	int noff = skb_network_offset(pkt->skb);
 	u32 *dst = &regs->data[priv->dreg];
 	const struct net_device *dev = NULL;
-	const struct iphdr *iph;
+	struct iphdr *iph, _iph;
 	__be32 addr;
 
 	if (priv->flags & NFTA_FIB_F_IIF)
@@ -42,7 +43,12 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
 	else if (priv->flags & NFTA_FIB_F_OIF)
 		dev = nft_out(pkt);
 
-	iph = ip_hdr(pkt->skb);
+	iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+	if (!iph) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
 	if (priv->flags & NFTA_FIB_F_DADDR)
 		addr = iph->daddr;
 	else
@@ -61,8 +67,9 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
 		   const struct nft_pktinfo *pkt)
 {
 	const struct nft_fib *priv = nft_expr_priv(expr);
+	int noff = skb_network_offset(pkt->skb);
 	u32 *dest = &regs->data[priv->dreg];
-	const struct iphdr *iph;
+	struct iphdr *iph, _iph;
 	struct fib_result res;
 	struct flowi4 fl4 = {
 		.flowi4_scope = RT_SCOPE_UNIVERSE,
@@ -95,7 +102,12 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
 		return;
 	}
 
-	iph = ip_hdr(pkt->skb);
+	iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+	if (!iph) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
 	if (ipv4_is_zeronet(iph->saddr)) {
 		if (ipv4_is_lbcast(iph->daddr) ||
 		    ipv4_is_local_multicast(iph->daddr)) {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 43eb6567..127153f 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -206,12 +206,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST),
 	SNMP_MIB_ITEM("ListenOverflows", LINUX_MIB_LISTENOVERFLOWS),
 	SNMP_MIB_ITEM("ListenDrops", LINUX_MIB_LISTENDROPS),
-	SNMP_MIB_ITEM("TCPPrequeued", LINUX_MIB_TCPPREQUEUED),
-	SNMP_MIB_ITEM("TCPDirectCopyFromBacklog", LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG),
-	SNMP_MIB_ITEM("TCPDirectCopyFromPrequeue", LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE),
-	SNMP_MIB_ITEM("TCPPrequeueDropped", LINUX_MIB_TCPPREQUEUEDROPPED),
 	SNMP_MIB_ITEM("TCPHPHits", LINUX_MIB_TCPHPHITS),
-	SNMP_MIB_ITEM("TCPHPHitsToUser", LINUX_MIB_TCPHPHITSTOUSER),
 	SNMP_MIB_ITEM("TCPPureAcks", LINUX_MIB_TCPPUREACKS),
 	SNMP_MIB_ITEM("TCPHPAcks", LINUX_MIB_TCPHPACKS),
 	SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY),
@@ -230,14 +225,12 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES),
 	SNMP_MIB_ITEM("TCPLossFailures", LINUX_MIB_TCPLOSSFAILURES),
 	SNMP_MIB_ITEM("TCPFastRetrans", LINUX_MIB_TCPFASTRETRANS),
-	SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS),
 	SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS),
 	SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS),
 	SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES),
 	SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY),
 	SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),
 	SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),
-	SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED),
 	SNMP_MIB_ITEM("TCPRcvCollapsed", LINUX_MIB_TCPRCVCOLLAPSED),
 	SNMP_MIB_ITEM("TCPDSACKOldSent", LINUX_MIB_TCPDSACKOLDSENT),
 	SNMP_MIB_ITEM("TCPDSACKOfoSent", LINUX_MIB_TCPDSACKOFOSENT),
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index b0bb5d0..33b70bf 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -122,7 +122,8 @@ void raw_unhash_sk(struct sock *sk)
 EXPORT_SYMBOL_GPL(raw_unhash_sk);
 
 struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
-		unsigned short num, __be32 raddr, __be32 laddr, int dif)
+			     unsigned short num, __be32 raddr, __be32 laddr,
+			     int dif, int sdif)
 {
 	sk_for_each_from(sk) {
 		struct inet_sock *inet = inet_sk(sk);
@@ -130,7 +131,8 @@ struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
 		if (net_eq(sock_net(sk), net) && inet->inet_num == num	&&
 		    !(inet->inet_daddr && inet->inet_daddr != raddr) 	&&
 		    !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
-		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+		      sk->sk_bound_dev_if != sdif))
 			goto found; /* gotcha */
 	}
 	sk = NULL;
@@ -171,6 +173,7 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
  */
 static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
 {
+	int sdif = inet_sdif(skb);
 	struct sock *sk;
 	struct hlist_head *head;
 	int delivered = 0;
@@ -184,13 +187,13 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
 	net = dev_net(skb->dev);
 	sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
 			     iph->saddr, iph->daddr,
-			     skb->dev->ifindex);
+			     skb->dev->ifindex, sdif);
 
 	while (sk) {
 		delivered = 1;
 		if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) &&
 		    ip_mc_sf_allow(sk, iph->daddr, iph->saddr,
-				   skb->dev->ifindex)) {
+				   skb->dev->ifindex, sdif)) {
 			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
 
 			/* Not releasing hash table! */
@@ -199,7 +202,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
 		}
 		sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
 				     iph->saddr, iph->daddr,
-				     skb->dev->ifindex);
+				     skb->dev->ifindex, sdif);
 	}
 out:
 	read_unlock(&raw_v4_hashinfo.lock);
@@ -297,12 +300,15 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
 	read_lock(&raw_v4_hashinfo.lock);
 	raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]);
 	if (raw_sk) {
+		int dif = skb->dev->ifindex;
+		int sdif = inet_sdif(skb);
+
 		iph = (const struct iphdr *)skb->data;
 		net = dev_net(skb->dev);
 
 		while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol,
 						iph->daddr, iph->saddr,
-						skb->dev->ifindex)) != NULL) {
+						dif, sdif)) != NULL) {
 			raw_err(raw_sk, skb, info);
 			raw_sk = sk_next(raw_sk);
 			iph = (const struct iphdr *)skb->data;
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index e1a51ca..c200065 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -46,13 +46,13 @@ static struct sock *raw_lookup(struct net *net, struct sock *from,
 		sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol,
 				     r->id.idiag_dst[0],
 				     r->id.idiag_src[0],
-				     r->id.idiag_if);
+				     r->id.idiag_if, 0);
 #if IS_ENABLED(CONFIG_IPV6)
 	else
 		sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol,
 				     (const struct in6_addr *)r->id.idiag_src,
 				     (const struct in6_addr *)r->id.idiag_dst,
-				     r->id.idiag_if);
+				     r->id.idiag_if, 0);
 #endif
 	return sk;
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2331de2..94d4cd2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1398,7 +1398,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
 	struct rtable *rt = (struct rtable *) dst;
 
-	if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
+	if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
 		kfree(p);
 
 	if (!list_empty(&rt->rt_uncached)) {
@@ -1456,7 +1456,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 		dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
 		if (fi->fib_metrics != &dst_default_metrics) {
 			rt->dst._metrics |= DST_METRICS_REFCOUNTED;
-			atomic_inc(&fi->fib_metrics->refcnt);
+			refcount_inc(&fi->fib_metrics->refcnt);
 		}
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		rt->dst.tclassid = nh->nh_tclassid;
@@ -2236,7 +2236,7 @@ add:
 	if (!rth)
 		return ERR_PTR(-ENOBUFS);
 
-	rth->rt_iif	= orig_oif ? : 0;
+	rth->rt_iif = orig_oif;
 	if (res->table)
 		rth->rt_table_id = res->table->tb_id;
 
@@ -2439,6 +2439,12 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
 		/* L3 master device is the loopback for that domain */
 		dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
 			net->loopback_dev;
+
+		/* make sure orig_oif points to fib result device even
+		 * though packet rx/tx happens over loopback or l3mdev
+		 */
+		orig_oif = FIB_RES_OIF(*res);
+
 		fl4->flowi4_oif = dev_out->ifindex;
 		flags |= RTCF_LOCAL;
 		goto make_route;
@@ -3075,7 +3081,8 @@ int __init ip_rt_init(void)
 	xfrm_init();
 	xfrm4_init();
 #endif
-	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
+	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
+		      RTNL_FLAG_DOIT_UNLOCKED);
 
 #ifdef CONFIG_SYSCTL
 	register_pernet_subsys(&sysctl_route_ops);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 03ad877..b1bb1b3 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	/* We throwed the options of the initial SYN away, so we hope
 	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
 	 */
-	ireq->opt = tcp_v4_save_options(skb);
+	ireq->opt = tcp_v4_save_options(sock_net(sk), skb);
 
 	if (security_inet_conn_request(sk, skb, req)) {
 		reqsk_free(req);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 9bf8097..0d3c038 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -45,6 +45,9 @@ static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
 static int ip_ping_group_range_min[] = { 0, 0 };
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 
+/* obsolete */
+static int sysctl_tcp_low_latency __read_mostly;
+
 /* Update system visible IP port range */
 static void set_local_port_range(struct net *net, int range[2])
 {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a3e91b5..5091402 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -269,6 +269,7 @@
 #include <linux/err.h>
 #include <linux/time.h>
 #include <linux/slab.h>
+#include <linux/errqueue.h>
 
 #include <net/icmp.h>
 #include <net/inet_common.h>
@@ -388,6 +389,19 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
 	return period;
 }
 
+static u64 tcp_compute_delivery_rate(const struct tcp_sock *tp)
+{
+	u32 rate = READ_ONCE(tp->rate_delivered);
+	u32 intv = READ_ONCE(tp->rate_interval_us);
+	u64 rate64 = 0;
+
+	if (rate && intv) {
+		rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
+		do_div(rate64, intv);
+	}
+	return rate64;
+}
+
 /* Address-family independent initialization for a tcp_sock.
  *
  * NOTE: A lot of things set to zero explicitly by call to
@@ -400,7 +414,6 @@ void tcp_init_sock(struct sock *sk)
 
 	tp->out_of_order_queue = RB_ROOT;
 	tcp_init_xmit_timers(sk);
-	tcp_prequeue_init(tp);
 	INIT_LIST_HEAD(&tp->tsq_node);
 
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
@@ -1034,23 +1047,29 @@ out_err:
 }
 EXPORT_SYMBOL_GPL(do_tcp_sendpages);
 
-int tcp_sendpage(struct sock *sk, struct page *page, int offset,
-		 size_t size, int flags)
+int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
+			size_t size, int flags)
 {
-	ssize_t res;
-
 	if (!(sk->sk_route_caps & NETIF_F_SG) ||
 	    !sk_check_csum_caps(sk))
-		return sock_no_sendpage(sk->sk_socket, page, offset, size,
-					flags);
-
-	lock_sock(sk);
+		return sock_no_sendpage_locked(sk, page, offset, size, flags);
 
 	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
 
-	res = do_tcp_sendpages(sk, page, offset, size, flags);
+	return do_tcp_sendpages(sk, page, offset, size, flags);
+}
+EXPORT_SYMBOL_GPL(tcp_sendpage_locked);
+
+int tcp_sendpage(struct sock *sk, struct page *page, int offset,
+		 size_t size, int flags)
+{
+	int ret;
+
+	lock_sock(sk);
+	ret = tcp_sendpage_locked(sk, page, offset, size, flags);
 	release_sock(sk);
-	return res;
+
+	return ret;
 }
 EXPORT_SYMBOL(tcp_sendpage);
 
@@ -1144,9 +1163,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 	return err;
 }
 
-int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct ubuf_info *uarg = NULL;
 	struct sk_buff *skb;
 	struct sockcm_cookie sockc;
 	int flags, err, copied = 0;
@@ -1155,9 +1175,25 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	bool sg;
 	long timeo;
 
-	lock_sock(sk);
-
 	flags = msg->msg_flags;
+
+	if (flags & MSG_ZEROCOPY && size) {
+		if (sk->sk_state != TCP_ESTABLISHED) {
+			err = -EINVAL;
+			goto out_err;
+		}
+
+		skb = tcp_send_head(sk) ? tcp_write_queue_tail(sk) : NULL;
+		uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
+		if (!uarg) {
+			err = -ENOBUFS;
+			goto out_err;
+		}
+
+		if (!(sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG))
+			uarg->zerocopy = 0;
+	}
+
 	if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) {
 		err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
 		if (err == -EINPROGRESS && copied_syn > 0)
@@ -1281,7 +1317,7 @@ new_segment:
 			err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
 			if (err)
 				goto do_fault;
-		} else {
+		} else if (!uarg || !uarg->zerocopy) {
 			bool merge = true;
 			int i = skb_shinfo(skb)->nr_frags;
 			struct page_frag *pfrag = sk_page_frag(sk);
@@ -1319,6 +1355,13 @@ new_segment:
 				page_ref_inc(pfrag->page);
 			}
 			pfrag->offset += copy;
+		} else {
+			err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
+			if (err == -EMSGSIZE || err == -EEXIST)
+				goto new_segment;
+			if (err < 0)
+				goto do_error;
+			copy = err;
 		}
 
 		if (!copied)
@@ -1365,7 +1408,7 @@ out:
 		tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
 	}
 out_nopush:
-	release_sock(sk);
+	sock_zerocopy_put(uarg);
 	return copied + copied_syn;
 
 do_fault:
@@ -1382,6 +1425,7 @@ do_error:
 	if (copied + copied_syn)
 		goto out;
 out_err:
+	sock_zerocopy_put_abort(uarg);
 	err = sk_stream_error(sk, flags, err);
 	/* make sure we wake any epoll edge trigger waiter */
 	if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
@@ -1389,9 +1433,20 @@ out_err:
 		sk->sk_write_space(sk);
 		tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
 	}
-	release_sock(sk);
 	return err;
 }
+EXPORT_SYMBOL_GPL(tcp_sendmsg_locked);
+
+int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	int ret;
+
+	lock_sock(sk);
+	ret = tcp_sendmsg_locked(sk, msg, size);
+	release_sock(sk);
+
+	return ret;
+}
 EXPORT_SYMBOL(tcp_sendmsg);
 
 /*
@@ -1525,20 +1580,6 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied)
 		tcp_send_ack(sk);
 }
 
-static void tcp_prequeue_process(struct sock *sk)
-{
-	struct sk_buff *skb;
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
-
-	while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-		sk_backlog_rcv(sk, skb);
-
-	/* Clear memory counter. */
-	tp->ucopy.memory = 0;
-}
-
 static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
 {
 	struct sk_buff *skb;
@@ -1652,6 +1693,61 @@ int tcp_peek_len(struct socket *sock)
 }
 EXPORT_SYMBOL(tcp_peek_len);
 
+static void tcp_update_recv_tstamps(struct sk_buff *skb,
+				    struct scm_timestamping *tss)
+{
+	if (skb->tstamp)
+		tss->ts[0] = ktime_to_timespec(skb->tstamp);
+	else
+		tss->ts[0] = (struct timespec) {0};
+
+	if (skb_hwtstamps(skb)->hwtstamp)
+		tss->ts[2] = ktime_to_timespec(skb_hwtstamps(skb)->hwtstamp);
+	else
+		tss->ts[2] = (struct timespec) {0};
+}
+
+/* Similar to __sock_recv_timestamp, but does not require an skb */
+void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+			struct scm_timestamping *tss)
+{
+	struct timeval tv;
+	bool has_timestamping = false;
+
+	if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
+		if (sock_flag(sk, SOCK_RCVTSTAMP)) {
+			if (sock_flag(sk, SOCK_RCVTSTAMPNS)) {
+				put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
+					 sizeof(tss->ts[0]), &tss->ts[0]);
+			} else {
+				tv.tv_sec = tss->ts[0].tv_sec;
+				tv.tv_usec = tss->ts[0].tv_nsec / 1000;
+
+				put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
+					 sizeof(tv), &tv);
+			}
+		}
+
+		if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
+			has_timestamping = true;
+		else
+			tss->ts[0] = (struct timespec) {0};
+	}
+
+	if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
+		if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
+			has_timestamping = true;
+		else
+			tss->ts[2] = (struct timespec) {0};
+	}
+
+	if (has_timestamping) {
+		tss->ts[1] = (struct timespec) {0};
+		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING,
+			 sizeof(*tss), tss);
+	}
+}
+
 /*
  *	This routine copies from a sock struct into the user buffer.
  *
@@ -1671,9 +1767,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 	int err;
 	int target;		/* Read at least this many bytes */
 	long timeo;
-	struct task_struct *user_recv = NULL;
 	struct sk_buff *skb, *last;
 	u32 urg_hole = 0;
+	struct scm_timestamping tss;
+	bool has_tss = false;
 
 	if (unlikely(flags & MSG_ERRQUEUE))
 		return inet_recv_error(sk, msg, len, addr_len);
@@ -1806,51 +1903,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 
 		tcp_cleanup_rbuf(sk, copied);
 
-		if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
-			/* Install new reader */
-			if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
-				user_recv = current;
-				tp->ucopy.task = user_recv;
-				tp->ucopy.msg = msg;
-			}
-
-			tp->ucopy.len = len;
-
-			WARN_ON(tp->copied_seq != tp->rcv_nxt &&
-				!(flags & (MSG_PEEK | MSG_TRUNC)));
-
-			/* Ugly... If prequeue is not empty, we have to
-			 * process it before releasing socket, otherwise
-			 * order will be broken at second iteration.
-			 * More elegant solution is required!!!
-			 *
-			 * Look: we have the following (pseudo)queues:
-			 *
-			 * 1. packets in flight
-			 * 2. backlog
-			 * 3. prequeue
-			 * 4. receive_queue
-			 *
-			 * Each queue can be processed only if the next ones
-			 * are empty. At this point we have empty receive_queue.
-			 * But prequeue _can_ be not empty after 2nd iteration,
-			 * when we jumped to start of loop because backlog
-			 * processing added something to receive_queue.
-			 * We cannot release_sock(), because backlog contains
-			 * packets arrived _after_ prequeued ones.
-			 *
-			 * Shortly, algorithm is clear --- to process all
-			 * the queues in order. We could make it more directly,
-			 * requeueing packets from backlog to prequeue, if
-			 * is not empty. It is more elegant, but eats cycles,
-			 * unfortunately.
-			 */
-			if (!skb_queue_empty(&tp->ucopy.prequeue))
-				goto do_prequeue;
-
-			/* __ Set realtime policy in scheduler __ */
-		}
-
 		if (copied >= target) {
 			/* Do not sleep, just process backlog. */
 			release_sock(sk);
@@ -1859,31 +1911,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 			sk_wait_data(sk, &timeo, last);
 		}
 
-		if (user_recv) {
-			int chunk;
-
-			/* __ Restore normal policy in scheduler __ */
-
-			chunk = len - tp->ucopy.len;
-			if (chunk != 0) {
-				NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
-				len -= chunk;
-				copied += chunk;
-			}
-
-			if (tp->rcv_nxt == tp->copied_seq &&
-			    !skb_queue_empty(&tp->ucopy.prequeue)) {
-do_prequeue:
-				tcp_prequeue_process(sk);
-
-				chunk = len - tp->ucopy.len;
-				if (chunk != 0) {
-					NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
-					len -= chunk;
-					copied += chunk;
-				}
-			}
-		}
 		if ((flags & MSG_PEEK) &&
 		    (peek_seq - copied - urg_hole != tp->copied_seq)) {
 			net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
@@ -1941,6 +1968,10 @@ skip_copy:
 		if (used + offset < skb->len)
 			continue;
 
+		if (TCP_SKB_CB(skb)->has_rxtstamp) {
+			tcp_update_recv_tstamps(skb, &tss);
+			has_tss = true;
+		}
 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
 			goto found_fin_ok;
 		if (!(flags & MSG_PEEK))
@@ -1955,29 +1986,13 @@ skip_copy:
 		break;
 	} while (len > 0);
 
-	if (user_recv) {
-		if (!skb_queue_empty(&tp->ucopy.prequeue)) {
-			int chunk;
-
-			tp->ucopy.len = copied > 0 ? len : 0;
-
-			tcp_prequeue_process(sk);
-
-			if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
-				NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
-				len -= chunk;
-				copied += chunk;
-			}
-		}
-
-		tp->ucopy.task = NULL;
-		tp->ucopy.len = 0;
-	}
-
 	/* According to UNIX98, msg_name/msg_namelen are ignored
 	 * on connected socket. I was just happy when found this 8) --ANK
 	 */
 
+	if (has_tss)
+		tcp_recv_timestamp(msg, sk, &tss);
+
 	/* Clean up data we have read: This will do ACK frames. */
 	tcp_cleanup_rbuf(sk, copied);
 
@@ -2823,7 +2838,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
 	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	u32 now, intv;
+	u32 now;
 	u64 rate64;
 	bool slow;
 	u32 rate;
@@ -2922,13 +2937,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_data_segs_out = tp->data_segs_out;
 
 	info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0;
-	rate = READ_ONCE(tp->rate_delivered);
-	intv = READ_ONCE(tp->rate_interval_us);
-	if (rate && intv) {
-		rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
-		do_div(rate64, intv);
+	rate64 = tcp_compute_delivery_rate(tp);
+	if (rate64)
 		info->tcpi_delivery_rate = rate64;
-	}
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -2938,8 +2949,12 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *stats;
 	struct tcp_info info;
+	u64 rate64;
+	u32 rate;
 
-	stats = alloc_skb(5 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC);
+	stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) +
+			  3 * nla_total_size(sizeof(u32)) +
+			  2 * nla_total_size(sizeof(u8)), GFP_ATOMIC);
 	if (!stats)
 		return NULL;
 
@@ -2954,6 +2969,20 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 			  tp->data_segs_out, TCP_NLA_PAD);
 	nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS,
 			  tp->total_retrans, TCP_NLA_PAD);
+
+	rate = READ_ONCE(sk->sk_pacing_rate);
+	rate64 = rate != ~0U ? rate : ~0ULL;
+	nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD);
+
+	rate64 = tcp_compute_delivery_rate(tp);
+	nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);
+
+	nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd);
+	nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
+	nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
+
+	nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits);
+	nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited);
 	return stats;
 }
 
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 609965f..fc36143 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -49,7 +49,6 @@ MODULE_PARM_DESC(smooth_part, "log(B/(B*Smin))/log(B/(B-1))+B, # of RTT from Wma
 struct bictcp {
 	u32	cnt;		/* increase cwnd by 1 after ACKs */
 	u32	last_max_cwnd;	/* last maximum snd_cwnd */
-	u32	loss_cwnd;	/* congestion window at last loss */
 	u32	last_cwnd;	/* the last snd_cwnd */
 	u32	last_time;	/* time when updated last_cwnd */
 	u32	epoch_start;	/* beginning of an epoch */
@@ -72,7 +71,6 @@ static void bictcp_init(struct sock *sk)
 	struct bictcp *ca = inet_csk_ca(sk);
 
 	bictcp_reset(ca);
-	ca->loss_cwnd = 0;
 
 	if (initial_ssthresh)
 		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
@@ -172,22 +170,12 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
 	else
 		ca->last_max_cwnd = tp->snd_cwnd;
 
-	ca->loss_cwnd = tp->snd_cwnd;
-
 	if (tp->snd_cwnd <= low_window)
 		return max(tp->snd_cwnd >> 1U, 2U);
 	else
 		return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
 }
 
-static u32 bictcp_undo_cwnd(struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	const struct bictcp *ca = inet_csk_ca(sk);
-
-	return max(tp->snd_cwnd, ca->loss_cwnd);
-}
-
 static void bictcp_state(struct sock *sk, u8 new_state)
 {
 	if (new_state == TCP_CA_Loss)
@@ -214,7 +202,7 @@ static struct tcp_congestion_ops bictcp __read_mostly = {
 	.ssthresh	= bictcp_recalc_ssthresh,
 	.cong_avoid	= bictcp_cong_avoid,
 	.set_state	= bictcp_state,
-	.undo_cwnd	= bictcp_undo_cwnd,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.pkts_acked     = bictcp_acked,
 	.owner		= THIS_MODULE,
 	.name		= "bic",
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 50a0f3e..66ac69f 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -85,7 +85,6 @@ struct cdg {
 	u8  state;
 	u8  delack;
 	u32 rtt_seq;
-	u32 undo_cwnd;
 	u32 shadow_wnd;
 	u16 backoff_cnt;
 	u16 sample_cnt;
@@ -330,8 +329,6 @@ static u32 tcp_cdg_ssthresh(struct sock *sk)
 	struct cdg *ca = inet_csk_ca(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	ca->undo_cwnd = tp->snd_cwnd;
-
 	if (ca->state == CDG_BACKOFF)
 		return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10);
 
@@ -344,13 +341,6 @@ static u32 tcp_cdg_ssthresh(struct sock *sk)
 	return max(2U, tp->snd_cwnd >> 1);
 }
 
-static u32 tcp_cdg_undo_cwnd(struct sock *sk)
-{
-	struct cdg *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->undo_cwnd);
-}
-
 static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
 {
 	struct cdg *ca = inet_csk_ca(sk);
@@ -403,7 +393,7 @@ struct tcp_congestion_ops tcp_cdg __read_mostly = {
 	.cong_avoid = tcp_cdg_cong_avoid,
 	.cwnd_event = tcp_cdg_cwnd_event,
 	.pkts_acked = tcp_cdg_acked,
-	.undo_cwnd = tcp_cdg_undo_cwnd,
+	.undo_cwnd = tcp_reno_undo_cwnd,
 	.ssthresh = tcp_cdg_ssthresh,
 	.release = tcp_cdg_release,
 	.init = tcp_cdg_init,
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 421ea1b..2f26124 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -465,7 +465,7 @@ u32 tcp_reno_undo_cwnd(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
-	return max(tp->snd_cwnd, tp->snd_ssthresh << 1);
+	return max(tp->snd_cwnd, tp->prior_cwnd);
 }
 EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);
 
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 57ae5b5..78bfadf 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -83,7 +83,6 @@ MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (mse
 struct bictcp {
 	u32	cnt;		/* increase cwnd by 1 after ACKs */
 	u32	last_max_cwnd;	/* last maximum snd_cwnd */
-	u32	loss_cwnd;	/* congestion window at last loss */
 	u32	last_cwnd;	/* the last snd_cwnd */
 	u32	last_time;	/* time when updated last_cwnd */
 	u32	bic_origin_point;/* origin point of bic function */
@@ -142,7 +141,6 @@ static void bictcp_init(struct sock *sk)
 	struct bictcp *ca = inet_csk_ca(sk);
 
 	bictcp_reset(ca);
-	ca->loss_cwnd = 0;
 
 	if (hystart)
 		bictcp_hystart_reset(sk);
@@ -366,18 +364,9 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
 	else
 		ca->last_max_cwnd = tp->snd_cwnd;
 
-	ca->loss_cwnd = tp->snd_cwnd;
-
 	return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
 }
 
-static u32 bictcp_undo_cwnd(struct sock *sk)
-{
-	struct bictcp *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
 static void bictcp_state(struct sock *sk, u8 new_state)
 {
 	if (new_state == TCP_CA_Loss) {
@@ -470,7 +459,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
 	.ssthresh	= bictcp_recalc_ssthresh,
 	.cong_avoid	= bictcp_cong_avoid,
 	.set_state	= bictcp_state,
-	.undo_cwnd	= bictcp_undo_cwnd,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cwnd_event	= bictcp_cwnd_event,
 	.pkts_acked     = bictcp_acked,
 	.owner		= THIS_MODULE,
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index a748c74..abbf0ed 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -16,6 +16,7 @@
 
 #include <linux/tcp.h>
 
+#include <net/netlink.h>
 #include <net/tcp.h>
 
 static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
@@ -36,6 +37,100 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 		tcp_get_info(sk, info);
 }
 
+#ifdef CONFIG_TCP_MD5SIG
+static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info,
+				 const struct tcp_md5sig_key *key)
+{
+	info->tcpm_family = key->family;
+	info->tcpm_prefixlen = key->prefixlen;
+	info->tcpm_keylen = key->keylen;
+	memcpy(info->tcpm_key, key->key, key->keylen);
+
+	if (key->family == AF_INET)
+		info->tcpm_addr[0] = key->addr.a4.s_addr;
+	#if IS_ENABLED(CONFIG_IPV6)
+	else if (key->family == AF_INET6)
+		memcpy(&info->tcpm_addr, &key->addr.a6,
+		       sizeof(info->tcpm_addr));
+	#endif
+}
+
+static int tcp_diag_put_md5sig(struct sk_buff *skb,
+			       const struct tcp_md5sig_info *md5sig)
+{
+	const struct tcp_md5sig_key *key;
+	struct tcp_diag_md5sig *info;
+	struct nlattr *attr;
+	int md5sig_count = 0;
+
+	hlist_for_each_entry_rcu(key, &md5sig->head, node)
+		md5sig_count++;
+	if (md5sig_count == 0)
+		return 0;
+
+	attr = nla_reserve(skb, INET_DIAG_MD5SIG,
+			   md5sig_count * sizeof(struct tcp_diag_md5sig));
+	if (!attr)
+		return -EMSGSIZE;
+
+	info = nla_data(attr);
+	memset(info, 0, md5sig_count * sizeof(struct tcp_diag_md5sig));
+	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
+		tcp_diag_md5sig_fill(info++, key);
+		if (--md5sig_count == 0)
+			break;
+	}
+
+	return 0;
+}
+#endif
+
+static int tcp_diag_get_aux(struct sock *sk, bool net_admin,
+			    struct sk_buff *skb)
+{
+#ifdef CONFIG_TCP_MD5SIG
+	if (net_admin) {
+		struct tcp_md5sig_info *md5sig;
+		int err = 0;
+
+		rcu_read_lock();
+		md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
+		if (md5sig)
+			err = tcp_diag_put_md5sig(skb, md5sig);
+		rcu_read_unlock();
+		if (err < 0)
+			return err;
+	}
+#endif
+
+	return 0;
+}
+
+static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
+{
+	size_t size = 0;
+
+#ifdef CONFIG_TCP_MD5SIG
+	if (net_admin && sk_fullsock(sk)) {
+		const struct tcp_md5sig_info *md5sig;
+		const struct tcp_md5sig_key *key;
+		size_t md5sig_count = 0;
+
+		rcu_read_lock();
+		md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
+		if (md5sig) {
+			hlist_for_each_entry_rcu(key, &md5sig->head, node)
+				md5sig_count++;
+		}
+		rcu_read_unlock();
+		size += nla_total_size(md5sig_count *
+				       sizeof(struct tcp_diag_md5sig));
+	}
+#endif
+
+	return size;
+}
+
 static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 			  const struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
@@ -68,13 +163,15 @@ static int tcp_diag_destroy(struct sk_buff *in_skb,
 #endif
 
 static const struct inet_diag_handler tcp_diag_handler = {
-	.dump		 = tcp_diag_dump,
-	.dump_one	 = tcp_diag_dump_one,
-	.idiag_get_info	 = tcp_diag_get_info,
-	.idiag_type	 = IPPROTO_TCP,
-	.idiag_info_size = sizeof(struct tcp_info),
+	.dump			= tcp_diag_dump,
+	.dump_one		= tcp_diag_dump_one,
+	.idiag_get_info		= tcp_diag_get_info,
+	.idiag_get_aux		= tcp_diag_get_aux,
+	.idiag_get_aux_size	= tcp_diag_get_aux_size,
+	.idiag_type		= IPPROTO_TCP,
+	.idiag_info_size	= sizeof(struct tcp_info),
 #ifdef CONFIG_INET_DIAG_DESTROY
-	.destroy	 = tcp_diag_destroy,
+	.destroy		= tcp_diag_destroy,
 #endif
 };
 
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index ce9c7fe..e3c3322 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -171,7 +171,6 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
 
 static struct sock *tcp_fastopen_create_child(struct sock *sk,
 					      struct sk_buff *skb,
-					      struct dst_entry *dst,
 					      struct request_sock *req)
 {
 	struct tcp_sock *tp;
@@ -278,8 +277,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
  */
 struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 			      struct request_sock *req,
-			      struct tcp_fastopen_cookie *foc,
-			      struct dst_entry *dst)
+			      struct tcp_fastopen_cookie *foc)
 {
 	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
 	bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
@@ -312,7 +310,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 		 * data in SYN_RECV state.
 		 */
 fastopen:
-		child = tcp_fastopen_create_child(sk, skb, dst, req);
+		child = tcp_fastopen_create_child(sk, skb, req);
 		if (child) {
 			foc->len = -1;
 			NET_INC_STATS(sock_net(sk),
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 6d9879e..d1c33c9 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -94,7 +94,6 @@ static const struct hstcp_aimd_val {
 
 struct hstcp {
 	u32	ai;
-	u32	loss_cwnd;
 };
 
 static void hstcp_init(struct sock *sk)
@@ -153,22 +152,14 @@ static u32 hstcp_ssthresh(struct sock *sk)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct hstcp *ca = inet_csk_ca(sk);
 
-	ca->loss_cwnd = tp->snd_cwnd;
 	/* Do multiplicative decrease */
 	return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
 }
 
-static u32 hstcp_cwnd_undo(struct sock *sk)
-{
-	const struct hstcp *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
 static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
 	.init		= hstcp_init,
 	.ssthresh	= hstcp_ssthresh,
-	.undo_cwnd	= hstcp_cwnd_undo,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cong_avoid	= hstcp_cong_avoid,
 
 	.owner		= THIS_MODULE,
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 3eb78cd..082d479 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -66,7 +66,6 @@ static inline void htcp_reset(struct htcp *ca)
 
 static u32 htcp_cwnd_undo(struct sock *sk)
 {
-	const struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
 
 	if (ca->undo_last_cong) {
@@ -76,7 +75,7 @@ static u32 htcp_cwnd_undo(struct sock *sk)
 		ca->undo_last_cong = 0;
 	}
 
-	return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta);
+	return tcp_reno_undo_cwnd(sk);
 }
 
 static inline void measure_rtt(struct sock *sk, u32 srtt)
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 60352ff..7c84357 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -48,7 +48,6 @@ struct illinois {
 	u32	end_seq;	/* right edge of current RTT */
 	u32	alpha;		/* Additive increase */
 	u32	beta;		/* Muliplicative decrease */
-	u32	loss_cwnd;	/* cwnd on loss */
 	u16	acked;		/* # packets acked by current ACK */
 	u8	rtt_above;	/* average rtt has gone above threshold */
 	u8	rtt_low;	/* # of rtts measurements below threshold */
@@ -297,18 +296,10 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct illinois *ca = inet_csk_ca(sk);
 
-	ca->loss_cwnd = tp->snd_cwnd;
 	/* Multiplicative decrease */
 	return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
 }
 
-static u32 tcp_illinois_cwnd_undo(struct sock *sk)
-{
-	const struct illinois *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
 /* Extract info for Tcp socket info provided via netlink. */
 static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
 				union tcp_cc_info *info)
@@ -336,7 +327,7 @@ static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
 static struct tcp_congestion_ops tcp_illinois __read_mostly = {
 	.init		= tcp_illinois_init,
 	.ssthresh	= tcp_illinois_ssthresh,
-	.undo_cwnd	= tcp_illinois_cwnd_undo,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cong_avoid	= tcp_illinois_cong_avoid,
 	.set_state	= tcp_illinois_state,
 	.get_info	= tcp_illinois_info,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bab7f04..c5d7656 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1952,6 +1952,7 @@ void tcp_enter_loss(struct sock *sk)
 	    !after(tp->high_seq, tp->snd_una) ||
 	    (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
 		tp->prior_ssthresh = tcp_current_ssthresh(sk);
+		tp->prior_cwnd = tp->snd_cwnd;
 		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 		tcp_ca_event(sk, CA_EVENT_LOSS);
 		tcp_init_undo(tp);
@@ -4267,9 +4268,15 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 	tp->rx_opt.num_sacks = num_sacks;
 }
 
+enum tcp_queue {
+	OOO_QUEUE,
+	RCV_QUEUE,
+};
+
 /**
  * tcp_try_coalesce - try to merge skb to prior one
  * @sk: socket
+ * @dest: destination queue
  * @to: prior buffer
  * @from: buffer to add in queue
  * @fragstolen: pointer to boolean
@@ -4281,6 +4288,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
  * Returns true if caller should free @from instead of queueing it
  */
 static bool tcp_try_coalesce(struct sock *sk,
+			     enum tcp_queue dest,
 			     struct sk_buff *to,
 			     struct sk_buff *from,
 			     bool *fragstolen)
@@ -4302,6 +4310,15 @@ static bool tcp_try_coalesce(struct sock *sk,
 	TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
 	TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
 	TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
+
+	if (TCP_SKB_CB(from)->has_rxtstamp) {
+		TCP_SKB_CB(to)->has_rxtstamp = true;
+		if (dest == OOO_QUEUE)
+			TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
+		else
+			to->tstamp = from->tstamp;
+	}
+
 	return true;
 }
 
@@ -4336,6 +4353,9 @@ static void tcp_ofo_queue(struct sock *sk)
 		}
 		p = rb_next(p);
 		rb_erase(&skb->rbnode, &tp->out_of_order_queue);
+		/* Replace tstamp which was stomped by rbnode */
+		if (TCP_SKB_CB(skb)->has_rxtstamp)
+			skb->tstamp = TCP_SKB_CB(skb)->swtstamp;
 
 		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
 			SOCK_DEBUG(sk, "ofo packet was already received\n");
@@ -4347,7 +4367,8 @@ static void tcp_ofo_queue(struct sock *sk)
 			   TCP_SKB_CB(skb)->end_seq);
 
 		tail = skb_peek_tail(&sk->sk_receive_queue);
-		eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
+		eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE,
+						 tail, skb, &fragstolen);
 		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
 		fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
 		if (!eaten)
@@ -4401,6 +4422,10 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 		return;
 	}
 
+	/* Stash tstamp to avoid being stomped on by rbnode */
+	if (TCP_SKB_CB(skb)->has_rxtstamp)
+		TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
+
 	/* Disable header prediction. */
 	tp->pred_flags = 0;
 	inet_csk_schedule_ack(sk);
@@ -4428,7 +4453,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 	/* In the typical case, we are adding an skb to the end of the list.
 	 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
 	 */
-	if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) {
+	if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb,
+			     skb, &fragstolen)) {
 coalesce_done:
 		tcp_grow_window(sk, skb);
 		kfree_skb_partial(skb, fragstolen);
@@ -4478,7 +4504,8 @@ coalesce_done:
 				__kfree_skb(skb1);
 				goto merge_right;
 			}
-		} else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
+		} else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1,
+					    skb, &fragstolen)) {
 			goto coalesce_done;
 		}
 		p = &parent->rb_right;
@@ -4529,7 +4556,8 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
 
 	__skb_pull(skb, hdrlen);
 	eaten = (tail &&
-		 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
+		 tcp_try_coalesce(sk, RCV_QUEUE, tail,
+				  skb, fragstolen)) ? 1 : 0;
 	tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
 	if (!eaten) {
 		__skb_queue_tail(&sk->sk_receive_queue, skb);
@@ -4591,8 +4619,8 @@ err:
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	bool fragstolen = false;
-	int eaten = -1;
+	bool fragstolen;
+	int eaten;
 
 	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
 		__kfree_skb(skb);
@@ -4614,32 +4642,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 			goto out_of_window;
 
 		/* Ok. In sequence. In window. */
-		if (tp->ucopy.task == current &&
-		    tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
-		    sock_owned_by_user(sk) && !tp->urg_data) {
-			int chunk = min_t(unsigned int, skb->len,
-					  tp->ucopy.len);
-
-			__set_current_state(TASK_RUNNING);
-
-			if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
-				tp->ucopy.len -= chunk;
-				tp->copied_seq += chunk;
-				eaten = (chunk == skb->len);
-				tcp_rcv_space_adjust(sk);
-			}
-		}
-
-		if (eaten <= 0) {
 queue_and_out:
-			if (eaten < 0) {
-				if (skb_queue_len(&sk->sk_receive_queue) == 0)
-					sk_forced_mem_schedule(sk, skb->truesize);
-				else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
-					goto drop;
-			}
-			eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
-		}
+		if (skb_queue_len(&sk->sk_receive_queue) == 0)
+			sk_forced_mem_schedule(sk, skb->truesize);
+		else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+			goto drop;
+
+		eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
 		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
 		if (skb->len)
 			tcp_event_data_recv(sk, skb);
@@ -5189,26 +5198,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t
 	}
 }
 
-static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	int chunk = skb->len - hlen;
-	int err;
-
-	if (skb_csum_unnecessary(skb))
-		err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
-	else
-		err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg);
-
-	if (!err) {
-		tp->ucopy.len -= chunk;
-		tp->copied_seq += chunk;
-		tcp_rcv_space_adjust(sk);
-	}
-
-	return err;
-}
-
 /* Accept RST for rcv_nxt - 1 after a FIN.
  * When tcp connections are abruptly terminated from Mac OSX (via ^C), a
  * FIN is sent followed by a RST packet. The RST is sent with the same
@@ -5361,8 +5350,9 @@ discard:
  *	tcp_data_queue when everything is OK.
  */
 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			 const struct tcphdr *th, unsigned int len)
+			 const struct tcphdr *th)
 {
+	unsigned int len = skb->len;
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tcp_mstamp_refresh(tp);
@@ -5448,56 +5438,28 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			int eaten = 0;
 			bool fragstolen = false;
 
-			if (tp->ucopy.task == current &&
-			    tp->copied_seq == tp->rcv_nxt &&
-			    len - tcp_header_len <= tp->ucopy.len &&
-			    sock_owned_by_user(sk)) {
-				__set_current_state(TASK_RUNNING);
-
-				if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
-					/* Predicted packet is in window by definition.
-					 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
-					 * Hence, check seq<=rcv_wup reduces to:
-					 */
-					if (tcp_header_len ==
-					    (sizeof(struct tcphdr) +
-					     TCPOLEN_TSTAMP_ALIGNED) &&
-					    tp->rcv_nxt == tp->rcv_wup)
-						tcp_store_ts_recent(tp);
-
-					tcp_rcv_rtt_measure_ts(sk, skb);
-
-					__skb_pull(skb, tcp_header_len);
-					tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
-					NET_INC_STATS(sock_net(sk),
-							LINUX_MIB_TCPHPHITSTOUSER);
-					eaten = 1;
-				}
-			}
-			if (!eaten) {
-				if (tcp_checksum_complete(skb))
-					goto csum_error;
+			if (tcp_checksum_complete(skb))
+				goto csum_error;
 
-				if ((int)skb->truesize > sk->sk_forward_alloc)
-					goto step5;
+			if ((int)skb->truesize > sk->sk_forward_alloc)
+				goto step5;
 
-				/* Predicted packet is in window by definition.
-				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
-				 * Hence, check seq<=rcv_wup reduces to:
-				 */
-				if (tcp_header_len ==
-				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
-				    tp->rcv_nxt == tp->rcv_wup)
-					tcp_store_ts_recent(tp);
+			/* Predicted packet is in window by definition.
+			 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
+			 * Hence, check seq<=rcv_wup reduces to:
+			 */
+			if (tcp_header_len ==
+			    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
+			    tp->rcv_nxt == tp->rcv_wup)
+				tcp_store_ts_recent(tp);
 
-				tcp_rcv_rtt_measure_ts(sk, skb);
+			tcp_rcv_rtt_measure_ts(sk, skb);
 
-				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
+			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
 
-				/* Bulk data transfer: receiver */
-				eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
-						      &fragstolen);
-			}
+			/* Bulk data transfer: receiver */
+			eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
+					      &fragstolen);
 
 			tcp_event_data_recv(sk, skb);
 
@@ -5591,7 +5553,6 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 		__tcp_fast_path_on(tp, tp->snd_wnd);
 	else
 		tp->pred_flags = 0;
-
 }
 
 static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
@@ -6306,9 +6267,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct net *net = sock_net(sk);
 	struct sock *fastopen_sk = NULL;
-	struct dst_entry *dst = NULL;
 	struct request_sock *req;
 	bool want_cookie = false;
+	struct dst_entry *dst;
 	struct flowi fl;
 
 	/* TW buckets are converted to open requests without
@@ -6358,6 +6319,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	if (tmp_opt.tstamp_ok)
 		tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
 
+	dst = af_ops->route_req(sk, &fl, req);
+	if (!dst)
+		goto drop_and_free;
+
 	if (!want_cookie && !isn) {
 		/* Kill the following clause, if you dislike this way. */
 		if (!net->ipv4.sysctl_tcp_syncookies &&
@@ -6378,11 +6343,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 
 		isn = af_ops->init_seq(skb);
 	}
-	if (!dst) {
-		dst = af_ops->route_req(sk, &fl, req);
-		if (!dst)
-			goto drop_and_free;
-	}
 
 	tcp_ecn_create_request(req, skb, sk, dst);
 
@@ -6398,7 +6358,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	tcp_openreq_init_rwin(req, sk, dst);
 	if (!want_cookie) {
 		tcp_reqsk_record_syn(sk, req, skb);
-		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
+		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc);
 	}
 	if (fastopen_sk) {
 		af_ops->send_synack(fastopen_sk, dst, &fl, req,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e9252c7..a63486a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -85,8 +85,6 @@
 #include <crypto/hash.h>
 #include <linux/scatterlist.h>
 
-int sysctl_tcp_low_latency __read_mostly;
-
 #ifdef CONFIG_TCP_MD5SIG
 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
@@ -385,7 +383,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 
 	sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
 				       th->dest, iph->saddr, ntohs(th->source),
-				       inet_iif(icmp_skb));
+				       inet_iif(icmp_skb), 0);
 	if (!sk) {
 		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
 		return;
@@ -661,7 +659,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 		sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
 					     ip_hdr(skb)->saddr,
 					     th->source, ip_hdr(skb)->daddr,
-					     ntohs(th->source), inet_iif(skb));
+					     ntohs(th->source), inet_iif(skb),
+					     tcp_v4_sdif(skb));
 		/* don't send rst if it can't find key */
 		if (!sk1)
 			goto out;
@@ -1269,7 +1268,7 @@ static void tcp_v4_init_req(struct request_sock *req,
 
 	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
 	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
-	ireq->opt = tcp_v4_save_options(skb);
+	ireq->opt = tcp_v4_save_options(sock_net(sk_listener), skb);
 }
 
 static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
@@ -1458,7 +1457,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 				sk->sk_rx_dst = NULL;
 			}
 		}
-		tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
+		tcp_rcv_established(sk, skb, tcp_hdr(skb));
 		return 0;
 	}
 
@@ -1525,7 +1524,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
 	sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
 				       iph->saddr, th->source,
 				       iph->daddr, ntohs(th->dest),
-				       skb->skb_iif);
+				       skb->skb_iif, inet_sdif(skb));
 	if (sk) {
 		skb->sk = sk;
 		skb->destructor = sock_edemux;
@@ -1541,61 +1540,6 @@ void tcp_v4_early_demux(struct sk_buff *skb)
 	}
 }
 
-/* Packet is added to VJ-style prequeue for processing in process
- * context, if a reader task is waiting. Apparently, this exciting
- * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
- * failed somewhere. Latency? Burstiness? Well, at least now we will
- * see, why it failed. 8)8)				  --ANK
- *
- */
-bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (sysctl_tcp_low_latency || !tp->ucopy.task)
-		return false;
-
-	if (skb->len <= tcp_hdrlen(skb) &&
-	    skb_queue_len(&tp->ucopy.prequeue) == 0)
-		return false;
-
-	/* Before escaping RCU protected region, we need to take care of skb
-	 * dst. Prequeue is only enabled for established sockets.
-	 * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
-	 * Instead of doing full sk_rx_dst validity here, let's perform
-	 * an optimistic check.
-	 */
-	if (likely(sk->sk_rx_dst))
-		skb_dst_drop(skb);
-	else
-		skb_dst_force_safe(skb);
-
-	__skb_queue_tail(&tp->ucopy.prequeue, skb);
-	tp->ucopy.memory += skb->truesize;
-	if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
-	    tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
-		struct sk_buff *skb1;
-
-		BUG_ON(sock_owned_by_user(sk));
-		__NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
-				skb_queue_len(&tp->ucopy.prequeue));
-
-		while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-			sk_backlog_rcv(sk, skb1);
-
-		tp->ucopy.memory = 0;
-	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
-		wake_up_interruptible_sync_poll(sk_sleep(sk),
-					   POLLIN | POLLRDNORM | POLLRDBAND);
-		if (!inet_csk_ack_scheduled(sk))
-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-						  (3 * tcp_rto_min(sk)) / 4,
-						  TCP_RTO_MAX);
-	}
-	return true;
-}
-EXPORT_SYMBOL(tcp_prequeue);
-
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
@@ -1645,6 +1589,7 @@ EXPORT_SYMBOL(tcp_filter);
 int tcp_v4_rcv(struct sk_buff *skb)
 {
 	struct net *net = dev_net(skb->dev);
+	int sdif = inet_sdif(skb);
 	const struct iphdr *iph;
 	const struct tcphdr *th;
 	bool refcounted;
@@ -1692,10 +1637,12 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
 	TCP_SKB_CB(skb)->sacked	 = 0;
+	TCP_SKB_CB(skb)->has_rxtstamp =
+			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
 
 lookup:
 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
-			       th->dest, &refcounted);
+			       th->dest, sdif, &refcounted);
 	if (!sk)
 		goto no_tcp_socket;
 
@@ -1770,8 +1717,7 @@ process:
 	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-		if (!tcp_prequeue(sk, skb))
-			ret = tcp_v4_do_rcv(sk, skb);
+		ret = tcp_v4_do_rcv(sk, skb);
 	} else if (tcp_add_backlog(sk, skb)) {
 		goto discard_and_relse;
 	}
@@ -1824,7 +1770,8 @@ do_time_wait:
 							__tcp_hdrlen(th),
 							iph->saddr, th->source,
 							iph->daddr, th->dest,
-							inet_iif(skb));
+							inet_iif(skb),
+							sdif);
 		if (sk2) {
 			inet_twsk_deschedule_put(inet_twsk(sk));
 			sk = sk2;
@@ -1936,9 +1883,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	}
 #endif
 
-	/* Clean prequeue, it must be empty really */
-	__skb_queue_purge(&tp->ucopy.prequeue);
-
 	/* Clean up a referenced TCP bind bucket. */
 	if (inet_csk(sk)->icsk_bind_hash)
 		inet_put_port(sk);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0ff83c1..188a6f3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -445,7 +445,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		newtp->snd_sml = newtp->snd_una =
 		newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
 
-		tcp_prequeue_init(newtp);
 		INIT_LIST_HEAD(&newtp->tsq_node);
 
 		tcp_init_wl(newtp, treq->rcv_isn);
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 6d650ed..1ff7398 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -86,7 +86,6 @@ struct tcpnv {
 				 * < 0 => less than 1 packet/RTT */
 	u8  available8;
 	u16 available16;
-	u32 loss_cwnd;	/* cwnd at last loss */
 	u8  nv_allow_cwnd_growth:1, /* whether cwnd can grow */
 		nv_reset:1,	    /* whether to reset values */
 		nv_catchup:1;	    /* whether we are growing because
@@ -121,7 +120,6 @@ static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	ca->nv_reset = 0;
-	ca->loss_cwnd = 0;
 	ca->nv_no_cong_cnt = 0;
 	ca->nv_rtt_cnt = 0;
 	ca->nv_last_rtt = 0;
@@ -177,19 +175,10 @@ static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 static u32 tcpnv_recalc_ssthresh(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
-	struct tcpnv *ca = inet_csk_ca(sk);
 
-	ca->loss_cwnd = tp->snd_cwnd;
 	return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U);
 }
 
-static u32 tcpnv_undo_cwnd(struct sock *sk)
-{
-	struct tcpnv *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
 static void tcpnv_state(struct sock *sk, u8 new_state)
 {
 	struct tcpnv *ca = inet_csk_ca(sk);
@@ -446,7 +435,7 @@ static struct tcp_congestion_ops tcpnv __read_mostly = {
 	.ssthresh	= tcpnv_recalc_ssthresh,
 	.cong_avoid	= tcpnv_cong_avoid,
 	.set_state	= tcpnv_state,
-	.undo_cwnd	= tcpnv_undo_cwnd,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.pkts_acked     = tcpnv_acked,
 	.get_info	= tcpnv_get_info,
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b7661a6..5b6690d0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2377,7 +2377,6 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
 	u32 timeout, rto_delta_us;
 
 	/* Don't do any loss probe on a Fast Open connection before 3WHS
@@ -2398,15 +2397,19 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 	     tcp_send_head(sk))
 		return false;
 
-	/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
+	/* Probe timeout is 2*rtt. Add minimum RTO to account
 	 * for delayed ack when there's one outstanding packet. If no RTT
 	 * sample is available then probe after TCP_TIMEOUT_INIT.
 	 */
-	timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
-	if (tp->packets_out == 1)
-		timeout = max_t(u32, timeout,
-				(rtt + (rtt >> 1) + TCP_DELACK_MAX));
-	timeout = max_t(u32, timeout, msecs_to_jiffies(10));
+	if (tp->srtt_us) {
+		timeout = usecs_to_jiffies(tp->srtt_us >> 2);
+		if (tp->packets_out == 1)
+			timeout += TCP_RTO_MIN;
+		else
+			timeout += TCP_TIMEOUT_MIN;
+	} else {
+		timeout = TCP_TIMEOUT_INIT;
+	}
 
 	/* If the RTO formula yields an earlier time, then use that time. */
 	rto_delta_us = tcp_rto_delta_us(sk);  /* How far in future is RTO? */
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index f6c50af..697f4c6 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -105,8 +105,9 @@ static inline int tcp_probe_avail(void)
  * Note: arguments must match tcp_rcv_established()!
  */
 static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-				 const struct tcphdr *th, unsigned int len)
+				 const struct tcphdr *th)
 {
+	unsigned int len = skb->len;
 	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_sock *inet = inet_sk(sk);
 
@@ -145,7 +146,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				BUG();
 			}
 
-			p->length = skb->len;
+			p->length = len;
 			p->snd_nxt = tp->snd_nxt;
 			p->snd_una = tp->snd_una;
 			p->snd_cwnd = tp->snd_cwnd;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index fe9a493..449cd91 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -113,7 +113,7 @@ void tcp_rack_mark_lost(struct sock *sk)
 	tp->rack.advanced = 0;
 	tcp_rack_detect_loss(sk, &timeout);
 	if (timeout) {
-		timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN);
+		timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
 					  timeout, inet_csk(sk)->icsk_rto);
 	}
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index f212307..addc122 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -15,10 +15,6 @@
 #define TCP_SCALABLE_AI_CNT	50U
 #define TCP_SCALABLE_MD_SCALE	3
 
-struct scalable {
-	u32 loss_cwnd;
-};
-
 static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -36,23 +32,13 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 static u32 tcp_scalable_ssthresh(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
-	struct scalable *ca = inet_csk_ca(sk);
-
-	ca->loss_cwnd = tp->snd_cwnd;
 
 	return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
 }
 
-static u32 tcp_scalable_cwnd_undo(struct sock *sk)
-{
-	const struct scalable *ca = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
-}
-
 static struct tcp_congestion_ops tcp_scalable __read_mostly = {
 	.ssthresh	= tcp_scalable_ssthresh,
-	.undo_cwnd	= tcp_scalable_cwnd_undo,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cong_avoid	= tcp_scalable_cong_avoid,
 
 	.owner		= THIS_MODULE,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index e906014..655dd8d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -239,7 +239,6 @@ static int tcp_write_timeout(struct sock *sk)
 /* Called with BH disabled */
 void tcp_delack_timer_handler(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	sk_mem_reclaim_partial(sk);
@@ -254,17 +253,6 @@ void tcp_delack_timer_handler(struct sock *sk)
 	}
 	icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
 
-	if (!skb_queue_empty(&tp->ucopy.prequeue)) {
-		struct sk_buff *skb;
-
-		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
-
-		while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
-			sk_backlog_rcv(sk, skb);
-
-		tp->ucopy.memory = 0;
-	}
-
 	if (inet_csk_ack_scheduled(sk)) {
 		if (!icsk->icsk_ack.pingpong) {
 			/* Delayed ACK missed: inflate ATO. */
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 76005d4..6fcf482 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -30,7 +30,6 @@ struct veno {
 	u32 basertt;		/* the min of all Veno rtt measurements seen (in usec) */
 	u32 inc;		/* decide whether to increase cwnd */
 	u32 diff;		/* calculate the diff rate */
-	u32 loss_cwnd;		/* cwnd when loss occured */
 };
 
 /* There are several situations when we must "re-start" Veno:
@@ -194,7 +193,6 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct veno *veno = inet_csk_ca(sk);
 
-	veno->loss_cwnd = tp->snd_cwnd;
 	if (veno->diff < beta)
 		/* in "non-congestive state", cut cwnd by 1/5 */
 		return max(tp->snd_cwnd * 4 / 5, 2U);
@@ -203,17 +201,10 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
 		return max(tp->snd_cwnd >> 1U, 2U);
 }
 
-static u32 tcp_veno_cwnd_undo(struct sock *sk)
-{
-	const struct veno *veno = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, veno->loss_cwnd);
-}
-
 static struct tcp_congestion_ops tcp_veno __read_mostly = {
 	.init		= tcp_veno_init,
 	.ssthresh	= tcp_veno_ssthresh,
-	.undo_cwnd	= tcp_veno_cwnd_undo,
+	.undo_cwnd	= tcp_reno_undo_cwnd,
 	.cong_avoid	= tcp_veno_cong_avoid,
 	.pkts_acked	= tcp_veno_pkts_acked,
 	.set_state	= tcp_veno_state,
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index e6ff99c..96e829b2 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -37,7 +37,6 @@ struct yeah {
 	u32 fast_count;
 
 	u32 pkts_acked;
-	u32 loss_cwnd;
 };
 
 static void tcp_yeah_init(struct sock *sk)
@@ -220,22 +219,14 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
 
 	yeah->fast_count = 0;
 	yeah->reno_count = max(yeah->reno_count>>1, 2U);
-	yeah->loss_cwnd = tp->snd_cwnd;
 
 	return max_t(int, tp->snd_cwnd - reduction, 2);
 }
 
-static u32 tcp_yeah_cwnd_undo(struct sock *sk)
-{
-	const struct yeah *yeah = inet_csk_ca(sk);
-
-	return max(tcp_sk(sk)->snd_cwnd, yeah->loss_cwnd);
-}
-
 static struct tcp_congestion_ops tcp_yeah __read_mostly = {
 	.init		= tcp_yeah_init,
 	.ssthresh	= tcp_yeah_ssthresh,
-	.undo_cwnd      = tcp_yeah_cwnd_undo,
+	.undo_cwnd      = tcp_reno_undo_cwnd,
 	.cong_avoid	= tcp_yeah_cong_avoid,
 	.set_state	= tcp_vegas_state,
 	.cwnd_event	= tcp_vegas_cwnd_event,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 38e795e..db1c9e7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -380,8 +380,8 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
 
 static int compute_score(struct sock *sk, struct net *net,
 			 __be32 saddr, __be16 sport,
-			 __be32 daddr, unsigned short hnum, int dif,
-			 bool exact_dif)
+			 __be32 daddr, unsigned short hnum,
+			 int dif, int sdif, bool exact_dif)
 {
 	int score;
 	struct inet_sock *inet;
@@ -413,10 +413,15 @@ static int compute_score(struct sock *sk, struct net *net,
 	}
 
 	if (sk->sk_bound_dev_if || exact_dif) {
-		if (sk->sk_bound_dev_if != dif)
+		bool dev_match = (sk->sk_bound_dev_if == dif ||
+				  sk->sk_bound_dev_if == sdif);
+
+		if (exact_dif && !dev_match)
 			return -1;
-		score += 4;
+		if (sk->sk_bound_dev_if && dev_match)
+			score += 4;
 	}
+
 	if (sk->sk_incoming_cpu == raw_smp_processor_id())
 		score++;
 	return score;
@@ -436,10 +441,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
 
 /* called with rcu_read_lock() */
 static struct sock *udp4_lib_lookup2(struct net *net,
-		__be32 saddr, __be16 sport,
-		__be32 daddr, unsigned int hnum, int dif, bool exact_dif,
-		struct udp_hslot *hslot2,
-		struct sk_buff *skb)
+				     __be32 saddr, __be16 sport,
+				     __be32 daddr, unsigned int hnum,
+				     int dif, int sdif, bool exact_dif,
+				     struct udp_hslot *hslot2,
+				     struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	int score, badness, matches = 0, reuseport = 0;
@@ -449,7 +455,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 	badness = 0;
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
 		score = compute_score(sk, net, saddr, sport,
-				      daddr, hnum, dif, exact_dif);
+				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -477,8 +483,8 @@ static struct sock *udp4_lib_lookup2(struct net *net,
  * harder than this. -DaveM
  */
 struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
-		__be16 sport, __be32 daddr, __be16 dport,
-		int dif, struct udp_table *udptable, struct sk_buff *skb)
+		__be16 sport, __be32 daddr, __be16 dport, int dif,
+		int sdif, struct udp_table *udptable, struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	unsigned short hnum = ntohs(dport);
@@ -496,7 +502,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 			goto begin;
 
 		result = udp4_lib_lookup2(net, saddr, sport,
-					  daddr, hnum, dif,
+					  daddr, hnum, dif, sdif,
 					  exact_dif, hslot2, skb);
 		if (!result) {
 			unsigned int old_slot2 = slot2;
@@ -511,7 +517,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 				goto begin;
 
 			result = udp4_lib_lookup2(net, saddr, sport,
-						  daddr, hnum, dif,
+						  daddr, hnum, dif, sdif,
 						  exact_dif, hslot2, skb);
 		}
 		return result;
@@ -521,7 +527,7 @@ begin:
 	badness = 0;
 	sk_for_each_rcu(sk, &hslot->head) {
 		score = compute_score(sk, net, saddr, sport,
-				      daddr, hnum, dif, exact_dif);
+				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -554,7 +560,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
 
 	return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
 				 iph->daddr, dport, inet_iif(skb),
-				 udptable, skb);
+				 inet_sdif(skb), udptable, skb);
 }
 
 struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
@@ -576,7 +582,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
 	struct sock *sk;
 
 	sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
-			       dif, &udp_table, NULL);
+			       dif, 0, &udp_table, NULL);
 	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
 	return sk;
@@ -587,7 +593,7 @@ EXPORT_SYMBOL_GPL(udp4_lib_lookup);
 static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
 				       __be16 loc_port, __be32 loc_addr,
 				       __be16 rmt_port, __be32 rmt_addr,
-				       int dif, unsigned short hnum)
+				       int dif, int sdif, unsigned short hnum)
 {
 	struct inet_sock *inet = inet_sk(sk);
 
@@ -597,9 +603,10 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
 	    (inet->inet_dport != rmt_port && inet->inet_dport) ||
 	    (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) ||
 	    ipv6_only_sock(sk) ||
-	    (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+	    (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+	     sk->sk_bound_dev_if != sdif))
 		return false;
-	if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif))
+	if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif))
 		return false;
 	return true;
 }
@@ -628,8 +635,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
 	struct net *net = dev_net(skb->dev);
 
 	sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
-			iph->saddr, uh->source, skb->dev->ifindex, udptable,
-			NULL);
+			       iph->saddr, uh->source, skb->dev->ifindex, 0,
+			       udptable, NULL);
 	if (!sk) {
 		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
 		return;	/* No socket for error */
@@ -802,7 +809,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
 	if (is_udplite)  				 /*     UDP-Lite      */
 		csum = udplite_csum(skb);
 
-	else if (sk->sk_no_check_tx && !skb_is_gso(skb)) {   /* UDP csum off */
+	else if (sk->sk_no_check_tx) {			 /* UDP csum off */
 
 		skb->ip_summed = CHECKSUM_NONE;
 		goto send;
@@ -1176,7 +1183,11 @@ static void udp_set_dev_scratch(struct sk_buff *skb)
 	scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
 	scratch->is_linear = !skb_is_nonlinear(skb);
 #endif
-	if (likely(!skb->_skb_refdst && !skb_sec_path(skb)))
+	/* all head states execept sp (dst, sk, nf) are always cleared by
+	 * udp_rcv() and we need to preserve secpath, if present, to eventually
+	 * process IP_CMSG_PASSSEC at recvmsg() time
+	 */
+	if (likely(!skb_sec_path(skb)))
 		scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
 }
 
@@ -1783,13 +1794,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		sk_mark_napi_id_once(sk, skb);
 	}
 
-	/* At recvmsg() time we may access skb->dst or skb->sp depending on
-	 * the IP options and the cmsg flags, elsewhere can we clear all
-	 * pending head states while they are hot in the cache
-	 */
-	if (likely(IPCB(skb)->opt.optlen == 0 && !skb_sec_path(skb)))
-		skb_release_head_state(skb);
-
 	rc = __udp_enqueue_schedule_skb(sk, skb);
 	if (rc < 0) {
 		int is_udplite = IS_UDPLITE(sk);
@@ -1958,6 +1962,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
 	unsigned int offset = offsetof(typeof(*sk), sk_node);
 	int dif = skb->dev->ifindex;
+	int sdif = inet_sdif(skb);
 	struct hlist_node *node;
 	struct sk_buff *nskb;
 
@@ -1972,7 +1977,7 @@ start_lookup:
 
 	sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
 		if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr,
-					 uh->source, saddr, dif, hnum))
+					 uh->source, saddr, dif, sdif, hnum))
 			continue;
 
 		if (!first) {
@@ -2162,7 +2167,7 @@ drop:
 static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
 						  __be16 loc_port, __be32 loc_addr,
 						  __be16 rmt_port, __be32 rmt_addr,
-						  int dif)
+						  int dif, int sdif)
 {
 	struct sock *sk, *result;
 	unsigned short hnum = ntohs(loc_port);
@@ -2176,7 +2181,7 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
 	result = NULL;
 	sk_for_each_rcu(sk, &hslot->head) {
 		if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr,
-					rmt_port, rmt_addr, dif, hnum)) {
+					rmt_port, rmt_addr, dif, sdif, hnum)) {
 			if (result)
 				return NULL;
 			result = sk;
@@ -2193,7 +2198,7 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
 static struct sock *__udp4_lib_demux_lookup(struct net *net,
 					    __be16 loc_port, __be32 loc_addr,
 					    __be16 rmt_port, __be32 rmt_addr,
-					    int dif)
+					    int dif, int sdif)
 {
 	unsigned short hnum = ntohs(loc_port);
 	unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
@@ -2205,7 +2210,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
 
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
 		if (INET_MATCH(sk, net, acookie, rmt_addr,
-			       loc_addr, ports, dif))
+			       loc_addr, ports, dif, sdif))
 			return sk;
 		/* Only check first socket in chain */
 		break;
@@ -2221,6 +2226,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
 	struct sock *sk = NULL;
 	struct dst_entry *dst;
 	int dif = skb->dev->ifindex;
+	int sdif = inet_sdif(skb);
 	int ours;
 
 	/* validate the packet */
@@ -2246,10 +2252,11 @@ void udp_v4_early_demux(struct sk_buff *skb)
 		}
 
 		sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
-						   uh->source, iph->saddr, dif);
+						   uh->source, iph->saddr,
+						   dif, sdif);
 	} else if (skb->pkt_type == PACKET_HOST) {
 		sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
-					     uh->source, iph->saddr, dif);
+					     uh->source, iph->saddr, dif, sdif);
 	}
 
 	if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 4515836..d0390d8 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -45,7 +45,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
 		sk = __udp4_lib_lookup(net,
 				req->id.idiag_src[0], req->id.idiag_sport,
 				req->id.idiag_dst[0], req->id.idiag_dport,
-				req->id.idiag_if, tbl, NULL);
+				req->id.idiag_if, 0, tbl, NULL);
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (req->sdiag_family == AF_INET6)
 		sk = __udp6_lib_lookup(net,
@@ -53,7 +53,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
 				req->id.idiag_sport,
 				(struct in6_addr *)req->id.idiag_dst,
 				req->id.idiag_dport,
-				req->id.idiag_if, tbl, NULL);
+				req->id.idiag_if, 0, tbl, NULL);
 #endif
 	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
@@ -182,7 +182,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
 		sk = __udp4_lib_lookup(net,
 				req->id.idiag_dst[0], req->id.idiag_dport,
 				req->id.idiag_src[0], req->id.idiag_sport,
-				req->id.idiag_if, tbl, NULL);
+				req->id.idiag_if, 0, tbl, NULL);
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (req->sdiag_family == AF_INET6) {
 		if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
@@ -190,7 +190,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
 			sk = __udp4_lib_lookup(net,
 					req->id.idiag_dst[3], req->id.idiag_dport,
 					req->id.idiag_src[3], req->id.idiag_sport,
-					req->id.idiag_if, tbl, NULL);
+					req->id.idiag_if, 0, tbl, NULL);
 
 		else
 			sk = __udp6_lib_lookup(net,
@@ -198,7 +198,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
 					req->id.idiag_dport,
 					(struct in6_addr *)req->id.idiag_src,
 					req->id.idiag_sport,
-					req->id.idiag_if, tbl, NULL);
+					req->id.idiag_if, 0, tbl, NULL);
 	}
 #endif
 	else {
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 0932c85..97658bf 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -21,7 +21,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	__be16 new_protocol, bool is_ipv6)
 {
 	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
-	bool remcsum, need_csum, offload_csum, ufo, gso_partial;
+	bool remcsum, need_csum, offload_csum, gso_partial;
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct udphdr *uh = udp_hdr(skb);
 	u16 mac_offset = skb->mac_header;
@@ -61,8 +61,6 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
 	skb->remcsum_offload = remcsum;
 
-	ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
-
 	need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb));
 	/* Try to offload checksum if possible */
 	offload_csum = !!(need_csum &&
@@ -77,7 +75,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	 * outer one so strip the existing checksum feature flags and
 	 * instead set the flag based on our outer checksum offload value.
 	 */
-	if (remcsum || ufo) {
+	if (remcsum) {
 		features &= ~NETIF_F_CSUM_MASK;
 		if (!need_csum || offload_csum)
 			features |= NETIF_F_HW_CSUM;
@@ -189,66 +187,16 @@ out_unlock:
 }
 EXPORT_SYMBOL(skb_udp_tunnel_segment);
 
-static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
-					 netdev_features_t features)
+static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb,
+					   netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
-	unsigned int mss;
-	__wsum csum;
-	struct udphdr *uh;
-	struct iphdr *iph;
 
 	if (skb->encapsulation &&
 	    (skb_shinfo(skb)->gso_type &
-	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
+	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)))
 		segs = skb_udp_tunnel_segment(skb, features, false);
-		goto out;
-	}
 
-	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
-		goto out;
-
-	mss = skb_shinfo(skb)->gso_size;
-	if (unlikely(skb->len <= mss))
-		goto out;
-
-	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
-		/* Packet is from an untrusted source, reset gso_segs. */
-
-		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
-
-		segs = NULL;
-		goto out;
-	}
-
-	/* Do software UFO. Complete and fill in the UDP checksum as
-	 * HW cannot do checksum of UDP packets sent as multiple
-	 * IP fragments.
-	 */
-
-	uh = udp_hdr(skb);
-	iph = ip_hdr(skb);
-
-	uh->check = 0;
-	csum = skb_checksum(skb, 0, skb->len, 0);
-	uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
-	if (uh->check == 0)
-		uh->check = CSUM_MANGLED_0;
-
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-	/* If there is no outer header we can fake a checksum offload
-	 * due to the fact that we have already done the checksum in
-	 * software prior to segmenting the frame.
-	 */
-	if (!skb->encap_hdr_csum)
-		features |= NETIF_F_HW_CSUM;
-
-	/* Fragment the skb. IP headers of the fragments are updated in
-	 * inet_gso_segment()
-	 */
-	segs = skb_segment(skb, features);
-out:
 	return segs;
 }
 
@@ -382,7 +330,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 
 static const struct net_offload udpv4_offload = {
 	.callbacks = {
-		.gso_segment = udp4_ufo_fragment,
+		.gso_segment = udp4_tunnel_segment,
 		.gro_receive  =	udp4_gro_receive,
 		.gro_complete =	udp4_gro_complete,
 	},
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 58bd39f..6539ff1 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -82,7 +82,8 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct udp_tunnel_info ti;
 
-	if (!dev->netdev_ops->ndo_udp_tunnel_add)
+	if (!dev->netdev_ops->ndo_udp_tunnel_add ||
+	    !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
 		return;
 
 	ti.type = type;
@@ -93,6 +94,24 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
 }
 EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port);
 
+void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock,
+			     unsigned short type)
+{
+	struct sock *sk = sock->sk;
+	struct udp_tunnel_info ti;
+
+	if (!dev->netdev_ops->ndo_udp_tunnel_del ||
+	    !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+		return;
+
+	ti.type = type;
+	ti.sa_family = sk->sk_family;
+	ti.port = inet_sk(sk)->inet_sport;
+
+	dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_drop_rx_port);
+
 /* Notify netdevs that UDP port started listening */
 void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
 {
@@ -109,6 +128,8 @@ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
 	for_each_netdev_rcu(net, dev) {
 		if (!dev->netdev_ops->ndo_udp_tunnel_add)
 			continue;
+		if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+			continue;
 		dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
 	}
 	rcu_read_unlock();
@@ -131,6 +152,8 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type)
 	for_each_netdev_rcu(net, dev) {
 		if (!dev->netdev_ops->ndo_udp_tunnel_del)
 			continue;
+		if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
+			continue;
 		dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
 	}
 	rcu_read_unlock();
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 71b4ecc1..d7bf0b0 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -20,7 +20,8 @@
 static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
 					    int tos, int oif,
 					    const xfrm_address_t *saddr,
-					    const xfrm_address_t *daddr)
+					    const xfrm_address_t *daddr,
+					    u32 mark)
 {
 	struct rtable *rt;
 
@@ -28,6 +29,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
 	fl4->daddr = daddr->a4;
 	fl4->flowi4_tos = tos;
 	fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif);
+	fl4->flowi4_mark = mark;
 	if (saddr)
 		fl4->saddr = saddr->a4;
 
@@ -42,20 +44,22 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
 
 static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif,
 					  const xfrm_address_t *saddr,
-					  const xfrm_address_t *daddr)
+					  const xfrm_address_t *daddr,
+					  u32 mark)
 {
 	struct flowi4 fl4;
 
-	return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr);
+	return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark);
 }
 
 static int xfrm4_get_saddr(struct net *net, int oif,
-			   xfrm_address_t *saddr, xfrm_address_t *daddr)
+			   xfrm_address_t *saddr, xfrm_address_t *daddr,
+			   u32 mark)
 {
 	struct dst_entry *dst;
 	struct flowi4 fl4;
 
-	dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr);
+	dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark);
 	if (IS_ERR(dst))
 		return -EHOSTUNREACH;
 
@@ -213,14 +217,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 	fl4->flowi4_tos = iph->tos;
 }
 
-static inline int xfrm4_garbage_collect(struct dst_ops *ops)
-{
-	struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
-
-	xfrm_garbage_collect_deferred(net);
-	return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
-}
-
 static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
 			      struct sk_buff *skb, u32 mtu)
 {
@@ -259,14 +255,13 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static struct dst_ops xfrm4_dst_ops_template = {
 	.family =		AF_INET,
-	.gc =			xfrm4_garbage_collect,
 	.update_pmtu =		xfrm4_update_pmtu,
 	.redirect =		xfrm4_redirect,
 	.cow_metrics =		dst_cow_metrics_generic,
 	.destroy =		xfrm4_dst_destroy,
 	.ifdown =		xfrm4_dst_ifdown,
 	.local_out =		__ip_local_out,
-	.gc_thresh =		INT_MAX,
+	.gc_thresh =		32768,
 };
 
 static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 48c4529..ea71e4b 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -308,22 +308,12 @@ config IPV6_SEG6_LWTUNNEL
 	depends on IPV6
 	select LWTUNNEL
 	select DST_CACHE
+	select IPV6_MULTIPLE_TABLES
 	---help---
 	  Support for encapsulation of packets within an outer IPv6
 	  header and a Segment Routing Header using the lightweight
-	  tunnels mechanism.
-
-	  If unsure, say N.
-
-config IPV6_SEG6_INLINE
-	bool "IPv6: direct Segment Routing Header insertion "
-	depends on IPV6_SEG6_LWTUNNEL
-	---help---
-	  Support for direct insertion of the Segment Routing Header,
-	  also known as inline mode. Be aware that direct insertion of
-	  extension headers (as opposed to encapsulation) may break
-	  multiple mechanisms such as PMTUD or IPSec AH. Use this feature
-	  only if you know exactly what you are doing.
+	  tunnels mechanism. Also enable support for advanced local
+	  processing of SRv6 packets based on their active segment.
 
 	  If unsure, say N.
 
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 217e9ff..10e3423 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -9,7 +9,7 @@ ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
 		raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
 		exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
-		udp_offload.o seg6.o
+		udp_offload.o seg6.o fib6_notifier.o
 
 ipv6-offload :=	ip6_offload.o tcpv6_offload.o exthdrs_offload.o
 
@@ -23,7 +23,7 @@ ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
 ipv6-$(CONFIG_PROC_FS) += proc.o
 ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o
 ipv6-$(CONFIG_NETLABEL) += calipso.o
-ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o
+ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o
 ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
 
 ipv6-objs += $(ipv6-y)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 936e9ab..c2e2a78 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3030,9 +3030,6 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
 static void init_loopback(struct net_device *dev)
 {
 	struct inet6_dev  *idev;
-	struct net_device *sp_dev;
-	struct inet6_ifaddr *sp_ifa;
-	struct rt6_info *sp_rt;
 
 	/* ::1 */
 
@@ -3045,45 +3042,6 @@ static void init_loopback(struct net_device *dev)
 	}
 
 	add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
-
-	/* Add routes to other interface's IPv6 addresses */
-	for_each_netdev(dev_net(dev), sp_dev) {
-		if (!strcmp(sp_dev->name, dev->name))
-			continue;
-
-		idev = __in6_dev_get(sp_dev);
-		if (!idev)
-			continue;
-
-		read_lock_bh(&idev->lock);
-		list_for_each_entry(sp_ifa, &idev->addr_list, if_list) {
-
-			if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))
-				continue;
-
-			if (sp_ifa->rt) {
-				/* This dst has been added to garbage list when
-				 * lo device down, release this obsolete dst and
-				 * reallocate a new router for ifa.
-				 */
-				if (!atomic_read(&sp_ifa->rt->rt6i_ref)) {
-					ip6_rt_put(sp_ifa->rt);
-					sp_ifa->rt = NULL;
-				} else {
-					continue;
-				}
-			}
-
-			sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false);
-
-			/* Failure cases are ignored */
-			if (!IS_ERR(sp_rt)) {
-				sp_ifa->rt = sp_rt;
-				ip6_ins_rt(sp_rt);
-			}
-		}
-		read_unlock_bh(&idev->lock);
-	}
 }
 
 void addrconf_add_linklocal(struct inet6_dev *idev,
@@ -3321,11 +3279,11 @@ static void addrconf_gre_config(struct net_device *dev)
 static int fixup_permanent_addr(struct inet6_dev *idev,
 				struct inet6_ifaddr *ifp)
 {
-	/* rt6i_ref == 0 means the host route was removed from the
+	/* !rt6i_node means the host route was removed from the
 	 * FIB, for example, if 'lo' device is taken down. In that
 	 * case regenerate the host route.
 	 */
-	if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
+	if (!ifp->rt || !ifp->rt->rt6i_node) {
 		struct rt6_info *rt, *prev;
 
 		rt = addrconf_dst_alloc(idev, &ifp->addr, false);
@@ -6605,21 +6563,21 @@ int __init addrconf_init(void)
 	rtnl_af_register(&inet6_ops);
 
 	err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,
-			      NULL);
+			      0);
 	if (err < 0)
 		goto errout;
 
 	/* Only the first call to __rtnl_register can fail */
-	__rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL);
-	__rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL);
+	__rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0);
+	__rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0);
 	__rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
-			inet6_dump_ifaddr, NULL);
+			inet6_dump_ifaddr, 0);
 	__rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
-			inet6_dump_ifmcaddr, NULL);
+			inet6_dump_ifmcaddr, 0);
 	__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
-			inet6_dump_ifacaddr, NULL);
+			inet6_dump_ifacaddr, 0);
 	__rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
-			inet6_netconf_dump_devconf, NULL);
+			inet6_netconf_dump_devconf, 0);
 
 	ipv6_addr_label_rtnl_register();
 
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 7a428f6..b055bc7 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -405,6 +405,18 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
 	[IFAL_LABEL]		= { .len = sizeof(u32), },
 };
 
+static bool addrlbl_ifindex_exists(struct net *net, int ifindex)
+{
+
+	struct net_device *dev;
+
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, ifindex);
+	rcu_read_unlock();
+
+	return dev != NULL;
+}
+
 static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
 			     struct netlink_ext_ack *extack)
 {
@@ -439,7 +451,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
 	switch (nlh->nlmsg_type) {
 	case RTM_NEWADDRLABEL:
 		if (ifal->ifal_index &&
-		    !__dev_get_by_index(net, ifal->ifal_index))
+		    !addrlbl_ifindex_exists(net, ifal->ifal_index))
 			return -EINVAL;
 
 		err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen,
@@ -548,7 +560,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 
 	if (ifal->ifal_index &&
-	    !__dev_get_by_index(net, ifal->ifal_index))
+	    !addrlbl_ifindex_exists(net, ifal->ifal_index))
 		return -EINVAL;
 
 	if (!tb[IFAL_ADDRESS])
@@ -593,10 +605,10 @@ out:
 void __init ipv6_addr_label_rtnl_register(void)
 {
 	__rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel,
-			NULL, NULL);
+			NULL, RTNL_FLAG_DOIT_UNLOCKED);
 	__rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
-			NULL, NULL);
+			NULL, RTNL_FLAG_DOIT_UNLOCKED);
 	__rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
-			ip6addrlbl_dump, NULL);
+			ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
 }
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a88b5b5..fe5262f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -210,7 +210,8 @@ lookup_protocol:
 	np->mcast_hops	= IPV6_DEFAULT_MCASTHOPS;
 	np->mc_loop	= 1;
 	np->pmtudisc	= IPV6_PMTUDISC_WANT;
-	np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk));
+	np->autoflowlabel = ip6_default_np_autolabel(net);
+	np->repflow	= net->ipv6.sysctl.flowlabel_reflect;
 	sk->sk_ipv6only	= net->ipv6.sysctl.bindv6only;
 
 	/* Init the ipv4 part of the socket since we can have sockets
@@ -554,6 +555,8 @@ const struct proto_ops inet6_stream_ops = {
 	.recvmsg	   = inet_recvmsg,		/* ok		*/
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
+	.sendmsg_locked    = tcp_sendmsg_locked,
+	.sendpage_locked   = tcp_sendpage_locked,
 	.splice_read	   = tcp_splice_read,
 	.read_sock	   = tcp_read_sock,
 	.peek_len	   = tcp_peek_len,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ab64f36..89910e2 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -463,28 +463,30 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	return esp6_output_tail(x, skb, &esp);
 }
 
-int esp6_input_done2(struct sk_buff *skb, int err)
+static inline int esp_remove_trailer(struct sk_buff *skb)
 {
 	struct xfrm_state *x = xfrm_input_state(skb);
 	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct crypto_aead *aead = x->data;
-	int alen = crypto_aead_authsize(aead);
-	int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
-	int elen = skb->len - hlen;
-	int hdr_len = skb_network_header_len(skb);
-	int padlen;
+	int alen, hlen, elen;
+	int padlen, trimlen;
+	__wsum csumdiff;
 	u8 nexthdr[2];
+	int ret;
 
-	if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
-		kfree(ESP_SKB_CB(skb)->tmp);
+	alen = crypto_aead_authsize(aead);
+	hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+	elen = skb->len - hlen;
 
-	if (unlikely(err))
+	if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
+		ret = xo->proto;
 		goto out;
+	}
 
 	if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
 		BUG();
 
-	err = -EINVAL;
+	ret = -EINVAL;
 	padlen = nexthdr[0];
 	if (padlen + 2 + alen >= elen) {
 		net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n",
@@ -492,17 +494,46 @@ int esp6_input_done2(struct sk_buff *skb, int err)
 		goto out;
 	}
 
-	/* ... check padding bits here. Silly. :-) */
+	trimlen = alen + padlen + 2;
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0);
+		skb->csum = csum_block_sub(skb->csum, csumdiff,
+					   skb->len - trimlen);
+	}
+	pskb_trim(skb, skb->len - trimlen);
+
+	ret = nexthdr[1];
+
+out:
+	return ret;
+}
 
-	pskb_trim(skb, skb->len - alen - padlen - 2);
-	__skb_pull(skb, hlen);
+int esp6_input_done2(struct sk_buff *skb, int err)
+{
+	struct xfrm_state *x = xfrm_input_state(skb);
+	struct xfrm_offload *xo = xfrm_offload(skb);
+	struct crypto_aead *aead = x->data;
+	int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
+	int hdr_len = skb_network_header_len(skb);
+
+	if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
+		kfree(ESP_SKB_CB(skb)->tmp);
+
+	if (unlikely(err))
+		goto out;
+
+	err = esp_remove_trailer(skb);
+	if (unlikely(err < 0))
+		goto out;
+
+	skb_postpull_rcsum(skb, skb_network_header(skb),
+			   skb_network_header_len(skb));
+	skb_pull_rcsum(skb, hlen);
 	if (x->props.mode == XFRM_MODE_TUNNEL)
 		skb_reset_transport_header(skb);
 	else
 		skb_set_transport_header(skb, -hdr_len);
 
-	err = nexthdr[1];
-
 	/* RFC4303: Drop dummy packets without any error */
 	if (err == IPPROTO_NONE)
 		err = -EINVAL;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 1cf437f..333a478 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -209,11 +209,13 @@ out:
 static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct crypto_aead *aead = x->data;
+	struct xfrm_offload *xo = xfrm_offload(skb);
 
 	if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead)))
 		return -EINVAL;
 
-	skb->ip_summed = CHECKSUM_NONE;
+	if (!(xo->flags & CRYPTO_DONE))
+		skb->ip_summed = CHECKSUM_NONE;
 
 	return esp6_input_done2(skb, 0);
 }
@@ -332,3 +334,4 @@ module_init(esp6_offload_init);
 module_exit(esp6_offload_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
+MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET6, XFRM_PROTO_ESP);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 3cec529..9551613 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -882,7 +882,7 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
 	       (hops - 1) * sizeof(struct in6_addr));
 
 	sr_phdr->segments[0] = **addr_p;
-	*addr_p = &sr_ihdr->segments[hops - 1];
+	*addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
 
 #ifdef CONFIG_IPV6_SEG6_HMAC
 	if (sr_has_hmac(sr_phdr)) {
@@ -1174,7 +1174,7 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
 	{
 		struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt;
 
-		fl6->daddr = srh->segments[srh->first_segment];
+		fl6->daddr = srh->segments[srh->segments_left];
 		break;
 	}
 	default:
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
new file mode 100644
index 0000000..05f82ba
--- /dev/null
+++ b/net/ipv6/fib6_notifier.c
@@ -0,0 +1,63 @@
+#include <linux/notifier.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <net/net_namespace.h>
+#include <net/fib_notifier.h>
+#include <net/netns/ipv6.h>
+#include <net/ip6_fib.h>
+
+int call_fib6_notifier(struct notifier_block *nb, struct net *net,
+		       enum fib_event_type event_type,
+		       struct fib_notifier_info *info)
+{
+	info->family = AF_INET6;
+	return call_fib_notifier(nb, net, event_type, info);
+}
+
+int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
+			struct fib_notifier_info *info)
+{
+	info->family = AF_INET6;
+	return call_fib_notifiers(net, event_type, info);
+}
+
+static unsigned int fib6_seq_read(struct net *net)
+{
+	return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
+}
+
+static int fib6_dump(struct net *net, struct notifier_block *nb)
+{
+	int err;
+
+	err = fib6_rules_dump(net, nb);
+	if (err)
+		return err;
+
+	return fib6_tables_dump(net, nb);
+}
+
+static const struct fib_notifier_ops fib6_notifier_ops_template = {
+	.family		= AF_INET6,
+	.fib_seq_read	= fib6_seq_read,
+	.fib_dump	= fib6_dump,
+	.owner		= THIS_MODULE,
+};
+
+int __net_init fib6_notifier_init(struct net *net)
+{
+	struct fib_notifier_ops *ops;
+
+	ops = fib_notifier_ops_register(&fib6_notifier_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	net->ipv6.notifier_ops = ops;
+
+	return 0;
+}
+
+void __net_exit fib6_notifier_exit(struct net *net)
+{
+	fib_notifier_ops_unregister(net->ipv6.notifier_ops);
+}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ec849d8..b240f24 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/netdevice.h>
+#include <linux/notifier.h>
 #include <linux/export.h>
 
 #include <net/fib_rules.h>
@@ -29,22 +30,65 @@ struct fib6_rule {
 	u8			tclass;
 };
 
-struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
-				   int flags, pol_lookup_t lookup)
+static bool fib6_rule_matchall(const struct fib_rule *rule)
+{
+	struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
+
+	if (r->dst.plen || r->src.plen || r->tclass)
+		return false;
+	return fib_rule_matchall(rule);
+}
+
+bool fib6_rule_default(const struct fib_rule *rule)
 {
-	struct fib_lookup_arg arg = {
-		.lookup_ptr = lookup,
-		.flags = FIB_LOOKUP_NOREF,
-	};
+	if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
+	    rule->l3mdev)
+		return false;
+	if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN)
+		return false;
+	return true;
+}
+EXPORT_SYMBOL_GPL(fib6_rule_default);
 
-	/* update flow if oif or iif point to device enslaved to l3mdev */
-	l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+int fib6_rules_dump(struct net *net, struct notifier_block *nb)
+{
+	return fib_rules_dump(net, nb, AF_INET6);
+}
 
-	fib_rules_lookup(net->ipv6.fib6_rules_ops,
-			 flowi6_to_flowi(fl6), flags, &arg);
+unsigned int fib6_rules_seq_read(struct net *net)
+{
+	return fib_rules_seq_read(net, AF_INET6);
+}
 
-	if (arg.result)
-		return arg.result;
+struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+				   int flags, pol_lookup_t lookup)
+{
+	if (net->ipv6.fib6_has_custom_rules) {
+		struct fib_lookup_arg arg = {
+			.lookup_ptr = lookup,
+			.flags = FIB_LOOKUP_NOREF,
+		};
+
+		/* update flow if oif or iif point to device enslaved to l3mdev */
+		l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+
+		fib_rules_lookup(net->ipv6.fib6_rules_ops,
+				 flowi6_to_flowi(fl6), flags, &arg);
+
+		if (arg.result)
+			return arg.result;
+	} else {
+		struct rt6_info *rt;
+
+		rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+		if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
+			return &rt->dst;
+		ip6_rt_put(rt);
+		rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+		if (rt->dst.error != -EAGAIN)
+			return &rt->dst;
+		ip6_rt_put(rt);
+	}
 
 	dst_hold(&net->ipv6.ip6_null_entry->dst);
 	return &net->ipv6.ip6_null_entry->dst;
@@ -214,6 +258,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	rule6->dst.plen = frh->dst_len;
 	rule6->tclass = frh->tos;
 
+	net->ipv6.fib6_has_custom_rules = true;
 	err = 0;
 errout:
 	return err;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 8d7b113..5acb544 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -399,6 +399,24 @@ relookup_failed:
 	return ERR_PTR(err);
 }
 
+static int icmp6_iif(const struct sk_buff *skb)
+{
+	int iif = skb->dev->ifindex;
+
+	/* for local traffic to local address, skb dev is the loopback
+	 * device. Check if there is a dst attached to the skb and if so
+	 * get the real device index.
+	 */
+	if (unlikely(iif == LOOPBACK_IFINDEX)) {
+		const struct rt6_info *rt6 = skb_rt6_info(skb);
+
+		if (rt6)
+			iif = rt6->rt6i_idev->dev->ifindex;
+	}
+
+	return iif;
+}
+
 /*
  *	Send an ICMP message in response to a packet in error
  */
@@ -459,9 +477,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	 *	Source addr check
 	 */
 
-	if (__ipv6_addr_needs_scope_id(addr_type))
-		iif = skb->dev->ifindex;
-	else {
+	if (__ipv6_addr_needs_scope_id(addr_type)) {
+		iif = icmp6_iif(skb);
+	} else {
 		dst = skb_dst(skb);
 		iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
 	}
@@ -508,6 +526,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
+	fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	sk = icmpv6_xmit_lock(net);
@@ -682,7 +701,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	fl6.daddr = ipv6_hdr(skb)->saddr;
 	if (saddr)
 		fl6.saddr = *saddr;
-	fl6.flowi6_oif = skb->dev->ifindex;
+	fl6.flowi6_oif = icmp6_iif(skb);
 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 	fl6.flowi6_mark = mark;
 	fl6.flowi6_uid = sock_net_uid(net, NULL);
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 77f7f8c..5bd419c 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -208,7 +208,7 @@ ila_nf_input(void *priv,
 	return NF_ACCEPT;
 }
 
-static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = {
+static const struct nf_hook_ops ila_nf_hook_ops[] = {
 	{
 		.hook = ila_nf_input,
 		.pf = NFPROTO_IPV6,
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b13b8f9..b01858f 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -56,7 +56,7 @@ struct sock *__inet6_lookup_established(struct net *net,
 					   const __be16 sport,
 					   const struct in6_addr *daddr,
 					   const u16 hnum,
-					   const int dif)
+					   const int dif, const int sdif)
 {
 	struct sock *sk;
 	const struct hlist_nulls_node *node;
@@ -73,12 +73,12 @@ begin:
 	sk_nulls_for_each_rcu(sk, node, &head->chain) {
 		if (sk->sk_hash != hash)
 			continue;
-		if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
+		if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))
 			continue;
 		if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
 			goto out;
 
-		if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
+		if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) {
 			sock_gen_put(sk);
 			goto begin;
 		}
@@ -96,7 +96,7 @@ EXPORT_SYMBOL(__inet6_lookup_established);
 static inline int compute_score(struct sock *sk, struct net *net,
 				const unsigned short hnum,
 				const struct in6_addr *daddr,
-				const int dif, bool exact_dif)
+				const int dif, const int sdif, bool exact_dif)
 {
 	int score = -1;
 
@@ -110,9 +110,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
 			score++;
 		}
 		if (sk->sk_bound_dev_if || exact_dif) {
-			if (sk->sk_bound_dev_if != dif)
+			bool dev_match = (sk->sk_bound_dev_if == dif ||
+					  sk->sk_bound_dev_if == sdif);
+
+			if (exact_dif && !dev_match)
 				return -1;
-			score++;
+			if (sk->sk_bound_dev_if && dev_match)
+				score++;
 		}
 		if (sk->sk_incoming_cpu == raw_smp_processor_id())
 			score++;
@@ -126,7 +130,7 @@ struct sock *inet6_lookup_listener(struct net *net,
 		struct sk_buff *skb, int doff,
 		const struct in6_addr *saddr,
 		const __be16 sport, const struct in6_addr *daddr,
-		const unsigned short hnum, const int dif)
+		const unsigned short hnum, const int dif, const int sdif)
 {
 	unsigned int hash = inet_lhashfn(net, hnum);
 	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -136,7 +140,7 @@ struct sock *inet6_lookup_listener(struct net *net,
 	u32 phash = 0;
 
 	sk_for_each(sk, &ilb->head) {
-		score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
+		score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
 		if (score > hiscore) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -171,7 +175,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
 	bool refcounted;
 
 	sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
-			    ntohs(dport), dif, &refcounted);
+			    ntohs(dport), dif, 0, &refcounted);
 	if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
 	return sk;
@@ -187,8 +191,9 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 	const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
 	const struct in6_addr *saddr = &sk->sk_v6_daddr;
 	const int dif = sk->sk_bound_dev_if;
-	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
 	struct net *net = sock_net(sk);
+	const int sdif = l3mdev_master_ifindex_by_index(net, dif);
+	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
 	const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
 						inet->inet_dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
@@ -203,7 +208,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 		if (sk2->sk_hash != hash)
 			continue;
 
-		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) {
+		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports,
+				       dif, sdif))) {
 			if (sk2->sk_state == TCP_TIME_WAIT) {
 				tw = inet_twsk(sk2);
 				if (twsk_unique(sk, sk2, twp))
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index e1c85bb..a3b5c16 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -33,6 +33,7 @@
 #include <net/ndisc.h>
 #include <net/addrconf.h>
 #include <net/lwtunnel.h>
+#include <net/fib_notifier.h>
 
 #include <net/ip6_fib.h>
 #include <net/ip6_route.h>
@@ -165,7 +166,7 @@ static void node_free(struct fib6_node *fn)
 	call_rcu(&fn->rcu, node_free_rcu);
 }
 
-static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
 {
 	int cpu;
 
@@ -188,15 +189,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
 	free_percpu(non_pcpu_rt->rt6i_pcpu);
 	non_pcpu_rt->rt6i_pcpu = NULL;
 }
-
-static void rt6_release(struct rt6_info *rt)
-{
-	if (atomic_dec_and_test(&rt->rt6i_ref)) {
-		rt6_free_pcpu(rt);
-		dst_dev_put(&rt->dst);
-		dst_release(&rt->dst);
-	}
-}
+EXPORT_SYMBOL_GPL(rt6_free_pcpu);
 
 static void fib6_link_table(struct net *net, struct fib6_table *tb)
 {
@@ -314,6 +307,109 @@ static void __net_init fib6_tables_init(struct net *net)
 
 #endif
 
+unsigned int fib6_tables_seq_read(struct net *net)
+{
+	unsigned int h, fib_seq = 0;
+
+	rcu_read_lock();
+	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+		struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+		struct fib6_table *tb;
+
+		hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
+			read_lock_bh(&tb->tb6_lock);
+			fib_seq += tb->fib_seq;
+			read_unlock_bh(&tb->tb6_lock);
+		}
+	}
+	rcu_read_unlock();
+
+	return fib_seq;
+}
+
+static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
+				    enum fib_event_type event_type,
+				    struct rt6_info *rt)
+{
+	struct fib6_entry_notifier_info info = {
+		.rt = rt,
+	};
+
+	return call_fib6_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_fib6_entry_notifiers(struct net *net,
+				     enum fib_event_type event_type,
+				     struct rt6_info *rt)
+{
+	struct fib6_entry_notifier_info info = {
+		.rt = rt,
+	};
+
+	rt->rt6i_table->fib_seq++;
+	return call_fib6_notifiers(net, event_type, &info.info);
+}
+
+struct fib6_dump_arg {
+	struct net *net;
+	struct notifier_block *nb;
+};
+
+static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg)
+{
+	if (rt == arg->net->ipv6.ip6_null_entry)
+		return;
+	call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt);
+}
+
+static int fib6_node_dump(struct fib6_walker *w)
+{
+	struct rt6_info *rt;
+
+	for (rt = w->leaf; rt; rt = rt->dst.rt6_next)
+		fib6_rt_dump(rt, w->args);
+	w->leaf = NULL;
+	return 0;
+}
+
+static void fib6_table_dump(struct net *net, struct fib6_table *tb,
+			    struct fib6_walker *w)
+{
+	w->root = &tb->tb6_root;
+	read_lock_bh(&tb->tb6_lock);
+	fib6_walk(net, w);
+	read_unlock_bh(&tb->tb6_lock);
+}
+
+/* Called with rcu_read_lock() */
+int fib6_tables_dump(struct net *net, struct notifier_block *nb)
+{
+	struct fib6_dump_arg arg;
+	struct fib6_walker *w;
+	unsigned int h;
+
+	w = kzalloc(sizeof(*w), GFP_ATOMIC);
+	if (!w)
+		return -ENOMEM;
+
+	w->func = fib6_node_dump;
+	arg.net = net;
+	arg.nb = nb;
+	w->args = &arg;
+
+	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+		struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+		struct fib6_table *tb;
+
+		hlist_for_each_entry_rcu(tb, head, tb6_hlist)
+			fib6_table_dump(net, tb, w);
+	}
+
+	kfree(w);
+
+	return 0;
+}
+
 static int fib6_dump_node(struct fib6_walker *w)
 {
 	int res;
@@ -745,8 +841,6 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
 			}
 			fn = fn->parent;
 		}
-		/* No more references are possible at this point. */
-		BUG_ON(atomic_read(&rt->rt6i_ref) != 1);
 	}
 }
 
@@ -891,6 +985,8 @@ add:
 		*ins = rt;
 		rcu_assign_pointer(rt->rt6i_node, fn);
 		atomic_inc(&rt->rt6i_ref);
+		call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
+					  rt);
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
 		info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -918,6 +1014,8 @@ add:
 		rcu_assign_pointer(rt->rt6i_node, fn);
 		rt->dst.rt6_next = iter->dst.rt6_next;
 		atomic_inc(&rt->rt6i_ref);
+		call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
+					  rt);
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
 		if (!(fn->fn_flags & RTN_RTINFO)) {
@@ -925,6 +1023,7 @@ add:
 			fn->fn_flags |= RTN_RTINFO;
 		}
 		nsiblings = iter->rt6i_nsiblings;
+		iter->rt6i_node = NULL;
 		fib6_purge_rt(iter, fn, info->nl_net);
 		if (fn->rr_ptr == iter)
 			fn->rr_ptr = NULL;
@@ -939,6 +1038,7 @@ add:
 					break;
 				if (rt6_qualify_for_ecmp(iter)) {
 					*ins = iter->dst.rt6_next;
+					iter->rt6i_node = NULL;
 					fib6_purge_rt(iter, fn, info->nl_net);
 					if (fn->rr_ptr == iter)
 						fn->rr_ptr = NULL;
@@ -1473,6 +1573,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 
 	fib6_purge_rt(rt, fn, net);
 
+	call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt);
 	if (!info->skip_notify)
 		inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
 	rt6_release(rt);
@@ -1858,6 +1959,11 @@ static void fib6_gc_timer_cb(unsigned long arg)
 static int __net_init fib6_net_init(struct net *net)
 {
 	size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
+	int err;
+
+	err = fib6_notifier_init(net);
+	if (err)
+		return err;
 
 	spin_lock_init(&net->ipv6.fib6_gc_lock);
 	rwlock_init(&net->ipv6.fib6_walker_lock);
@@ -1910,6 +2016,7 @@ out_fib_table_hash:
 out_rt6_stats:
 	kfree(net->ipv6.rt6_stats);
 out_timer:
+	fib6_notifier_exit(net);
 	return -ENOMEM;
 }
 
@@ -1926,6 +2033,7 @@ static void fib6_net_exit(struct net *net)
 	kfree(net->ipv6.fib6_main_tbl);
 	kfree(net->ipv6.fib_table_hash);
 	kfree(net->ipv6.rt6_stats);
+	fib6_notifier_exit(net);
 }
 
 static struct pernet_operations fib6_net_ops = {
@@ -1949,7 +2057,7 @@ int __init fib6_init(void)
 		goto out_kmem_cache_create;
 
 	ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
-			      NULL);
+			      0);
 	if (ret)
 		goto out_unregister_subsys;
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2dfe50d..43ca864 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1110,69 +1110,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 }
 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
 
-static inline int ip6_ufo_append_data(struct sock *sk,
-			struct sk_buff_head *queue,
-			int getfrag(void *from, char *to, int offset, int len,
-			int odd, struct sk_buff *skb),
-			void *from, int length, int hh_len, int fragheaderlen,
-			int exthdrlen, int transhdrlen, int mtu,
-			unsigned int flags, const struct flowi6 *fl6)
-
-{
-	struct sk_buff *skb;
-	int err;
-
-	/* There is support for UDP large send offload by network
-	 * device, so create one single skb packet containing complete
-	 * udp datagram
-	 */
-	skb = skb_peek_tail(queue);
-	if (!skb) {
-		skb = sock_alloc_send_skb(sk,
-			hh_len + fragheaderlen + transhdrlen + 20,
-			(flags & MSG_DONTWAIT), &err);
-		if (!skb)
-			return err;
-
-		/* reserve space for Hardware header */
-		skb_reserve(skb, hh_len);
-
-		/* create space for UDP/IP header */
-		skb_put(skb, fragheaderlen + transhdrlen);
-
-		/* initialize network header pointer */
-		skb_set_network_header(skb, exthdrlen);
-
-		/* initialize protocol header pointer */
-		skb->transport_header = skb->network_header + fragheaderlen;
-
-		skb->protocol = htons(ETH_P_IPV6);
-		skb->csum = 0;
-
-		if (flags & MSG_CONFIRM)
-			skb_set_dst_pending_confirm(skb, 1);
-
-		__skb_queue_tail(queue, skb);
-	} else if (skb_is_gso(skb)) {
-		goto append;
-	}
-
-	skb->ip_summed = CHECKSUM_PARTIAL;
-	/* Specify the length of each IPv6 datagram fragment.
-	 * It has to be a multiple of 8.
-	 */
-	skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
-				     sizeof(struct frag_hdr)) & ~7;
-	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-	skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
-							 &fl6->daddr,
-							 &fl6->saddr);
-
-append:
-	return skb_append_datato_frags(sk, skb, getfrag, from,
-				       (length - transhdrlen));
-}
-
 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
 					       gfp_t gfp)
 {
@@ -1381,20 +1318,6 @@ emsgsize:
 	 */
 
 	cork->length += length;
-	if ((skb && skb_is_gso(skb)) ||
-	    (((length + (skb ? skb->len : headersize)) > mtu) &&
-	    (skb_queue_len(queue) <= 1) &&
-	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
-	    (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) {
-		err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
-					  hh_len, fragheaderlen, exthdrlen,
-					  transhdrlen, mtu, flags, fl6);
-		if (err)
-			goto error;
-		return 0;
-	}
-
 	if (!skb)
 		goto alloc_new_skb;
 
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 486c230..79444a4 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1145,33 +1145,6 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
 	.priority	=	100,
 };
 
-static bool is_vti6_tunnel(const struct net_device *dev)
-{
-	return dev->netdev_ops == &vti6_netdev_ops;
-}
-
-static int vti6_device_event(struct notifier_block *unused,
-			     unsigned long event, void *ptr)
-{
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct ip6_tnl *t = netdev_priv(dev);
-
-	if (!is_vti6_tunnel(dev))
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case NETDEV_DOWN:
-		if (!net_eq(t->net, dev_net(dev)))
-			xfrm_garbage_collect(t->net);
-		break;
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block vti6_notifier_block __read_mostly = {
-	.notifier_call = vti6_device_event,
-};
-
 /**
  * vti6_tunnel_init - register protocol and reserve needed resources
  *
@@ -1182,8 +1155,6 @@ static int __init vti6_tunnel_init(void)
 	const char *msg;
 	int err;
 
-	register_netdevice_notifier(&vti6_notifier_block);
-
 	msg = "tunnel device";
 	err = register_pernet_device(&vti6_net_ops);
 	if (err < 0)
@@ -1216,7 +1187,6 @@ xfrm_proto_ah_failed:
 xfrm_proto_esp_failed:
 	unregister_pernet_device(&vti6_net_ops);
 pernet_dev_failed:
-	unregister_netdevice_notifier(&vti6_notifier_block);
 	pr_err("vti6 init: failed to register %s\n", msg);
 	return err;
 }
@@ -1231,7 +1201,6 @@ static void __exit vti6_tunnel_cleanup(void)
 	xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
 	xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
 	unregister_pernet_device(&vti6_net_ops);
-	unregister_netdevice_notifier(&vti6_notifier_block);
 }
 
 module_init(vti6_tunnel_init);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7454850..f5500f5 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1427,7 +1427,7 @@ int __init ip6_mr_init(void)
 	}
 #endif
 	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
-		      ip6mr_rtm_dumproute, NULL);
+		      ip6mr_rtm_dumproute, 0);
 	return 0;
 #ifdef CONFIG_IPV6_PIMSM_V2
 add_proto_fail:
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0327c1f..266a530 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -127,7 +127,7 @@ struct neigh_table nd_tbl = {
 			[NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME,
 			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
 			[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
-			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024,
+			[NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
 			[NEIGH_VAR_PROXY_QLEN] = 64,
 			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ,
 			[NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10,
@@ -1779,6 +1779,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
 
 static struct notifier_block ndisc_netdev_notifier = {
 	.notifier_call = ndisc_netdev_event,
+	.priority = ADDRCONF_NOTIFY_PRIORITY - 5,
 };
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1f90644..54b1e75 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -39,12 +39,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("IPv6 packet filter");
 
-#ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x)	WARN_ON(!(x))
-#else
-#define IP_NF_ASSERT(x)
-#endif
-
 void *ip6t_alloc_initial_table(const struct xt_table *info)
 {
 	return xt_alloc_initial_table(ip6t, IP6T);
@@ -176,7 +170,7 @@ static const char *const comments[] = {
 	[NF_IP6_TRACE_COMMENT_POLICY]	= "policy",
 };
 
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
 	.type = NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
@@ -284,7 +278,7 @@ ip6t_do_table(struct sk_buff *skb,
 	acpar.hotdrop = false;
 	acpar.state   = state;
 
-	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+	WARN_ON(!(table->valid_hooks & (1 << hook)));
 
 	local_bh_disable();
 	addend = xt_write_recseq_begin();
@@ -315,7 +309,7 @@ ip6t_do_table(struct sk_buff *skb,
 		const struct xt_entry_match *ematch;
 		struct xt_counters *counter;
 
-		IP_NF_ASSERT(e);
+		WARN_ON(!e);
 		acpar.thoff = 0;
 		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
 		    &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) {
@@ -335,7 +329,7 @@ ip6t_do_table(struct sk_buff *skb,
 		ADD_COUNTER(*counter, skb->len, 1);
 
 		t = ip6t_get_target_c(e);
-		IP_NF_ASSERT(t->u.kernel.target);
+		WARN_ON(!t->u.kernel.target);
 
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 		/* The packet is traced: log it */
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index ce203dd..a5cd43d 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -438,7 +438,7 @@ static unsigned int ipv6_synproxy_hook(void *priv,
 	return NF_ACCEPT;
 }
 
-static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv6_synproxy_ops[] = {
 	{
 		.hook		= ipv6_synproxy_hook,
 		.pf		= NFPROTO_IPV6,
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 7d2bd94..9915125 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -69,7 +69,7 @@ static unsigned int ip6table_nat_local_fn(void *priv,
 	return nf_nat_ipv6_local_fn(priv, skb, state, ip6table_nat_do_chain);
 }
 
-static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
+static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
 	/* Before packet filtering, change destination */
 	{
 		.hook		= ip6table_nat_in,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 4e34024..fe01dc9 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -67,13 +67,6 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-static void ipv6_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "src=%pI6 dst=%pI6 ",
-		   tuple->src.u3.ip6, tuple->dst.u3.ip6);
-}
-
 static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			    unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -191,7 +184,7 @@ static unsigned int ipv6_conntrack_local(void *priv,
 	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
 }
 
-static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
+static const struct nf_hook_ops ipv6_conntrack_ops[] = {
 	{
 		.hook		= ipv6_conntrack_in,
 		.pf		= NFPROTO_IPV6,
@@ -308,11 +301,6 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
 
 	return 0;
 }
-
-static int ipv6_nlattr_tuple_size(void)
-{
-	return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1);
-}
 #endif
 
 static int ipv6_hooks_register(struct net *net)
@@ -353,16 +341,15 @@ static void ipv6_hooks_unregister(struct net *net)
 
 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
 	.l3proto		= PF_INET6,
-	.name			= "ipv6",
 	.pkt_to_tuple		= ipv6_pkt_to_tuple,
 	.invert_tuple		= ipv6_invert_tuple,
-	.print_tuple		= ipv6_print_tuple,
 	.get_l4proto		= ipv6_get_l4proto,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= ipv6_tuple_to_nlattr,
-	.nlattr_tuple_size	= ipv6_nlattr_tuple_size,
 	.nlattr_to_tuple	= ipv6_nlattr_to_tuple,
 	.nla_policy		= ipv6_nla_policy,
+	.nla_size		= NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])) +
+				  NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])),
 #endif
 	.net_ns_get		= ipv6_hooks_register,
 	.net_ns_put		= ipv6_hooks_unregister,
@@ -398,25 +385,12 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
 
 static int ipv6_net_init(struct net *net)
 {
-	int ret = 0;
-
-	ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
-					    ARRAY_SIZE(builtin_l4proto6));
-	if (ret < 0)
-		return ret;
-
-	ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6);
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv6: pernet registration failed.\n");
-		nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
-						ARRAY_SIZE(builtin_l4proto6));
-	}
-	return ret;
+	return nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
+					     ARRAY_SIZE(builtin_l4proto6));
 }
 
 static void ipv6_net_exit(struct net *net)
 {
-	nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6);
 	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
 					ARRAY_SIZE(builtin_l4proto6));
 }
@@ -434,6 +408,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 
 	need_conntrack();
 
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	if (WARN_ON(nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1) !=
+	    nf_conntrack_l3proto_ipv6.nla_size))
+		return -EINVAL;
+#endif
+
 	ret = nf_register_sockopt(&so_getorigdst6);
 	if (ret < 0) {
 		pr_err("Unable to register netfilter socket option\n");
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index d5f028e..a9e1fd1 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -84,16 +84,6 @@ static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void icmpv6_print_tuple(struct seq_file *s,
-			      const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "type=%u code=%u id=%u ",
-		   tuple->dst.u.icmp.type,
-		   tuple->dst.u.icmp.code,
-		   ntohs(tuple->src.u.icmp.id));
-}
-
 static unsigned int *icmpv6_get_timeouts(struct net *net)
 {
 	return &icmpv6_pernet(net)->timeout;
@@ -105,7 +95,6 @@ static int icmpv6_packet(struct nf_conn *ct,
 		       unsigned int dataoff,
 		       enum ip_conntrack_info ctinfo,
 		       u_int8_t pf,
-		       unsigned int hooknum,
 		       unsigned int *timeout)
 {
 	/* Do not immediately delete the connection after the first
@@ -131,11 +120,6 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
 		pr_debug("icmpv6: can't create new conn with type %u\n",
 			 type + 128);
 		nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
-		if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
-			nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL,
-				      NULL, NULL,
-				      "nf_ct_icmpv6: invalid new with type %d ",
-				      type + 128);
 		return false;
 	}
 	return true;
@@ -144,8 +128,7 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
 static int
 icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
 		     struct sk_buff *skb,
-		     unsigned int icmp6off,
-		     unsigned int hooknum)
+		     unsigned int icmp6off)
 {
 	struct nf_conntrack_tuple intuple, origtuple;
 	const struct nf_conntrack_tuple_hash *h;
@@ -153,7 +136,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conntrack_zone tmp;
 
-	NF_CT_ASSERT(!skb_nfct(skb));
+	WARN_ON(skb_nfct(skb));
 
 	/* Are they talking about one of our connections? */
 	if (!nf_ct_get_tuplepr(skb,
@@ -229,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
 	if (icmp6h->icmp6_type >= 128)
 		return NF_ACCEPT;
 
-	return icmpv6_error_message(net, tmpl, skb, dataoff, hooknum);
+	return icmpv6_error_message(net, tmpl, skb, dataoff);
 }
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -367,10 +350,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
 {
 	.l3proto		= PF_INET6,
 	.l4proto		= IPPROTO_ICMPV6,
-	.name			= "icmpv6",
 	.pkt_to_tuple		= icmpv6_pkt_to_tuple,
 	.invert_tuple		= icmpv6_invert_tuple,
-	.print_tuple		= icmpv6_print_tuple,
 	.packet			= icmpv6_packet,
 	.get_timeouts		= icmpv6_get_timeouts,
 	.new			= icmpv6_new,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 986d4ca..b263bf3 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -622,18 +622,12 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
 
 static int nf_ct_net_init(struct net *net)
 {
-	int res;
-
 	net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
-	res = inet_frags_init_net(&net->nf_frag.frags);
-	if (res)
-		return res;
-	res = nf_ct_frag6_sysctl_register(net);
-	if (res)
-		inet_frags_uninit_net(&net->nf_frag.frags);
-	return res;
+	inet_frags_init_net(&net->nf_frag.frags);
+
+	return nf_ct_frag6_sysctl_register(net);
 }
 
 static void nf_ct_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index ada60d1..b326da5 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -74,7 +74,7 @@ static unsigned int ipv6_defrag(void *priv,
 	return err == 0 ? NF_ACCEPT : NF_DROP;
 }
 
-static struct nf_hook_ops ipv6_defrag_ops[] = {
+static const struct nf_hook_ops ipv6_defrag_ops[] = {
 	{
 		.hook		= ipv6_defrag,
 		.pf		= NFPROTO_IPV6,
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index 97c7242..b397a8f 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -25,7 +25,7 @@
 #include <linux/netfilter/xt_LOG.h>
 #include <net/netfilter/nf_log.h>
 
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index b2b4f03..46d6dba 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -196,7 +196,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
 	struct nf_conntrack_tuple target;
 	unsigned long statusbit;
 
-	NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+	WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
 
 	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
 		return 0;
@@ -319,8 +319,8 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
 
 	default:
 		/* ESTABLISHED */
-		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
-			     ctinfo == IP_CT_ESTABLISHED_REPLY);
+		WARN_ON(ctinfo != IP_CT_ESTABLISHED &&
+			ctinfo != IP_CT_ESTABLISHED_REPLY);
 		if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
 			goto oif_changed;
 	}
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index d7b6790..98f61fc 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -36,8 +36,8 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
 	struct nf_nat_range newrange;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-			    ctinfo == IP_CT_RELATED_REPLY));
+	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+			 ctinfo == IP_CT_RELATED_REPLY)));
 
 	if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
 			       &ipv6_hdr(skb)->daddr, 0, &src) < 0)
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 43f91d9..54b5899 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -25,9 +25,9 @@ static int get_ifindex(const struct net_device *dev)
 
 static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
 			       const struct nft_pktinfo *pkt,
-			       const struct net_device *dev)
+			       const struct net_device *dev,
+			       struct ipv6hdr *iph)
 {
-	const struct ipv6hdr *iph = ipv6_hdr(pkt->skb);
 	int lookup_flags = 0;
 
 	if (priv->flags & NFTA_FIB_F_DADDR) {
@@ -55,7 +55,8 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
 }
 
 static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
-				const struct nft_pktinfo *pkt)
+				const struct nft_pktinfo *pkt,
+				struct ipv6hdr *iph)
 {
 	const struct net_device *dev = NULL;
 	const struct nf_ipv6_ops *v6ops;
@@ -77,7 +78,7 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
 	else if (priv->flags & NFTA_FIB_F_OIF)
 		dev = nft_out(pkt);
 
-	nft_fib6_flowi_init(&fl6, priv, pkt, dev);
+	nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
 
 	v6ops = nf_get_ipv6_ops();
 	if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
@@ -131,9 +132,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
 			const struct nft_pktinfo *pkt)
 {
 	const struct nft_fib *priv = nft_expr_priv(expr);
+	int noff = skb_network_offset(pkt->skb);
 	u32 *dest = &regs->data[priv->dreg];
+	struct ipv6hdr *iph, _iph;
 
-	*dest = __nft_fib6_eval_type(priv, pkt);
+	iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+	if (!iph) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	*dest = __nft_fib6_eval_type(priv, pkt, iph);
 }
 EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
 
@@ -141,8 +150,10 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 		   const struct nft_pktinfo *pkt)
 {
 	const struct nft_fib *priv = nft_expr_priv(expr);
+	int noff = skb_network_offset(pkt->skb);
 	const struct net_device *oif = NULL;
 	u32 *dest = &regs->data[priv->dreg];
+	struct ipv6hdr *iph, _iph;
 	struct flowi6 fl6 = {
 		.flowi6_iif = LOOPBACK_IFINDEX,
 		.flowi6_proto = pkt->tprot,
@@ -155,7 +166,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 	else if (priv->flags & NFTA_FIB_F_OIF)
 		oif = nft_out(pkt);
 
-	lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif);
+	iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
+	if (!iph) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph);
 
 	if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
 	    nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 60be012..e4462b0 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(raw_v6_hashinfo);
 
 struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 		unsigned short num, const struct in6_addr *loc_addr,
-		const struct in6_addr *rmt_addr, int dif)
+		const struct in6_addr *rmt_addr, int dif, int sdif)
 {
 	bool is_multicast = ipv6_addr_is_multicast(loc_addr);
 
@@ -86,7 +86,9 @@ struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 			    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
 				continue;
 
-			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+			if (sk->sk_bound_dev_if &&
+			    sk->sk_bound_dev_if != dif &&
+			    sk->sk_bound_dev_if != sdif)
 				continue;
 
 			if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
@@ -178,7 +180,8 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 		goto out;
 
 	net = dev_net(skb->dev);
-	sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, inet6_iif(skb));
+	sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr,
+			     inet6_iif(skb), inet6_sdif(skb));
 
 	while (sk) {
 		int filtered;
@@ -222,7 +225,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 			}
 		}
 		sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr,
-				     inet6_iif(skb));
+				     inet6_iif(skb), inet6_sdif(skb));
 	}
 out:
 	read_unlock(&raw_v6_hashinfo.lock);
@@ -378,7 +381,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
 		net = dev_net(skb->dev);
 
 		while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr,
-						inet6_iif(skb)))) {
+					     inet6_iif(skb), inet6_iif(skb)))) {
 			rawv6_err(sk, skb, NULL, type, code,
 					inner_offset, info);
 			sk = sk_next(sk);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e1da5b8..846012e 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -714,19 +714,13 @@ static void ip6_frags_sysctl_unregister(void)
 
 static int __net_init ipv6_frags_init_net(struct net *net)
 {
-	int res;
-
 	net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
 
-	res = inet_frags_init_net(&net->ipv6.frags);
-	if (res)
-		return res;
-	res = ip6_frags_ns_sysctl_register(net);
-	if (res)
-		inet_frags_uninit_net(&net->ipv6.frags);
-	return res;
+	inet_frags_init_net(&net->ipv6.frags);
+
+	return ip6_frags_ns_sysctl_register(net);
 }
 
 static void __net_exit ipv6_frags_exit_net(struct net *net)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 2d0e779..26cc9f4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -446,16 +446,6 @@ static bool rt6_check_expired(const struct rt6_info *rt)
 	return false;
 }
 
-/* Multipath route selection:
- *   Hash based function using packet header and flowlabel.
- * Adapted from fib_info_hashfn()
- */
-static int rt6_info_hash_nhsfn(unsigned int candidate_count,
-			       const struct flowi6 *fl6)
-{
-	return get_hash_from_flowi6(fl6) % candidate_count;
-}
-
 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 					     struct flowi6 *fl6, int oif,
 					     int strict)
@@ -463,7 +453,13 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 	struct rt6_info *sibling, *next_sibling;
 	int route_choosen;
 
-	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
+	/* We might have already computed the hash for ICMPv6 errors. In such
+	 * case it will always be non-zero. Otherwise now is the time to do it.
+	 */
+	if (!fl6->mp_hash)
+		fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+
+	route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
 	/* Don't change the route, if route_choosen == 0
 	 * (siblings does not include ourself)
 	 */
@@ -959,10 +955,34 @@ int ip6_ins_rt(struct rt6_info *rt)
 	return __ip6_ins_rt(rt, &info, &mxc, NULL);
 }
 
+/* called with rcu_lock held */
+static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
+{
+	struct net_device *dev = rt->dst.dev;
+
+	if (rt->rt6i_flags & RTF_LOCAL) {
+		/* for copies of local routes, dst->dev needs to be the
+		 * device if it is a master device, the master device if
+		 * device is enslaved, and the loopback as the default
+		 */
+		if (netif_is_l3_slave(dev) &&
+		    !rt6_need_strict(&rt->rt6i_dst.addr))
+			dev = l3mdev_master_dev_rcu(dev);
+		else if (!netif_is_l3_master(dev))
+			dev = dev_net(dev)->loopback_dev;
+		/* last case is netif_is_l3_master(dev) is true in which
+		 * case we want dev returned to be dev
+		 */
+	}
+
+	return dev;
+}
+
 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 					   const struct in6_addr *daddr,
 					   const struct in6_addr *saddr)
 {
+	struct net_device *dev;
 	struct rt6_info *rt;
 
 	/*
@@ -972,8 +992,10 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
 		ort = (struct rt6_info *)ort->dst.from;
 
-	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
-
+	rcu_read_lock();
+	dev = ip6_rt_get_dev_rcu(ort);
+	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
+	rcu_read_unlock();
 	if (!rt)
 		return NULL;
 
@@ -1001,11 +1023,13 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 
 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 {
+	struct net_device *dev;
 	struct rt6_info *pcpu_rt;
 
-	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
-				  rt->dst.dev, rt->dst.flags);
-
+	rcu_read_lock();
+	dev = ip6_rt_get_dev_rcu(rt);
+	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
+	rcu_read_unlock();
 	if (!pcpu_rt)
 		return NULL;
 	ip6_rt_copy_init(pcpu_rt, rt);
@@ -1187,6 +1211,54 @@ struct dst_entry *ip6_route_input_lookup(struct net *net,
 }
 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
 
+static void ip6_multipath_l3_keys(const struct sk_buff *skb,
+				  struct flow_keys *keys)
+{
+	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
+	const struct ipv6hdr *key_iph = outer_iph;
+	const struct ipv6hdr *inner_iph;
+	const struct icmp6hdr *icmph;
+	struct ipv6hdr _inner_iph;
+
+	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
+		goto out;
+
+	icmph = icmp6_hdr(skb);
+	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
+	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
+	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
+	    icmph->icmp6_type != ICMPV6_PARAMPROB)
+		goto out;
+
+	inner_iph = skb_header_pointer(skb,
+				       skb_transport_offset(skb) + sizeof(*icmph),
+				       sizeof(_inner_iph), &_inner_iph);
+	if (!inner_iph)
+		goto out;
+
+	key_iph = inner_iph;
+out:
+	memset(keys, 0, sizeof(*keys));
+	keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+	keys->addrs.v6addrs.src = key_iph->saddr;
+	keys->addrs.v6addrs.dst = key_iph->daddr;
+	keys->tags.flow_label = ip6_flowinfo(key_iph);
+	keys->basic.ip_proto = key_iph->nexthdr;
+}
+
+/* if skb is set it will be used and fl6 can be NULL */
+u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+{
+	struct flow_keys hash_keys;
+
+	if (skb) {
+		ip6_multipath_l3_keys(skb, &hash_keys);
+		return flow_hash_from_keys(&hash_keys);
+	}
+
+	return get_hash_from_flowi6(fl6);
+}
+
 void ip6_route_input(struct sk_buff *skb)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -1205,6 +1277,8 @@ void ip6_route_input(struct sk_buff *skb)
 	tun_info = skb_tunnel_info(skb);
 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
+		fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
 	skb_dst_drop(skb);
 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 }
@@ -2698,15 +2772,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 {
 	u32 tb_id;
 	struct net *net = dev_net(idev->dev);
-	struct net_device *dev = net->loopback_dev;
+	struct net_device *dev = idev->dev;
 	struct rt6_info *rt;
 
-	/* use L3 Master device as loopback for host routes if device
-	 * is enslaved and address is not link local or multicast
-	 */
-	if (!rt6_need_strict(addr))
-		dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
-
 	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
 	if (!rt)
 		return ERR_PTR(-ENOMEM);
@@ -3337,6 +3405,9 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
 			goto nla_put_failure;
 	}
 
+	if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
+		*flags |= RTNH_F_OFFLOAD;
+
 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
 	if (!skip_oif && rt->dst.dev &&
 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
@@ -3615,8 +3686,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		struct net_device *dev;
 		int flags = 0;
 
-		dev = __dev_get_by_index(net, iif);
+		rcu_read_lock();
+
+		dev = dev_get_by_index_rcu(net, iif);
 		if (!dev) {
+			rcu_read_unlock();
 			err = -ENODEV;
 			goto errout;
 		}
@@ -3628,15 +3702,19 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
 		if (!fibmatch)
 			dst = ip6_route_input_lookup(net, dev, &fl6, flags);
+		else
+			dst = ip6_route_lookup(net, &fl6, 0);
+
+		rcu_read_unlock();
 	} else {
 		fl6.flowi6_oif = oif;
 
 		if (!fibmatch)
 			dst = ip6_route_output(net, NULL, &fl6);
+		else
+			dst = ip6_route_lookup(net, &fl6, 0);
 	}
 
-	if (fibmatch)
-		dst = ip6_route_lookup(net, &fl6, 0);
 
 	rt = container_of(dst, struct rt6_info, dst);
 	if (rt->dst.error) {
@@ -3928,6 +4006,7 @@ static int __net_init ip6_route_net_init(struct net *net)
 			 ip6_template_metrics, true);
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+	net->ipv6.fib6_has_custom_rules = false;
 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
 					       sizeof(*net->ipv6.ip6_prohibit_entry),
 					       GFP_KERNEL);
@@ -4103,9 +4182,10 @@ int __init ip6_route_init(void)
 		goto fib6_rules_init;
 
 	ret = -ENOBUFS;
-	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
-	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
-	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
+	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
+	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
+	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
+			    RTNL_FLAG_DOIT_UNLOCKED))
 		goto out_register_late_subsys;
 
 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 15fba55..c814077 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -40,7 +40,7 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
 	if (((srh->hdrlen + 1) << 3) != len)
 		return false;
 
-	if (srh->segments_left != srh->first_segment)
+	if (srh->segments_left > srh->first_segment)
 		return false;
 
 	tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4);
@@ -456,6 +456,10 @@ int __init seg6_init(void)
 	err = seg6_iptunnel_init();
 	if (err)
 		goto out_unregister_pernet;
+
+	err = seg6_local_init();
+	if (err)
+		goto out_unregister_pernet;
 #endif
 
 #ifdef CONFIG_IPV6_SEG6_HMAC
@@ -471,6 +475,7 @@ out:
 #ifdef CONFIG_IPV6_SEG6_HMAC
 out_unregister_iptun:
 #ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+	seg6_local_exit();
 	seg6_iptunnel_exit();
 #endif
 #endif
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index f950cb5..33fb35c 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/errno.h>
+#include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
@@ -110,7 +111,7 @@ static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id)
 	struct seg6_hmac_algo *algo;
 	int i, alg_count;
 
-	alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+	alg_count = ARRAY_SIZE(hmac_algos);
 	for (i = 0; i < alg_count; i++) {
 		algo = &hmac_algos[i];
 		if (algo->alg_id == alg_id)
@@ -360,7 +361,7 @@ static int seg6_hmac_init_algo(void)
 	struct shash_desc *shash;
 	int i, alg_count, cpu;
 
-	alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+	alg_count = ARRAY_SIZE(hmac_algos);
 
 	for (i = 0; i < alg_count; i++) {
 		struct crypto_shash **p_tfm;
@@ -421,7 +422,7 @@ void seg6_hmac_exit(void)
 	struct seg6_hmac_algo *algo = NULL;
 	int i, alg_count, cpu;
 
-	alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+	alg_count = ARRAY_SIZE(hmac_algos);
 	for (i = 0; i < alg_count; i++) {
 		algo = &hmac_algos[i];
 		for_each_possible_cpu(cpu) {
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 264d772..bd6cc68 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -91,7 +91,7 @@ static void set_tun_src(struct net *net, struct net_device *dev,
 }
 
 /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
-static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
 {
 	struct net *net = dev_net(skb_dst(skb)->dev);
 	struct ipv6hdr *hdr, *inner_hdr;
@@ -116,15 +116,22 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 	 * hlim will be decremented in ip6_forward() afterwards and
 	 * decapsulation will overwrite inner hlim with outer hlim
 	 */
-	ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
-		     ip6_flowlabel(inner_hdr));
-	hdr->hop_limit = inner_hdr->hop_limit;
+
+	if (skb->protocol == htons(ETH_P_IPV6)) {
+		ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
+			     ip6_flowlabel(inner_hdr));
+		hdr->hop_limit = inner_hdr->hop_limit;
+	} else {
+		ip6_flow_hdr(hdr, 0, 0);
+		hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+	}
+
 	hdr->nexthdr = NEXTHDR_ROUTING;
 
 	isrh = (void *)hdr + sizeof(*hdr);
 	memcpy(isrh, osrh, hdrlen);
 
-	isrh->nexthdr = NEXTHDR_IPV6;
+	isrh->nexthdr = proto;
 
 	hdr->daddr = isrh->segments[isrh->first_segment];
 	set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
@@ -141,10 +148,10 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
 
 /* insert an SRH within an IPv6 packet, just after the IPv6 header */
-#ifdef CONFIG_IPV6_SEG6_INLINE
-static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 {
 	struct ipv6hdr *hdr, *oldhdr;
 	struct ipv6_sr_hdr *isrh;
@@ -193,13 +200,13 @@ static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 
 	return 0;
 }
-#endif
+EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
 
 static int seg6_do_srh(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct seg6_iptunnel_encap *tinfo;
-	int err = 0;
+	int proto, err = 0;
 
 	tinfo = seg6_encap_lwtunnel(dst->lwtstate);
 
@@ -209,19 +216,47 @@ static int seg6_do_srh(struct sk_buff *skb)
 	}
 
 	switch (tinfo->mode) {
-#ifdef CONFIG_IPV6_SEG6_INLINE
 	case SEG6_IPTUN_MODE_INLINE:
+		if (skb->protocol != htons(ETH_P_IPV6))
+			return -EINVAL;
+
 		err = seg6_do_srh_inline(skb, tinfo->srh);
+		if (err)
+			return err;
+
 		skb_reset_inner_headers(skb);
 		break;
-#endif
 	case SEG6_IPTUN_MODE_ENCAP:
-		err = seg6_do_srh_encap(skb, tinfo->srh);
+		if (skb->protocol == htons(ETH_P_IPV6))
+			proto = IPPROTO_IPV6;
+		else if (skb->protocol == htons(ETH_P_IP))
+			proto = IPPROTO_IPIP;
+		else
+			return -EINVAL;
+
+		err = seg6_do_srh_encap(skb, tinfo->srh, proto);
+		if (err)
+			return err;
+
+		skb->protocol = htons(ETH_P_IPV6);
 		break;
-	}
+	case SEG6_IPTUN_MODE_L2ENCAP:
+		if (!skb_mac_header_was_set(skb))
+			return -EINVAL;
 
-	if (err)
-		return err;
+		if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0)
+			return -ENOMEM;
+
+		skb_mac_header_rebuild(skb);
+		skb_push(skb, skb->mac_len);
+
+		err = seg6_do_srh_encap(skb, tinfo->srh, NEXTHDR_NONE);
+		if (err)
+			return err;
+
+		skb->protocol = htons(ETH_P_IPV6);
+		break;
+	}
 
 	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
@@ -336,6 +371,9 @@ static int seg6_build_state(struct nlattr *nla,
 	struct seg6_lwt *slwt;
 	int err;
 
+	if (family != AF_INET && family != AF_INET6)
+		return -EINVAL;
+
 	err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
 			       seg6_iptunnel_policy, extack);
 
@@ -357,12 +395,15 @@ static int seg6_build_state(struct nlattr *nla,
 		return -EINVAL;
 
 	switch (tuninfo->mode) {
-#ifdef CONFIG_IPV6_SEG6_INLINE
 	case SEG6_IPTUN_MODE_INLINE:
+		if (family != AF_INET6)
+			return -EINVAL;
+
 		break;
-#endif
 	case SEG6_IPTUN_MODE_ENCAP:
 		break;
+	case SEG6_IPTUN_MODE_L2ENCAP:
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -386,8 +427,11 @@ static int seg6_build_state(struct nlattr *nla,
 	memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
 
 	newts->type = LWTUNNEL_ENCAP_SEG6;
-	newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
-			LWTUNNEL_STATE_INPUT_REDIRECT;
+	newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
+
+	if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP)
+		newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+
 	newts->headroom = seg6_lwt_headroom(tuninfo);
 
 	*ts = newts;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
new file mode 100644
index 0000000..7ff54db
--- /dev/null
+++ b/net/ipv6/seg6_local.c
@@ -0,0 +1,938 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *        modify it under the terms of the GNU General Public License
+ *        as published by the Free Software Foundation; either version
+ *        2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/lwtunnel.h>
+#include <net/netevent.h>
+#include <net/netns/generic.h>
+#include <net/ip6_fib.h>
+#include <net/route.h>
+#include <net/seg6.h>
+#include <linux/seg6.h>
+#include <linux/seg6_local.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/dst_cache.h>
+#ifdef CONFIG_IPV6_SEG6_HMAC
+#include <net/seg6_hmac.h>
+#endif
+#include <linux/etherdevice.h>
+
+struct seg6_local_lwt;
+
+struct seg6_action_desc {
+	int action;
+	unsigned long attrs;
+	int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
+	int static_headroom;
+};
+
+struct seg6_local_lwt {
+	int action;
+	struct ipv6_sr_hdr *srh;
+	int table;
+	struct in_addr nh4;
+	struct in6_addr nh6;
+	int iif;
+	int oif;
+
+	int headroom;
+	struct seg6_action_desc *desc;
+};
+
+static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
+{
+	return (struct seg6_local_lwt *)lwt->data;
+}
+
+static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
+{
+	struct ipv6_sr_hdr *srh;
+	int len, srhoff = 0;
+
+	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+		return NULL;
+
+	if (!pskb_may_pull(skb, srhoff + sizeof(*srh)))
+		return NULL;
+
+	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+
+	/* make sure it's a Segment Routing header (Routing Type 4) */
+	if (srh->type != IPV6_SRCRT_TYPE_4)
+		return NULL;
+
+	len = (srh->hdrlen + 1) << 3;
+
+	if (!pskb_may_pull(skb, srhoff + len))
+		return NULL;
+
+	if (!seg6_validate_srh(srh, len))
+		return NULL;
+
+	return srh;
+}
+
+static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
+{
+	struct ipv6_sr_hdr *srh;
+
+	srh = get_srh(skb);
+	if (!srh)
+		return NULL;
+
+	if (srh->segments_left == 0)
+		return NULL;
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+	if (!seg6_hmac_validate_skb(skb))
+		return NULL;
+#endif
+
+	return srh;
+}
+
+static bool decap_and_validate(struct sk_buff *skb, int proto)
+{
+	struct ipv6_sr_hdr *srh;
+	unsigned int off = 0;
+
+	srh = get_srh(skb);
+	if (srh && srh->segments_left > 0)
+		return false;
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+	if (srh && !seg6_hmac_validate_skb(skb))
+		return false;
+#endif
+
+	if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
+		return false;
+
+	if (!pskb_pull(skb, off))
+		return false;
+
+	skb_postpull_rcsum(skb, skb_network_header(skb), off);
+
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb->encapsulation = 0;
+
+	return true;
+}
+
+static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
+{
+	struct in6_addr *addr;
+
+	srh->segments_left--;
+	addr = srh->segments + srh->segments_left;
+	*daddr = *addr;
+}
+
+static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+			   u32 tbl_id)
+{
+	struct net *net = dev_net(skb->dev);
+	struct ipv6hdr *hdr = ipv6_hdr(skb);
+	int flags = RT6_LOOKUP_F_HAS_SADDR;
+	struct dst_entry *dst = NULL;
+	struct rt6_info *rt;
+	struct flowi6 fl6;
+
+	fl6.flowi6_iif = skb->dev->ifindex;
+	fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
+	fl6.saddr = hdr->saddr;
+	fl6.flowlabel = ip6_flowinfo(hdr);
+	fl6.flowi6_mark = skb->mark;
+	fl6.flowi6_proto = hdr->nexthdr;
+
+	if (nhaddr)
+		fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
+
+	if (!tbl_id) {
+		dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
+	} else {
+		struct fib6_table *table;
+
+		table = fib6_get_table(net, tbl_id);
+		if (!table)
+			goto out;
+
+		rt = ip6_pol_route(net, table, 0, &fl6, flags);
+		dst = &rt->dst;
+	}
+
+	if (dst && dst->dev->flags & IFF_LOOPBACK && !dst->error) {
+		dst_release(dst);
+		dst = NULL;
+	}
+
+out:
+	if (!dst) {
+		rt = net->ipv6.ip6_blk_hole_entry;
+		dst = &rt->dst;
+		dst_hold(dst);
+	}
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
+}
+
+/* regular endpoint function */
+static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct ipv6_sr_hdr *srh;
+
+	srh = get_and_validate_srh(skb);
+	if (!srh)
+		goto drop;
+
+	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+	lookup_nexthop(skb, NULL, 0);
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+/* regular endpoint, and forward to specified nexthop */
+static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct ipv6_sr_hdr *srh;
+
+	srh = get_and_validate_srh(skb);
+	if (!srh)
+		goto drop;
+
+	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+	lookup_nexthop(skb, &slwt->nh6, 0);
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct ipv6_sr_hdr *srh;
+
+	srh = get_and_validate_srh(skb);
+	if (!srh)
+		goto drop;
+
+	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+	lookup_nexthop(skb, NULL, slwt->table);
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+/* decapsulate and forward inner L2 frame on specified interface */
+static int input_action_end_dx2(struct sk_buff *skb,
+				struct seg6_local_lwt *slwt)
+{
+	struct net *net = dev_net(skb->dev);
+	struct net_device *odev;
+	struct ethhdr *eth;
+
+	if (!decap_and_validate(skb, NEXTHDR_NONE))
+		goto drop;
+
+	if (!pskb_may_pull(skb, ETH_HLEN))
+		goto drop;
+
+	skb_reset_mac_header(skb);
+	eth = (struct ethhdr *)skb->data;
+
+	/* To determine the frame's protocol, we assume it is 802.3. This avoids
+	 * a call to eth_type_trans(), which is not really relevant for our
+	 * use case.
+	 */
+	if (!eth_proto_is_802_3(eth->h_proto))
+		goto drop;
+
+	odev = dev_get_by_index_rcu(net, slwt->oif);
+	if (!odev)
+		goto drop;
+
+	/* As we accept Ethernet frames, make sure the egress device is of
+	 * the correct type.
+	 */
+	if (odev->type != ARPHRD_ETHER)
+		goto drop;
+
+	if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
+		goto drop;
+
+	skb_orphan(skb);
+
+	if (skb_warn_if_lro(skb))
+		goto drop;
+
+	skb_forward_csum(skb);
+
+	if (skb->len - ETH_HLEN > odev->mtu)
+		goto drop;
+
+	skb->dev = odev;
+	skb->protocol = eth->h_proto;
+
+	return dev_queue_xmit(skb);
+
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+/* decapsulate and forward to specified nexthop */
+static int input_action_end_dx6(struct sk_buff *skb,
+				struct seg6_local_lwt *slwt)
+{
+	struct in6_addr *nhaddr = NULL;
+
+	/* this function accepts IPv6 encapsulated packets, with either
+	 * an SRH with SL=0, or no SRH.
+	 */
+
+	if (!decap_and_validate(skb, IPPROTO_IPV6))
+		goto drop;
+
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+		goto drop;
+
+	/* The inner packet is not associated to any local interface,
+	 * so we do not call netif_rx().
+	 *
+	 * If slwt->nh6 is set to ::, then lookup the nexthop for the
+	 * inner packet's DA. Otherwise, use the specified nexthop.
+	 */
+
+	if (!ipv6_addr_any(&slwt->nh6))
+		nhaddr = &slwt->nh6;
+
+	lookup_nexthop(skb, nhaddr, 0);
+
+	return dst_input(skb);
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static int input_action_end_dx4(struct sk_buff *skb,
+				struct seg6_local_lwt *slwt)
+{
+	struct iphdr *iph;
+	__be32 nhaddr;
+	int err;
+
+	if (!decap_and_validate(skb, IPPROTO_IPIP))
+		goto drop;
+
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		goto drop;
+
+	skb->protocol = htons(ETH_P_IP);
+
+	iph = ip_hdr(skb);
+
+	nhaddr = slwt->nh4.s_addr ?: iph->daddr;
+
+	skb_dst_drop(skb);
+
+	err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
+	if (err)
+		goto drop;
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static int input_action_end_dt6(struct sk_buff *skb,
+				struct seg6_local_lwt *slwt)
+{
+	if (!decap_and_validate(skb, IPPROTO_IPV6))
+		goto drop;
+
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+		goto drop;
+
+	lookup_nexthop(skb, NULL, slwt->table);
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+/* push an SRH on top of the current one */
+static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct ipv6_sr_hdr *srh;
+	int err = -EINVAL;
+
+	srh = get_and_validate_srh(skb);
+	if (!srh)
+		goto drop;
+
+	err = seg6_do_srh_inline(skb, slwt->srh);
+	if (err)
+		goto drop;
+
+	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+	lookup_nexthop(skb, NULL, 0);
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return err;
+}
+
+/* encapsulate within an outer IPv6 header and a specified SRH */
+static int input_action_end_b6_encap(struct sk_buff *skb,
+				     struct seg6_local_lwt *slwt)
+{
+	struct ipv6_sr_hdr *srh;
+	int err = -EINVAL;
+
+	srh = get_and_validate_srh(skb);
+	if (!srh)
+		goto drop;
+
+	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+	skb_reset_inner_headers(skb);
+	skb->encapsulation = 1;
+
+	err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
+	if (err)
+		goto drop;
+
+	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+	lookup_nexthop(skb, NULL, 0);
+
+	return dst_input(skb);
+
+drop:
+	kfree_skb(skb);
+	return err;
+}
+
+static struct seg6_action_desc seg6_action_table[] = {
+	{
+		.action		= SEG6_LOCAL_ACTION_END,
+		.attrs		= 0,
+		.input		= input_action_end,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_X,
+		.attrs		= (1 << SEG6_LOCAL_NH6),
+		.input		= input_action_end_x,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_T,
+		.attrs		= (1 << SEG6_LOCAL_TABLE),
+		.input		= input_action_end_t,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_DX2,
+		.attrs		= (1 << SEG6_LOCAL_OIF),
+		.input		= input_action_end_dx2,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_DX6,
+		.attrs		= (1 << SEG6_LOCAL_NH6),
+		.input		= input_action_end_dx6,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_DX4,
+		.attrs		= (1 << SEG6_LOCAL_NH4),
+		.input		= input_action_end_dx4,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_DT6,
+		.attrs		= (1 << SEG6_LOCAL_TABLE),
+		.input		= input_action_end_dt6,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_B6,
+		.attrs		= (1 << SEG6_LOCAL_SRH),
+		.input		= input_action_end_b6,
+	},
+	{
+		.action		= SEG6_LOCAL_ACTION_END_B6_ENCAP,
+		.attrs		= (1 << SEG6_LOCAL_SRH),
+		.input		= input_action_end_b6_encap,
+		.static_headroom	= sizeof(struct ipv6hdr),
+	}
+};
+
+static struct seg6_action_desc *__get_action_desc(int action)
+{
+	struct seg6_action_desc *desc;
+	int i, count;
+
+	count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc);
+	for (i = 0; i < count; i++) {
+		desc = &seg6_action_table[i];
+		if (desc->action == action)
+			return desc;
+	}
+
+	return NULL;
+}
+
+static int seg6_local_input(struct sk_buff *skb)
+{
+	struct dst_entry *orig_dst = skb_dst(skb);
+	struct seg6_action_desc *desc;
+	struct seg6_local_lwt *slwt;
+
+	if (skb->protocol != htons(ETH_P_IPV6)) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
+	desc = slwt->desc;
+
+	return desc->input(skb, slwt);
+}
+
+static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
+	[SEG6_LOCAL_ACTION]	= { .type = NLA_U32 },
+	[SEG6_LOCAL_SRH]	= { .type = NLA_BINARY },
+	[SEG6_LOCAL_TABLE]	= { .type = NLA_U32 },
+	[SEG6_LOCAL_NH4]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct in_addr) },
+	[SEG6_LOCAL_NH6]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct in6_addr) },
+	[SEG6_LOCAL_IIF]	= { .type = NLA_U32 },
+	[SEG6_LOCAL_OIF]	= { .type = NLA_U32 },
+};
+
+static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	struct ipv6_sr_hdr *srh;
+	int len;
+
+	srh = nla_data(attrs[SEG6_LOCAL_SRH]);
+	len = nla_len(attrs[SEG6_LOCAL_SRH]);
+
+	/* SRH must contain at least one segment */
+	if (len < sizeof(*srh) + sizeof(struct in6_addr))
+		return -EINVAL;
+
+	if (!seg6_validate_srh(srh, len))
+		return -EINVAL;
+
+	slwt->srh = kmalloc(len, GFP_KERNEL);
+	if (!slwt->srh)
+		return -ENOMEM;
+
+	memcpy(slwt->srh, srh, len);
+
+	slwt->headroom += len;
+
+	return 0;
+}
+
+static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct ipv6_sr_hdr *srh;
+	struct nlattr *nla;
+	int len;
+
+	srh = slwt->srh;
+	len = (srh->hdrlen + 1) << 3;
+
+	nla = nla_reserve(skb, SEG6_LOCAL_SRH, len);
+	if (!nla)
+		return -EMSGSIZE;
+
+	memcpy(nla_data(nla), srh, len);
+
+	return 0;
+}
+
+static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+	int len = (a->srh->hdrlen + 1) << 3;
+
+	if (len != ((b->srh->hdrlen + 1) << 3))
+		return 1;
+
+	return memcmp(a->srh, b->srh, len);
+}
+
+static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
+
+	return 0;
+}
+
+static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+	if (a->table != b->table)
+		return 1;
+
+	return 0;
+}
+
+static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
+	       sizeof(struct in_addr));
+
+	return 0;
+}
+
+static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct nlattr *nla;
+
+	nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr));
+	if (!nla)
+		return -EMSGSIZE;
+
+	memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr));
+
+	return 0;
+}
+
+static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+	return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr));
+}
+
+static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]),
+	       sizeof(struct in6_addr));
+
+	return 0;
+}
+
+static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	struct nlattr *nla;
+
+	nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr));
+	if (!nla)
+		return -EMSGSIZE;
+
+	memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr));
+
+	return 0;
+}
+
+static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+	return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr));
+}
+
+static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]);
+
+	return 0;
+}
+
+static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+	if (a->iif != b->iif)
+		return 1;
+
+	return 0;
+}
+
+static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]);
+
+	return 0;
+}
+
+static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+	if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+	if (a->oif != b->oif)
+		return 1;
+
+	return 0;
+}
+
+struct seg6_action_param {
+	int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
+	int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
+	int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
+};
+
+static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
+	[SEG6_LOCAL_SRH]	= { .parse = parse_nla_srh,
+				    .put = put_nla_srh,
+				    .cmp = cmp_nla_srh },
+
+	[SEG6_LOCAL_TABLE]	= { .parse = parse_nla_table,
+				    .put = put_nla_table,
+				    .cmp = cmp_nla_table },
+
+	[SEG6_LOCAL_NH4]	= { .parse = parse_nla_nh4,
+				    .put = put_nla_nh4,
+				    .cmp = cmp_nla_nh4 },
+
+	[SEG6_LOCAL_NH6]	= { .parse = parse_nla_nh6,
+				    .put = put_nla_nh6,
+				    .cmp = cmp_nla_nh6 },
+
+	[SEG6_LOCAL_IIF]	= { .parse = parse_nla_iif,
+				    .put = put_nla_iif,
+				    .cmp = cmp_nla_iif },
+
+	[SEG6_LOCAL_OIF]	= { .parse = parse_nla_oif,
+				    .put = put_nla_oif,
+				    .cmp = cmp_nla_oif },
+};
+
+static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
+{
+	struct seg6_action_param *param;
+	struct seg6_action_desc *desc;
+	int i, err;
+
+	desc = __get_action_desc(slwt->action);
+	if (!desc)
+		return -EINVAL;
+
+	if (!desc->input)
+		return -EOPNOTSUPP;
+
+	slwt->desc = desc;
+	slwt->headroom += desc->static_headroom;
+
+	for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
+		if (desc->attrs & (1 << i)) {
+			if (!attrs[i])
+				return -EINVAL;
+
+			param = &seg6_action_params[i];
+
+			err = param->parse(attrs, slwt);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int seg6_local_build_state(struct nlattr *nla, unsigned int family,
+				  const void *cfg, struct lwtunnel_state **ts,
+				  struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[SEG6_LOCAL_MAX + 1];
+	struct lwtunnel_state *newts;
+	struct seg6_local_lwt *slwt;
+	int err;
+
+	if (family != AF_INET6)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy,
+			       extack);
+
+	if (err < 0)
+		return err;
+
+	if (!tb[SEG6_LOCAL_ACTION])
+		return -EINVAL;
+
+	newts = lwtunnel_state_alloc(sizeof(*slwt));
+	if (!newts)
+		return -ENOMEM;
+
+	slwt = seg6_local_lwtunnel(newts);
+	slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
+
+	err = parse_nla_action(tb, slwt);
+	if (err < 0)
+		goto out_free;
+
+	newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
+	newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
+	newts->headroom = slwt->headroom;
+
+	*ts = newts;
+
+	return 0;
+
+out_free:
+	kfree(slwt->srh);
+	kfree(newts);
+	return err;
+}
+
+static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
+{
+	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
+
+	kfree(slwt->srh);
+}
+
+static int seg6_local_fill_encap(struct sk_buff *skb,
+				 struct lwtunnel_state *lwt)
+{
+	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
+	struct seg6_action_param *param;
+	int i, err;
+
+	if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
+		return -EMSGSIZE;
+
+	for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
+		if (slwt->desc->attrs & (1 << i)) {
+			param = &seg6_action_params[i];
+			err = param->put(skb, slwt);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
+{
+	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
+	unsigned long attrs;
+	int nlsize;
+
+	nlsize = nla_total_size(4); /* action */
+
+	attrs = slwt->desc->attrs;
+
+	if (attrs & (1 << SEG6_LOCAL_SRH))
+		nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
+
+	if (attrs & (1 << SEG6_LOCAL_TABLE))
+		nlsize += nla_total_size(4);
+
+	if (attrs & (1 << SEG6_LOCAL_NH4))
+		nlsize += nla_total_size(4);
+
+	if (attrs & (1 << SEG6_LOCAL_NH6))
+		nlsize += nla_total_size(16);
+
+	if (attrs & (1 << SEG6_LOCAL_IIF))
+		nlsize += nla_total_size(4);
+
+	if (attrs & (1 << SEG6_LOCAL_OIF))
+		nlsize += nla_total_size(4);
+
+	return nlsize;
+}
+
+static int seg6_local_cmp_encap(struct lwtunnel_state *a,
+				struct lwtunnel_state *b)
+{
+	struct seg6_local_lwt *slwt_a, *slwt_b;
+	struct seg6_action_param *param;
+	int i;
+
+	slwt_a = seg6_local_lwtunnel(a);
+	slwt_b = seg6_local_lwtunnel(b);
+
+	if (slwt_a->action != slwt_b->action)
+		return 1;
+
+	if (slwt_a->desc->attrs != slwt_b->desc->attrs)
+		return 1;
+
+	for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
+		if (slwt_a->desc->attrs & (1 << i)) {
+			param = &seg6_action_params[i];
+			if (param->cmp(slwt_a, slwt_b))
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
+static const struct lwtunnel_encap_ops seg6_local_ops = {
+	.build_state	= seg6_local_build_state,
+	.destroy_state	= seg6_local_destroy_state,
+	.input		= seg6_local_input,
+	.fill_encap	= seg6_local_fill_encap,
+	.get_encap_size	= seg6_local_get_encap_size,
+	.cmp_encap	= seg6_local_cmp_encap,
+	.owner		= THIS_MODULE,
+};
+
+int __init seg6_local_init(void)
+{
+	return lwtunnel_encap_add_ops(&seg6_local_ops,
+				      LWTUNNEL_ENCAP_SEG6_LOCAL);
+}
+
+void seg6_local_exit(void)
+{
+	lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL);
+}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 69c50e7..6fbf8ae 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -90,6 +90,13 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "flowlabel_reflect",
+		.data		= &init_net.ipv6.sysctl.flowlabel_reflect,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
@@ -149,6 +156,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay;
 	ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
 	ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind;
+	ipv6_table[9].data = &net->ipv6.sysctl.flowlabel_reflect;
 
 	ipv6_route_table = ipv6_route_sysctl_init(net);
 	if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2062101..38f76d8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -350,7 +350,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
 					&hdr->daddr, th->dest,
 					&hdr->saddr, ntohs(th->source),
-					skb->dev->ifindex);
+					skb->dev->ifindex, inet6_sdif(skb));
 
 	if (!sk) {
 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
@@ -918,7 +918,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 					   &tcp_hashinfo, NULL, 0,
 					   &ipv6h->saddr,
 					   th->source, &ipv6h->daddr,
-					   ntohs(th->source), tcp_v6_iif(skb));
+					   ntohs(th->source), tcp_v6_iif(skb),
+					   tcp_v6_sdif(skb));
 		if (!sk1)
 			goto out;
 
@@ -1296,7 +1297,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 			}
 		}
 
-		tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
+		tcp_rcv_established(sk, skb, tcp_hdr(skb));
 		if (opt_skb)
 			goto ipv6_pktoptions;
 		return 0;
@@ -1393,10 +1394,13 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
 	TCP_SKB_CB(skb)->sacked = 0;
+	TCP_SKB_CB(skb)->has_rxtstamp =
+			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
 }
 
 static int tcp_v6_rcv(struct sk_buff *skb)
 {
+	int sdif = inet6_sdif(skb);
 	const struct tcphdr *th;
 	const struct ipv6hdr *hdr;
 	bool refcounted;
@@ -1430,7 +1434,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 
 lookup:
 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
-				th->source, th->dest, inet6_iif(skb),
+				th->source, th->dest, inet6_iif(skb), sdif,
 				&refcounted);
 	if (!sk)
 		goto no_tcp_socket;
@@ -1505,8 +1509,7 @@ process:
 	tcp_segs_in(tcp_sk(sk), skb);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-		if (!tcp_prequeue(sk, skb))
-			ret = tcp_v6_do_rcv(sk, skb);
+		ret = tcp_v6_do_rcv(sk, skb);
 	} else if (tcp_add_backlog(sk, skb)) {
 		goto discard_and_relse;
 	}
@@ -1564,7 +1567,8 @@ do_time_wait:
 					    skb, __tcp_hdrlen(th),
 					    &ipv6_hdr(skb)->saddr, th->source,
 					    &ipv6_hdr(skb)->daddr,
-					    ntohs(th->dest), tcp_v6_iif(skb));
+					    ntohs(th->dest), tcp_v6_iif(skb),
+					    sdif);
 		if (sk2) {
 			struct inet_timewait_sock *tw = inet_twsk(sk);
 			inet_twsk_deschedule_put(tw);
@@ -1611,7 +1615,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
 					&hdr->saddr, th->source,
 					&hdr->daddr, ntohs(th->dest),
-					inet6_iif(skb));
+					inet6_iif(skb), inet6_sdif(skb));
 	if (sk) {
 		skb->sk = sk;
 		skb->destructor = sock_edemux;
@@ -1945,6 +1949,9 @@ struct proto tcpv6_prot = {
 	.diag_destroy		= tcp_abort,
 };
 
+/* thinking of making this const? Don't.
+ * early_demux can change based on sysctl.
+ */
 static struct inet6_protocol tcpv6_protocol = {
 	.early_demux	=	tcp_v6_early_demux,
 	.early_demux_handler =  tcp_v6_early_demux,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 56030d4..e2ecfb1 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -129,7 +129,7 @@ static void udp_v6_rehash(struct sock *sk)
 static int compute_score(struct sock *sk, struct net *net,
 			 const struct in6_addr *saddr, __be16 sport,
 			 const struct in6_addr *daddr, unsigned short hnum,
-			 int dif, bool exact_dif)
+			 int dif, int sdif, bool exact_dif)
 {
 	int score;
 	struct inet_sock *inet;
@@ -161,9 +161,13 @@ static int compute_score(struct sock *sk, struct net *net,
 	}
 
 	if (sk->sk_bound_dev_if || exact_dif) {
-		if (sk->sk_bound_dev_if != dif)
+		bool dev_match = (sk->sk_bound_dev_if == dif ||
+				  sk->sk_bound_dev_if == sdif);
+
+		if (exact_dif && !dev_match)
 			return -1;
-		score++;
+		if (sk->sk_bound_dev_if && dev_match)
+			score++;
 	}
 
 	if (sk->sk_incoming_cpu == raw_smp_processor_id())
@@ -175,9 +179,9 @@ static int compute_score(struct sock *sk, struct net *net,
 /* called with rcu_read_lock() */
 static struct sock *udp6_lib_lookup2(struct net *net,
 		const struct in6_addr *saddr, __be16 sport,
-		const struct in6_addr *daddr, unsigned int hnum, int dif,
-		bool exact_dif, struct udp_hslot *hslot2,
-		struct sk_buff *skb)
+		const struct in6_addr *daddr, unsigned int hnum,
+		int dif, int sdif, bool exact_dif,
+		struct udp_hslot *hslot2, struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	int score, badness, matches = 0, reuseport = 0;
@@ -187,7 +191,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 	badness = -1;
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
 		score = compute_score(sk, net, saddr, sport,
-				      daddr, hnum, dif, exact_dif);
+				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -214,10 +218,10 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 
 /* rcu_read_lock() must be held */
 struct sock *__udp6_lib_lookup(struct net *net,
-				      const struct in6_addr *saddr, __be16 sport,
-				      const struct in6_addr *daddr, __be16 dport,
-				      int dif, struct udp_table *udptable,
-				      struct sk_buff *skb)
+			       const struct in6_addr *saddr, __be16 sport,
+			       const struct in6_addr *daddr, __be16 dport,
+			       int dif, int sdif, struct udp_table *udptable,
+			       struct sk_buff *skb)
 {
 	struct sock *sk, *result;
 	unsigned short hnum = ntohs(dport);
@@ -235,7 +239,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
 			goto begin;
 
 		result = udp6_lib_lookup2(net, saddr, sport,
-					  daddr, hnum, dif, exact_dif,
+					  daddr, hnum, dif, sdif, exact_dif,
 					  hslot2, skb);
 		if (!result) {
 			unsigned int old_slot2 = slot2;
@@ -250,7 +254,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
 				goto begin;
 
 			result = udp6_lib_lookup2(net, saddr, sport,
-						  daddr, hnum, dif,
+						  daddr, hnum, dif, sdif,
 						  exact_dif, hslot2,
 						  skb);
 		}
@@ -261,7 +265,7 @@ begin:
 	badness = -1;
 	sk_for_each_rcu(sk, &hslot->head) {
 		score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
-				      exact_dif);
+				      sdif, exact_dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
@@ -294,7 +298,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
 
 	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
 				 &iph->daddr, dport, inet6_iif(skb),
-				 udptable, skb);
+				 inet6_sdif(skb), udptable, skb);
 }
 
 struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
@@ -304,7 +308,7 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
 
 	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
 				 &iph->daddr, dport, inet6_iif(skb),
-				 &udp_table, skb);
+				 inet6_sdif(skb), &udp_table, skb);
 }
 EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
 
@@ -320,7 +324,7 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be
 	struct sock *sk;
 
 	sk =  __udp6_lib_lookup(net, saddr, sport, daddr, dport,
-				dif, &udp_table, NULL);
+				dif, 0, &udp_table, NULL);
 	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 		sk = NULL;
 	return sk;
@@ -502,7 +506,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	struct net *net = dev_net(skb->dev);
 
 	sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
-			       inet6_iif(skb), udptable, skb);
+			       inet6_iif(skb), 0, udptable, skb);
 	if (!sk) {
 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
 				  ICMP6_MIB_INERRORS);
@@ -902,7 +906,7 @@ discard:
 static struct sock *__udp6_lib_demux_lookup(struct net *net,
 			__be16 loc_port, const struct in6_addr *loc_addr,
 			__be16 rmt_port, const struct in6_addr *rmt_addr,
-			int dif)
+			int dif, int sdif)
 {
 	unsigned short hnum = ntohs(loc_port);
 	unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
@@ -913,7 +917,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
 
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
 		if (sk->sk_state == TCP_ESTABLISHED &&
-		    INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+		    INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif))
 			return sk;
 		/* Only check first socket in chain */
 		break;
@@ -928,6 +932,7 @@ static void udp_v6_early_demux(struct sk_buff *skb)
 	struct sock *sk;
 	struct dst_entry *dst;
 	int dif = skb->dev->ifindex;
+	int sdif = inet6_sdif(skb);
 
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) +
 	    sizeof(struct udphdr)))
@@ -939,7 +944,7 @@ static void udp_v6_early_demux(struct sk_buff *skb)
 		sk = __udp6_lib_demux_lookup(net, uh->dest,
 					     &ipv6_hdr(skb)->daddr,
 					     uh->source, &ipv6_hdr(skb)->saddr,
-					     dif);
+					     dif, sdif);
 	else
 		return;
 
@@ -1475,6 +1480,9 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
 }
 #endif
 
+/* thinking of making this const? Don't.
+ * early_demux can change based on sysctl.
+ */
 static struct inet6_protocol udpv6_protocol = {
 	.early_demux	=	udp_v6_early_demux,
 	.early_demux_handler =  udp_v6_early_demux,
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index e7d378c..455fd4e 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -17,109 +17,15 @@
 #include <net/ip6_checksum.h>
 #include "ip6_offload.h"
 
-static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
-					 netdev_features_t features)
+static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb,
+					   netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
-	unsigned int mss;
-	unsigned int unfrag_ip6hlen, unfrag_len;
-	struct frag_hdr *fptr;
-	u8 *packet_start, *prevhdr;
-	u8 nexthdr;
-	u8 frag_hdr_sz = sizeof(struct frag_hdr);
-	__wsum csum;
-	int tnl_hlen;
-	int err;
-
-	mss = skb_shinfo(skb)->gso_size;
-	if (unlikely(skb->len <= mss))
-		goto out;
-
-	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
-		/* Packet is from an untrusted source, reset gso_segs. */
-
-		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
-
-		/* Set the IPv6 fragment id if not set yet */
-		if (!skb_shinfo(skb)->ip6_frag_id)
-			ipv6_proxy_select_ident(dev_net(skb->dev), skb);
-
-		segs = NULL;
-		goto out;
-	}
 
 	if (skb->encapsulation && skb_shinfo(skb)->gso_type &
 	    (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
 		segs = skb_udp_tunnel_segment(skb, features, true);
-	else {
-		const struct ipv6hdr *ipv6h;
-		struct udphdr *uh;
-
-		if (!pskb_may_pull(skb, sizeof(struct udphdr)))
-			goto out;
-
-		/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
-		 * do checksum of UDP packets sent as multiple IP fragments.
-		 */
-
-		uh = udp_hdr(skb);
-		ipv6h = ipv6_hdr(skb);
-
-		uh->check = 0;
-		csum = skb_checksum(skb, 0, skb->len, 0);
-		uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
-					  &ipv6h->daddr, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-		/* If there is no outer header we can fake a checksum offload
-		 * due to the fact that we have already done the checksum in
-		 * software prior to segmenting the frame.
-		 */
-		if (!skb->encap_hdr_csum)
-			features |= NETIF_F_HW_CSUM;
-
-		/* Check if there is enough headroom to insert fragment header. */
-		tnl_hlen = skb_tnl_header_len(skb);
-		if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
-			if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
-				goto out;
-		}
-
-		/* Find the unfragmentable header and shift it left by frag_hdr_sz
-		 * bytes to insert fragment header.
-		 */
-		err = ip6_find_1stfragopt(skb, &prevhdr);
-		if (err < 0)
-			return ERR_PTR(err);
-		unfrag_ip6hlen = err;
-		nexthdr = *prevhdr;
-		*prevhdr = NEXTHDR_FRAGMENT;
-		unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
-			     unfrag_ip6hlen + tnl_hlen;
-		packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
-		memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
-
-		SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
-		skb->mac_header -= frag_hdr_sz;
-		skb->network_header -= frag_hdr_sz;
-
-		fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
-		fptr->nexthdr = nexthdr;
-		fptr->reserved = 0;
-		if (!skb_shinfo(skb)->ip6_frag_id)
-			ipv6_proxy_select_ident(dev_net(skb->dev), skb);
-		fptr->identification = skb_shinfo(skb)->ip6_frag_id;
-
-		/* Fragment the skb. ipv6 header and the remaining fields of the
-		 * fragment header are updated in ipv6_gso_segment()
-		 */
-		segs = skb_segment(skb, features);
-	}
 
-out:
 	return segs;
 }
 
@@ -169,7 +75,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
 
 static const struct net_offload udpv6_offload = {
 	.callbacks = {
-		.gso_segment	=	udp6_ufo_fragment,
+		.gso_segment	=	udp6_tunnel_segment,
 		.gro_receive	=	udp6_gro_receive,
 		.gro_complete	=	udp6_gro_complete,
 	},
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 3ef5d91..f95943a 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -34,6 +34,7 @@ EXPORT_SYMBOL(xfrm6_rcv_spi);
 int xfrm6_transport_finish(struct sk_buff *skb, int async)
 {
 	struct xfrm_offload *xo = xfrm_offload(skb);
+	int nhlen = skb->data - skb_network_header(skb);
 
 	skb_network_header(skb)[IP6CB(skb)->nhoff] =
 		XFRM_MODE_SKB_CB(skb)->protocol;
@@ -43,8 +44,9 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 		return 1;
 #endif
 
-	__skb_push(skb, skb->data - skb_network_header(skb));
+	__skb_push(skb, nhlen);
 	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+	skb_postpush_rcsum(skb, skb_network_header(skb), nhlen);
 
 	if (xo && (xo->flags & XFRM_GRO)) {
 		skb_mac_header_rebuild(skb);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 79651bc..11d1314 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -27,7 +27,8 @@
 
 static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
 					  const xfrm_address_t *saddr,
-					  const xfrm_address_t *daddr)
+					  const xfrm_address_t *daddr,
+					  u32 mark)
 {
 	struct flowi6 fl6;
 	struct dst_entry *dst;
@@ -36,6 +37,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif);
 	fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
+	fl6.flowi6_mark = mark;
 	memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
 	if (saddr)
 		memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));
@@ -52,12 +54,13 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
 }
 
 static int xfrm6_get_saddr(struct net *net, int oif,
-			   xfrm_address_t *saddr, xfrm_address_t *daddr)
+			   xfrm_address_t *saddr, xfrm_address_t *daddr,
+			   u32 mark)
 {
 	struct dst_entry *dst;
 	struct net_device *dev;
 
-	dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr);
+	dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark);
 	if (IS_ERR(dst))
 		return -EHOSTUNREACH;
 
@@ -214,14 +217,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	}
 }
 
-static inline int xfrm6_garbage_collect(struct dst_ops *ops)
-{
-	struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
-
-	xfrm_garbage_collect_deferred(net);
-	return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
-}
-
 static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
 			      struct sk_buff *skb, u32 mtu)
 {
@@ -279,14 +274,13 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 static struct dst_ops xfrm6_dst_ops_template = {
 	.family =		AF_INET6,
-	.gc =			xfrm6_garbage_collect,
 	.update_pmtu =		xfrm6_update_pmtu,
 	.redirect =		xfrm6_redirect,
 	.cow_metrics =		dst_cow_metrics_generic,
 	.destroy =		xfrm6_dst_destroy,
 	.ifdown =		xfrm6_dst_ifdown,
 	.local_out =		__ip6_local_out,
-	.gc_thresh =		INT_MAX,
+	.gc_thresh =		32768,
 };
 
 static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index c343ac6..c748e8a6 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -155,8 +155,8 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
 	seq_printf(seq,
 		   "   psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ",
 		   psock->index,
-		   psock->strp.stats.rx_msgs,
-		   psock->strp.stats.rx_bytes,
+		   psock->strp.stats.msgs,
+		   psock->strp.stats.bytes,
 		   psock->stats.tx_msgs,
 		   psock->stats.tx_bytes,
 		   psock->sk->sk_receive_queue.qlen,
@@ -170,22 +170,22 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
 	if (psock->tx_stopped)
 		seq_puts(seq, "TxStop ");
 
-	if (psock->strp.rx_stopped)
+	if (psock->strp.stopped)
 		seq_puts(seq, "RxStop ");
 
 	if (psock->tx_kcm)
 		seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index);
 
-	if (!psock->strp.rx_paused && !psock->ready_rx_msg) {
+	if (!psock->strp.paused && !psock->ready_rx_msg) {
 		if (psock->sk->sk_receive_queue.qlen) {
-			if (psock->strp.rx_need_bytes)
+			if (psock->strp.need_bytes)
 				seq_printf(seq, "RxWait=%u ",
-					   psock->strp.rx_need_bytes);
+					   psock->strp.need_bytes);
 			else
 				seq_printf(seq, "RxWait ");
 		}
 	} else  {
-		if (psock->strp.rx_paused)
+		if (psock->strp.paused)
 			seq_puts(seq, "RxPause ");
 
 		if (psock->ready_rx_msg)
@@ -371,20 +371,20 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq,
 		   "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
 		   "",
-		   strp_stats.rx_msgs,
-		   strp_stats.rx_bytes,
+		   strp_stats.msgs,
+		   strp_stats.bytes,
 		   psock_stats.tx_msgs,
 		   psock_stats.tx_bytes,
 		   psock_stats.reserved,
 		   psock_stats.unreserved,
-		   strp_stats.rx_aborts,
-		   strp_stats.rx_interrupted,
-		   strp_stats.rx_unrecov_intr,
-		   strp_stats.rx_mem_fail,
-		   strp_stats.rx_need_more_hdr,
-		   strp_stats.rx_bad_hdr_len,
-		   strp_stats.rx_msg_too_big,
-		   strp_stats.rx_msg_timeouts,
+		   strp_stats.aborts,
+		   strp_stats.interrupted,
+		   strp_stats.unrecov_intr,
+		   strp_stats.mem_fail,
+		   strp_stats.need_more_hdr,
+		   strp_stats.bad_hdr_len,
+		   strp_stats.msg_too_big,
+		   strp_stats.msg_timeouts,
 		   psock_stats.tx_aborts);
 
 	return 0;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 4abf628..af4e76a 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -96,12 +96,12 @@ static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
 				    struct kcm_psock *psock)
 {
 	STRP_STATS_ADD(mux->stats.rx_bytes,
-		       psock->strp.stats.rx_bytes -
+		       psock->strp.stats.bytes -
 		       psock->saved_rx_bytes);
 	mux->stats.rx_msgs +=
-		psock->strp.stats.rx_msgs - psock->saved_rx_msgs;
-	psock->saved_rx_msgs = psock->strp.stats.rx_msgs;
-	psock->saved_rx_bytes = psock->strp.stats.rx_bytes;
+		psock->strp.stats.msgs - psock->saved_rx_msgs;
+	psock->saved_rx_msgs = psock->strp.stats.msgs;
+	psock->saved_rx_bytes = psock->strp.stats.bytes;
 }
 
 static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
@@ -1118,7 +1118,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
 	struct kcm_sock *kcm = kcm_sk(sk);
 	int err = 0;
 	long timeo;
-	struct strp_rx_msg *rxm;
+	struct strp_msg *stm;
 	int copied = 0;
 	struct sk_buff *skb;
 
@@ -1132,26 +1132,26 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
 
 	/* Okay, have a message on the receive queue */
 
-	rxm = strp_rx_msg(skb);
+	stm = strp_msg(skb);
 
-	if (len > rxm->full_len)
-		len = rxm->full_len;
+	if (len > stm->full_len)
+		len = stm->full_len;
 
-	err = skb_copy_datagram_msg(skb, rxm->offset, msg, len);
+	err = skb_copy_datagram_msg(skb, stm->offset, msg, len);
 	if (err < 0)
 		goto out;
 
 	copied = len;
 	if (likely(!(flags & MSG_PEEK))) {
 		KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
-		if (copied < rxm->full_len) {
+		if (copied < stm->full_len) {
 			if (sock->type == SOCK_DGRAM) {
 				/* Truncated message */
 				msg->msg_flags |= MSG_TRUNC;
 				goto msg_finished;
 			}
-			rxm->offset += copied;
-			rxm->full_len -= copied;
+			stm->offset += copied;
+			stm->full_len -= copied;
 		} else {
 msg_finished:
 			/* Finished with message */
@@ -1175,7 +1175,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
 	struct sock *sk = sock->sk;
 	struct kcm_sock *kcm = kcm_sk(sk);
 	long timeo;
-	struct strp_rx_msg *rxm;
+	struct strp_msg *stm;
 	int err = 0;
 	ssize_t copied;
 	struct sk_buff *skb;
@@ -1192,12 +1192,12 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
 
 	/* Okay, have a message on the receive queue */
 
-	rxm = strp_rx_msg(skb);
+	stm = strp_msg(skb);
 
-	if (len > rxm->full_len)
-		len = rxm->full_len;
+	if (len > stm->full_len)
+		len = stm->full_len;
 
-	copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags);
+	copied = skb_splice_bits(skb, sk, stm->offset, pipe, len, flags);
 	if (copied < 0) {
 		err = copied;
 		goto err_out;
@@ -1205,8 +1205,8 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
 
 	KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
 
-	rxm->offset += copied;
-	rxm->full_len -= copied;
+	stm->offset += copied;
+	stm->full_len -= copied;
 
 	/* We have no way to return MSG_EOR. If all the bytes have been
 	 * read we still leave the message in the receive socket buffer.
@@ -1376,7 +1376,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	struct kcm_psock *psock = NULL, *tpsock;
 	struct list_head *head;
 	int index = 0;
-	struct strp_callbacks cb;
+	static const struct strp_callbacks cb = {
+		.rcv_msg = kcm_rcv_strparser,
+		.parse_msg = kcm_parse_func_strparser,
+		.read_sock_done = kcm_read_sock_done,
+	};
 	int err;
 
 	csk = csock->sk;
@@ -1395,11 +1399,6 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 	psock->sk = csk;
 	psock->bpf_prog = prog;
 
-	cb.rcv_msg = kcm_rcv_strparser;
-	cb.abort_parser = NULL;
-	cb.parse_msg = kcm_parse_func_strparser;
-	cb.read_sock_done = kcm_read_sock_done;
-
 	err = strp_init(&psock->strp, csk, &cb);
 	if (err) {
 		kmem_cache_free(kcm_psockp, psock);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 98f4d82..a00d607 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2399,8 +2399,6 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
 
 out:
 	xfrm_pol_put(xp);
-	if (err == 0)
-		xfrm_garbage_collect(net);
 	return err;
 }
 
@@ -2651,8 +2649,6 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
 
 out:
 	xfrm_pol_put(xp);
-	if (delete && err == 0)
-		xfrm_garbage_collect(net);
 	return err;
 }
 
@@ -2752,8 +2748,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
 	int err, err2;
 
 	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
-	if (!err)
-		xfrm_garbage_collect(net);
 	err2 = unicast_flush_resp(sk, hdr);
 	if (err || err2) {
 		if (err == -ESRCH) /* empty table - old silent behavior */
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 90165a6..ee485df 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -329,13 +329,21 @@ static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
 	struct hlist_head *g_head;
 	struct hlist_head *head;
 	struct l2tp_net *pn;
+	int err;
 
 	head = l2tp_session_id_hash(tunnel, session->session_id);
 
 	write_lock_bh(&tunnel->hlist_lock);
+	if (!tunnel->acpt_newsess) {
+		err = -ENODEV;
+		goto err_tlock;
+	}
+
 	hlist_for_each_entry(session_walk, head, hlist)
-		if (session_walk->session_id == session->session_id)
-			goto exist;
+		if (session_walk->session_id == session->session_id) {
+			err = -EEXIST;
+			goto err_tlock;
+		}
 
 	if (tunnel->version == L2TP_HDR_VER_3) {
 		pn = l2tp_pernet(tunnel->l2tp_net);
@@ -343,12 +351,21 @@ static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
 						session->session_id);
 
 		spin_lock_bh(&pn->l2tp_session_hlist_lock);
+
 		hlist_for_each_entry(session_walk, g_head, global_hlist)
-			if (session_walk->session_id == session->session_id)
-				goto exist_glob;
+			if (session_walk->session_id == session->session_id) {
+				err = -EEXIST;
+				goto err_tlock_pnlock;
+			}
 
+		l2tp_tunnel_inc_refcount(tunnel);
+		sock_hold(tunnel->sock);
 		hlist_add_head_rcu(&session->global_hlist, g_head);
+
 		spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+	} else {
+		l2tp_tunnel_inc_refcount(tunnel);
+		sock_hold(tunnel->sock);
 	}
 
 	hlist_add_head(&session->hlist, head);
@@ -356,12 +373,12 @@ static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
 
 	return 0;
 
-exist_glob:
+err_tlock_pnlock:
 	spin_unlock_bh(&pn->l2tp_session_hlist_lock);
-exist:
+err_tlock:
 	write_unlock_bh(&tunnel->hlist_lock);
 
-	return -EEXIST;
+	return err;
 }
 
 /* Lookup a tunnel by id
@@ -1251,7 +1268,6 @@ static void l2tp_tunnel_destruct(struct sock *sk)
 	/* Remove hooks into tunnel socket */
 	sk->sk_destruct = tunnel->old_sk_destruct;
 	sk->sk_user_data = NULL;
-	tunnel->sock = NULL;
 
 	/* Remove the tunnel struct from the tunnel list */
 	pn = l2tp_pernet(tunnel->l2tp_net);
@@ -1261,6 +1277,8 @@ static void l2tp_tunnel_destruct(struct sock *sk)
 	atomic_dec(&l2tp_tunnel_count);
 
 	l2tp_tunnel_closeall(tunnel);
+
+	tunnel->sock = NULL;
 	l2tp_tunnel_dec_refcount(tunnel);
 
 	/* Call the original destructor */
@@ -1285,6 +1303,7 @@ void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
 		  tunnel->name);
 
 	write_lock_bh(&tunnel->hlist_lock);
+	tunnel->acpt_newsess = false;
 	for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
 again:
 		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
@@ -1581,6 +1600,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	tunnel->magic = L2TP_TUNNEL_MAGIC;
 	sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
 	rwlock_init(&tunnel->hlist_lock);
+	tunnel->acpt_newsess = true;
 
 	/* The net we belong to */
 	tunnel->l2tp_net = net;
@@ -1829,11 +1849,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 			return ERR_PTR(err);
 		}
 
-		l2tp_tunnel_inc_refcount(tunnel);
-
-		/* Ensure tunnel socket isn't deleted */
-		sock_hold(tunnel->sock);
-
 		/* Ignore management session in session count value */
 		if (session->session_id != 0)
 			atomic_inc(&l2tp_session_count);
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 9101297..a305e0c 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -162,6 +162,10 @@ struct l2tp_tunnel {
 	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
 	struct rcu_head rcu;
 	rwlock_t		hlist_lock;	/* protect session_hlist */
+	bool			acpt_newsess;	/* Indicates whether this
+						 * tunnel accepts new sessions.
+						 * Protected by hlist_lock.
+						 */
 	struct hlist_head	session_hlist[L2TP_HASH_SIZE];
 						/* hashed list of sessions,
 						 * hashed by id */
@@ -197,7 +201,9 @@ struct l2tp_tunnel {
 };
 
 struct l2tp_nl_cmd_ops {
-	int (*session_create)(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+	int (*session_create)(struct net *net, struct l2tp_tunnel *tunnel,
+			      u32 session_id, u32 peer_session_id,
+			      struct l2tp_session_cfg *cfg);
 	int (*session_delete)(struct l2tp_session *session);
 };
 
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 4de2ec9..87da9ef 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -262,24 +262,19 @@ static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
 	dev->needed_headroom += session->hdr_len;
 }
 
-static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
+			   u32 session_id, u32 peer_session_id,
+			   struct l2tp_session_cfg *cfg)
 {
 	unsigned char name_assign_type;
 	struct net_device *dev;
 	char name[IFNAMSIZ];
-	struct l2tp_tunnel *tunnel;
 	struct l2tp_session *session;
 	struct l2tp_eth *priv;
 	struct l2tp_eth_sess *spriv;
 	int rc;
 	struct l2tp_eth_net *pn;
 
-	tunnel = l2tp_tunnel_find(net, tunnel_id);
-	if (!tunnel) {
-		rc = -ENODEV;
-		goto out;
-	}
-
 	if (cfg->ifname) {
 		strlcpy(name, cfg->ifname, IFNAMSIZ);
 		name_assign_type = NET_NAME_USER;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 57427d4..7135f46 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -643,10 +643,10 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 		break;
 	}
 
-	ret = -EPROTONOSUPPORT;
-	if (l2tp_nl_cmd_ops[cfg.pw_type]->session_create)
-		ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id,
-			session_id, peer_session_id, &cfg);
+	ret = l2tp_nl_cmd_ops[cfg.pw_type]->session_create(net, tunnel,
+							   session_id,
+							   peer_session_id,
+							   &cfg);
 
 	if (ret >= 0) {
 		session = l2tp_session_get(net, tunnel, session_id, false);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index f0edb72..50e3ee9 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -788,25 +788,20 @@ end:
 
 #ifdef CONFIG_L2TP_V3
 
-/* Called when creating sessions via the netlink interface.
- */
-static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+/* Called when creating sessions via the netlink interface. */
+static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel,
+				   u32 session_id, u32 peer_session_id,
+				   struct l2tp_session_cfg *cfg)
 {
 	int error;
-	struct l2tp_tunnel *tunnel;
 	struct l2tp_session *session;
 	struct pppol2tp_session *ps;
 
-	tunnel = l2tp_tunnel_find(net, tunnel_id);
-
-	/* Error if we can't find the tunnel */
-	error = -ENOENT;
-	if (tunnel == NULL)
-		goto out;
-
 	/* Error if tunnel socket is not prepped */
-	if (tunnel->sock == NULL)
+	if (!tunnel->sock) {
+		error = -ENOENT;
 		goto out;
+	}
 
 	/* Default MTU values. */
 	if (cfg->mtu == 0)
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index ea4f4818..c5b9ce4 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -2479,12 +2479,12 @@ static int __init mpls_init(void)
 
 	rtnl_af_register(&mpls_af_ops);
 
-	rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL);
-	rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL);
+	rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 0);
+	rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 0);
 	rtnl_register(PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes,
-		      NULL);
+		      0);
 	rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf,
-		      mpls_netconf_dump_devconf, NULL);
+		      mpls_netconf_dump_devconf, 0);
 	err = 0;
 out:
 	return err;
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 1308a56..af3d636 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -180,6 +180,7 @@ struct ncsi_channel {
 #define NCSI_CHANNEL_INACTIVE		1
 #define NCSI_CHANNEL_ACTIVE		2
 #define NCSI_CHANNEL_INVISIBLE		3
+	bool                        reconfigure_needed;
 	spinlock_t                  lock;	/* Protect filters etc */
 	struct ncsi_package         *package;
 	struct ncsi_channel_version version;
@@ -235,6 +236,9 @@ enum {
 	ncsi_dev_state_probe_dp,
 	ncsi_dev_state_config_sp	= 0x0301,
 	ncsi_dev_state_config_cis,
+	ncsi_dev_state_config_clear_vids,
+	ncsi_dev_state_config_svf,
+	ncsi_dev_state_config_ev,
 	ncsi_dev_state_config_sma,
 	ncsi_dev_state_config_ebf,
 #if IS_ENABLED(CONFIG_IPV6)
@@ -253,6 +257,12 @@ enum {
 	ncsi_dev_state_suspend_done
 };
 
+struct vlan_vid {
+	struct list_head list;
+	__be16 proto;
+	u16 vid;
+};
+
 struct ncsi_dev_priv {
 	struct ncsi_dev     ndev;            /* Associated NCSI device     */
 	unsigned int        flags;           /* NCSI device flags          */
@@ -276,6 +286,7 @@ struct ncsi_dev_priv {
 	struct work_struct  work;            /* For channel management     */
 	struct packet_type  ptype;           /* NCSI packet Rx handler     */
 	struct list_head    node;            /* Form NCSI device list      */
+	struct list_head    vlan_vids;       /* List of active VLAN IDs */
 };
 
 struct ncsi_cmd_arg {
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index 5e03ed1..7567ca63 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -139,9 +139,9 @@ static int ncsi_cmd_handler_svf(struct sk_buff *skb,
 	struct ncsi_cmd_svf_pkt *cmd;
 
 	cmd = skb_put_zero(skb, sizeof(*cmd));
-	cmd->vlan = htons(nca->words[0]);
-	cmd->index = nca->bytes[2];
-	cmd->enable = nca->bytes[3];
+	cmd->vlan = htons(nca->words[1]);
+	cmd->index = nca->bytes[6];
+	cmd->enable = nca->bytes[7];
 	ncsi_cmd_build_header(&cmd->cmd.common, nca);
 
 	return 0;
@@ -153,7 +153,7 @@ static int ncsi_cmd_handler_ev(struct sk_buff *skb,
 	struct ncsi_cmd_ev_pkt *cmd;
 
 	cmd = skb_put_zero(skb, sizeof(*cmd));
-	cmd->mode = nca->bytes[0];
+	cmd->mode = nca->bytes[3];
 	ncsi_cmd_build_header(&cmd->cmd.common, nca);
 
 	return 0;
@@ -228,7 +228,7 @@ static struct ncsi_cmd_handler {
 	{ NCSI_PKT_CMD_AE,     8, ncsi_cmd_handler_ae      },
 	{ NCSI_PKT_CMD_SL,     8, ncsi_cmd_handler_sl      },
 	{ NCSI_PKT_CMD_GLS,    0, ncsi_cmd_handler_default },
-	{ NCSI_PKT_CMD_SVF,    4, ncsi_cmd_handler_svf     },
+	{ NCSI_PKT_CMD_SVF,    8, ncsi_cmd_handler_svf     },
 	{ NCSI_PKT_CMD_EV,     4, ncsi_cmd_handler_ev      },
 	{ NCSI_PKT_CMD_DV,     0, ncsi_cmd_handler_default },
 	{ NCSI_PKT_CMD_SMA,    8, ncsi_cmd_handler_sma     },
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index a3bd5fa..3fd3c39 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -38,6 +38,25 @@ static inline int ncsi_filter_size(int table)
 	return sizes[table];
 }
 
+u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
+{
+	struct ncsi_channel_filter *ncf;
+	int size;
+
+	ncf = nc->filters[table];
+	if (!ncf)
+		return NULL;
+
+	size = ncsi_filter_size(table);
+	if (size < 0)
+		return NULL;
+
+	return ncf->data + size * index;
+}
+
+/* Find the first active filter in a filter table that matches the given
+ * data parameter. If data is NULL, this returns the first active filter.
+ */
 int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data)
 {
 	struct ncsi_channel_filter *ncf;
@@ -58,7 +77,7 @@ int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data)
 	index = -1;
 	while ((index = find_next_bit(bitmap, ncf->total, index + 1))
 	       < ncf->total) {
-		if (!memcmp(ncf->data + size * index, data, size)) {
+		if (!data || !memcmp(ncf->data + size * index, data, size)) {
 			spin_unlock_irqrestore(&nc->lock, flags);
 			return index;
 		}
@@ -639,6 +658,95 @@ error:
 	nd->state = ncsi_dev_state_functional;
 }
 
+/* Check the VLAN filter bitmap for a set filter, and construct a
+ * "Set VLAN Filter - Disable" packet if found.
+ */
+static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
+			 struct ncsi_cmd_arg *nca)
+{
+	int index;
+	u32 *data;
+	u16 vid;
+
+	index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, NULL);
+	if (index < 0) {
+		/* Filter table empty */
+		return -1;
+	}
+
+	data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index);
+	if (!data) {
+		netdev_err(ndp->ndev.dev,
+			   "ncsi: failed to retrieve filter %d\n", index);
+		/* Set the VLAN id to 0 - this will still disable the entry in
+		 * the filter table, but we won't know what it was.
+		 */
+		vid = 0;
+	} else {
+		vid = *(u16 *)data;
+	}
+
+	netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+		      "ncsi: removed vlan tag %u at index %d\n",
+		      vid, index + 1);
+	ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index);
+
+	nca->type = NCSI_PKT_CMD_SVF;
+	nca->words[1] = vid;
+	/* HW filter index starts at 1 */
+	nca->bytes[6] = index + 1;
+	nca->bytes[7] = 0x00;
+	return 0;
+}
+
+/* Find an outstanding VLAN tag and constuct a "Set VLAN Filter - Enable"
+ * packet.
+ */
+static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
+		       struct ncsi_cmd_arg *nca)
+{
+	struct vlan_vid *vlan = NULL;
+	int index = 0;
+
+	list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) {
+		index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, &vlan->vid);
+		if (index < 0) {
+			/* New tag to add */
+			netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+				      "ncsi: new vlan id to set: %u\n",
+				      vlan->vid);
+			break;
+		}
+		netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+			      "vid %u already at filter pos %d\n",
+			      vlan->vid, index);
+	}
+
+	if (!vlan || index >= 0) {
+		netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+			      "no vlan ids left to set\n");
+		return -1;
+	}
+
+	index = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan->vid);
+	if (index < 0) {
+		netdev_err(ndp->ndev.dev,
+			   "Failed to add new VLAN tag, error %d\n", index);
+		return -1;
+	}
+
+	netdev_printk(KERN_DEBUG, ndp->ndev.dev,
+		      "ncsi: set vid %u in packet, index %u\n",
+		      vlan->vid, index + 1);
+	nca->type = NCSI_PKT_CMD_SVF;
+	nca->words[1] = vlan->vid;
+	/* HW filter index starts at 1 */
+	nca->bytes[6] = index + 1;
+	nca->bytes[7] = 0x01;
+
+	return 0;
+}
+
 static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 {
 	struct ncsi_dev *nd = &ndp->ndev;
@@ -683,8 +791,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 		if (ret)
 			goto error;
 
-		nd->state = ncsi_dev_state_config_sma;
+		nd->state = ncsi_dev_state_config_clear_vids;
 		break;
+	case ncsi_dev_state_config_clear_vids:
+	case ncsi_dev_state_config_svf:
+	case ncsi_dev_state_config_ev:
 	case ncsi_dev_state_config_sma:
 	case ncsi_dev_state_config_ebf:
 #if IS_ENABLED(CONFIG_IPV6)
@@ -699,11 +810,40 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 		nca.package = np->id;
 		nca.channel = nc->id;
 
+		/* Clear any active filters on the channel before setting */
+		if (nd->state == ncsi_dev_state_config_clear_vids) {
+			ret = clear_one_vid(ndp, nc, &nca);
+			if (ret) {
+				nd->state = ncsi_dev_state_config_svf;
+				schedule_work(&ndp->work);
+				break;
+			}
+			/* Repeat */
+			nd->state = ncsi_dev_state_config_clear_vids;
+		/* Add known VLAN tags to the filter */
+		} else if (nd->state == ncsi_dev_state_config_svf) {
+			ret = set_one_vid(ndp, nc, &nca);
+			if (ret) {
+				nd->state = ncsi_dev_state_config_ev;
+				schedule_work(&ndp->work);
+				break;
+			}
+			/* Repeat */
+			nd->state = ncsi_dev_state_config_svf;
+		/* Enable/Disable the VLAN filter */
+		} else if (nd->state == ncsi_dev_state_config_ev) {
+			if (list_empty(&ndp->vlan_vids)) {
+				nca.type = NCSI_PKT_CMD_DV;
+			} else {
+				nca.type = NCSI_PKT_CMD_EV;
+				nca.bytes[3] = NCSI_CAP_VLAN_NO;
+			}
+			nd->state = ncsi_dev_state_config_sma;
+		} else if (nd->state == ncsi_dev_state_config_sma) {
 		/* Use first entry in unicast filter table. Note that
 		 * the MAC filter table starts from entry 1 instead of
 		 * 0.
 		 */
-		if (nd->state == ncsi_dev_state_config_sma) {
 			nca.type = NCSI_PKT_CMD_SMA;
 			for (index = 0; index < 6; index++)
 				nca.bytes[index] = dev->dev_addr[index];
@@ -751,6 +891,25 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 		break;
 	case ncsi_dev_state_config_done:
 		spin_lock_irqsave(&nc->lock, flags);
+		if (nc->reconfigure_needed) {
+			/* This channel's configuration has been updated
+			 * part-way during the config state - start the
+			 * channel configuration over
+			 */
+			nc->reconfigure_needed = false;
+			nc->state = NCSI_CHANNEL_INACTIVE;
+			spin_unlock_irqrestore(&nc->lock, flags);
+
+			spin_lock_irqsave(&ndp->lock, flags);
+			list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+			spin_unlock_irqrestore(&ndp->lock, flags);
+
+			netdev_printk(KERN_DEBUG, dev,
+				      "Dirty NCSI channel state reset\n");
+			ncsi_process_next_channel(ndp);
+			break;
+		}
+
 		if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
 			hot_nc = nc;
 			nc->state = NCSI_CHANNEL_ACTIVE;
@@ -1191,6 +1350,150 @@ static struct notifier_block ncsi_inet6addr_notifier = {
 };
 #endif /* CONFIG_IPV6 */
 
+static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
+{
+	struct ncsi_dev *nd = &ndp->ndev;
+	struct ncsi_channel *nc;
+	struct ncsi_package *np;
+	unsigned long flags;
+	unsigned int n = 0;
+
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			spin_lock_irqsave(&nc->lock, flags);
+
+			/* Channels may be busy, mark dirty instead of
+			 * kicking if;
+			 * a) not ACTIVE (configured)
+			 * b) in the channel_queue (to be configured)
+			 * c) it's ndev is in the config state
+			 */
+			if (nc->state != NCSI_CHANNEL_ACTIVE) {
+				if ((ndp->ndev.state & 0xff00) ==
+						ncsi_dev_state_config ||
+						!list_empty(&nc->link)) {
+					netdev_printk(KERN_DEBUG, nd->dev,
+						      "ncsi: channel %p marked dirty\n",
+						      nc);
+					nc->reconfigure_needed = true;
+				}
+				spin_unlock_irqrestore(&nc->lock, flags);
+				continue;
+			}
+
+			spin_unlock_irqrestore(&nc->lock, flags);
+
+			ncsi_stop_channel_monitor(nc);
+			spin_lock_irqsave(&nc->lock, flags);
+			nc->state = NCSI_CHANNEL_INACTIVE;
+			spin_unlock_irqrestore(&nc->lock, flags);
+
+			spin_lock_irqsave(&ndp->lock, flags);
+			list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+			spin_unlock_irqrestore(&ndp->lock, flags);
+
+			netdev_printk(KERN_DEBUG, nd->dev,
+				      "ncsi: kicked channel %p\n", nc);
+			n++;
+		}
+	}
+
+	return n;
+}
+
+int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct ncsi_channel_filter *ncf;
+	struct ncsi_dev_priv *ndp;
+	unsigned int n_vids = 0;
+	struct vlan_vid *vlan;
+	struct ncsi_dev *nd;
+	bool found = false;
+
+	if (vid == 0)
+		return 0;
+
+	nd = ncsi_find_dev(dev);
+	if (!nd) {
+		netdev_warn(dev, "ncsi: No net_device?\n");
+		return 0;
+	}
+
+	ndp = TO_NCSI_DEV_PRIV(nd);
+	ncf = ndp->hot_channel->filters[NCSI_FILTER_VLAN];
+
+	/* Add the VLAN id to our internal list */
+	list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) {
+		n_vids++;
+		if (vlan->vid == vid) {
+			netdev_printk(KERN_DEBUG, dev,
+				      "vid %u already registered\n", vid);
+			return 0;
+		}
+	}
+
+	if (n_vids >= ncf->total) {
+		netdev_info(dev,
+			    "NCSI Channel supports up to %u VLAN tags but %u are already set\n",
+			    ncf->total, n_vids);
+		return -EINVAL;
+	}
+
+	vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+	if (!vlan)
+		return -ENOMEM;
+
+	vlan->proto = proto;
+	vlan->vid = vid;
+	list_add_rcu(&vlan->list, &ndp->vlan_vids);
+
+	netdev_printk(KERN_DEBUG, dev, "Added new vid %u\n", vid);
+
+	found = ncsi_kick_channels(ndp) != 0;
+
+	return found ? ncsi_process_next_channel(ndp) : 0;
+}
+EXPORT_SYMBOL_GPL(ncsi_vlan_rx_add_vid);
+
+int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct vlan_vid *vlan, *tmp;
+	struct ncsi_dev_priv *ndp;
+	struct ncsi_dev *nd;
+	bool found = false;
+
+	if (vid == 0)
+		return 0;
+
+	nd = ncsi_find_dev(dev);
+	if (!nd) {
+		netdev_warn(dev, "ncsi: no net_device?\n");
+		return 0;
+	}
+
+	ndp = TO_NCSI_DEV_PRIV(nd);
+
+	/* Remove the VLAN id from our internal list */
+	list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list)
+		if (vlan->vid == vid) {
+			netdev_printk(KERN_DEBUG, dev,
+				      "vid %u found, removing\n", vid);
+			list_del_rcu(&vlan->list);
+			found = true;
+			kfree(vlan);
+		}
+
+	if (!found) {
+		netdev_err(dev, "ncsi: vid %u wasn't registered!\n", vid);
+		return -EINVAL;
+	}
+
+	found = ncsi_kick_channels(ndp) != 0;
+
+	return found ? ncsi_process_next_channel(ndp) : 0;
+}
+EXPORT_SYMBOL_GPL(ncsi_vlan_rx_kill_vid);
+
 struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 				   void (*handler)(struct ncsi_dev *ndev))
 {
@@ -1215,6 +1518,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 	nd->handler = handler;
 	ndp->pending_req_num = 0;
 	INIT_LIST_HEAD(&ndp->channel_queue);
+	INIT_LIST_HEAD(&ndp->vlan_vids);
 	INIT_WORK(&ndp->work, ncsi_dev_work);
 
 	/* Initialize private NCSI device */
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index 3ea49ed..91b4b66 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -104,7 +104,7 @@ struct ncsi_cmd_svf_pkt {
 	unsigned char           index;     /* VLAN table index  */
 	unsigned char           enable;    /* Enable or disable */
 	__be32                  checksum;  /* Checksum          */
-	unsigned char           pad[14];
+	unsigned char           pad[18];
 };
 
 /* Enable VLAN */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 087db77..265b9a8 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -354,7 +354,8 @@ static int ncsi_rsp_handler_svf(struct ncsi_request *nr)
 
 	/* Add or remove the VLAN filter */
 	if (!(cmd->enable & 0x1)) {
-		ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index);
+		/* HW indexes from 1 */
+		ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index - 1);
 	} else {
 		vlan = ntohs(cmd->vlan);
 		ret = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan);
@@ -693,7 +694,14 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
 
 		ncf->index = i;
 		ncf->total = cnt;
-		ncf->bitmap = 0x0ul;
+		if (i == NCSI_FILTER_VLAN) {
+			/* Set VLAN filters active so they are cleared in
+			 * first configuration state
+			 */
+			ncf->bitmap = U64_MAX;
+		} else {
+			ncf->bitmap = 0x0ul;
+		}
 		nc->filters[i] = ncf;
 	}
 
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9b28864..e4a13cc 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -636,6 +636,15 @@ config NFT_FWD_NETDEV
 	help
 	  This option enables packet forwarding for the "netdev" family.
 
+config NFT_FIB_NETDEV
+	depends on NFT_FIB_IPV4
+	depends on NFT_FIB_IPV6
+	tristate "Netfilter nf_tables netdev fib lookups support"
+	help
+	  This option allows using the FIB expression from the netdev table.
+	  The lookup will be delegated to the IPv4 or IPv6 FIB depending
+	  on the protocol of the packet.
+
 endif # NF_TABLES_NETDEV
 
 endif # NF_TABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 9133809..d3891c9 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_NFT_REDIR)		+= nft_redir.o
 obj-$(CONFIG_NFT_HASH)		+= nft_hash.o
 obj-$(CONFIG_NFT_FIB)		+= nft_fib.o
 obj-$(CONFIG_NFT_FIB_INET)	+= nft_fib_inet.o
+obj-$(CONFIG_NFT_FIB_NETDEV)	+= nft_fib_netdev.o
 
 # nf_tables netdev
 obj-$(CONFIG_NFT_DUP_NETDEV)	+= nft_dup_netdev.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 974cf2a..04fe25a 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -21,7 +21,7 @@
 #include <linux/inetdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/mutex.h>
-#include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/rcupdate.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -62,10 +62,182 @@ EXPORT_SYMBOL(nf_hooks_needed);
 #endif
 
 static DEFINE_MUTEX(nf_hook_mutex);
+
+/* max hooks per family/hooknum */
+#define MAX_HOOK_COUNT		1024
+
 #define nf_entry_dereference(e) \
 	rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
 
-static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
+static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
+{
+	struct nf_hook_entries *e;
+	size_t alloc = sizeof(*e) +
+		       sizeof(struct nf_hook_entry) * num +
+		       sizeof(struct nf_hook_ops *) * num;
+
+	if (num == 0)
+		return NULL;
+
+	e = kvzalloc(alloc, GFP_KERNEL);
+	if (e)
+		e->num_hook_entries = num;
+	return e;
+}
+
+static unsigned int accept_all(void *priv,
+			       struct sk_buff *skb,
+			       const struct nf_hook_state *state)
+{
+	return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */
+}
+
+static const struct nf_hook_ops dummy_ops = {
+	.hook = accept_all,
+	.priority = INT_MIN,
+};
+
+static struct nf_hook_entries *
+nf_hook_entries_grow(const struct nf_hook_entries *old,
+		     const struct nf_hook_ops *reg)
+{
+	unsigned int i, alloc_entries, nhooks, old_entries;
+	struct nf_hook_ops **orig_ops = NULL;
+	struct nf_hook_ops **new_ops;
+	struct nf_hook_entries *new;
+	bool inserted = false;
+
+	alloc_entries = 1;
+	old_entries = old ? old->num_hook_entries : 0;
+
+	if (old) {
+		orig_ops = nf_hook_entries_get_hook_ops(old);
+
+		for (i = 0; i < old_entries; i++) {
+			if (orig_ops[i] != &dummy_ops)
+				alloc_entries++;
+		}
+	}
+
+	if (alloc_entries > MAX_HOOK_COUNT)
+		return ERR_PTR(-E2BIG);
+
+	new = allocate_hook_entries_size(alloc_entries);
+	if (!new)
+		return ERR_PTR(-ENOMEM);
+
+	new_ops = nf_hook_entries_get_hook_ops(new);
+
+	i = 0;
+	nhooks = 0;
+	while (i < old_entries) {
+		if (orig_ops[i] == &dummy_ops) {
+			++i;
+			continue;
+		}
+		if (inserted || reg->priority > orig_ops[i]->priority) {
+			new_ops[nhooks] = (void *)orig_ops[i];
+			new->hooks[nhooks] = old->hooks[i];
+			i++;
+		} else {
+			new_ops[nhooks] = (void *)reg;
+			new->hooks[nhooks].hook = reg->hook;
+			new->hooks[nhooks].priv = reg->priv;
+			inserted = true;
+		}
+		nhooks++;
+	}
+
+	if (!inserted) {
+		new_ops[nhooks] = (void *)reg;
+		new->hooks[nhooks].hook = reg->hook;
+		new->hooks[nhooks].priv = reg->priv;
+	}
+
+	return new;
+}
+
+static void hooks_validate(const struct nf_hook_entries *hooks)
+{
+#ifdef CONFIG_DEBUG_KERNEL
+	struct nf_hook_ops **orig_ops;
+	int prio = INT_MIN;
+	size_t i = 0;
+
+	orig_ops = nf_hook_entries_get_hook_ops(hooks);
+
+	for (i = 0; i < hooks->num_hook_entries; i++) {
+		if (orig_ops[i] == &dummy_ops)
+			continue;
+
+		WARN_ON(orig_ops[i]->priority < prio);
+
+		if (orig_ops[i]->priority > prio)
+			prio = orig_ops[i]->priority;
+	}
+#endif
+}
+
+/*
+ * __nf_hook_entries_try_shrink - try to shrink hook array
+ *
+ * @pp -- location of hook blob
+ *
+ * Hook unregistration must always succeed, so to-be-removed hooks
+ * are replaced by a dummy one that will just move to next hook.
+ *
+ * This counts the current dummy hooks, attempts to allocate new blob,
+ * copies the live hooks, then replaces and discards old one.
+ *
+ * return values:
+ *
+ * Returns address to free, or NULL.
+ */
+static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp)
+{
+	struct nf_hook_entries *old, *new = NULL;
+	unsigned int i, j, skip = 0, hook_entries;
+	struct nf_hook_ops **orig_ops;
+	struct nf_hook_ops **new_ops;
+
+	old = nf_entry_dereference(*pp);
+	if (WARN_ON_ONCE(!old))
+		return NULL;
+
+	orig_ops = nf_hook_entries_get_hook_ops(old);
+	for (i = 0; i < old->num_hook_entries; i++) {
+		if (orig_ops[i] == &dummy_ops)
+			skip++;
+	}
+
+	/* if skip == hook_entries all hooks have been removed */
+	hook_entries = old->num_hook_entries;
+	if (skip == hook_entries)
+		goto out_assign;
+
+	if (WARN_ON(skip == 0))
+		return NULL;
+
+	hook_entries -= skip;
+	new = allocate_hook_entries_size(hook_entries);
+	if (!new)
+		return NULL;
+
+	new_ops = nf_hook_entries_get_hook_ops(new);
+	for (i = 0, j = 0; i < old->num_hook_entries; i++) {
+		if (orig_ops[i] == &dummy_ops)
+			continue;
+		new->hooks[j] = old->hooks[i];
+		new_ops[j] = (void *)orig_ops[i];
+		j++;
+	}
+	hooks_validate(new);
+out_assign:
+	rcu_assign_pointer(*pp, new);
+	return old;
+}
+
+static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
 {
 	if (reg->pf != NFPROTO_NETDEV)
 		return net->nf.hooks[reg->pf]+reg->hooknum;
@@ -76,13 +248,14 @@ static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const st
 			return &reg->dev->nf_hooks_ingress;
 	}
 #endif
+	WARN_ON_ONCE(1);
 	return NULL;
 }
 
 int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
-	struct nf_hook_entry __rcu **pp;
-	struct nf_hook_entry *entry, *p;
+	struct nf_hook_entries *p, *new_hooks;
+	struct nf_hook_entries __rcu **pp;
 
 	if (reg->pf == NFPROTO_NETDEV) {
 #ifndef CONFIG_NETFILTER_INGRESS
@@ -98,23 +271,19 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 	if (!pp)
 		return -EINVAL;
 
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry)
-		return -ENOMEM;
-
-	nf_hook_entry_init(entry, reg);
-
 	mutex_lock(&nf_hook_mutex);
 
-	/* Find the spot in the list */
-	for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
-		if (reg->priority < nf_hook_entry_priority(p))
-			break;
-	}
-	rcu_assign_pointer(entry->next, p);
-	rcu_assign_pointer(*pp, entry);
+	p = nf_entry_dereference(*pp);
+	new_hooks = nf_hook_entries_grow(p, reg);
+
+	if (!IS_ERR(new_hooks))
+		rcu_assign_pointer(*pp, new_hooks);
 
 	mutex_unlock(&nf_hook_mutex);
+	if (IS_ERR(new_hooks))
+		return PTR_ERR(new_hooks);
+
+	hooks_validate(new_hooks);
 #ifdef CONFIG_NETFILTER_INGRESS
 	if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
 		net_inc_ingress_queue();
@@ -122,48 +291,74 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 #ifdef HAVE_JUMP_LABEL
 	static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
+	synchronize_net();
+	BUG_ON(p == new_hooks);
+	kvfree(p);
 	return 0;
 }
 EXPORT_SYMBOL(nf_register_net_hook);
 
-static struct nf_hook_entry *
-__nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
+/*
+ * __nf_unregister_net_hook - remove a hook from blob
+ *
+ * @oldp: current address of hook blob
+ * @unreg: hook to unregister
+ *
+ * This cannot fail, hook unregistration must always succeed.
+ * Therefore replace the to-be-removed hook with a dummy hook.
+ */
+static void __nf_unregister_net_hook(struct nf_hook_entries *old,
+				     const struct nf_hook_ops *unreg)
 {
-	struct nf_hook_entry __rcu **pp;
-	struct nf_hook_entry *p;
-
-	pp = nf_hook_entry_head(net, reg);
-	if (WARN_ON_ONCE(!pp))
-		return NULL;
+	struct nf_hook_ops **orig_ops;
+	bool found = false;
+	unsigned int i;
 
-	mutex_lock(&nf_hook_mutex);
-	for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) {
-		if (nf_hook_entry_ops(p) == reg) {
-			rcu_assign_pointer(*pp, p->next);
-			break;
-		}
-	}
-	mutex_unlock(&nf_hook_mutex);
-	if (!p) {
-		WARN(1, "nf_unregister_net_hook: hook not found!\n");
-		return NULL;
+	orig_ops = nf_hook_entries_get_hook_ops(old);
+	for (i = 0; i < old->num_hook_entries; i++) {
+		if (orig_ops[i] != unreg)
+			continue;
+		WRITE_ONCE(old->hooks[i].hook, accept_all);
+		WRITE_ONCE(orig_ops[i], &dummy_ops);
+		found = true;
+		break;
 	}
+
+	if (found) {
 #ifdef CONFIG_NETFILTER_INGRESS
-	if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
-		net_dec_ingress_queue();
+		if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
+			net_dec_ingress_queue();
 #endif
 #ifdef HAVE_JUMP_LABEL
-	static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
+		static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]);
 #endif
-
-	return p;
+	} else {
+		WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum);
+	}
 }
 
 void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
-	struct nf_hook_entry *p = __nf_unregister_net_hook(net, reg);
+	struct nf_hook_entries __rcu **pp;
+	struct nf_hook_entries *p;
 	unsigned int nfq;
 
+	pp = nf_hook_entry_head(net, reg);
+	if (!pp)
+		return;
+
+	mutex_lock(&nf_hook_mutex);
+
+	p = nf_entry_dereference(*pp);
+	if (WARN_ON_ONCE(!p)) {
+		mutex_unlock(&nf_hook_mutex);
+		return;
+	}
+
+	__nf_unregister_net_hook(p, reg);
+
+	p = __nf_hook_entries_try_shrink(pp);
+	mutex_unlock(&nf_hook_mutex);
 	if (!p)
 		return;
 
@@ -173,7 +368,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 	nfq = nf_queue_nf_hook_drop(net);
 	if (nfq)
 		synchronize_net();
-	kfree(p);
+	kvfree(p);
 }
 EXPORT_SYMBOL(nf_unregister_net_hook);
 
@@ -200,26 +395,59 @@ EXPORT_SYMBOL(nf_register_net_hooks);
 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 			     unsigned int hookcount)
 {
-	struct nf_hook_entry *to_free[16];
-	unsigned int i, n, nfq;
+	struct nf_hook_entries *to_free[16], *p;
+	struct nf_hook_entries __rcu **pp;
+	unsigned int i, j, n;
+
+	mutex_lock(&nf_hook_mutex);
+	for (i = 0; i < hookcount; i++) {
+		pp = nf_hook_entry_head(net, &reg[i]);
+		if (!pp)
+			continue;
+
+		p = nf_entry_dereference(*pp);
+		if (WARN_ON_ONCE(!p))
+			continue;
+		__nf_unregister_net_hook(p, &reg[i]);
+	}
+	mutex_unlock(&nf_hook_mutex);
 
 	do {
 		n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
 
-		for (i = 0; i < n; i++)
-			to_free[i] = __nf_unregister_net_hook(net, &reg[i]);
+		mutex_lock(&nf_hook_mutex);
 
-		synchronize_net();
+		for (i = 0, j = 0; i < hookcount && j < n; i++) {
+			pp = nf_hook_entry_head(net, &reg[i]);
+			if (!pp)
+				continue;
+
+			p = nf_entry_dereference(*pp);
+			if (!p)
+				continue;
+
+			to_free[j] = __nf_hook_entries_try_shrink(pp);
+			if (to_free[j])
+				++j;
+		}
+
+		mutex_unlock(&nf_hook_mutex);
+
+		if (j) {
+			unsigned int nfq;
 
-		/* need 2nd synchronize_net() if nfqueue is used, skb
-		 * can get reinjected right before nf_queue_hook_drop()
-		 */
-		nfq = nf_queue_nf_hook_drop(net);
-		if (nfq)
 			synchronize_net();
 
-		for (i = 0; i < n; i++)
-			kfree(to_free[i]);
+			/* need 2nd synchronize_net() if nfqueue is used, skb
+			 * can get reinjected right before nf_queue_hook_drop()
+			 */
+			nfq = nf_queue_nf_hook_drop(net);
+			if (nfq)
+				synchronize_net();
+
+			for (i = 0; i < j; i++)
+				kvfree(to_free[i]);
+		}
 
 		reg += n;
 		hookcount -= n;
@@ -230,16 +458,15 @@ EXPORT_SYMBOL(nf_unregister_net_hooks);
 /* Returns 1 if okfn() needs to be executed by the caller,
  * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
-		 struct nf_hook_entry *entry)
+		 const struct nf_hook_entries *e, unsigned int s)
 {
 	unsigned int verdict;
 	int ret;
 
-	do {
-		verdict = nf_hook_entry_hookfn(entry, skb, state);
+	for (; s < e->num_hook_entries; s++) {
+		verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
 		switch (verdict & NF_VERDICT_MASK) {
 		case NF_ACCEPT:
-			entry = rcu_dereference(entry->next);
 			break;
 		case NF_DROP:
 			kfree_skb(skb);
@@ -248,8 +475,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 				ret = -EPERM;
 			return ret;
 		case NF_QUEUE:
-			ret = nf_queue(skb, state, &entry, verdict);
-			if (ret == 1 && entry)
+			ret = nf_queue(skb, state, e, s, verdict);
+			if (ret == 1)
 				continue;
 			return ret;
 		default:
@@ -258,7 +485,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 			 */
 			return 0;
 		}
-	} while (entry);
+	}
 
 	return 1;
 }
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e31956b..5cb7cac 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -125,14 +125,12 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 		s->cnt.inbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
 
-		rcu_read_lock();
 		svc = rcu_dereference(dest->svc);
 		s = this_cpu_ptr(svc->stats.cpustats);
 		u64_stats_update_begin(&s->syncp);
 		s->cnt.inpkts++;
 		s->cnt.inbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
-		rcu_read_unlock();
 
 		s = this_cpu_ptr(ipvs->tot_stats.cpustats);
 		u64_stats_update_begin(&s->syncp);
@@ -159,14 +157,12 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 		s->cnt.outbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
 
-		rcu_read_lock();
 		svc = rcu_dereference(dest->svc);
 		s = this_cpu_ptr(svc->stats.cpustats);
 		u64_stats_update_begin(&s->syncp);
 		s->cnt.outpkts++;
 		s->cnt.outbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
-		rcu_read_unlock();
 
 		s = this_cpu_ptr(ipvs->tot_stats.cpustats);
 		u64_stats_update_begin(&s->syncp);
@@ -1222,7 +1218,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
 	if (!pptr)
 		return NULL;
 
-	rcu_read_lock();
 	dest = ip_vs_find_real_service(ipvs, af, iph->protocol,
 				       &iph->saddr, pptr[0]);
 	if (dest) {
@@ -1237,7 +1232,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
 						  pptr[0], pptr[1]);
 		}
 	}
-	rcu_read_unlock();
 
 	return cp;
 }
@@ -1689,11 +1683,9 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
 			if (dest) {
 				struct ip_vs_dest_dst *dest_dst;
 
-				rcu_read_lock();
 				dest_dst = rcu_dereference(dest->dest_dst);
 				if (dest_dst)
 					mtu = dst_mtu(dest_dst->dst_cache);
-				rcu_read_unlock();
 			}
 			if (mtu > 68 + sizeof(struct iphdr))
 				mtu -= sizeof(struct iphdr);
@@ -2109,7 +2101,7 @@ ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
 #endif
 
 
-static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
+static const struct nf_hook_ops ip_vs_ops[] = {
 	/* After packet filtering, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_reply4,
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 1fa3c23..4f940d7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -550,18 +550,15 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
 	/* Check for "full" addressed entries */
 	hash = ip_vs_rs_hashkey(af, daddr, dport);
 
-	rcu_read_lock();
 	hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
 		if (dest->port == dport &&
 		    dest->af == af &&
 		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
 		    (dest->protocol == protocol || dest->vfwmark)) {
 			/* HIT */
-			rcu_read_unlock();
 			return true;
 		}
 	}
-	rcu_read_unlock();
 
 	return false;
 }
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index fb780be..3e17d32 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -269,13 +269,11 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 			 * hopefully it will succeed on the retransmitted
 			 * packet.
 			 */
-			rcu_read_lock();
 			mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
 							   iph->ihl * 4,
 							   start - data,
 							   end - start,
 							   buf, buf_len);
-			rcu_read_unlock();
 			if (mangled) {
 				ip_vs_nfct_expect_related(skb, ct, n_cp,
 							  IPPROTO_TCP, 0, 0);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 3ffad4a..e1efa44 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -38,7 +38,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 		return 0;
 	}
 
-	rcu_read_lock();
 	if (likely(!ip_vs_iph_inverse(iph)))
 		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
 					 &iph->daddr, ports[1]);
@@ -53,7 +52,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
 			 */
-			rcu_read_unlock();
 			*verdict = NF_DROP;
 			return 0;
 		}
@@ -67,11 +65,9 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 				*verdict = ip_vs_leave(svc, skb, pd, iph);
 			else
 				*verdict = NF_DROP;
-			rcu_read_unlock();
 			return 0;
 		}
 	}
-	rcu_read_unlock();
 	/* NF_ACCEPT */
 	return 1;
 }
@@ -526,12 +522,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = sctp_app_hashkey(cp->vport);
 
-	rcu_read_lock();
 	list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			rcu_read_unlock();
 
 			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
 					"%s:%u to app %s on port %u\n",
@@ -544,11 +538,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 			cp->app = inc;
 			if (inc->init_conn)
 				result = inc->init_conn(inc, cp);
-			goto out;
+			break;
 		}
 	}
-	rcu_read_unlock();
-out:
+
 	return result;
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 12dc8d5..121a321 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -63,7 +63,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 	}
 
 	/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
-	rcu_read_lock();
 
 	if (likely(!ip_vs_iph_inverse(iph)))
 		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
@@ -80,7 +79,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
 			 */
-			rcu_read_unlock();
 			*verdict = NF_DROP;
 			return 0;
 		}
@@ -95,11 +93,9 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 				*verdict = ip_vs_leave(svc, skb, pd, iph);
 			else
 				*verdict = NF_DROP;
-			rcu_read_unlock();
 			return 0;
 		}
 	}
-	rcu_read_unlock();
 	/* NF_ACCEPT */
 	return 1;
 }
@@ -661,12 +657,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = tcp_app_hashkey(cp->vport);
 
-	rcu_read_lock();
 	list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			rcu_read_unlock();
 
 			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 				      "%s:%u to app %s on port %u\n",
@@ -680,12 +674,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 			cp->app = inc;
 			if (inc->init_conn)
 				result = inc->init_conn(inc, cp);
-			goto out;
+			break;
 		}
 	}
-	rcu_read_unlock();
 
-  out:
 	return result;
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index e494e9a..30e11cd 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -53,7 +53,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 		return 0;
 	}
 
-	rcu_read_lock();
 	if (likely(!ip_vs_iph_inverse(iph)))
 		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
 					 &iph->daddr, ports[1]);
@@ -69,7 +68,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
 			 */
-			rcu_read_unlock();
 			*verdict = NF_DROP;
 			return 0;
 		}
@@ -84,11 +82,9 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 				*verdict = ip_vs_leave(svc, skb, pd, iph);
 			else
 				*verdict = NF_DROP;
-			rcu_read_unlock();
 			return 0;
 		}
 	}
-	rcu_read_unlock();
 	/* NF_ACCEPT */
 	return 1;
 }
@@ -410,12 +406,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
 	/* Lookup application incarnations and bind the right one */
 	hash = udp_app_hashkey(cp->vport);
 
-	rcu_read_lock();
 	list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
 		if (inc->port == cp->vport) {
 			if (unlikely(!ip_vs_app_inc_get(inc)))
 				break;
-			rcu_read_unlock();
 
 			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 				      "%s:%u to app %s on port %u\n",
@@ -429,12 +423,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
 			cp->app = inc;
 			if (inc->init_conn)
 				result = inc->init_conn(inc, cp);
-			goto out;
+			break;
 		}
 	}
-	rcu_read_unlock();
 
-  out:
 	return result;
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 2eab1e0..90d3968 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -678,7 +678,6 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr,
 			       IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
 		goto tx_error;
@@ -689,14 +688,12 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return NF_STOLEN;
 
  tx_error:
 	kfree_skb(skb);
-	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -710,7 +707,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL,
 				  &iph->daddr, NULL,
 				  ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
@@ -720,14 +716,12 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return NF_STOLEN;
 
  tx_error:
 	kfree_skb(skb);
-	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -746,7 +740,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	/* check if it is a connection of no-client-port */
 	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
 		__be16 _pt, *p;
@@ -815,14 +808,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return rc;
 
   tx_error:
 	kfree_skb(skb);
-	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -837,7 +828,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	/* check if it is a connection of no-client-port */
 	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {
 		__be16 _pt, *p;
@@ -906,7 +896,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return rc;
@@ -914,7 +903,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 tx_error:
 	LeaveFunction(10);
 	kfree_skb(skb);
-	rcu_read_unlock();
 	return NF_STOLEN;
 }
 #endif
@@ -1035,7 +1023,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
 				   IP_VS_RT_MODE_LOCAL |
 				   IP_VS_RT_MODE_NON_LOCAL |
@@ -1043,10 +1030,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 				   IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
 	if (local < 0)
 		goto tx_error;
-	if (local) {
-		rcu_read_unlock();
+	if (local)
 		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
-	}
 
 	rt = skb_rtable(skb);
 	tdev = rt->dst.dev;
@@ -1095,7 +1080,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		ip_local_out(net, skb->sk, skb);
 	else if (ret == NF_DROP)
 		kfree_skb(skb);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 
@@ -1104,7 +1088,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
   tx_error:
 	if (!IS_ERR(skb))
 		kfree_skb(skb);
-	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -1127,7 +1110,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
 				      &cp->daddr.in6,
 				      &saddr, ipvsh, 1,
@@ -1136,10 +1118,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 				      IP_VS_RT_MODE_TUNNEL);
 	if (local < 0)
 		goto tx_error;
-	if (local) {
-		rcu_read_unlock();
+	if (local)
 		return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
-	}
 
 	rt = (struct rt6_info *) skb_dst(skb);
 	tdev = rt->dst.dev;
@@ -1185,7 +1165,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		ip6_local_out(cp->ipvs->net, skb->sk, skb);
 	else if (ret == NF_DROP)
 		kfree_skb(skb);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 
@@ -1194,7 +1173,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 tx_error:
 	if (!IS_ERR(skb))
 		kfree_skb(skb);
-	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -1213,17 +1191,14 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
 				   IP_VS_RT_MODE_LOCAL |
 				   IP_VS_RT_MODE_NON_LOCAL |
 				   IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
 	if (local < 0)
 		goto tx_error;
-	if (local) {
-		rcu_read_unlock();
+	if (local)
 		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
-	}
 
 	ip_send_check(ip_hdr(skb));
 
@@ -1231,14 +1206,12 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return NF_STOLEN;
 
   tx_error:
 	kfree_skb(skb);
-	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -1252,7 +1225,6 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	rcu_read_lock();
 	local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
 				      &cp->daddr.in6,
 				      NULL, ipvsh, 0,
@@ -1261,23 +1233,19 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 				      IP_VS_RT_MODE_KNOWN_NH);
 	if (local < 0)
 		goto tx_error;
-	if (local) {
-		rcu_read_unlock();
+	if (local)
 		return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
-	}
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
-	rcu_read_unlock();
 
 	LeaveFunction(10);
 	return NF_STOLEN;
 
 tx_error:
 	kfree_skb(skb);
-	rcu_read_unlock();
 	LeaveFunction(10);
 	return NF_STOLEN;
 }
@@ -1322,7 +1290,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	rt_mode = (hooknum != NF_INET_FORWARD) ?
 		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
 		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
-	rcu_read_lock();
 	local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
 				   NULL, iph);
 	if (local < 0)
@@ -1368,12 +1335,10 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
-	rcu_read_unlock();
 	goto out;
 
   tx_error:
 	kfree_skb(skb);
-	rcu_read_unlock();
 	rc = NF_STOLEN;
   out:
 	LeaveFunction(10);
@@ -1414,7 +1379,6 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	rt_mode = (hooknum != NF_INET_FORWARD) ?
 		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
 		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
-	rcu_read_lock();
 	local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
 				      &cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
 	if (local < 0)
@@ -1460,12 +1424,10 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
-	rcu_read_unlock();
 	goto out;
 
 tx_error:
 	kfree_skb(skb);
-	rcu_read_unlock();
 	rc = NF_STOLEN;
 out:
 	LeaveFunction(10);
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 4e99cca..ecc3ab7 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -40,7 +40,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
 	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
 		goto out;
 
-	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(rt->dst.dev);
 	if (in_dev != NULL) {
 		for_primary_ifa(in_dev) {
@@ -50,7 +49,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
 			}
 		} endfor_ifa(in_dev);
 	}
-	rcu_read_unlock();
 
 	if (mask == 0)
 		goto out;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 51390fe..0113039 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -56,6 +56,8 @@
 #include <net/netfilter/nf_nat_helper.h>
 #include <net/netns/hash.h>
 
+#include "nf_internals.h"
+
 #define NF_CONNTRACK_VERSION	"0.5.0"
 
 int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
@@ -254,8 +256,8 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 		       u_int16_t l3num,
 		       struct net *net, struct nf_conntrack_tuple *tuple)
 {
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	unsigned int protoff;
 	u_int8_t protonum;
 	int ret;
@@ -404,22 +406,19 @@ static void
 destroy_conntrack(struct nf_conntrack *nfct)
 {
 	struct nf_conn *ct = (struct nf_conn *)nfct;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 
 	pr_debug("destroy_conntrack(%p)\n", ct);
-	NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
+	WARN_ON(atomic_read(&nfct->use) != 0);
 
 	if (unlikely(nf_ct_is_template(ct))) {
 		nf_ct_tmpl_free(ct);
 		return;
 	}
-	rcu_read_lock();
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
 	if (l4proto->destroy)
 		l4proto->destroy(ct);
 
-	rcu_read_unlock();
-
 	local_bh_disable();
 	/* Expectations will have been removed in clean_from_lists,
 	 * except TFTP can create an expectation on the first packet,
@@ -701,7 +700,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
 {
 	/* This is the conntrack entry already in hashes that won race. */
 	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
 	if (l4proto->allow_clash &&
@@ -763,12 +762,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	 * connections for unconfirmed conns.  But packet copies and
 	 * REJECT will give spurious warnings here.
 	 */
-	/* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
 
 	/* No external references means no one else could have
 	 * confirmed us.
 	 */
-	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+	WARN_ON(nf_ct_is_confirmed(ct));
 	pr_debug("Confirming conntrack %p\n", ct);
 	/* We have to check the DYING flag after unlink to prevent
 	 * a race against nf_ct_get_next_corpse() possibly called from
@@ -1090,7 +1088,7 @@ static void gc_worker(struct work_struct *work)
 
 static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
 {
-	INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+	INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
 	gc_work->next_gc_run = HZ;
 	gc_work->exiting = false;
 }
@@ -1167,7 +1165,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 	/* A freed object has refcnt == 0, that's
 	 * the golden rule for SLAB_TYPESAFE_BY_RCU
 	 */
-	NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
+	WARN_ON(atomic_read(&ct->ct_general.use) != 0);
 
 	nf_ct_ext_destroy(ct);
 	nf_ct_ext_free(ct);
@@ -1183,8 +1181,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
 static noinline struct nf_conntrack_tuple_hash *
 init_conntrack(struct net *net, struct nf_conn *tmpl,
 	       const struct nf_conntrack_tuple *tuple,
-	       struct nf_conntrack_l3proto *l3proto,
-	       struct nf_conntrack_l4proto *l4proto,
+	       const struct nf_conntrack_l3proto *l3proto,
+	       const struct nf_conntrack_l4proto *l4proto,
 	       struct sk_buff *skb,
 	       unsigned int dataoff, u32 hash)
 {
@@ -1295,8 +1293,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 		  unsigned int dataoff,
 		  u_int16_t l3num,
 		  u_int8_t protonum,
-		  struct nf_conntrack_l3proto *l3proto,
-		  struct nf_conntrack_l4proto *l4proto)
+		  const struct nf_conntrack_l3proto *l3proto,
+		  const struct nf_conntrack_l4proto *l4proto)
 {
 	const struct nf_conntrack_zone *zone;
 	struct nf_conntrack_tuple tuple;
@@ -1351,10 +1349,10 @@ unsigned int
 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		struct sk_buff *skb)
 {
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conn *ct, *tmpl;
 	enum ip_conntrack_info ctinfo;
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
 	unsigned int *timeouts;
 	unsigned int dataoff;
 	u_int8_t protonum;
@@ -1421,7 +1419,7 @@ repeat:
 	/* Decide what timeout policy we want to apply to this flow. */
 	timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
 
-	ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
+	ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, timeouts);
 	if (ret <= 0) {
 		/* Invalid: inverse of the return code tells
 		 * the netfilter core what to do */
@@ -1475,7 +1473,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
 	struct nf_conn_help *help = nfct_help(ct);
 
 	/* Should be unconfirmed, so not in hash table yet */
-	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+	WARN_ON(nf_ct_is_confirmed(ct));
 
 	pr_debug("Altering reply tuple of %p to ", ct);
 	nf_ct_dump_tuple(newreply);
@@ -1497,7 +1495,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
 			  unsigned long extra_jiffies,
 			  int do_acct)
 {
-	NF_CT_ASSERT(skb);
+	WARN_ON(!skb);
 
 	/* Only update if this is not a fixed timeout */
 	if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
@@ -1695,6 +1693,18 @@ __nf_ct_unconfirmed_destroy(struct net *net)
 	}
 }
 
+void nf_ct_unconfirmed_destroy(struct net *net)
+{
+	might_sleep();
+
+	if (atomic_read(&net->ct.count) > 0) {
+		__nf_ct_unconfirmed_destroy(net);
+		nf_queue_nf_hook_drop(net);
+		synchronize_net();
+	}
+}
+EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy);
+
 void nf_ct_iterate_cleanup_net(struct net *net,
 			       int (*iter)(struct nf_conn *i, void *data),
 			       void *data, u32 portid, int report)
@@ -1706,14 +1716,10 @@ void nf_ct_iterate_cleanup_net(struct net *net,
 	if (atomic_read(&net->ct.count) == 0)
 		return;
 
-	__nf_ct_unconfirmed_destroy(net);
-
 	d.iter = iter;
 	d.data = data;
 	d.net = net;
 
-	synchronize_net();
-
 	nf_ct_iterate_cleanup(iter_net_only, &d, portid, report);
 }
 EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
@@ -1739,6 +1745,7 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
 		if (atomic_read(&net->ct.count) == 0)
 			continue;
 		__nf_ct_unconfirmed_destroy(net);
+		nf_queue_nf_hook_drop(net);
 	}
 	rtnl_unlock();
 
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 899c2c3..64778f9 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -51,8 +51,8 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
 	struct nf_conn_help *master_help = nfct_help(exp->master);
 	struct net *net = nf_ct_exp_net(exp);
 
-	NF_CT_ASSERT(master_help);
-	NF_CT_ASSERT(!timer_pending(&exp->timeout));
+	WARN_ON(!master_help);
+	WARN_ON(timer_pending(&exp->timeout));
 
 	hlist_del_rcu(&exp->hnode);
 	net->ct.expect_count--;
@@ -368,12 +368,6 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	/* two references : one for hash insert, one for the timer */
 	refcount_add(2, &exp->use);
 
-	hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
-	master_help->expecting[exp->class]++;
-
-	hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
-	net->ct.expect_count++;
-
 	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
 		    (unsigned long)exp);
 	helper = rcu_dereference_protected(master_help->helper,
@@ -384,6 +378,12 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	}
 	add_timer(&exp->timeout);
 
+	hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
+	master_help->expecting[exp->class]++;
+
+	hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
+	net->ct.expect_count++;
+
 	NF_CT_STAT_INC(net, expect_create);
 }
 
@@ -474,6 +474,60 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
 
+void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data),
+				  void *data)
+{
+	struct nf_conntrack_expect *exp;
+	const struct hlist_node *next;
+	unsigned int i;
+
+	spin_lock_bh(&nf_conntrack_expect_lock);
+
+	for (i = 0; i < nf_ct_expect_hsize; i++) {
+		hlist_for_each_entry_safe(exp, next,
+					  &nf_ct_expect_hash[i],
+					  hnode) {
+			if (iter(exp, data) && del_timer(&exp->timeout)) {
+				nf_ct_unlink_expect(exp);
+				nf_ct_expect_put(exp);
+			}
+		}
+	}
+
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy);
+
+void nf_ct_expect_iterate_net(struct net *net,
+			      bool (*iter)(struct nf_conntrack_expect *e, void *data),
+			      void *data,
+			      u32 portid, int report)
+{
+	struct nf_conntrack_expect *exp;
+	const struct hlist_node *next;
+	unsigned int i;
+
+	spin_lock_bh(&nf_conntrack_expect_lock);
+
+	for (i = 0; i < nf_ct_expect_hsize; i++) {
+		hlist_for_each_entry_safe(exp, next,
+					  &nf_ct_expect_hash[i],
+					  hnode) {
+
+			if (!net_eq(nf_ct_exp_net(exp), net))
+				continue;
+
+			if (iter(exp, data) && del_timer(&exp->timeout)) {
+				nf_ct_unlink_expect_report(exp, portid, report);
+				nf_ct_expect_put(exp);
+			}
+		}
+	}
+
+	spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net);
+
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 struct ct_expect_iter_state {
 	struct seq_net_private p;
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 6c605e8..9fe0ddc 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -47,7 +47,7 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
 	struct nf_ct_ext_type *t;
 
 	/* Conntrack must not be confirmed to avoid races on reallocation. */
-	NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+	WARN_ON(nf_ct_is_confirmed(ct));
 
 	old = ct->ext;
 
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 9129bb3..551a1ed 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -437,12 +437,22 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
 
-void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data)
 {
-	struct nf_conntrack_expect *exp;
-	const struct hlist_node *next;
-	unsigned int i;
+	struct nf_conn_help *help = nfct_help(exp->master);
+	const struct nf_conntrack_helper *me = data;
+	const struct nf_conntrack_helper *this;
+
+	if (exp->helper == me)
+		return true;
 
+	this = rcu_dereference_protected(help->helper,
+					 lockdep_is_held(&nf_conntrack_expect_lock));
+	return this == me;
+}
+
+void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
+{
 	mutex_lock(&nf_ct_helper_mutex);
 	hlist_del_rcu(&me->hnode);
 	nf_ct_helper_count--;
@@ -453,21 +463,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 	 */
 	synchronize_rcu();
 
-	/* Get rid of expectations */
-	spin_lock_bh(&nf_conntrack_expect_lock);
-	for (i = 0; i < nf_ct_expect_hsize; i++) {
-		hlist_for_each_entry_safe(exp, next,
-					  &nf_ct_expect_hash[i], hnode) {
-			struct nf_conn_help *help = nfct_help(exp->master);
-			if ((rcu_dereference_protected(
-					help->helper,
-					lockdep_is_held(&nf_conntrack_expect_lock)
-					) == me || exp->helper == me))
-				nf_ct_remove_expect(exp);
-		}
-	}
-	spin_unlock_bh(&nf_conntrack_expect_lock);
-
+	nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
 	nf_ct_iterate_destroy(unhelp, me);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index cf9ace7..397e691 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -49,11 +49,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-static void generic_print_tuple(struct seq_file *s,
-				const struct nf_conntrack_tuple *tuple)
-{
-}
-
 static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			       unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -64,10 +59,8 @@ static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 
 struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
 	.l3proto	 = PF_UNSPEC,
-	.name		 = "unknown",
 	.pkt_to_tuple	 = generic_pkt_to_tuple,
 	.invert_tuple	 = generic_invert_tuple,
-	.print_tuple	 = generic_print_tuple,
 	.get_l4proto	 = generic_get_l4proto,
 };
 EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7999e70..de4053d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -61,8 +61,8 @@ MODULE_LICENSE("GPL");
 static char __initdata version[] = "0.93";
 
 static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
-				       const struct nf_conntrack_tuple *tuple,
-				       struct nf_conntrack_l4proto *l4proto)
+				const struct nf_conntrack_tuple *tuple,
+				const struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
 	struct nlattr *nest_parms;
@@ -86,7 +86,7 @@ nla_put_failure:
 
 static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
 				    const struct nf_conntrack_tuple *tuple,
-				    struct nf_conntrack_l3proto *l3proto)
+				    const struct nf_conntrack_l3proto *l3proto)
 {
 	int ret = 0;
 	struct nlattr *nest_parms;
@@ -109,9 +109,9 @@ nla_put_failure:
 static int ctnetlink_dump_tuples(struct sk_buff *skb,
 				 const struct nf_conntrack_tuple *tuple)
 {
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	int ret;
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
 
 	rcu_read_lock();
 	l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
@@ -163,7 +163,7 @@ nla_put_failure:
 
 static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
 {
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nlattr *nest_proto;
 	int ret;
 
@@ -535,17 +535,16 @@ nla_put_failure:
 
 static inline size_t ctnetlink_proto_size(const struct nf_conn *ct)
 {
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
-	size_t len = 0;
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
+	size_t len;
 
-	rcu_read_lock();
 	l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
-	len += l3proto->nla_size;
+	len = l3proto->nla_size;
+	len *= 3u; /* ORIG, REPLY, MASTER */
 
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
 	len += l4proto->nla_size;
-	rcu_read_unlock();
 
 	return len;
 }
@@ -664,7 +663,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	nfmsg->version	= NFNETLINK_V0;
 	nfmsg->res_id	= 0;
 
-	rcu_read_lock();
 	zone = nf_ct_zone(ct);
 
 	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
@@ -736,8 +734,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	    && ctnetlink_dump_mark(skb, ct) < 0)
 		goto nla_put_failure;
 #endif
-	rcu_read_unlock();
-
 	nlmsg_end(skb, nlh);
 	err = nfnetlink_send(skb, net, item->portid, group, item->report,
 			     GFP_ATOMIC);
@@ -747,7 +743,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	return 0;
 
 nla_put_failure:
-	rcu_read_unlock();
 	nlmsg_cancel(skb, nlh);
 nlmsg_failure:
 	kfree_skb(skb);
@@ -941,8 +936,8 @@ static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = {
 static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
 				       struct nf_conntrack_tuple *tuple)
 {
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nlattr *tb[CTA_PROTO_MAX+1];
-	struct nf_conntrack_l4proto *l4proto;
 	int ret = 0;
 
 	ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy,
@@ -1585,8 +1580,8 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct,
 				      const struct nlattr * const cda[])
 {
 	const struct nlattr *attr = cda[CTA_PROTOINFO];
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nlattr *tb[CTA_PROTOINFO_MAX+1];
-	struct nf_conntrack_l4proto *l4proto;
 	int err = 0;
 
 	err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy,
@@ -2213,7 +2208,6 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
 	const struct nf_conntrack_zone *zone;
 	struct nlattr *nest_parms;
 
-	rcu_read_lock();
 	zone = nf_ct_zone(ct);
 
 	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
@@ -2272,11 +2266,9 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
 #endif
 	if (ctnetlink_dump_labels(skb, ct) < 0)
 		goto nla_put_failure;
-	rcu_read_unlock();
 	return 0;
 
 nla_put_failure:
-	rcu_read_unlock();
 	return -ENOSPC;
 }
 
@@ -2483,11 +2475,11 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
 				   const struct nf_conntrack_tuple *tuple,
 				   const struct nf_conntrack_tuple_mask *mask)
 {
-	int ret;
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple m;
 	struct nlattr *nest_parms;
+	int ret;
 
 	memset(&m, 0xFF, sizeof(m));
 	memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3));
@@ -2661,17 +2653,14 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 	nfmsg->version	    = NFNETLINK_V0;
 	nfmsg->res_id	    = 0;
 
-	rcu_read_lock();
 	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
 		goto nla_put_failure;
-	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
 	nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC);
 	return 0;
 
 nla_put_failure:
-	rcu_read_unlock();
 	nlmsg_cancel(skb, nlh);
 nlmsg_failure:
 	kfree_skb(skb);
@@ -2910,6 +2899,21 @@ out:
 	return err == -EAGAIN ? -ENOBUFS : err;
 }
 
+static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data)
+{
+	const struct nf_conn_help *m_help;
+	const char *name = data;
+
+	m_help = nfct_help(exp->master);
+
+	return strcmp(m_help->helper->name, name) == 0;
+}
+
+static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data)
+{
+	return true;
+}
+
 static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
 				struct sk_buff *skb, const struct nlmsghdr *nlh,
 				const struct nlattr * const cda[],
@@ -2918,10 +2922,8 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	struct hlist_node *next;
 	u_int8_t u3 = nfmsg->nfgen_family;
 	struct nf_conntrack_zone zone;
-	unsigned int i;
 	int err;
 
 	if (cda[CTA_EXPECT_TUPLE]) {
@@ -2961,49 +2963,15 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
 		nf_ct_expect_put(exp);
 	} else if (cda[CTA_EXPECT_HELP_NAME]) {
 		char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]);
-		struct nf_conn_help *m_help;
-
-		/* delete all expectations for this helper */
-		spin_lock_bh(&nf_conntrack_expect_lock);
-		for (i = 0; i < nf_ct_expect_hsize; i++) {
-			hlist_for_each_entry_safe(exp, next,
-						  &nf_ct_expect_hash[i],
-						  hnode) {
-
-				if (!net_eq(nf_ct_exp_net(exp), net))
-					continue;
 
-				m_help = nfct_help(exp->master);
-				if (!strcmp(m_help->helper->name, name) &&
-				    del_timer(&exp->timeout)) {
-					nf_ct_unlink_expect_report(exp,
-							NETLINK_CB(skb).portid,
-							nlmsg_report(nlh));
-					nf_ct_expect_put(exp);
-				}
-			}
-		}
-		spin_unlock_bh(&nf_conntrack_expect_lock);
+		nf_ct_expect_iterate_net(net, expect_iter_name, name,
+					 NETLINK_CB(skb).portid,
+					 nlmsg_report(nlh));
 	} else {
 		/* This basically means we have to flush everything*/
-		spin_lock_bh(&nf_conntrack_expect_lock);
-		for (i = 0; i < nf_ct_expect_hsize; i++) {
-			hlist_for_each_entry_safe(exp, next,
-						  &nf_ct_expect_hash[i],
-						  hnode) {
-
-				if (!net_eq(nf_ct_exp_net(exp), net))
-					continue;
-
-				if (del_timer(&exp->timeout)) {
-					nf_ct_unlink_expect_report(exp,
-							NETLINK_CB(skb).portid,
-							nlmsg_report(nlh));
-					nf_ct_expect_put(exp);
-				}
-			}
-		}
-		spin_unlock_bh(&nf_conntrack_expect_lock);
+		nf_ct_expect_iterate_net(net, expect_iter_all, NULL,
+					 NETLINK_CB(skb).portid,
+					 nlmsg_report(nlh));
 	}
 
 	return 0;
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 6959e93..11562f2 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -113,7 +113,6 @@ static void pptp_expectfn(struct nf_conn *ct,
 	/* Can you see how rusty this code is, compared with the pre-2.6.11
 	 * one? That's what happened to my shiny newnat of 2002 ;( -HW */
 
-	rcu_read_lock();
 	nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn);
 	if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK)
 		nf_nat_pptp_expectfn(ct, exp);
@@ -136,7 +135,6 @@ static void pptp_expectfn(struct nf_conn *ct,
 			pr_debug("not found\n");
 		}
 	}
-	rcu_read_unlock();
 }
 
 static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 1dcad22..b3e489c 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -65,7 +65,7 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
 }
 #endif
 
-struct nf_conntrack_l4proto *
+const struct nf_conntrack_l4proto *
 __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
 {
 	if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL))
@@ -77,7 +77,7 @@ EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
 
 /* this is guaranteed to always return a valid protocol helper, since
  * it falls back to generic_protocol */
-struct nf_conntrack_l3proto *
+const struct nf_conntrack_l3proto *
 nf_ct_l3proto_find_get(u_int16_t l3proto)
 {
 	struct nf_conntrack_l3proto *p;
@@ -95,8 +95,8 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
 int
 nf_ct_l3proto_try_module_get(unsigned short l3proto)
 {
+	const struct nf_conntrack_l3proto *p;
 	int ret;
-	struct nf_conntrack_l3proto *p;
 
 retry:	p = nf_ct_l3proto_find_get(l3proto);
 	if (p == &nf_conntrack_l3proto_generic) {
@@ -173,10 +173,10 @@ void nf_ct_netns_put(struct net *net, u8 nfproto)
 }
 EXPORT_SYMBOL_GPL(nf_ct_netns_put);
 
-struct nf_conntrack_l4proto *
+const struct nf_conntrack_l4proto *
 nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
 {
-	struct nf_conntrack_l4proto *p;
+	const struct nf_conntrack_l4proto *p;
 
 	rcu_read_lock();
 	p = __nf_ct_l4proto_find(l3num, l4num);
@@ -188,7 +188,7 @@ nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
 
-void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p)
+void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
 {
 	module_put(p->me);
 }
@@ -196,28 +196,28 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
 
 static int kill_l3proto(struct nf_conn *i, void *data)
 {
-	return nf_ct_l3num(i) == ((struct nf_conntrack_l3proto *)data)->l3proto;
+	return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto;
 }
 
 static int kill_l4proto(struct nf_conn *i, void *data)
 {
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	l4proto = data;
 	return nf_ct_protonum(i) == l4proto->l4proto &&
 	       nf_ct_l3num(i) == l4proto->l3proto;
 }
 
-int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
+int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto)
 {
 	int ret = 0;
 	struct nf_conntrack_l3proto *old;
 
 	if (proto->l3proto >= NFPROTO_NUMPROTO)
 		return -EBUSY;
-
-	if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size)
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	if (proto->tuple_to_nlattr && proto->nla_size == 0)
 		return -EINVAL;
-
+#endif
 	mutex_lock(&nf_ct_proto_mutex);
 	old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
 					lockdep_is_held(&nf_ct_proto_mutex));
@@ -226,9 +226,6 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
 		goto out_unlock;
 	}
 
-	if (proto->nlattr_tuple_size)
-		proto->nla_size = 3 * proto->nlattr_tuple_size();
-
 	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
 
 out_unlock:
@@ -238,21 +235,7 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
 
-#ifdef CONFIG_SYSCTL
-extern unsigned int nf_conntrack_default_on;
-
-int nf_ct_l3proto_pernet_register(struct net *net,
-				  struct nf_conntrack_l3proto *proto)
-{
-	if (nf_conntrack_default_on == 0)
-		return 0;
-
-	return proto->net_ns_get ? proto->net_ns_get(net) : 0;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
-#endif
-
-void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
+void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto)
 {
 	BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
 
@@ -266,27 +249,12 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 
 	synchronize_rcu();
 	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_destroy(kill_l3proto, proto);
+	nf_ct_iterate_destroy(kill_l3proto, (void*)proto);
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
 
-void nf_ct_l3proto_pernet_unregister(struct net *net,
-				     struct nf_conntrack_l3proto *proto)
-{
-	/*
-	 * nf_conntrack_default_on *might* have registered hooks.
-	 * ->net_ns_put must cope with more puts() than get(), i.e.
-	 * if nf_conntrack_default_on was 0 at time of
-	 * nf_ct_l3proto_pernet_register invocation this net_ns_put()
-	 * should be a noop.
-	 */
-	if (proto->net_ns_put)
-		proto->net_ns_put(net);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
-
 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
-					      struct nf_conntrack_l4proto *l4proto)
+				const struct nf_conntrack_l4proto *l4proto)
 {
 	if (l4proto->get_net_proto) {
 		/* statically built-in protocols use static per-net */
@@ -301,7 +269,7 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
 static
 int nf_ct_l4proto_register_sysctl(struct net *net,
 				  struct nf_proto_net *pn,
-				  struct nf_conntrack_l4proto *l4proto)
+				  const struct nf_conntrack_l4proto *l4proto)
 {
 	int err = 0;
 
@@ -324,8 +292,8 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
 
 static
 void nf_ct_l4proto_unregister_sysctl(struct net *net,
-				     struct nf_proto_net *pn,
-				     struct nf_conntrack_l4proto *l4proto)
+				struct nf_proto_net *pn,
+				const struct nf_conntrack_l4proto *l4proto)
 {
 #ifdef CONFIG_SYSCTL
 	if (pn->ctl_table_header != NULL)
@@ -395,7 +363,7 @@ out_unlock:
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
 
 int nf_ct_l4proto_pernet_register_one(struct net *net,
-				      struct nf_conntrack_l4proto *l4proto)
+				const struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
 	struct nf_proto_net *pn = NULL;
@@ -420,7 +388,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
 
-static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
 
 {
 	BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos));
@@ -433,7 +401,7 @@ static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
 			   &nf_conntrack_l4proto_generic);
 }
 
-void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
 {
 	mutex_lock(&nf_ct_proto_mutex);
 	__nf_ct_l4proto_unregister_one(l4proto);
@@ -444,7 +412,7 @@ void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
 
 void nf_ct_l4proto_pernet_unregister_one(struct net *net,
-					 struct nf_conntrack_l4proto *l4proto)
+				const struct nf_conntrack_l4proto *l4proto)
 {
 	struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
 
@@ -469,8 +437,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
 	}
 	if (i != num_proto) {
 		ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
-		pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n",
-		       ver, l4proto[i]->name, ver);
+		pr_err("nf_conntrack_ipv%d: can't register l4 %d proto.\n",
+		       ver, l4proto[i]->l4proto);
 		nf_ct_l4proto_unregister(l4proto, i);
 	}
 	return ret;
@@ -478,7 +446,7 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
 
 int nf_ct_l4proto_pernet_register(struct net *net,
-				  struct nf_conntrack_l4proto *l4proto[],
+				  struct nf_conntrack_l4proto *const l4proto[],
 				  unsigned int num_proto)
 {
 	int ret = -EINVAL;
@@ -490,8 +458,8 @@ int nf_ct_l4proto_pernet_register(struct net *net,
 			break;
 	}
 	if (i != num_proto) {
-		pr_err("nf_conntrack_%s%d: pernet registration failed\n",
-		       l4proto[i]->name,
+		pr_err("nf_conntrack_proto_%d %d: pernet registration failed\n",
+		       l4proto[i]->l4proto,
 		       l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
 		nf_ct_l4proto_pernet_unregister(net, l4proto, i);
 	}
@@ -514,8 +482,8 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
 
 void nf_ct_l4proto_pernet_unregister(struct net *net,
-				     struct nf_conntrack_l4proto *l4proto[],
-				     unsigned int num_proto)
+				struct nf_conntrack_l4proto *const l4proto[],
+				unsigned int num_proto)
 {
 	while (num_proto-- != 0)
 		nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 4707d99..0f5a4d7 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -469,7 +469,7 @@ static unsigned int *dccp_get_timeouts(struct net *net)
 
 static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 		       unsigned int dataoff, enum ip_conntrack_info ctinfo,
-		       u_int8_t pf, unsigned int hooknum,
+		       u_int8_t pf,
 		       unsigned int *timeouts)
 {
 	struct net *net = nf_ct_net(ct);
@@ -623,18 +623,12 @@ static bool dccp_can_early_drop(const struct nf_conn *ct)
 	return false;
 }
 
-static void dccp_print_tuple(struct seq_file *s,
-			     const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "sport=%hu dport=%hu ",
-		   ntohs(tuple->src.u.dccp.port),
-		   ntohs(tuple->dst.u.dccp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
 	seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
 }
+#endif
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
@@ -880,7 +874,6 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
 struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
 	.l3proto		= AF_INET,
 	.l4proto		= IPPROTO_DCCP,
-	.name			= "dccp",
 	.pkt_to_tuple		= dccp_pkt_to_tuple,
 	.invert_tuple		= dccp_invert_tuple,
 	.new			= dccp_new,
@@ -888,8 +881,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
 	.get_timeouts		= dccp_get_timeouts,
 	.error			= dccp_error,
 	.can_early_drop		= dccp_can_early_drop,
-	.print_tuple		= dccp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= dccp_print_conntrack,
+#endif
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.to_nlattr		= dccp_to_nlattr,
 	.nlattr_size		= dccp_nlattr_size,
@@ -916,7 +910,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
 struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
 	.l3proto		= AF_INET6,
 	.l4proto		= IPPROTO_DCCP,
-	.name			= "dccp",
 	.pkt_to_tuple		= dccp_pkt_to_tuple,
 	.invert_tuple		= dccp_invert_tuple,
 	.new			= dccp_new,
@@ -924,8 +917,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
 	.get_timeouts		= dccp_get_timeouts,
 	.error			= dccp_error,
 	.can_early_drop		= dccp_can_early_drop,
-	.print_tuple		= dccp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= dccp_print_conntrack,
+#endif
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.to_nlattr		= dccp_to_nlattr,
 	.nlattr_size		= dccp_nlattr_size,
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index d5868ba..9cd4070 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -17,22 +17,10 @@ static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
 static bool nf_generic_should_process(u8 proto)
 {
 	switch (proto) {
-#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE
-	case IPPROTO_SCTP:
-		return false;
-#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE
-	case IPPROTO_DCCP:
-		return false;
-#endif
 #ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
 	case IPPROTO_GRE:
 		return false;
 #endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE
-	case IPPROTO_UDPLITE:
-		return false;
-#endif
 	default:
 		return true;
 	}
@@ -62,12 +50,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void generic_print_tuple(struct seq_file *s,
-				const struct nf_conntrack_tuple *tuple)
-{
-}
-
 static unsigned int *generic_get_timeouts(struct net *net)
 {
 	return &(generic_pernet(net)->timeout);
@@ -79,7 +61,6 @@ static int generic_packet(struct nf_conn *ct,
 			  unsigned int dataoff,
 			  enum ip_conntrack_info ctinfo,
 			  u_int8_t pf,
-			  unsigned int hooknum,
 			  unsigned int *timeout)
 {
 	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
@@ -187,10 +168,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
 {
 	.l3proto		= PF_UNSPEC,
 	.l4proto		= 255,
-	.name			= "unknown",
 	.pkt_to_tuple		= generic_pkt_to_tuple,
 	.invert_tuple		= generic_invert_tuple,
-	.print_tuple		= generic_print_tuple,
 	.packet			= generic_packet,
 	.get_timeouts		= generic_get_timeouts,
 	.new			= generic_new,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 87bb40a..09a9048 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -224,15 +224,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 	return true;
 }
 
-/* print gre part of tuple */
-static void gre_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "srckey=0x%x dstkey=0x%x ",
-		   ntohs(tuple->src.u.gre.key),
-		   ntohs(tuple->dst.u.gre.key));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* print private data for conntrack */
 static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
@@ -240,6 +232,7 @@ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 		   (ct->proto.gre.timeout / HZ),
 		   (ct->proto.gre.stream_timeout / HZ));
 }
+#endif
 
 static unsigned int *gre_get_timeouts(struct net *net)
 {
@@ -252,7 +245,6 @@ static int gre_packet(struct nf_conn *ct,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
 		      u_int8_t pf,
-		      unsigned int hooknum,
 		      unsigned int *timeouts)
 {
 	/* If we've seen traffic both ways, this is a GRE connection.
@@ -364,11 +356,11 @@ static int gre_init_net(struct net *net, u_int16_t proto)
 static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
 	.l3proto	 = AF_INET,
 	.l4proto	 = IPPROTO_GRE,
-	.name		 = "gre",
 	.pkt_to_tuple	 = gre_pkt_to_tuple,
 	.invert_tuple	 = gre_invert_tuple,
-	.print_tuple	 = gre_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack = gre_print_conntrack,
+#endif
 	.get_timeouts    = gre_get_timeouts,
 	.packet		 = gre_packet,
 	.new		 = gre_new,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 6eef29d..6303a88 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -174,20 +174,13 @@ static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void sctp_print_tuple(struct seq_file *s,
-			     const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "sport=%hu dport=%hu ",
-		   ntohs(tuple->src.u.sctp.port),
-		   ntohs(tuple->dst.u.sctp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
 	seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]);
 }
+#endif
 
 #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count)	\
 for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0;	\
@@ -314,7 +307,6 @@ static int sctp_packet(struct nf_conn *ct,
 		       unsigned int dataoff,
 		       enum ip_conntrack_info ctinfo,
 		       u_int8_t pf,
-		       unsigned int hooknum,
 		       unsigned int *timeouts)
 {
 	enum sctp_conntrack new_state, old_state;
@@ -791,11 +783,11 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net)
 struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
 	.l3proto		= PF_INET,
 	.l4proto 		= IPPROTO_SCTP,
-	.name 			= "sctp",
 	.pkt_to_tuple 		= sctp_pkt_to_tuple,
 	.invert_tuple 		= sctp_invert_tuple,
-	.print_tuple 		= sctp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= sctp_print_conntrack,
+#endif
 	.packet 		= sctp_packet,
 	.get_timeouts		= sctp_get_timeouts,
 	.new 			= sctp_new,
@@ -828,11 +820,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
 struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
 	.l3proto		= PF_INET6,
 	.l4proto 		= IPPROTO_SCTP,
-	.name 			= "sctp",
 	.pkt_to_tuple 		= sctp_pkt_to_tuple,
 	.invert_tuple 		= sctp_invert_tuple,
-	.print_tuple 		= sctp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= sctp_print_conntrack,
+#endif
 	.packet 		= sctp_packet,
 	.get_timeouts		= sctp_get_timeouts,
 	.new 			= sctp_new,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 9758a7d..cba1c6f 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -301,20 +301,13 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void tcp_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "sport=%hu dport=%hu ",
-		   ntohs(tuple->src.u.tcp.port),
-		   ntohs(tuple->dst.u.tcp.port));
-}
-
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 {
 	seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
 }
+#endif
 
 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
 {
@@ -810,7 +803,6 @@ static int tcp_packet(struct nf_conn *ct,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
 		      u_int8_t pf,
-		      unsigned int hooknum,
 		      unsigned int *timeouts)
 {
 	struct net *net = nf_ct_net(ct);
@@ -1556,11 +1548,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
 {
 	.l3proto		= PF_INET,
 	.l4proto 		= IPPROTO_TCP,
-	.name 			= "tcp",
 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
 	.invert_tuple 		= tcp_invert_tuple,
-	.print_tuple 		= tcp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack 	= tcp_print_conntrack,
+#endif
 	.packet 		= tcp_packet,
 	.get_timeouts		= tcp_get_timeouts,
 	.new 			= tcp_new,
@@ -1594,11 +1586,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
 {
 	.l3proto		= PF_INET6,
 	.l4proto 		= IPPROTO_TCP,
-	.name 			= "tcp",
 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
 	.invert_tuple 		= tcp_invert_tuple,
-	.print_tuple 		= tcp_print_tuple,
+#ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack 	= tcp_print_conntrack,
+#endif
 	.packet 		= tcp_packet,
 	.get_timeouts		= tcp_get_timeouts,
 	.new 			= tcp_new,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index f6ebce6..8af734c 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -63,15 +63,6 @@ static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple,
 	return true;
 }
 
-/* Print out the per-protocol part of the tuple. */
-static void udp_print_tuple(struct seq_file *s,
-			    const struct nf_conntrack_tuple *tuple)
-{
-	seq_printf(s, "sport=%hu dport=%hu ",
-		   ntohs(tuple->src.u.udp.port),
-		   ntohs(tuple->dst.u.udp.port));
-}
-
 static unsigned int *udp_get_timeouts(struct net *net)
 {
 	return udp_pernet(net)->timeouts;
@@ -83,7 +74,6 @@ static int udp_packet(struct nf_conn *ct,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
 		      u_int8_t pf,
-		      unsigned int hooknum,
 		      unsigned int *timeouts)
 {
 	/* If we've seen traffic both ways, this is some kind of UDP
@@ -313,11 +303,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
 {
 	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_UDP,
-	.name			= "udp",
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
-	.print_tuple		= udp_print_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
@@ -347,11 +335,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
 {
 	.l3proto		= PF_INET,
 	.l4proto		= IPPROTO_UDPLITE,
-	.name			= "udplite",
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
-	.print_tuple		= udp_print_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
@@ -381,11 +367,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
 {
 	.l3proto		= PF_INET6,
 	.l4proto		= IPPROTO_UDP,
-	.name			= "udp",
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
-	.print_tuple		= udp_print_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
@@ -415,11 +399,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
 {
 	.l3proto		= PF_INET6,
 	.l4proto		= IPPROTO_UDPLITE,
-	.name			= "udplite",
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
 	.invert_tuple		= udp_invert_tuple,
-	.print_tuple		= udp_print_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index d38af42..4dbb5ba 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -884,7 +884,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 	tuple.dst.u3		= *daddr;
 	tuple.dst.u.udp.port	= port;
 
-	rcu_read_lock();
 	do {
 		exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
 
@@ -918,10 +917,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 			goto err1;
 	}
 
-	if (skip_expect) {
-		rcu_read_unlock();
+	if (skip_expect)
 		return NF_ACCEPT;
-	}
 
 	rtp_exp = nf_ct_expect_alloc(ct);
 	if (rtp_exp == NULL)
@@ -952,7 +949,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 err2:
 	nf_ct_expect_put(rtp_exp);
 err1:
-	rcu_read_unlock();
 	return ret;
 }
 
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index ccb5cb9..5a101ca 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -41,8 +41,62 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
             const struct nf_conntrack_l3proto *l3proto,
             const struct nf_conntrack_l4proto *l4proto)
 {
-	l3proto->print_tuple(s, tuple);
-	l4proto->print_tuple(s, tuple);
+	switch (l3proto->l3proto) {
+	case NFPROTO_IPV4:
+		seq_printf(s, "src=%pI4 dst=%pI4 ",
+			   &tuple->src.u3.ip, &tuple->dst.u3.ip);
+		break;
+	case NFPROTO_IPV6:
+		seq_printf(s, "src=%pI6 dst=%pI6 ",
+			   tuple->src.u3.ip6, tuple->dst.u3.ip6);
+		break;
+	default:
+		break;
+	}
+
+	switch (l4proto->l4proto) {
+	case IPPROTO_ICMP:
+		seq_printf(s, "type=%u code=%u id=%u ",
+			   tuple->dst.u.icmp.type,
+			   tuple->dst.u.icmp.code,
+			   ntohs(tuple->src.u.icmp.id));
+		break;
+	case IPPROTO_TCP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.tcp.port),
+			   ntohs(tuple->dst.u.tcp.port));
+		break;
+	case IPPROTO_UDPLITE: /* fallthrough */
+	case IPPROTO_UDP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.udp.port),
+			   ntohs(tuple->dst.u.udp.port));
+
+		break;
+	case IPPROTO_DCCP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.dccp.port),
+			   ntohs(tuple->dst.u.dccp.port));
+		break;
+	case IPPROTO_SCTP:
+		seq_printf(s, "sport=%hu dport=%hu ",
+			   ntohs(tuple->src.u.sctp.port),
+			   ntohs(tuple->dst.u.sctp.port));
+		break;
+	case IPPROTO_ICMPV6:
+		seq_printf(s, "type=%u code=%u id=%u ",
+			   tuple->dst.u.icmp.type,
+			   tuple->dst.u.icmp.code,
+			   ntohs(tuple->src.u.icmp.id));
+		break;
+	case IPPROTO_GRE:
+		seq_printf(s, "srckey=0x%x dstkey=0x%x ",
+			   ntohs(tuple->src.u.gre.key),
+			   ntohs(tuple->dst.u.gre.key));
+		break;
+	default:
+		break;
+	}
 }
 EXPORT_SYMBOL_GPL(print_tuple);
 
@@ -198,6 +252,31 @@ ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
 }
 #endif
 
+static const char* l3proto_name(u16 proto)
+{
+	switch (proto) {
+	case AF_INET: return "ipv4";
+	case AF_INET6: return "ipv6";
+	}
+
+	return "unknown";
+}
+
+static const char* l4proto_name(u16 proto)
+{
+	switch (proto) {
+	case IPPROTO_ICMP: return "icmp";
+	case IPPROTO_TCP: return "tcp";
+	case IPPROTO_UDP: return "udp";
+	case IPPROTO_DCCP: return "dccp";
+	case IPPROTO_GRE: return "gre";
+	case IPPROTO_SCTP: return "sctp";
+	case IPPROTO_UDPLITE: return "udplite";
+	}
+
+	return "unknown";
+}
+
 /* return 0 on success, 1 in case of error */
 static int ct_seq_show(struct seq_file *s, void *v)
 {
@@ -208,7 +287,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 	struct net *net = seq_file_net(s);
 	int ret = 0;
 
-	NF_CT_ASSERT(ct);
+	WARN_ON(!ct);
 	if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
 		return 0;
 
@@ -225,14 +304,14 @@ static int ct_seq_show(struct seq_file *s, void *v)
 		goto release;
 
 	l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
-	NF_CT_ASSERT(l3proto);
+	WARN_ON(!l3proto);
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
-	NF_CT_ASSERT(l4proto);
+	WARN_ON(!l4proto);
 
 	ret = -ENOSPC;
 	seq_printf(s, "%-8s %u %-8s %u %ld ",
-		   l3proto->name, nf_ct_l3num(ct),
-		   l4proto->name, nf_ct_protonum(ct),
+		   l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
+		   l4proto_name(l4proto->l4proto), nf_ct_protonum(ct),
 		   nf_ct_expires(ct)  / HZ);
 
 	if (l4proto->print_conntrack)
@@ -452,9 +531,6 @@ static int log_invalid_proto_max __read_mostly = 255;
 /* size the user *wants to set */
 static unsigned int nf_conntrack_htable_size_user __read_mostly;
 
-extern unsigned int nf_conntrack_default_on;
-unsigned int nf_conntrack_default_on __read_mostly = 1;
-
 static int
 nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
 			 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -520,13 +596,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "nf_conntrack_default_on",
-		.data		= &nf_conntrack_default_on,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{ }
 };
 
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index bfa742d..49f87ec 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -5,17 +5,11 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 
-#ifdef CONFIG_NETFILTER_DEBUG
-#define NFDEBUG(format, args...)  printk(KERN_DEBUG format , ## args)
-#else
-#define NFDEBUG(format, args...)
-#endif
-
 /* nf_queue.c */
 int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
-	     struct nf_hook_entry **entryp, unsigned int verdict);
+	     const struct nf_hook_entries *entries, unsigned int index,
+	     unsigned int verdict);
 unsigned int nf_queue_nf_hook_drop(struct net *net);
-int __init netfilter_queue_init(void);
 
 /* nf_log.c */
 int __init netfilter_log_init(void);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index b1d3740..40573aa 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -414,8 +414,8 @@ nf_nat_setup_info(struct nf_conn *ct,
 	if (nf_ct_is_confirmed(ct))
 		return NF_ACCEPT;
 
-	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
-		     maniptype == NF_NAT_MANIP_DST);
+	WARN_ON(maniptype != NF_NAT_MANIP_SRC &&
+		maniptype != NF_NAT_MANIP_DST);
 	BUG_ON(nf_nat_initialized(ct, maniptype));
 
 	/* What we've got will look like inverse of reply. Normally
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 86067560..25b06b9 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -38,11 +38,11 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
 	__be32 newdst;
 	struct nf_nat_range newrange;
 
-	NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING ||
-		     hooknum == NF_INET_LOCAL_OUT);
+	WARN_ON(hooknum != NF_INET_PRE_ROUTING &&
+		hooknum != NF_INET_LOCAL_OUT);
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
 
 	/* Local packets: make them go to loopback */
 	if (hooknum == NF_INET_LOCAL_OUT) {
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 043850c..f7e2195 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -109,9 +109,11 @@ unsigned int nf_queue_nf_hook_drop(struct net *net)
 
 	return count;
 }
+EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
 
 static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
-		      struct nf_hook_entry *hook_entry, unsigned int queuenum)
+		      const struct nf_hook_entries *entries,
+		      unsigned int index, unsigned int queuenum)
 {
 	int status = -ENOENT;
 	struct nf_queue_entry *entry = NULL;
@@ -139,7 +141,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
 	*entry = (struct nf_queue_entry) {
 		.skb	= skb,
 		.state	= *state,
-		.hook	= hook_entry,
+		.hook_index = index,
 		.size	= sizeof(*entry) + afinfo->route_key_size,
 	};
 
@@ -162,18 +164,16 @@ err:
 
 /* Packets leaving via this function must come back through nf_reinject(). */
 int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
-	     struct nf_hook_entry **entryp, unsigned int verdict)
+	     const struct nf_hook_entries *entries, unsigned int index,
+	     unsigned int verdict)
 {
-	struct nf_hook_entry *entry = *entryp;
 	int ret;
 
-	ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS);
+	ret = __nf_queue(skb, state, entries, index, verdict >> NF_VERDICT_QBITS);
 	if (ret < 0) {
 		if (ret == -ESRCH &&
-		    (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) {
-			*entryp = rcu_dereference(entry->next);
+		    (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
 			return 1;
-		}
 		kfree_skb(skb);
 	}
 
@@ -182,33 +182,56 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
 
 static unsigned int nf_iterate(struct sk_buff *skb,
 			       struct nf_hook_state *state,
-			       struct nf_hook_entry **entryp)
+			       const struct nf_hook_entries *hooks,
+			       unsigned int *index)
 {
-	unsigned int verdict;
+	const struct nf_hook_entry *hook;
+	unsigned int verdict, i = *index;
 
-	do {
+	while (i < hooks->num_hook_entries) {
+		hook = &hooks->hooks[i];
 repeat:
-		verdict = nf_hook_entry_hookfn((*entryp), skb, state);
+		verdict = nf_hook_entry_hookfn(hook, skb, state);
 		if (verdict != NF_ACCEPT) {
 			if (verdict != NF_REPEAT)
 				return verdict;
 			goto repeat;
 		}
-		*entryp = rcu_dereference((*entryp)->next);
-	} while (*entryp);
+		i++;
+	}
 
+	*index = i;
 	return NF_ACCEPT;
 }
 
+/* Caller must hold rcu read-side lock */
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 {
-	struct nf_hook_entry *hook_entry = entry->hook;
+	const struct nf_hook_entry *hook_entry;
+	const struct nf_hook_entries *hooks;
 	struct sk_buff *skb = entry->skb;
 	const struct nf_afinfo *afinfo;
+	const struct net *net;
+	unsigned int i;
 	int err;
+	u8 pf;
+
+	net = entry->state.net;
+	pf = entry->state.pf;
+
+	hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]);
 
 	nf_queue_entry_release_refs(entry);
 
+	i = entry->hook_index;
+	if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) {
+		kfree_skb(skb);
+		kfree(entry);
+		return;
+	}
+
+	hook_entry = &hooks->hooks[i];
+
 	/* Continue traversal iff userspace said ok... */
 	if (verdict == NF_REPEAT)
 		verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
@@ -220,27 +243,22 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	}
 
 	if (verdict == NF_ACCEPT) {
-		hook_entry = rcu_dereference(hook_entry->next);
-		if (hook_entry)
 next_hook:
-			verdict = nf_iterate(skb, &entry->state, &hook_entry);
+		++i;
+		verdict = nf_iterate(skb, &entry->state, hooks, &i);
 	}
 
 	switch (verdict & NF_VERDICT_MASK) {
 	case NF_ACCEPT:
 	case NF_STOP:
-okfn:
 		local_bh_disable();
 		entry->state.okfn(entry->state.net, entry->state.sk, skb);
 		local_bh_enable();
 		break;
 	case NF_QUEUE:
-		err = nf_queue(skb, &entry->state, &hook_entry, verdict);
-		if (err == 1) {
-			if (hook_entry)
-				goto next_hook;
-			goto okfn;
-		}
+		err = nf_queue(skb, &entry->state, hooks, i, verdict);
+		if (err == 1)
+			goto next_hook;
 		break;
 	case NF_STOLEN:
 		break;
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index c68c1e5..d2a9e6b 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -33,7 +33,7 @@ int nf_register_sockopt(struct nf_sockopt_ops *reg)
 				reg->set_optmin, reg->set_optmax)
 			|| overlap(ops->get_optmin, ops->get_optmax,
 				   reg->get_optmin, reg->get_optmax))) {
-			NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
+			pr_debug("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
 				ops->set_optmin, ops->set_optmax,
 				ops->get_optmin, ops->get_optmax,
 				reg->set_optmin, reg->set_optmax,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7843efa..9299271 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -726,7 +726,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	if (table == NULL)
 		goto err2;
 
-	nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
+	table->name = nla_strdup(name, GFP_KERNEL);
+	if (table->name == NULL)
+		goto err3;
+
 	INIT_LIST_HEAD(&table->chains);
 	INIT_LIST_HEAD(&table->sets);
 	INIT_LIST_HEAD(&table->objects);
@@ -735,10 +738,12 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
 	err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
 	if (err < 0)
-		goto err3;
+		goto err4;
 
 	list_add_tail_rcu(&table->list, &afi->tables);
 	return 0;
+err4:
+	kfree(table->name);
 err3:
 	kfree(table);
 err2:
@@ -855,6 +860,10 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
 	if (IS_ERR(table))
 		return PTR_ERR(table);
 
+	if (nlh->nlmsg_flags & NLM_F_NONREC &&
+	    table->use > 0)
+		return -EBUSY;
+
 	ctx.afi = afi;
 	ctx.table = table;
 
@@ -865,6 +874,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
 {
 	BUG_ON(ctx->table->use > 0);
 
+	kfree(ctx->table->name);
 	kfree(ctx->table);
 	module_put(ctx->afi->owner);
 }
@@ -1240,10 +1250,14 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
 
 		module_put(basechain->type->owner);
 		free_percpu(basechain->stats);
+		if (basechain->stats)
+			static_branch_dec(&nft_counters_enabled);
 		if (basechain->ops[0].dev != NULL)
 			dev_put(basechain->ops[0].dev);
+		kfree(chain->name);
 		kfree(basechain);
 	} else {
+		kfree(chain->name);
 		kfree(chain);
 	}
 }
@@ -1325,155 +1339,18 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook)
 		dev_put(hook->dev);
 }
 
-static int nf_tables_newchain(struct net *net, struct sock *nlsk,
-			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[],
-			      struct netlink_ext_ack *extack)
+static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+			      u8 policy, bool create)
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	const struct nlattr * uninitialized_var(name);
-	struct nft_af_info *afi;
-	struct nft_table *table;
+	const struct nlattr * const *nla = ctx->nla;
+	struct nft_table *table = ctx->table;
+	struct nft_af_info *afi = ctx->afi;
+	struct nft_base_chain *basechain;
+	struct nft_stats __percpu *stats;
+	struct net *net = ctx->net;
 	struct nft_chain *chain;
-	struct nft_base_chain *basechain = NULL;
-	u8 genmask = nft_genmask_next(net);
-	int family = nfmsg->nfgen_family;
-	u8 policy = NF_ACCEPT;
-	u64 handle = 0;
 	unsigned int i;
-	struct nft_stats __percpu *stats;
 	int err;
-	bool create;
-	struct nft_ctx ctx;
-
-	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
-
-	afi = nf_tables_afinfo_lookup(net, family, true);
-	if (IS_ERR(afi))
-		return PTR_ERR(afi);
-
-	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
-	if (IS_ERR(table))
-		return PTR_ERR(table);
-
-	chain = NULL;
-	name = nla[NFTA_CHAIN_NAME];
-
-	if (nla[NFTA_CHAIN_HANDLE]) {
-		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
-		chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
-		if (IS_ERR(chain))
-			return PTR_ERR(chain);
-	} else {
-		chain = nf_tables_chain_lookup(table, name, genmask);
-		if (IS_ERR(chain)) {
-			if (PTR_ERR(chain) != -ENOENT)
-				return PTR_ERR(chain);
-			chain = NULL;
-		}
-	}
-
-	if (nla[NFTA_CHAIN_POLICY]) {
-		if (chain != NULL &&
-		    !nft_is_base_chain(chain))
-			return -EOPNOTSUPP;
-
-		if (chain == NULL &&
-		    nla[NFTA_CHAIN_HOOK] == NULL)
-			return -EOPNOTSUPP;
-
-		policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
-		switch (policy) {
-		case NF_DROP:
-		case NF_ACCEPT:
-			break;
-		default:
-			return -EINVAL;
-		}
-	}
-
-	if (chain != NULL) {
-		struct nft_stats *stats = NULL;
-		struct nft_trans *trans;
-
-		if (nlh->nlmsg_flags & NLM_F_EXCL)
-			return -EEXIST;
-		if (nlh->nlmsg_flags & NLM_F_REPLACE)
-			return -EOPNOTSUPP;
-
-		if (nla[NFTA_CHAIN_HOOK]) {
-			struct nft_base_chain *basechain;
-			struct nft_chain_hook hook;
-			struct nf_hook_ops *ops;
-
-			if (!nft_is_base_chain(chain))
-				return -EBUSY;
-
-			err = nft_chain_parse_hook(net, nla, afi, &hook,
-						   create);
-			if (err < 0)
-				return err;
-
-			basechain = nft_base_chain(chain);
-			if (basechain->type != hook.type) {
-				nft_chain_release_hook(&hook);
-				return -EBUSY;
-			}
-
-			for (i = 0; i < afi->nops; i++) {
-				ops = &basechain->ops[i];
-				if (ops->hooknum != hook.num ||
-				    ops->priority != hook.priority ||
-				    ops->dev != hook.dev) {
-					nft_chain_release_hook(&hook);
-					return -EBUSY;
-				}
-			}
-			nft_chain_release_hook(&hook);
-		}
-
-		if (nla[NFTA_CHAIN_HANDLE] && name) {
-			struct nft_chain *chain2;
-
-			chain2 = nf_tables_chain_lookup(table,
-							nla[NFTA_CHAIN_NAME],
-							genmask);
-			if (IS_ERR(chain2))
-				return PTR_ERR(chain2);
-		}
-
-		if (nla[NFTA_CHAIN_COUNTERS]) {
-			if (!nft_is_base_chain(chain))
-				return -EOPNOTSUPP;
-
-			stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
-			if (IS_ERR(stats))
-				return PTR_ERR(stats);
-		}
-
-		nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
-		trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
-					sizeof(struct nft_trans_chain));
-		if (trans == NULL) {
-			free_percpu(stats);
-			return -ENOMEM;
-		}
-
-		nft_trans_chain_stats(trans) = stats;
-		nft_trans_chain_update(trans) = true;
-
-		if (nla[NFTA_CHAIN_POLICY])
-			nft_trans_chain_policy(trans) = policy;
-		else
-			nft_trans_chain_policy(trans) = -1;
-
-		if (nla[NFTA_CHAIN_HANDLE] && name) {
-			nla_strlcpy(nft_trans_chain_name(trans), name,
-				    NFT_CHAIN_MAXNAMELEN);
-		}
-		list_add_tail(&trans->list, &net->nft.commit_list);
-		return 0;
-	}
 
 	if (table->use == UINT_MAX)
 		return -EOVERFLOW;
@@ -1504,14 +1381,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 				return PTR_ERR(stats);
 			}
 			basechain->stats = stats;
-		} else {
-			stats = netdev_alloc_pcpu_stats(struct nft_stats);
-			if (stats == NULL) {
-				nft_chain_release_hook(&hook);
-				kfree(basechain);
-				return -ENOMEM;
-			}
-			rcu_assign_pointer(basechain->stats, stats);
+			static_branch_inc(&nft_counters_enabled);
 		}
 
 		hookfn = hook.type->hooks[hook.num];
@@ -1539,31 +1409,204 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 		if (chain == NULL)
 			return -ENOMEM;
 	}
-
 	INIT_LIST_HEAD(&chain->rules);
 	chain->handle = nf_tables_alloc_handle(table);
 	chain->table = table;
-	nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+	chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+	if (!chain->name) {
+		err = -ENOMEM;
+		goto err1;
+	}
 
 	err = nf_tables_register_hooks(net, table, chain, afi->nops);
 	if (err < 0)
 		goto err1;
 
-	nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
-	err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
+	ctx->chain = chain;
+	err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
 	if (err < 0)
 		goto err2;
 
 	table->use++;
 	list_add_tail_rcu(&chain->list, &table->chains);
+
 	return 0;
 err2:
 	nf_tables_unregister_hooks(net, table, chain, afi->nops);
 err1:
 	nf_tables_chain_destroy(chain);
+
 	return err;
 }
 
+static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
+			      bool create)
+{
+	const struct nlattr * const *nla = ctx->nla;
+	struct nft_table *table = ctx->table;
+	struct nft_chain *chain = ctx->chain;
+	struct nft_af_info *afi = ctx->afi;
+	struct nft_base_chain *basechain;
+	struct nft_stats *stats = NULL;
+	struct nft_chain_hook hook;
+	const struct nlattr *name;
+	struct nf_hook_ops *ops;
+	struct nft_trans *trans;
+	int err, i;
+
+	if (nla[NFTA_CHAIN_HOOK]) {
+		if (!nft_is_base_chain(chain))
+			return -EBUSY;
+
+		err = nft_chain_parse_hook(ctx->net, nla, ctx->afi, &hook,
+					   create);
+		if (err < 0)
+			return err;
+
+		basechain = nft_base_chain(chain);
+		if (basechain->type != hook.type) {
+			nft_chain_release_hook(&hook);
+			return -EBUSY;
+		}
+
+		for (i = 0; i < afi->nops; i++) {
+			ops = &basechain->ops[i];
+			if (ops->hooknum != hook.num ||
+			    ops->priority != hook.priority ||
+			    ops->dev != hook.dev) {
+				nft_chain_release_hook(&hook);
+				return -EBUSY;
+			}
+		}
+		nft_chain_release_hook(&hook);
+	}
+
+	if (nla[NFTA_CHAIN_HANDLE] &&
+	    nla[NFTA_CHAIN_NAME]) {
+		struct nft_chain *chain2;
+
+		chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME],
+						genmask);
+		if (IS_ERR(chain2))
+			return PTR_ERR(chain2);
+	}
+
+	if (nla[NFTA_CHAIN_COUNTERS]) {
+		if (!nft_is_base_chain(chain))
+			return -EOPNOTSUPP;
+
+		stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
+		if (IS_ERR(stats))
+			return PTR_ERR(stats);
+	}
+
+	trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN,
+				sizeof(struct nft_trans_chain));
+	if (trans == NULL) {
+		free_percpu(stats);
+		return -ENOMEM;
+	}
+
+	nft_trans_chain_stats(trans) = stats;
+	nft_trans_chain_update(trans) = true;
+
+	if (nla[NFTA_CHAIN_POLICY])
+		nft_trans_chain_policy(trans) = policy;
+	else
+		nft_trans_chain_policy(trans) = -1;
+
+	name = nla[NFTA_CHAIN_NAME];
+	if (nla[NFTA_CHAIN_HANDLE] && name) {
+		nft_trans_chain_name(trans) =
+			nla_strdup(name, GFP_KERNEL);
+		if (!nft_trans_chain_name(trans)) {
+			kfree(trans);
+			free_percpu(stats);
+			return -ENOMEM;
+		}
+	}
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+	return 0;
+}
+
+static int nf_tables_newchain(struct net *net, struct sock *nlsk,
+			      struct sk_buff *skb, const struct nlmsghdr *nlh,
+			      const struct nlattr * const nla[],
+			      struct netlink_ext_ack *extack)
+{
+	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nlattr * uninitialized_var(name);
+	u8 genmask = nft_genmask_next(net);
+	int family = nfmsg->nfgen_family;
+	struct nft_af_info *afi;
+	struct nft_table *table;
+	struct nft_chain *chain;
+	u8 policy = NF_ACCEPT;
+	struct nft_ctx ctx;
+	u64 handle = 0;
+	bool create;
+
+	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+	afi = nf_tables_afinfo_lookup(net, family, true);
+	if (IS_ERR(afi))
+		return PTR_ERR(afi);
+
+	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	chain = NULL;
+	name = nla[NFTA_CHAIN_NAME];
+
+	if (nla[NFTA_CHAIN_HANDLE]) {
+		handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+		chain = nf_tables_chain_lookup_byhandle(table, handle, genmask);
+		if (IS_ERR(chain))
+			return PTR_ERR(chain);
+	} else {
+		chain = nf_tables_chain_lookup(table, name, genmask);
+		if (IS_ERR(chain)) {
+			if (PTR_ERR(chain) != -ENOENT)
+				return PTR_ERR(chain);
+			chain = NULL;
+		}
+	}
+
+	if (nla[NFTA_CHAIN_POLICY]) {
+		if (chain != NULL &&
+		    !nft_is_base_chain(chain))
+			return -EOPNOTSUPP;
+
+		if (chain == NULL &&
+		    nla[NFTA_CHAIN_HOOK] == NULL)
+			return -EOPNOTSUPP;
+
+		policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY]));
+		switch (policy) {
+		case NF_DROP:
+		case NF_ACCEPT:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
+
+	if (chain != NULL) {
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+
+		return nf_tables_updchain(&ctx, genmask, policy, create);
+	}
+
+	return nf_tables_addchain(&ctx, family, genmask, policy, create);
+}
+
 static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
 			      const struct nlattr * const nla[],
@@ -1574,8 +1617,11 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 	struct nft_af_info *afi;
 	struct nft_table *table;
 	struct nft_chain *chain;
+	struct nft_rule *rule;
 	int family = nfmsg->nfgen_family;
 	struct nft_ctx ctx;
+	u32 use;
+	int err;
 
 	afi = nf_tables_afinfo_lookup(net, family, false);
 	if (IS_ERR(afi))
@@ -1588,11 +1634,30 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
-	if (chain->use > 0)
+
+	if (nlh->nlmsg_flags & NLM_F_NONREC &&
+	    chain->use > 0)
 		return -EBUSY;
 
 	nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
 
+	use = chain->use;
+	list_for_each_entry(rule, &chain->rules, list) {
+		if (!nft_is_active_next(net, rule))
+			continue;
+		use--;
+
+		err = nft_delrule(&ctx, rule);
+		if (err < 0)
+			return err;
+	}
+
+	/* There are rules and elements that are still holding references to us,
+	 * we cannot do a recursive removal in this case.
+	 */
+	if (use > 0)
+		return -EBUSY;
+
 	return nft_delchain(&ctx);
 }
 
@@ -1977,8 +2042,8 @@ err:
 }
 
 struct nft_rule_dump_ctx {
-	char table[NFT_TABLE_MAXNAMELEN];
-	char chain[NFT_CHAIN_MAXNAMELEN];
+	char *table;
+	char *chain;
 };
 
 static int nf_tables_dump_rules(struct sk_buff *skb,
@@ -2002,7 +2067,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 			continue;
 
 		list_for_each_entry_rcu(table, &afi->tables, list) {
-			if (ctx && ctx->table[0] &&
+			if (ctx && ctx->table &&
 			    strcmp(ctx->table, table->name) != 0)
 				continue;
 
@@ -2042,7 +2107,13 @@ done:
 
 static int nf_tables_dump_rules_done(struct netlink_callback *cb)
 {
-	kfree(cb->data);
+	struct nft_rule_dump_ctx *ctx = cb->data;
+
+	if (ctx) {
+		kfree(ctx->table);
+		kfree(ctx->chain);
+		kfree(ctx);
+	}
 	return 0;
 }
 
@@ -2074,12 +2145,23 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 			if (!ctx)
 				return -ENOMEM;
 
-			if (nla[NFTA_RULE_TABLE])
-				nla_strlcpy(ctx->table, nla[NFTA_RULE_TABLE],
-					    sizeof(ctx->table));
-			if (nla[NFTA_RULE_CHAIN])
-				nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN],
-					    sizeof(ctx->chain));
+			if (nla[NFTA_RULE_TABLE]) {
+				ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
+							GFP_KERNEL);
+				if (!ctx->table) {
+					kfree(ctx);
+					return -ENOMEM;
+				}
+			}
+			if (nla[NFTA_RULE_CHAIN]) {
+				ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
+							GFP_KERNEL);
+				if (!ctx->chain) {
+					kfree(ctx->table);
+					kfree(ctx);
+					return -ENOMEM;
+				}
+			}
 			c.data = ctx;
 		}
 
@@ -2621,7 +2703,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
 	unsigned long *inuse;
 	unsigned int n = 0, min = 0;
 
-	p = strnchr(name, NFT_SET_MAXNAMELEN, '%');
+	p = strchr(name, '%');
 	if (p != NULL) {
 		if (p[1] != 'd' || strchr(p + 2, '%'))
 			return -EINVAL;
@@ -2652,7 +2734,10 @@ cont:
 		free_page((unsigned long)inuse);
 	}
 
-	snprintf(set->name, sizeof(set->name), name, min + n);
+	set->name = kasprintf(GFP_KERNEL, name, min + n);
+	if (!set->name)
+		return -ENOMEM;
+
 	list_for_each_entry(i, &ctx->table->sets, list) {
 		if (!nft_is_active_next(ctx->net, i))
 			continue;
@@ -2929,7 +3014,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 	struct nft_table *table;
 	struct nft_set *set;
 	struct nft_ctx ctx;
-	char name[NFT_SET_MAXNAMELEN];
+	char *name;
 	unsigned int size;
 	bool create;
 	u64 timeout;
@@ -3075,8 +3160,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 		goto err1;
 	}
 
-	nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
+	name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
+	if (!name) {
+		err = -ENOMEM;
+		goto err2;
+	}
+
 	err = nf_tables_set_alloc_name(&ctx, set, name);
+	kfree(name);
 	if (err < 0)
 		goto err2;
 
@@ -3126,6 +3217,7 @@ static void nft_set_destroy(struct nft_set *set)
 {
 	set->ops->destroy(set);
 	module_put(set->ops->type->owner);
+	kfree(set->name);
 	kvfree(set);
 }
 
@@ -3159,7 +3251,9 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
 	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
-	if (!list_empty(&set->bindings))
+
+	if (!list_empty(&set->bindings) ||
+	    (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0))
 		return -EBUSY;
 
 	return nft_delset(&ctx, set);
@@ -4209,7 +4303,7 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
 
 	list_for_each_entry(obj, &table->objects, list) {
 		if (!nla_strcmp(nla, obj->name) &&
-		    objtype == obj->type->type &&
+		    objtype == obj->ops->type->type &&
 		    nft_active_genmask(obj, genmask))
 			return obj;
 	}
@@ -4231,6 +4325,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
 				       const struct nlattr *attr)
 {
 	struct nlattr *tb[type->maxattr + 1];
+	const struct nft_object_ops *ops;
 	struct nft_object *obj;
 	int err;
 
@@ -4243,16 +4338,27 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx,
 		memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1));
 	}
 
+	if (type->select_ops) {
+		ops = type->select_ops(ctx, (const struct nlattr * const *)tb);
+		if (IS_ERR(ops)) {
+			err = PTR_ERR(ops);
+			goto err1;
+		}
+	} else {
+		ops = type->ops;
+	}
+
 	err = -ENOMEM;
-	obj = kzalloc(sizeof(struct nft_object) + type->size, GFP_KERNEL);
+	obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL);
 	if (obj == NULL)
 		goto err1;
 
-	err = type->init(ctx, (const struct nlattr * const *)tb, obj);
+	err = ops->init(ctx, (const struct nlattr * const *)tb, obj);
 	if (err < 0)
 		goto err2;
 
-	obj->type = type;
+	obj->ops = ops;
+
 	return obj;
 err2:
 	kfree(obj);
@@ -4268,7 +4374,7 @@ static int nft_object_dump(struct sk_buff *skb, unsigned int attr,
 	nest = nla_nest_start(skb, attr);
 	if (!nest)
 		goto nla_put_failure;
-	if (obj->type->dump(skb, obj, reset) < 0)
+	if (obj->ops->dump(skb, obj, reset) < 0)
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 	return 0;
@@ -4363,18 +4469,24 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 		goto err1;
 	}
 	obj->table = table;
-	nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN);
+	obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
+	if (!obj->name) {
+		err = -ENOMEM;
+		goto err2;
+	}
 
 	err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj);
 	if (err < 0)
-		goto err2;
+		goto err3;
 
 	list_add_tail_rcu(&obj->list, &table->objects);
 	table->use++;
 	return 0;
+err3:
+	kfree(obj->name);
 err2:
-	if (obj->type->destroy)
-		obj->type->destroy(obj);
+	if (obj->ops->destroy)
+		obj->ops->destroy(obj);
 	kfree(obj);
 err1:
 	module_put(type->owner);
@@ -4401,7 +4513,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
 
 	if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
 	    nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
-	    nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->type->type)) ||
+	    nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
 	    nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
 	    nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
 		goto nla_put_failure;
@@ -4415,7 +4527,7 @@ nla_put_failure:
 }
 
 struct nft_obj_filter {
-	char		table[NFT_OBJ_MAXNAMELEN];
+	char		*table;
 	u32		type;
 };
 
@@ -4455,7 +4567,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 					goto cont;
 				if (filter &&
 				    filter->type != NFT_OBJECT_UNSPEC &&
-				    obj->type->type != filter->type)
+				    obj->ops->type->type != filter->type)
 					goto cont;
 
 				if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid,
@@ -4480,7 +4592,10 @@ done:
 
 static int nf_tables_dump_obj_done(struct netlink_callback *cb)
 {
-	kfree(cb->data);
+	struct nft_obj_filter *filter = cb->data;
+
+	kfree(filter->table);
+	kfree(filter);
 
 	return 0;
 }
@@ -4494,9 +4609,13 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
 	if (!filter)
 		return ERR_PTR(-ENOMEM);
 
-	if (nla[NFTA_OBJ_TABLE])
-		nla_strlcpy(filter->table, nla[NFTA_OBJ_TABLE],
-			    NFT_TABLE_MAXNAMELEN);
+	if (nla[NFTA_OBJ_TABLE]) {
+		filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_KERNEL);
+		if (!filter->table) {
+			kfree(filter);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
 	if (nla[NFTA_OBJ_TYPE])
 		filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
 
@@ -4576,10 +4695,11 @@ err:
 
 static void nft_obj_destroy(struct nft_object *obj)
 {
-	if (obj->type->destroy)
-		obj->type->destroy(obj);
+	if (obj->ops->destroy)
+		obj->ops->destroy(obj);
 
-	module_put(obj->type->owner);
+	module_put(obj->ops->type->owner);
+	kfree(obj->name);
 	kfree(obj);
 }
 
@@ -4662,6 +4782,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
+	char buf[TASK_COMM_LEN];
 	int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
 
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
@@ -4673,7 +4794,9 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
 	nfmsg->version		= NFNETLINK_V0;
 	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
 
-	if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
+	if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) ||
+	    nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
+	    nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
 		goto nla_put_failure;
 
 	nlmsg_end(skb, nlh);
@@ -4842,7 +4965,7 @@ static void nft_chain_commit_update(struct nft_trans *trans)
 {
 	struct nft_base_chain *basechain;
 
-	if (nft_trans_chain_name(trans)[0])
+	if (nft_trans_chain_name(trans))
 		strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
 
 	if (!nft_is_base_chain(trans->ctx.chain))
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 65dbead..dfd0bf3 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -29,7 +29,7 @@ static const char *const comments[__NFT_TRACETYPE_MAX] = {
 	[NFT_TRACETYPE_RULE]	= "rule",
 };
 
-static struct nf_loginfo trace_loginfo = {
+static const struct nf_loginfo trace_loginfo = {
 	.type = NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
@@ -114,6 +114,22 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
 	return true;
 }
 
+DEFINE_STATIC_KEY_FALSE(nft_counters_enabled);
+
+static noinline void nft_update_chain_stats(const struct nft_chain *chain,
+					    const struct nft_pktinfo *pkt)
+{
+	struct nft_stats *stats;
+
+	local_bh_disable();
+	stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats));
+	u64_stats_update_begin(&stats->syncp);
+	stats->pkts++;
+	stats->bytes += pkt->skb->len;
+	u64_stats_update_end(&stats->syncp);
+	local_bh_enable();
+}
+
 struct nft_jumpstack {
 	const struct nft_chain	*chain;
 	const struct nft_rule	*rule;
@@ -130,7 +146,6 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
 	struct nft_regs regs;
 	unsigned int stackptr = 0;
 	struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
-	struct nft_stats *stats;
 	int rulenum;
 	unsigned int gencursor = nft_genmask_cur(net);
 	struct nft_traceinfo info;
@@ -220,13 +235,8 @@ next_rule:
 	nft_trace_packet(&info, basechain, NULL, -1,
 			 NFT_TRACETYPE_POLICY);
 
-	rcu_read_lock_bh();
-	stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
-	u64_stats_update_begin(&stats->syncp);
-	stats->pkts++;
-	stats->bytes += pkt->skb->len;
-	u64_stats_update_end(&stats->syncp);
-	rcu_read_unlock_bh();
+	if (static_branch_unlikely(&nft_counters_enabled))
+		nft_update_chain_stats(basechain, pkt);
 
 	return nft_base_chain(basechain)->policy;
 }
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index e1b15e7..e1dc527 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -162,6 +162,27 @@ static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
 			    NFTA_TRACE_PAD);
 }
 
+static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info)
+{
+	switch (info->type) {
+	case NFT_TRACETYPE_RETURN:
+	case NFT_TRACETYPE_RULE:
+		break;
+	default:
+		return false;
+	}
+
+	switch (info->verdict->code) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
 void nft_trace_notify(struct nft_traceinfo *info)
 {
 	const struct nft_pktinfo *pkt = info->pkt;
@@ -175,13 +196,12 @@ void nft_trace_notify(struct nft_traceinfo *info)
 		return;
 
 	size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
-		nla_total_size(NFT_TABLE_MAXNAMELEN) +
-		nla_total_size(NFT_CHAIN_MAXNAMELEN) +
+		nla_total_size(strlen(info->chain->table->name)) +
+		nla_total_size(strlen(info->chain->name)) +
 		nla_total_size_64bit(sizeof(__be64)) +	/* rule handle */
 		nla_total_size(sizeof(__be32)) +	/* trace type */
 		nla_total_size(0) +			/* VERDICT, nested */
 			nla_total_size(sizeof(u32)) +	/* verdict code */
-			nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */
 		nla_total_size(sizeof(u32)) +		/* id */
 		nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
 		nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
@@ -194,6 +214,9 @@ void nft_trace_notify(struct nft_traceinfo *info)
 		nla_total_size(sizeof(u32)) +		/* nfproto */
 		nla_total_size(sizeof(u32));		/* policy */
 
+	if (nft_trace_have_verdict_chain(info))
+		size += nla_total_size(strlen(info->verdict->chain->name)); /* jump target */
+
 	skb = nlmsg_new(size, GFP_ATOMIC);
 	if (!skb)
 		return;
@@ -217,14 +240,11 @@ void nft_trace_notify(struct nft_traceinfo *info)
 	if (trace_fill_id(skb, pkt->skb))
 		goto nla_put_failure;
 
-	if (info->chain) {
-		if (nla_put_string(skb, NFTA_TRACE_CHAIN,
-				   info->chain->name))
-			goto nla_put_failure;
-		if (nla_put_string(skb, NFTA_TRACE_TABLE,
-				   info->chain->table->name))
-			goto nla_put_failure;
-	}
+	if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name))
+		goto nla_put_failure;
+
+	if (nla_put_string(skb, NFTA_TRACE_TABLE, info->chain->table->name))
+		goto nla_put_failure;
 
 	if (nf_trace_fill_rule_info(skb, info))
 		goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 400e9ae..32b1c0b 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -47,7 +47,8 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
 };
 
 static int
-ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
+ctnl_timeout_parse_policy(void *timeouts,
+			  const struct nf_conntrack_l4proto *l4proto,
 			  struct net *net, const struct nlattr *attr)
 {
 	int ret = 0;
@@ -74,7 +75,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 {
 	__u16 l3num;
 	__u8 l4num;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct ctnl_timeout *timeout, *matching = NULL;
 	char *name;
 	int ret;
@@ -158,7 +159,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	unsigned int flags = portid ? NLM_F_MULTI : 0;
-	struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
+	const struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -363,10 +364,10 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 				 const struct nlattr * const cda[],
 				 struct netlink_ext_ack *extack)
 {
+	const struct nf_conntrack_l4proto *l4proto;
+	unsigned int *timeouts;
 	__u16 l3num;
 	__u8 l4num;
-	struct nf_conntrack_l4proto *l4proto;
-	unsigned int *timeouts;
 	int ret;
 
 	if (!cda[CTA_TIMEOUT_L3PROTO] ||
@@ -401,7 +402,7 @@ err:
 static int
 cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
 			    u32 seq, u32 type, int event,
-			    struct nf_conntrack_l4proto *l4proto)
+			    const struct nf_conntrack_l4proto *l4proto)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
@@ -453,11 +454,11 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 				 const struct nlattr * const cda[],
 				 struct netlink_ext_ack *extack)
 {
-	__u16 l3num;
-	__u8 l4num;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct sk_buff *skb2;
 	int ret, err;
+	__u16 l3num;
+	__u8 l4num;
 
 	if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO])
 		return -EINVAL;
@@ -505,7 +506,6 @@ ctnl_timeout_find_get(struct net *net, const char *name)
 {
 	struct ctnl_timeout *timeout, *matching = NULL;
 
-	rcu_read_lock();
 	list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) {
 		if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
 			continue;
@@ -521,7 +521,6 @@ ctnl_timeout_find_get(struct net *net, const char *name)
 		break;
 	}
 err:
-	rcu_read_unlock();
 	return matching;
 }
 
@@ -572,6 +571,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
 {
 	struct ctnl_timeout *cur, *tmp;
 
+	nf_ct_unconfirmed_destroy(net);
 	ctnl_untimeout(net, NULL);
 
 	list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index c684ba9..cad6498 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -606,7 +606,7 @@ nla_put_failure:
 	return -1;
 }
 
-static struct nf_loginfo default_loginfo = {
+static const struct nf_loginfo default_loginfo = {
 	.type =		NF_LOG_TYPE_ULOG,
 	.u = {
 		.ulog = {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 16fa040..c979662 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -41,6 +41,10 @@
 #include "../bridge/br_private.h"
 #endif
 
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
 #define NFQNL_QMAX_DEFAULT 1024
 
 /* We're using struct nlattr which has 16bit nla_len. Note that nla_len
@@ -612,6 +616,18 @@ nlmsg_failure:
 	return NULL;
 }
 
+static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
+	const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
+
+	if (ct && ((ct->status & flags) == IPS_DYING))
+		return true;
+#endif
+	return false;
+}
+
 static int
 __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
 			struct nf_queue_entry *entry)
@@ -628,6 +644,9 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
 	}
 	spin_lock_bh(&queue->lock);
 
+	if (nf_ct_drop_unconfirmed(entry))
+		goto err_out_free_nskb;
+
 	if (queue->queue_total >= queue->queue_maxlen) {
 		if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
 			failopen = 1;
@@ -928,7 +947,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net)
 	unsigned int instances = 0;
 	int i;
 
-	rcu_read_lock();
 	for (i = 0; i < INSTANCE_BUCKETS; i++) {
 		struct nfqnl_instance *inst;
 		struct hlist_head *head = &q->instance_table[i];
@@ -938,7 +956,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net)
 			instances++;
 		}
 	}
-	rcu_read_unlock();
 
 	return instances;
 }
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 67a710e..eefe3b4 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -175,15 +175,21 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
 	[NFTA_COUNTER_BYTES]	= { .type = NLA_U64 },
 };
 
-static struct nft_object_type nft_counter_obj __read_mostly = {
-	.type		= NFT_OBJECT_COUNTER,
+static struct nft_object_type nft_counter_obj_type;
+static const struct nft_object_ops nft_counter_obj_ops = {
+	.type		= &nft_counter_obj_type,
 	.size		= sizeof(struct nft_counter_percpu_priv),
-	.maxattr	= NFTA_COUNTER_MAX,
-	.policy		= nft_counter_policy,
 	.eval		= nft_counter_obj_eval,
 	.init		= nft_counter_obj_init,
 	.destroy	= nft_counter_obj_destroy,
 	.dump		= nft_counter_obj_dump,
+};
+
+static struct nft_object_type nft_counter_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_COUNTER,
+	.ops		= &nft_counter_obj_ops,
+	.maxattr	= NFTA_COUNTER_MAX,
+	.policy		= nft_counter_policy,
 	.owner		= THIS_MODULE,
 };
 
@@ -271,7 +277,7 @@ static int __init nft_counter_module_init(void)
 	for_each_possible_cpu(cpu)
 		seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
 
-	err = nft_register_obj(&nft_counter_obj);
+	err = nft_register_obj(&nft_counter_obj_type);
 	if (err < 0)
 		return err;
 
@@ -281,14 +287,14 @@ static int __init nft_counter_module_init(void)
 
 	return 0;
 err1:
-	nft_unregister_obj(&nft_counter_obj);
+	nft_unregister_obj(&nft_counter_obj_type);
 	return err;
 }
 
 static void __exit nft_counter_module_exit(void)
 {
 	nft_unregister_expr(&nft_counter_type);
-	nft_unregister_obj(&nft_counter_obj);
+	nft_unregister_obj(&nft_counter_obj_type);
 }
 
 module_init(nft_counter_module_init);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 1678e9e..bd0975d 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -904,15 +904,21 @@ static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = {
 	[NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 },
 };
 
-static struct nft_object_type nft_ct_helper_obj __read_mostly = {
-	.type		= NFT_OBJECT_CT_HELPER,
+static struct nft_object_type nft_ct_helper_obj_type;
+static const struct nft_object_ops nft_ct_helper_obj_ops = {
+	.type		= &nft_ct_helper_obj_type,
 	.size		= sizeof(struct nft_ct_helper_obj),
-	.maxattr	= NFTA_CT_HELPER_MAX,
-	.policy		= nft_ct_helper_policy,
 	.eval		= nft_ct_helper_obj_eval,
 	.init		= nft_ct_helper_obj_init,
 	.destroy	= nft_ct_helper_obj_destroy,
 	.dump		= nft_ct_helper_obj_dump,
+};
+
+static struct nft_object_type nft_ct_helper_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_CT_HELPER,
+	.ops		= &nft_ct_helper_obj_ops,
+	.maxattr	= NFTA_CT_HELPER_MAX,
+	.policy		= nft_ct_helper_policy,
 	.owner		= THIS_MODULE,
 };
 
@@ -930,7 +936,7 @@ static int __init nft_ct_module_init(void)
 	if (err < 0)
 		goto err1;
 
-	err = nft_register_obj(&nft_ct_helper_obj);
+	err = nft_register_obj(&nft_ct_helper_obj_type);
 	if (err < 0)
 		goto err2;
 
@@ -945,7 +951,7 @@ err1:
 
 static void __exit nft_ct_module_exit(void)
 {
-	nft_unregister_obj(&nft_ct_helper_obj);
+	nft_unregister_obj(&nft_ct_helper_obj_type);
 	nft_unregister_expr(&nft_notrack_type);
 	nft_unregister_expr(&nft_ct_type);
 }
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 1ec49fe..a0a93d9 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -8,6 +8,7 @@
  * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
+#include <asm/unaligned.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -23,6 +24,7 @@ struct nft_exthdr {
 	u8			len;
 	u8			op;
 	enum nft_registers	dreg:8;
+	enum nft_registers	sreg:8;
 	u8			flags;
 };
 
@@ -61,6 +63,26 @@ err:
 	regs->verdict.code = NFT_BREAK;
 }
 
+static void *
+nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
+		       unsigned int len, void *buffer, unsigned int *tcphdr_len)
+{
+	struct tcphdr *tcph;
+
+	if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
+		return NULL;
+
+	tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buffer);
+	if (!tcph)
+		return NULL;
+
+	*tcphdr_len = __tcp_hdrlen(tcph);
+	if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len)
+		return NULL;
+
+	return skb_header_pointer(pkt->skb, pkt->xt.thoff, *tcphdr_len, buffer);
+}
+
 static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
 				struct nft_regs *regs,
 				const struct nft_pktinfo *pkt)
@@ -72,18 +94,7 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
 	struct tcphdr *tcph;
 	u8 *opt;
 
-	if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
-		goto err;
-
-	tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buff);
-	if (!tcph)
-		goto err;
-
-	tcphdr_len = __tcp_hdrlen(tcph);
-	if (tcphdr_len < sizeof(*tcph))
-		goto err;
-
-	tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, tcphdr_len, buff);
+	tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
 	if (!tcph)
 		goto err;
 
@@ -115,6 +126,88 @@ err:
 		regs->verdict.code = NFT_BREAK;
 }
 
+static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
+				    struct nft_regs *regs,
+				    const struct nft_pktinfo *pkt)
+{
+	u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	unsigned int i, optl, tcphdr_len, offset;
+	struct tcphdr *tcph;
+	u8 *opt;
+	u32 src;
+
+	tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
+	if (!tcph)
+		return;
+
+	opt = (u8 *)tcph;
+	for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
+		union {
+			u8 octet;
+			__be16 v16;
+			__be32 v32;
+		} old, new;
+
+		optl = optlen(opt, i);
+
+		if (priv->type != opt[i])
+			continue;
+
+		if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
+			return;
+
+		if (!skb_make_writable(pkt->skb, pkt->xt.thoff + i + priv->len))
+			return;
+
+		tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
+					      &tcphdr_len);
+		if (!tcph)
+			return;
+
+		src = regs->data[priv->sreg];
+		offset = i + priv->offset;
+
+		switch (priv->len) {
+		case 2:
+			old.v16 = get_unaligned((u16 *)(opt + offset));
+			new.v16 = src;
+
+			switch (priv->type) {
+			case TCPOPT_MSS:
+				/* increase can cause connection to stall */
+				if (ntohs(old.v16) <= ntohs(new.v16))
+					return;
+			break;
+			}
+
+			if (old.v16 == new.v16)
+				return;
+
+			put_unaligned(new.v16, (u16*)(opt + offset));
+			inet_proto_csum_replace2(&tcph->check, pkt->skb,
+						 old.v16, new.v16, false);
+			break;
+		case 4:
+			new.v32 = src;
+			old.v32 = get_unaligned((u32 *)(opt + offset));
+
+			if (old.v32 == new.v32)
+				return;
+
+			put_unaligned(new.v32, (u32*)(opt + offset));
+			inet_proto_csum_replace4(&tcph->check, pkt->skb,
+						 old.v32, new.v32, false);
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			break;
+		}
+
+		return;
+	}
+}
+
 static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
 	[NFTA_EXTHDR_DREG]		= { .type = NLA_U32 },
 	[NFTA_EXTHDR_TYPE]		= { .type = NLA_U8 },
@@ -171,12 +264,57 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
 					   NFT_DATA_VALUE, priv->len);
 }
 
-static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
+				   const struct nft_expr *expr,
+				   const struct nlattr * const tb[])
 {
-	const struct nft_exthdr *priv = nft_expr_priv(expr);
+	struct nft_exthdr *priv = nft_expr_priv(expr);
+	u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
+	int err;
 
-	if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
-		goto nla_put_failure;
+	if (!tb[NFTA_EXTHDR_SREG] ||
+	    !tb[NFTA_EXTHDR_TYPE] ||
+	    !tb[NFTA_EXTHDR_OFFSET] ||
+	    !tb[NFTA_EXTHDR_LEN])
+		return -EINVAL;
+
+	if (tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS])
+		return -EINVAL;
+
+	err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
+	if (err < 0)
+		return err;
+
+	err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
+	if (err < 0)
+		return err;
+
+	if (offset < 2)
+		return -EOPNOTSUPP;
+
+	switch (len) {
+	case 2: break;
+	case 4: break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
+	if (err < 0)
+		return err;
+
+	priv->type   = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+	priv->offset = offset;
+	priv->len    = len;
+	priv->sreg   = nft_parse_register(tb[NFTA_EXTHDR_SREG]);
+	priv->flags  = flags;
+	priv->op     = op;
+
+	return nft_validate_register_load(priv->sreg, priv->len);
+}
+
+static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
+{
 	if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
 		goto nla_put_failure;
 	if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
@@ -193,6 +331,26 @@ nla_put_failure:
 	return -1;
 }
 
+static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
+		return -1;
+
+	return nft_exthdr_dump_common(skb, priv);
+}
+
+static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_EXTHDR_SREG, priv->sreg))
+		return -1;
+
+	return nft_exthdr_dump_common(skb, priv);
+}
+
 static struct nft_expr_type nft_exthdr_type;
 static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
 	.type		= &nft_exthdr_type,
@@ -210,6 +368,14 @@ static const struct nft_expr_ops nft_exthdr_tcp_ops = {
 	.dump		= nft_exthdr_dump,
 };
 
+static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
+	.type		= &nft_exthdr_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+	.eval		= nft_exthdr_tcp_set_eval,
+	.init		= nft_exthdr_tcp_set_init,
+	.dump		= nft_exthdr_dump_set,
+};
+
 static const struct nft_expr_ops *
 nft_exthdr_select_ops(const struct nft_ctx *ctx,
 		      const struct nlattr * const tb[])
@@ -219,12 +385,21 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
 	if (!tb[NFTA_EXTHDR_OP])
 		return &nft_exthdr_ipv6_ops;
 
-	op = ntohl(nla_get_u32(tb[NFTA_EXTHDR_OP]));
+	if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG])
+		return ERR_PTR(-EOPNOTSUPP);
+
+	op = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OP]));
 	switch (op) {
 	case NFT_EXTHDR_OP_TCPOPT:
-		return &nft_exthdr_tcp_ops;
+		if (tb[NFTA_EXTHDR_SREG])
+			return &nft_exthdr_tcp_set_ops;
+		if (tb[NFTA_EXTHDR_DREG])
+			return &nft_exthdr_tcp_ops;
+		break;
 	case NFT_EXTHDR_OP_IPV6:
-		return &nft_exthdr_ipv6_ops;
+		if (tb[NFTA_EXTHDR_DREG])
+			return &nft_exthdr_ipv6_ops;
+		break;
 	}
 
 	return ERR_PTR(-EOPNOTSUPP);
diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c
new file mode 100644
index 0000000..3997ee3
--- /dev/null
+++ b/net/netfilter/nft_fib_netdev.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017 Pablo M. Bermudo Garay <pablombg@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This code is based on net/netfilter/nft_fib_inet.c, written by
+ * Florian Westphal <fw@strlen.de>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#include <net/netfilter/nft_fib.h>
+
+static void nft_fib_netdev_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	const struct nft_fib *priv = nft_expr_priv(expr);
+
+	switch (ntohs(pkt->skb->protocol)) {
+	case ETH_P_IP:
+		switch (priv->result) {
+		case NFT_FIB_RESULT_OIF:
+		case NFT_FIB_RESULT_OIFNAME:
+			return nft_fib4_eval(expr, regs, pkt);
+		case NFT_FIB_RESULT_ADDRTYPE:
+			return nft_fib4_eval_type(expr, regs, pkt);
+		}
+		break;
+	case ETH_P_IPV6:
+		switch (priv->result) {
+		case NFT_FIB_RESULT_OIF:
+		case NFT_FIB_RESULT_OIFNAME:
+			return nft_fib6_eval(expr, regs, pkt);
+		case NFT_FIB_RESULT_ADDRTYPE:
+			return nft_fib6_eval_type(expr, regs, pkt);
+		}
+		break;
+	}
+
+	regs->verdict.code = NFT_BREAK;
+}
+
+static struct nft_expr_type nft_fib_netdev_type;
+static const struct nft_expr_ops nft_fib_netdev_ops = {
+	.type		= &nft_fib_netdev_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+	.eval		= nft_fib_netdev_eval,
+	.init		= nft_fib_init,
+	.dump		= nft_fib_dump,
+	.validate	= nft_fib_validate,
+};
+
+static struct nft_expr_type nft_fib_netdev_type __read_mostly = {
+	.family		= NFPROTO_NETDEV,
+	.name		= "fib",
+	.ops		= &nft_fib_netdev_ops,
+	.policy		= nft_fib_policy,
+	.maxattr	= NFTA_FIB_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_fib_netdev_module_init(void)
+{
+	return nft_register_expr(&nft_fib_netdev_type);
+}
+
+static void __exit nft_fib_netdev_module_exit(void)
+{
+	nft_unregister_expr(&nft_fib_netdev_type);
+}
+
+module_init(nft_fib_netdev_module_init);
+module_exit(nft_fib_netdev_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo M. Bermudo Garay <pablombg@gmail.com>");
+MODULE_ALIAS_NFT_AF_EXPR(5, "fib");
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 14538b1..a9fc298 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -168,9 +168,9 @@ static const struct nft_expr_ops nft_limit_pkts_ops = {
 	.dump		= nft_limit_pkts_dump,
 };
 
-static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr,
-				     struct nft_regs *regs,
-				     const struct nft_pktinfo *pkt)
+static void nft_limit_bytes_eval(const struct nft_expr *expr,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
 {
 	struct nft_limit *priv = nft_expr_priv(expr);
 	u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
@@ -179,29 +179,29 @@ static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr,
 		regs->verdict.code = NFT_BREAK;
 }
 
-static int nft_limit_pkt_bytes_init(const struct nft_ctx *ctx,
-				    const struct nft_expr *expr,
-				    const struct nlattr * const tb[])
+static int nft_limit_bytes_init(const struct nft_ctx *ctx,
+				const struct nft_expr *expr,
+				const struct nlattr * const tb[])
 {
 	struct nft_limit *priv = nft_expr_priv(expr);
 
 	return nft_limit_init(priv, tb);
 }
 
-static int nft_limit_pkt_bytes_dump(struct sk_buff *skb,
-				    const struct nft_expr *expr)
+static int nft_limit_bytes_dump(struct sk_buff *skb,
+				const struct nft_expr *expr)
 {
 	const struct nft_limit *priv = nft_expr_priv(expr);
 
 	return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
 }
 
-static const struct nft_expr_ops nft_limit_pkt_bytes_ops = {
+static const struct nft_expr_ops nft_limit_bytes_ops = {
 	.type		= &nft_limit_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit)),
-	.eval		= nft_limit_pkt_bytes_eval,
-	.init		= nft_limit_pkt_bytes_init,
-	.dump		= nft_limit_pkt_bytes_dump,
+	.eval		= nft_limit_bytes_eval,
+	.init		= nft_limit_bytes_init,
+	.dump		= nft_limit_bytes_dump,
 };
 
 static const struct nft_expr_ops *
@@ -215,7 +215,7 @@ nft_limit_select_ops(const struct nft_ctx *ctx,
 	case NFT_LIMIT_PKTS:
 		return &nft_limit_pkts_ops;
 	case NFT_LIMIT_PKT_BYTES:
-		return &nft_limit_pkt_bytes_ops;
+		return &nft_limit_bytes_ops;
 	}
 	return ERR_PTR(-EOPNOTSUPP);
 }
@@ -229,14 +229,133 @@ static struct nft_expr_type nft_limit_type __read_mostly = {
 	.owner		= THIS_MODULE,
 };
 
+static void nft_limit_obj_pkts_eval(struct nft_object *obj,
+				    struct nft_regs *regs,
+				    const struct nft_pktinfo *pkt)
+{
+	struct nft_limit_pkts *priv = nft_obj_data(obj);
+
+	if (nft_limit_eval(&priv->limit, priv->cost))
+		regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx,
+				   const struct nlattr * const tb[],
+				   struct nft_object *obj)
+{
+	struct nft_limit_pkts *priv = nft_obj_data(obj);
+	int err;
+
+	err = nft_limit_init(&priv->limit, tb);
+	if (err < 0)
+		return err;
+
+	priv->cost = div64_u64(priv->limit.nsecs, priv->limit.rate);
+	return 0;
+}
+
+static int nft_limit_obj_pkts_dump(struct sk_buff *skb,
+				   struct nft_object *obj,
+				   bool reset)
+{
+	const struct nft_limit_pkts *priv = nft_obj_data(obj);
+
+	return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
+}
+
+static struct nft_object_type nft_limit_obj_type;
+static const struct nft_object_ops nft_limit_obj_pkts_ops = {
+	.type		= &nft_limit_obj_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)),
+	.init		= nft_limit_obj_pkts_init,
+	.eval		= nft_limit_obj_pkts_eval,
+	.dump		= nft_limit_obj_pkts_dump,
+};
+
+static void nft_limit_obj_bytes_eval(struct nft_object *obj,
+				     struct nft_regs *regs,
+				     const struct nft_pktinfo *pkt)
+{
+	struct nft_limit *priv = nft_obj_data(obj);
+	u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
+
+	if (nft_limit_eval(priv, cost))
+		regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx,
+				    const struct nlattr * const tb[],
+				    struct nft_object *obj)
+{
+	struct nft_limit *priv = nft_obj_data(obj);
+
+	return nft_limit_init(priv, tb);
+}
+
+static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
+				    struct nft_object *obj,
+				    bool reset)
+{
+	const struct nft_limit *priv = nft_obj_data(obj);
+
+	return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
+}
+
+static struct nft_object_type nft_limit_obj_type;
+static const struct nft_object_ops nft_limit_obj_bytes_ops = {
+	.type		= &nft_limit_obj_type,
+	.size		= sizeof(struct nft_limit),
+	.init		= nft_limit_obj_bytes_init,
+	.eval		= nft_limit_obj_bytes_eval,
+	.dump		= nft_limit_obj_bytes_dump,
+};
+
+static const struct nft_object_ops *
+nft_limit_obj_select_ops(const struct nft_ctx *ctx,
+			 const struct nlattr * const tb[])
+{
+	if (!tb[NFTA_LIMIT_TYPE])
+		return &nft_limit_obj_pkts_ops;
+
+	switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) {
+	case NFT_LIMIT_PKTS:
+		return &nft_limit_obj_pkts_ops;
+	case NFT_LIMIT_PKT_BYTES:
+		return &nft_limit_obj_bytes_ops;
+	}
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static struct nft_object_type nft_limit_obj_type __read_mostly = {
+	.select_ops	= nft_limit_obj_select_ops,
+	.type		= NFT_OBJECT_LIMIT,
+	.maxattr	= NFTA_LIMIT_MAX,
+	.policy		= nft_limit_policy,
+	.owner		= THIS_MODULE,
+};
+
 static int __init nft_limit_module_init(void)
 {
-	return nft_register_expr(&nft_limit_type);
+	int err;
+
+	err = nft_register_obj(&nft_limit_obj_type);
+	if (err < 0)
+		return err;
+
+	err = nft_register_expr(&nft_limit_type);
+	if (err < 0)
+		goto err1;
+
+	return 0;
+err1:
+	nft_unregister_obj(&nft_limit_obj_type);
+	return err;
 }
 
 static void __exit nft_limit_module_exit(void)
 {
 	nft_unregister_expr(&nft_limit_type);
+	nft_unregister_obj(&nft_limit_obj_type);
 }
 
 module_init(nft_limit_module_init);
@@ -245,3 +364,4 @@ module_exit(nft_limit_module_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_ALIAS_NFT_EXPR("limit");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_LIMIT);
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 1dd428f..7bcdc48 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -22,7 +22,7 @@ static void nft_objref_eval(const struct nft_expr *expr,
 {
 	struct nft_object *obj = nft_objref_priv(expr);
 
-	obj->type->eval(obj, regs, pkt);
+	obj->ops->eval(obj, regs, pkt);
 }
 
 static int nft_objref_init(const struct nft_ctx *ctx,
@@ -54,7 +54,8 @@ static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr)
 	const struct nft_object *obj = nft_objref_priv(expr);
 
 	if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) ||
-	    nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->type->type)))
+	    nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE,
+			 htonl(obj->ops->type->type)))
 		goto nla_put_failure;
 
 	return 0;
@@ -104,7 +105,7 @@ static void nft_objref_map_eval(const struct nft_expr *expr,
 		return;
 	}
 	obj = *nft_set_ext_obj(ext);
-	obj->type->eval(obj, regs, pkt);
+	obj->ops->eval(obj, regs, pkt);
 }
 
 static int nft_objref_map_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 7d699bb..e110b0e 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -184,7 +184,7 @@ static bool nft_payload_udp_checksum(struct sk_buff *skb, unsigned int thoff)
 	if (!uh)
 		return false;
 
-	return uh->check;
+	return (__force bool)uh->check;
 }
 
 static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 25e3315..0ed124a 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -151,14 +151,20 @@ static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj,
 	return nft_quota_do_dump(skb, priv, reset);
 }
 
-static struct nft_object_type nft_quota_obj __read_mostly = {
-	.type		= NFT_OBJECT_QUOTA,
+static struct nft_object_type nft_quota_obj_type;
+static const struct nft_object_ops nft_quota_obj_ops = {
+	.type		= &nft_quota_obj_type,
 	.size		= sizeof(struct nft_quota),
-	.maxattr	= NFTA_QUOTA_MAX,
-	.policy		= nft_quota_policy,
 	.init		= nft_quota_obj_init,
 	.eval		= nft_quota_obj_eval,
 	.dump		= nft_quota_obj_dump,
+};
+
+static struct nft_object_type nft_quota_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_QUOTA,
+	.ops		= &nft_quota_obj_ops,
+	.maxattr	= NFTA_QUOTA_MAX,
+	.policy		= nft_quota_policy,
 	.owner		= THIS_MODULE,
 };
 
@@ -209,7 +215,7 @@ static int __init nft_quota_module_init(void)
 {
 	int err;
 
-	err = nft_register_obj(&nft_quota_obj);
+	err = nft_register_obj(&nft_quota_obj_type);
 	if (err < 0)
 		return err;
 
@@ -219,14 +225,14 @@ static int __init nft_quota_module_init(void)
 
 	return 0;
 err1:
-	nft_unregister_obj(&nft_quota_obj);
+	nft_unregister_obj(&nft_quota_obj_type);
 	return err;
 }
 
 static void __exit nft_quota_module_exit(void)
 {
 	nft_unregister_expr(&nft_quota_type);
-	nft_unregister_obj(&nft_quota_obj);
+	nft_unregister_obj(&nft_quota_obj_type);
 }
 
 module_init(nft_quota_module_init);
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index c7383d8..a6b7d05 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -23,6 +23,43 @@ struct nft_rt {
 	enum nft_registers	dreg:8;
 };
 
+static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst)
+{
+	u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst);
+	const struct sk_buff *skb = pkt->skb;
+	const struct nf_afinfo *ai;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		fl.u.ip4.daddr = ip_hdr(skb)->saddr;
+		minlen = sizeof(struct iphdr) + sizeof(struct tcphdr);
+		break;
+	case NFPROTO_IPV6:
+		fl.u.ip6.daddr = ipv6_hdr(skb)->saddr;
+		minlen = sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+		break;
+	}
+
+	ai = nf_get_afinfo(nft_pf(pkt));
+	if (ai) {
+		struct dst_entry *dst = NULL;
+
+		ai->route(nft_net(pkt), &dst, &fl, false);
+		if (dst) {
+			mtu = min(mtu, dst_mtu(dst));
+			dst_release(dst);
+		}
+	}
+
+	if (mtu <= minlen || mtu > 0xffff)
+		return TCP_MSS_DEFAULT;
+
+	return mtu - minlen;
+}
+
 static void nft_rt_get_eval(const struct nft_expr *expr,
 			    struct nft_regs *regs,
 			    const struct nft_pktinfo *pkt)
@@ -46,8 +83,8 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
 		if (nft_pf(pkt) != NFPROTO_IPV4)
 			goto err;
 
-		*dest = rt_nexthop((const struct rtable *)dst,
-				   ip_hdr(skb)->daddr);
+		*dest = (__force u32)rt_nexthop((const struct rtable *)dst,
+						ip_hdr(skb)->daddr);
 		break;
 	case NFT_RT_NEXTHOP6:
 		if (nft_pf(pkt) != NFPROTO_IPV6)
@@ -57,6 +94,9 @@ static void nft_rt_get_eval(const struct nft_expr *expr,
 					 &ipv6_hdr(skb)->daddr),
 		       sizeof(struct in6_addr));
 		break;
+	case NFT_RT_TCPMSS:
+		nft_reg_store16(dest, get_tcpmss(pkt, dst));
+		break;
 	default:
 		WARN_ON(1);
 		goto err;
@@ -67,7 +107,7 @@ err:
 	regs->verdict.code = NFT_BREAK;
 }
 
-const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
+static const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
 	[NFTA_RT_DREG]		= { .type = NLA_U32 },
 	[NFTA_RT_KEY]		= { .type = NLA_U32 },
 };
@@ -94,6 +134,9 @@ static int nft_rt_get_init(const struct nft_ctx *ctx,
 	case NFT_RT_NEXTHOP6:
 		len = sizeof(struct in6_addr);
 		break;
+	case NFT_RT_TCPMSS:
+		len = sizeof(u16);
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -118,6 +161,29 @@ nla_put_failure:
 	return -1;
 }
 
+static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			   const struct nft_data **data)
+{
+	const struct nft_rt *priv = nft_expr_priv(expr);
+	unsigned int hooks;
+
+	switch (priv->key) {
+	case NFT_RT_NEXTHOP4:
+	case NFT_RT_NEXTHOP6:
+	case NFT_RT_CLASSID:
+		return 0;
+	case NFT_RT_TCPMSS:
+		hooks = (1 << NF_INET_FORWARD) |
+			(1 << NF_INET_LOCAL_OUT) |
+			(1 << NF_INET_POST_ROUTING);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
 static struct nft_expr_type nft_rt_type;
 static const struct nft_expr_ops nft_rt_get_ops = {
 	.type		= &nft_rt_type,
@@ -125,6 +191,7 @@ static const struct nft_expr_ops nft_rt_get_ops = {
 	.eval		= nft_rt_get_eval,
 	.init		= nft_rt_get_init,
 	.dump		= nft_rt_get_dump,
+	.validate	= nft_rt_validate,
 };
 
 static struct nft_expr_type nft_rt_type __read_mostly = {
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index bce5382..d83a4ec 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -19,8 +19,9 @@
 #include <net/netfilter/nf_tables.h>
 
 struct nft_rbtree {
-	rwlock_t		lock;
 	struct rb_root		root;
+	rwlock_t		lock;
+	seqcount_t		count;
 };
 
 struct nft_rbtree_elem {
@@ -40,8 +41,9 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
 	return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
 }
 
-static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
-			      const u32 *key, const struct nft_set_ext **ext)
+static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+				const u32 *key, const struct nft_set_ext **ext,
+				unsigned int seq)
 {
 	struct nft_rbtree *priv = nft_set_priv(set);
 	const struct nft_rbtree_elem *rbe, *interval = NULL;
@@ -50,15 +52,17 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
 	const void *this;
 	int d;
 
-	read_lock_bh(&priv->lock);
-	parent = priv->root.rb_node;
+	parent = rcu_dereference_raw(priv->root.rb_node);
 	while (parent != NULL) {
+		if (read_seqcount_retry(&priv->count, seq))
+			return false;
+
 		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 
 		this = nft_set_ext_key(&rbe->ext);
 		d = memcmp(this, key, set->klen);
 		if (d < 0) {
-			parent = parent->rb_left;
+			parent = rcu_dereference_raw(parent->rb_left);
 			if (interval &&
 			    nft_rbtree_equal(set, this, interval) &&
 			    nft_rbtree_interval_end(this) &&
@@ -66,15 +70,14 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
 				continue;
 			interval = rbe;
 		} else if (d > 0)
-			parent = parent->rb_right;
+			parent = rcu_dereference_raw(parent->rb_right);
 		else {
 			if (!nft_set_elem_active(&rbe->ext, genmask)) {
-				parent = parent->rb_left;
+				parent = rcu_dereference_raw(parent->rb_left);
 				continue;
 			}
 			if (nft_rbtree_interval_end(rbe))
 				goto out;
-			read_unlock_bh(&priv->lock);
 
 			*ext = &rbe->ext;
 			return true;
@@ -84,15 +87,32 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
 	if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
 	    nft_set_elem_active(&interval->ext, genmask) &&
 	    !nft_rbtree_interval_end(interval)) {
-		read_unlock_bh(&priv->lock);
 		*ext = &interval->ext;
 		return true;
 	}
 out:
-	read_unlock_bh(&priv->lock);
 	return false;
 }
 
+static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+			      const u32 *key, const struct nft_set_ext **ext)
+{
+	struct nft_rbtree *priv = nft_set_priv(set);
+	unsigned int seq = read_seqcount_begin(&priv->count);
+	bool ret;
+
+	ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+	if (ret || !read_seqcount_retry(&priv->count, seq))
+		return ret;
+
+	read_lock_bh(&priv->lock);
+	seq = read_seqcount_begin(&priv->count);
+	ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+	read_unlock_bh(&priv->lock);
+
+	return ret;
+}
+
 static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 			       struct nft_rbtree_elem *new,
 			       struct nft_set_ext **ext)
@@ -130,7 +150,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 			}
 		}
 	}
-	rb_link_node(&new->node, parent, p);
+	rb_link_node_rcu(&new->node, parent, p);
 	rb_insert_color(&new->node, &priv->root);
 	return 0;
 }
@@ -144,7 +164,9 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 	int err;
 
 	write_lock_bh(&priv->lock);
+	write_seqcount_begin(&priv->count);
 	err = __nft_rbtree_insert(net, set, rbe, ext);
+	write_seqcount_end(&priv->count);
 	write_unlock_bh(&priv->lock);
 
 	return err;
@@ -158,7 +180,9 @@ static void nft_rbtree_remove(const struct net *net,
 	struct nft_rbtree_elem *rbe = elem->priv;
 
 	write_lock_bh(&priv->lock);
+	write_seqcount_begin(&priv->count);
 	rb_erase(&rbe->node, &priv->root);
+	write_seqcount_end(&priv->count);
 	write_unlock_bh(&priv->lock);
 }
 
@@ -264,6 +288,7 @@ static int nft_rbtree_init(const struct nft_set *set,
 	struct nft_rbtree *priv = nft_set_priv(set);
 
 	rwlock_init(&priv->lock);
+	seqcount_init(&priv->count);
 	priv->root = RB_ROOT;
 	return 0;
 }
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e164823..c83a3b5 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1192,16 +1192,10 @@ xt_replace_table(struct xt_table *table,
 
 #ifdef CONFIG_AUDIT
 	if (audit_enabled) {
-		struct audit_buffer *ab;
-
-		ab = audit_log_start(current->audit_context, GFP_KERNEL,
-				     AUDIT_NETFILTER_CFG);
-		if (ab) {
-			audit_log_format(ab, "table=%s family=%u entries=%u",
-					 table->name, table->af,
-					 private->number);
-			audit_log_end(ab);
-		}
+		audit_log(current->audit_context, GFP_KERNEL,
+			  AUDIT_NETFILTER_CFG,
+			  "table=%s family=%u entries=%u",
+			  table->name, table->af, private->number);
 	}
 #endif
 
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 623ef37..5a152e2 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -121,9 +121,9 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
 {
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct ctnl_timeout *timeout;
 	struct nf_conn_timeout *timeout_ext;
-	struct nf_conntrack_l4proto *l4proto;
 	int ret = 0;
 	u8 proto;
 
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
index e45a012..58aa9dd 100644
--- a/net/netfilter/xt_NETMAP.c
+++ b/net/netfilter/xt_NETMAP.c
@@ -77,10 +77,10 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 	struct nf_nat_range newrange;
 
-	NF_CT_ASSERT(xt_hooknum(par) == NF_INET_PRE_ROUTING ||
-		     xt_hooknum(par) == NF_INET_POST_ROUTING ||
-		     xt_hooknum(par) == NF_INET_LOCAL_OUT ||
-		     xt_hooknum(par) == NF_INET_LOCAL_IN);
+	WARN_ON(xt_hooknum(par) != NF_INET_PRE_ROUTING &&
+		xt_hooknum(par) != NF_INET_POST_ROUTING &&
+		xt_hooknum(par) != NF_INET_LOCAL_OUT &&
+		xt_hooknum(par) != NF_INET_LOCAL_IN);
 	ct = nf_ct_get(skb, &ctinfo);
 
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index c64aca6..9dae4d6 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -62,11 +62,9 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net,
 		memset(fl6, 0, sizeof(*fl6));
 		fl6->daddr = ipv6_hdr(skb)->saddr;
 	}
-	rcu_read_lock();
 	ai = nf_get_afinfo(family);
 	if (ai != NULL)
 		ai->route(net, (struct dst_entry **)&rt, &fl, false);
-	rcu_read_unlock();
 
 	if (rt != NULL) {
 		mtu = dst_mtu(&rt->dst);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d767e35..17d7705 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -70,13 +70,11 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
 		return user_laddr;
 
 	laddr = 0;
-	rcu_read_lock();
 	indev = __in_dev_get_rcu(skb->dev);
 	for_primary_ifa(indev) {
 		laddr = ifa->ifa_local;
 		break;
 	} endfor_ifa(indev);
-	rcu_read_unlock();
 
 	return laddr ? laddr : daddr;
 }
@@ -125,7 +123,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
 						      __tcp_hdrlen(tcph),
 						    saddr, sport,
 						    daddr, dport,
-						    in->ifindex);
+						    in->ifindex, 0);
 
 			if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 				sk = NULL;
@@ -195,7 +193,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
 						   thoff + __tcp_hdrlen(tcph),
 						   saddr, sport,
 						   daddr, ntohs(dport),
-						   in->ifindex);
+						   in->ifindex, 0);
 
 			if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
 				sk = NULL;
@@ -208,7 +206,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
 		case NFT_LOOKUP_ESTABLISHED:
 			sk = __inet6_lookup_established(net, &tcp_hashinfo,
 							saddr, sport, daddr, ntohs(dport),
-							in->ifindex);
+							in->ifindex, 0);
 			break;
 		default:
 			BUG();
@@ -391,7 +389,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
 		return user_laddr;
 	laddr = NULL;
 
-	rcu_read_lock();
 	indev = __in6_dev_get(skb->dev);
 	if (indev) {
 		read_lock_bh(&indev->lock);
@@ -404,7 +401,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
 		}
 		read_unlock_bh(&indev->lock);
 	}
-	rcu_read_unlock();
 
 	return laddr ? laddr : daddr;
 }
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index e329dab..3b2be2a 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -47,8 +47,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
 	if (dev)
 		flow.flowi6_oif = dev->ifindex;
 
-	rcu_read_lock();
-
 	afinfo = nf_get_afinfo(NFPROTO_IPV6);
 	if (afinfo != NULL) {
 		const struct nf_ipv6_ops *v6ops;
@@ -63,7 +61,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
 	} else {
 		route_err = 1;
 	}
-	rcu_read_unlock();
 
 	if (route_err)
 		return XT_ADDRTYPE_UNREACHABLE;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index b8fd4ab..ffa8eec 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -58,8 +58,7 @@ struct xt_connlimit_rb {
 static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp;
 
 struct xt_connlimit_data {
-	struct rb_root climit_root4[CONNLIMIT_SLOTS];
-	struct rb_root climit_root6[CONNLIMIT_SLOTS];
+	struct rb_root climit_root[CONNLIMIT_SLOTS];
 };
 
 static u_int32_t connlimit_rnd __read_mostly;
@@ -144,7 +143,6 @@ static unsigned int check_hlist(struct net *net,
 	unsigned int length = 0;
 
 	*addit = true;
-	rcu_read_lock();
 
 	/* check the saved connections */
 	hlist_for_each_entry_safe(conn, n, head, node) {
@@ -179,8 +177,6 @@ static unsigned int check_hlist(struct net *net,
 		length++;
 	}
 
-	rcu_read_unlock();
-
 	return length;
 }
 
@@ -297,13 +293,11 @@ static int count_them(struct net *net,
 	int count;
 	u32 hash;
 
-	if (family == NFPROTO_IPV6) {
+	if (family == NFPROTO_IPV6)
 		hash = connlimit_iphash6(addr, mask);
-		root = &data->climit_root6[hash];
-	} else {
+	else
 		hash = connlimit_iphash(addr->ip & mask->ip);
-		root = &data->climit_root4[hash];
-	}
+	root = &data->climit_root[hash];
 
 	spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]);
 
@@ -382,10 +376,8 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
-		info->data->climit_root4[i] = RB_ROOT;
-	for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
-		info->data->climit_root6[i] = RB_ROOT;
+	for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
+		info->data->climit_root[i] = RB_ROOT;
 
 	return 0;
 }
@@ -416,10 +408,8 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
 
 	nf_ct_netns_put(par->net, par->family);
 
-	for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
-		destroy_tree(&info->data->climit_root4[i]);
-	for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
-		destroy_tree(&info->data->climit_root6[i]);
+	for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i)
+		destroy_tree(&info->data->climit_root[i]);
 
 	kfree(info->data);
 }
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 762e187..10d4823 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -56,6 +56,7 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
 }
 
 /* need to declare this at the top */
+static const struct file_operations dl_file_ops_v2;
 static const struct file_operations dl_file_ops_v1;
 static const struct file_operations dl_file_ops;
 
@@ -87,8 +88,19 @@ struct dsthash_ent {
 	unsigned long expires;		/* precalculated expiry time */
 	struct {
 		unsigned long prev;	/* last modification */
-		u_int64_t credit;
-		u_int64_t credit_cap, cost;
+		union {
+			struct {
+				u_int64_t credit;
+				u_int64_t credit_cap;
+				u_int64_t cost;
+			};
+			struct {
+				u_int32_t interval, prev_window;
+				u_int64_t current_rate;
+				u_int64_t rate;
+				int64_t burst;
+			};
+		};
 	} rateinfo;
 	struct rcu_head rcu;
 };
@@ -99,7 +111,7 @@ struct xt_hashlimit_htable {
 	u_int8_t family;
 	bool rnd_initialized;
 
-	struct hashlimit_cfg2 cfg;	/* config */
+	struct hashlimit_cfg3 cfg;	/* config */
 
 	/* used internally */
 	spinlock_t lock;		/* lock for list_head */
@@ -116,10 +128,10 @@ struct xt_hashlimit_htable {
 };
 
 static int
-cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
+cfg_copy(struct hashlimit_cfg3 *to, const void *from, int revision)
 {
 	if (revision == 1) {
-		struct hashlimit_cfg1 *cfg = from;
+		struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from;
 
 		to->mode = cfg->mode;
 		to->avg = cfg->avg;
@@ -131,7 +143,19 @@ cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
 		to->srcmask = cfg->srcmask;
 		to->dstmask = cfg->dstmask;
 	} else if (revision == 2) {
-		memcpy(to, from, sizeof(struct hashlimit_cfg2));
+		struct hashlimit_cfg2 *cfg = (struct hashlimit_cfg2 *)from;
+
+		to->mode = cfg->mode;
+		to->avg = cfg->avg;
+		to->burst = cfg->burst;
+		to->size = cfg->size;
+		to->max = cfg->max;
+		to->gc_interval = cfg->gc_interval;
+		to->expire = cfg->expire;
+		to->srcmask = cfg->srcmask;
+		to->dstmask = cfg->dstmask;
+	} else if (revision == 3) {
+		memcpy(to, from, sizeof(struct hashlimit_cfg3));
 	} else {
 		return -EINVAL;
 	}
@@ -240,13 +264,14 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
 }
 static void htable_gc(struct work_struct *work);
 
-static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
+static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
 			 const char *name, u_int8_t family,
 			 struct xt_hashlimit_htable **out_hinfo,
 			 int revision)
 {
 	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 	struct xt_hashlimit_htable *hinfo;
+	const struct file_operations *fops;
 	unsigned int size, i;
 	int ret;
 
@@ -268,7 +293,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
 	*out_hinfo = hinfo;
 
 	/* copy match config into hashtable config */
-	ret = cfg_copy(&hinfo->cfg, (void *)cfg, 2);
+	ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3);
 
 	if (ret)
 		return ret;
@@ -293,11 +318,21 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
 	}
 	spin_lock_init(&hinfo->lock);
 
+	switch (revision) {
+	case 1:
+		fops = &dl_file_ops_v1;
+		break;
+	case 2:
+		fops = &dl_file_ops_v2;
+		break;
+	default:
+		fops = &dl_file_ops;
+	}
+
 	hinfo->pde = proc_create_data(name, 0,
 		(family == NFPROTO_IPV4) ?
 		hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
-		(revision == 1) ? &dl_file_ops_v1 : &dl_file_ops,
-		hinfo);
+		fops, hinfo);
 	if (hinfo->pde == NULL) {
 		kfree(hinfo->name);
 		vfree(hinfo);
@@ -482,6 +517,25 @@ static u32 user2credits_byte(u32 user)
 	return (u32) (us >> 32);
 }
 
+static u64 user2rate(u64 user)
+{
+	if (user != 0) {
+		return div64_u64(XT_HASHLIMIT_SCALE_v2, user);
+	} else {
+		pr_warn("invalid rate from userspace: %llu\n", user);
+		return 0;
+	}
+}
+
+static u64 user2rate_bytes(u64 user)
+{
+	u64 r;
+
+	r = user ? 0xFFFFFFFFULL / user : 0xFFFFFFFFULL;
+	r = (r - 1) << 4;
+	return r;
+}
+
 static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
 			    u32 mode, int revision)
 {
@@ -491,6 +545,21 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
 	if (delta == 0)
 		return;
 
+	if (revision >= 3 && mode & XT_HASHLIMIT_RATE_MATCH) {
+		u64 interval = dh->rateinfo.interval * HZ;
+
+		if (delta < interval)
+			return;
+
+		dh->rateinfo.prev = now;
+		dh->rateinfo.prev_window =
+			((dh->rateinfo.current_rate * interval) >
+			 (delta * dh->rateinfo.rate));
+		dh->rateinfo.current_rate = 0;
+
+		return;
+	}
+
 	dh->rateinfo.prev = now;
 
 	if (mode & XT_HASHLIMIT_BYTES) {
@@ -515,7 +584,23 @@ static void rateinfo_init(struct dsthash_ent *dh,
 			  struct xt_hashlimit_htable *hinfo, int revision)
 {
 	dh->rateinfo.prev = jiffies;
-	if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+	if (revision >= 3 && hinfo->cfg.mode & XT_HASHLIMIT_RATE_MATCH) {
+		dh->rateinfo.prev_window = 0;
+		dh->rateinfo.current_rate = 0;
+		if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+			dh->rateinfo.rate = user2rate_bytes(hinfo->cfg.avg);
+			if (hinfo->cfg.burst)
+				dh->rateinfo.burst =
+					hinfo->cfg.burst * dh->rateinfo.rate;
+			else
+				dh->rateinfo.burst = dh->rateinfo.rate;
+		} else {
+			dh->rateinfo.rate = user2rate(hinfo->cfg.avg);
+			dh->rateinfo.burst =
+				hinfo->cfg.burst + dh->rateinfo.rate;
+		}
+		dh->rateinfo.interval = hinfo->cfg.interval;
+	} else if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
 		dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ;
 		dh->rateinfo.cost = user2credits_byte(hinfo->cfg.avg);
 		dh->rateinfo.credit_cap = hinfo->cfg.burst;
@@ -648,7 +733,7 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh)
 static bool
 hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
 		    struct xt_hashlimit_htable *hinfo,
-		    const struct hashlimit_cfg2 *cfg, int revision)
+		    const struct hashlimit_cfg3 *cfg, int revision)
 {
 	unsigned long now = jiffies;
 	struct dsthash_ent *dh;
@@ -659,12 +744,12 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
 	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
 		goto hotdrop;
 
-	rcu_read_lock_bh();
+	local_bh_disable();
 	dh = dsthash_find(hinfo, &dst);
 	if (dh == NULL) {
 		dh = dsthash_alloc_init(hinfo, &dst, &race);
 		if (dh == NULL) {
-			rcu_read_unlock_bh();
+			local_bh_enable();
 			goto hotdrop;
 		} else if (race) {
 			/* Already got an entry, update expiration timeout */
@@ -680,6 +765,20 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
 		rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
 	}
 
+	if (cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
+		cost = (cfg->mode & XT_HASHLIMIT_BYTES) ? skb->len : 1;
+		dh->rateinfo.current_rate += cost;
+
+		if (!dh->rateinfo.prev_window &&
+		    (dh->rateinfo.current_rate <= dh->rateinfo.burst)) {
+			spin_unlock(&dh->lock);
+			rcu_read_unlock_bh();
+			return !(cfg->mode & XT_HASHLIMIT_INVERT);
+		} else {
+			goto overlimit;
+		}
+	}
+
 	if (cfg->mode & XT_HASHLIMIT_BYTES)
 		cost = hashlimit_byte_cost(skb->len, dh);
 	else
@@ -689,12 +788,13 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
 		/* below the limit */
 		dh->rateinfo.credit -= cost;
 		spin_unlock(&dh->lock);
-		rcu_read_unlock_bh();
+		local_bh_enable();
 		return !(cfg->mode & XT_HASHLIMIT_INVERT);
 	}
 
+overlimit:
 	spin_unlock(&dh->lock);
-	rcu_read_unlock_bh();
+	local_bh_enable();
 	/* default match is underlimit - so over the limit, we need to invert */
 	return cfg->mode & XT_HASHLIMIT_INVERT;
 
@@ -708,7 +808,7 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 	struct xt_hashlimit_htable *hinfo = info->hinfo;
-	struct hashlimit_cfg2 cfg = {};
+	struct hashlimit_cfg3 cfg = {};
 	int ret;
 
 	ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
@@ -720,17 +820,33 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 }
 
 static bool
-hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
 	struct xt_hashlimit_htable *hinfo = info->hinfo;
+	struct hashlimit_cfg3 cfg = {};
+	int ret;
+
+	ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
+
+	if (ret)
+		return ret;
+
+	return hashlimit_mt_common(skb, par, hinfo, &cfg, 2);
+}
+
+static bool
+hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
+	struct xt_hashlimit_htable *hinfo = info->hinfo;
 
-	return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2);
+	return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3);
 }
 
 static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
 				     struct xt_hashlimit_htable **hinfo,
-				     struct hashlimit_cfg2 *cfg,
+				     struct hashlimit_cfg3 *cfg,
 				     const char *name, int revision)
 {
 	struct net *net = par->net;
@@ -753,7 +869,17 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
 	}
 
 	/* Check for overflow. */
-	if (cfg->mode & XT_HASHLIMIT_BYTES) {
+	if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
+		if (cfg->avg == 0) {
+			pr_info("hashlimit invalid rate\n");
+			return -ERANGE;
+		}
+
+		if (cfg->interval == 0) {
+			pr_info("hashlimit invalid interval\n");
+			return -EINVAL;
+		}
+	} else if (cfg->mode & XT_HASHLIMIT_BYTES) {
 		if (user2credits_byte(cfg->avg) == 0) {
 			pr_info("overflow, rate too high: %llu\n", cfg->avg);
 			return -EINVAL;
@@ -784,7 +910,7 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
 static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
 {
 	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
-	struct hashlimit_cfg2 cfg = {};
+	struct hashlimit_cfg3 cfg = {};
 	int ret;
 
 	if (info->name[sizeof(info->name) - 1] != '\0')
@@ -799,15 +925,40 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
 					 &cfg, info->name, 1);
 }
 
-static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
 {
 	struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+	struct hashlimit_cfg3 cfg = {};
+	int ret;
+
+	if (info->name[sizeof(info->name) - 1] != '\0')
+		return -EINVAL;
+
+	ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
+
+	if (ret)
+		return ret;
+
+	return hashlimit_mt_check_common(par, &info->hinfo,
+					 &cfg, info->name, 2);
+}
+
+static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
 
 	if (info->name[sizeof(info->name) - 1] != '\0')
 		return -EINVAL;
 
 	return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
-					 info->name, 2);
+					 info->name, 3);
+}
+
+static void hashlimit_mt_destroy_v2(const struct xt_mtdtor_param *par)
+{
+	const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+
+	htable_put(info->hinfo);
 }
 
 static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
@@ -819,7 +970,7 @@ static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
 
 static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+	const struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
 
 	htable_put(info->hinfo);
 }
@@ -840,9 +991,20 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 		.name           = "hashlimit",
 		.revision       = 2,
 		.family         = NFPROTO_IPV4,
-		.match          = hashlimit_mt,
+		.match          = hashlimit_mt_v2,
 		.matchsize      = sizeof(struct xt_hashlimit_mtinfo2),
 		.usersize	= offsetof(struct xt_hashlimit_mtinfo2, hinfo),
+		.checkentry     = hashlimit_mt_check_v2,
+		.destroy        = hashlimit_mt_destroy_v2,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "hashlimit",
+		.revision       = 3,
+		.family         = NFPROTO_IPV4,
+		.match          = hashlimit_mt,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo3),
+		.usersize	= offsetof(struct xt_hashlimit_mtinfo3, hinfo),
 		.checkentry     = hashlimit_mt_check,
 		.destroy        = hashlimit_mt_destroy,
 		.me             = THIS_MODULE,
@@ -863,9 +1025,20 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 		.name           = "hashlimit",
 		.revision       = 2,
 		.family         = NFPROTO_IPV6,
-		.match          = hashlimit_mt,
+		.match          = hashlimit_mt_v2,
 		.matchsize      = sizeof(struct xt_hashlimit_mtinfo2),
 		.usersize	= offsetof(struct xt_hashlimit_mtinfo2, hinfo),
+		.checkentry     = hashlimit_mt_check_v2,
+		.destroy        = hashlimit_mt_destroy_v2,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "hashlimit",
+		.revision       = 3,
+		.family         = NFPROTO_IPV6,
+		.match          = hashlimit_mt,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo3),
+		.usersize	= offsetof(struct xt_hashlimit_mtinfo3, hinfo),
 		.checkentry     = hashlimit_mt_check,
 		.destroy        = hashlimit_mt_destroy,
 		.me             = THIS_MODULE,
@@ -947,6 +1120,21 @@ static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
 	}
 }
 
+static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
+			       struct seq_file *s)
+{
+	const struct xt_hashlimit_htable *ht = s->private;
+
+	spin_lock(&ent->lock);
+	/* recalculate to show accurate numbers */
+	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+
+	dl_seq_print(ent, family, s);
+
+	spin_unlock(&ent->lock);
+	return seq_has_overflowed(s);
+}
+
 static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
 			       struct seq_file *s)
 {
@@ -969,7 +1157,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 
 	spin_lock(&ent->lock);
 	/* recalculate to show accurate numbers */
-	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 3);
 
 	dl_seq_print(ent, family, s);
 
@@ -977,6 +1165,20 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 	return seq_has_overflowed(s);
 }
 
+static int dl_seq_show_v2(struct seq_file *s, void *v)
+{
+	struct xt_hashlimit_htable *htable = s->private;
+	unsigned int *bucket = (unsigned int *)v;
+	struct dsthash_ent *ent;
+
+	if (!hlist_empty(&htable->hash[*bucket])) {
+		hlist_for_each_entry(ent, &htable->hash[*bucket], node)
+			if (dl_seq_real_show_v2(ent, htable->family, s))
+				return -1;
+	}
+	return 0;
+}
+
 static int dl_seq_show_v1(struct seq_file *s, void *v)
 {
 	struct xt_hashlimit_htable *htable = s->private;
@@ -1012,6 +1214,13 @@ static const struct seq_operations dl_seq_ops_v1 = {
 	.show  = dl_seq_show_v1
 };
 
+static const struct seq_operations dl_seq_ops_v2 = {
+	.start = dl_seq_start,
+	.next  = dl_seq_next,
+	.stop  = dl_seq_stop,
+	.show  = dl_seq_show_v2
+};
+
 static const struct seq_operations dl_seq_ops = {
 	.start = dl_seq_start,
 	.next  = dl_seq_next,
@@ -1019,6 +1228,18 @@ static const struct seq_operations dl_seq_ops = {
 	.show  = dl_seq_show
 };
 
+static int dl_proc_open_v2(struct inode *inode, struct file *file)
+{
+	int ret = seq_open(file, &dl_seq_ops_v2);
+
+	if (!ret) {
+		struct seq_file *sf = file->private_data;
+
+		sf->private = PDE_DATA(inode);
+	}
+	return ret;
+}
+
 static int dl_proc_open_v1(struct inode *inode, struct file *file)
 {
 	int ret = seq_open(file, &dl_seq_ops_v1);
@@ -1042,6 +1263,14 @@ static int dl_proc_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
+static const struct file_operations dl_file_ops_v2 = {
+	.owner   = THIS_MODULE,
+	.open    = dl_proc_open_v2,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
 static const struct file_operations dl_file_ops_v1 = {
 	.owner   = THIS_MODULE,
 	.open    = dl_proc_open_v1,
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index 8107b3e..0fd14d1 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -58,9 +58,9 @@ xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct != NULL &&
-		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-		      ctinfo == IP_CT_RELATED_REPLY));
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+		  ctinfo == IP_CT_RELATED_REPLY)));
 
 	xt_nat_convert_range(&range, &mr->range[0]);
 	return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
@@ -75,8 +75,8 @@ xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct != NULL &&
-		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
 
 	xt_nat_convert_range(&range, &mr->range[0]);
 	return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
@@ -90,9 +90,9 @@ xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct != NULL &&
-		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-		      ctinfo == IP_CT_RELATED_REPLY));
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+		  ctinfo == IP_CT_RELATED_REPLY)));
 
 	return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC);
 }
@@ -105,8 +105,8 @@ xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	NF_CT_ASSERT(ct != NULL &&
-		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+	WARN_ON(!(ct != NULL &&
+		 (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)));
 
 	return nf_nat_setup_info(ct, range, NF_NAT_MANIP_DST);
 }
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 71cfa95..36e14b1 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -226,7 +226,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 				sizeof(struct tcphdr), optsize, opts);
 	}
 
-	rcu_read_lock();
 	list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
 		int foptsize, optnum;
 
@@ -340,7 +339,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 		    info->loglevel == XT_OSF_LOGLEVEL_FIRST)
 			break;
 	}
-	rcu_read_unlock();
 
 	if (!fcount && (info->flags & XT_OSF_LOG))
 		nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p),
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 3f6c4fa..245fa35 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -106,7 +106,7 @@ static DEFINE_SPINLOCK(recent_lock);
 static DEFINE_MUTEX(recent_mutex);
 
 #ifdef CONFIG_PROC_FS
-static const struct file_operations recent_old_fops, recent_mt_fops;
+static const struct file_operations recent_mt_fops;
 #endif
 
 static u_int32_t hash_rnd __read_mostly;
diff --git a/net/nsh/Kconfig b/net/nsh/Kconfig
new file mode 100644
index 0000000..bafc3dd
--- /dev/null
+++ b/net/nsh/Kconfig
@@ -0,0 +1,9 @@
+menuconfig NET_NSH
+	tristate "Network Service Header (NSH) protocol"
+	default n
+	---help---
+	  Network Service Header is an implementation of Service Function
+	  Chaining (RFC 7665). The current implementation in Linux supports
+	  only MD type 1 and only with the openvswitch module.
+
+	  If unsure, say N.
diff --git a/net/nsh/Makefile b/net/nsh/Makefile
new file mode 100644
index 0000000..c93c787
--- /dev/null
+++ b/net/nsh/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_NET_NSH) += nsh.o
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
new file mode 100644
index 0000000..58fb827
--- /dev/null
+++ b/net/nsh/nsh.c
@@ -0,0 +1,91 @@
+/*
+ * Network Service Header
+ *
+ * Copyright (c) 2017 Red Hat, Inc. -- Jiri Benc <jbenc@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/nsh.h>
+#include <net/tun_proto.h>
+
+static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
+				       netdev_features_t features)
+{
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	unsigned int nsh_len, mac_len;
+	__be16 proto;
+	int nhoff;
+
+	skb_reset_network_header(skb);
+
+	nhoff = skb->network_header - skb->mac_header;
+	mac_len = skb->mac_len;
+
+	if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN)))
+		goto out;
+	nsh_len = nsh_hdr_len(nsh_hdr(skb));
+	if (unlikely(!pskb_may_pull(skb, nsh_len)))
+		goto out;
+
+	proto = tun_p_to_eth_p(nsh_hdr(skb)->np);
+	if (!proto)
+		goto out;
+
+	__skb_pull(skb, nsh_len);
+
+	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
+	skb->protocol = proto;
+
+	features &= NETIF_F_SG;
+	segs = skb_mac_gso_segment(skb, features);
+	if (IS_ERR_OR_NULL(segs)) {
+		skb_gso_error_unwind(skb, htons(ETH_P_NSH), nsh_len,
+				     skb->network_header - nhoff,
+				     mac_len);
+		goto out;
+	}
+
+	for (skb = segs; skb; skb = skb->next) {
+		skb->protocol = htons(ETH_P_NSH);
+		__skb_push(skb, nsh_len);
+		skb_set_mac_header(skb, -nhoff);
+		skb->network_header = skb->mac_header + mac_len;
+		skb->mac_len = mac_len;
+	}
+
+out:
+	return segs;
+}
+
+static struct packet_offload nsh_packet_offload __read_mostly = {
+	.type = htons(ETH_P_NSH),
+	.priority = 15,
+	.callbacks = {
+		.gso_segment = nsh_gso_segment,
+	},
+};
+
+static int __init nsh_init_module(void)
+{
+	dev_add_offload(&nsh_packet_offload);
+	return 0;
+}
+
+static void __exit nsh_cleanup_module(void)
+{
+	dev_remove_offload(&nsh_packet_offload);
+}
+
+module_init(nsh_init_module);
+module_exit(nsh_cleanup_module);
+
+MODULE_AUTHOR("Jiri Benc <jbenc@redhat.com>");
+MODULE_DESCRIPTION("NSH protocol");
+MODULE_LICENSE("GPL v2");
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 03859e3..d558e88 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -579,8 +579,8 @@ static struct nf_conn *
 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 		     u8 l3num, struct sk_buff *skb, bool natted)
 {
-	struct nf_conntrack_l3proto *l3proto;
-	struct nf_conntrack_l4proto *l4proto;
+	const struct nf_conntrack_l3proto *l3proto;
+	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
@@ -1180,15 +1180,13 @@ static int parse_nat(const struct nlattr *attr,
 		int type = nla_type(a);
 
 		if (type > OVS_NAT_ATTR_MAX) {
-			OVS_NLERR(log,
-				  "Unknown NAT attribute (type=%d, max=%d).\n",
+			OVS_NLERR(log, "Unknown NAT attribute (type=%d, max=%d)",
 				  type, OVS_NAT_ATTR_MAX);
 			return -EINVAL;
 		}
 
 		if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) {
-			OVS_NLERR(log,
-				  "NAT attribute type %d has unexpected length (%d != %d).\n",
+			OVS_NLERR(log, "NAT attribute type %d has unexpected length (%d != %d)",
 				  type, nla_len(a),
 				  ovs_nat_attr_lens[type][ip_vers]);
 			return -EINVAL;
@@ -1198,9 +1196,7 @@ static int parse_nat(const struct nlattr *attr,
 		case OVS_NAT_ATTR_SRC:
 		case OVS_NAT_ATTR_DST:
 			if (info->nat) {
-				OVS_NLERR(log,
-					  "Only one type of NAT may be specified.\n"
-					  );
+				OVS_NLERR(log, "Only one type of NAT may be specified");
 				return -ERANGE;
 			}
 			info->nat |= OVS_CT_NAT;
@@ -1245,13 +1241,13 @@ static int parse_nat(const struct nlattr *attr,
 			break;
 
 		default:
-			OVS_NLERR(log, "Unknown nat attribute (%d).\n", type);
+			OVS_NLERR(log, "Unknown nat attribute (%d)", type);
 			return -EINVAL;
 		}
 	}
 
 	if (rem > 0) {
-		OVS_NLERR(log, "NAT attribute has %d unknown bytes.\n", rem);
+		OVS_NLERR(log, "NAT attribute has %d unknown bytes", rem);
 		return -EINVAL;
 	}
 	if (!info->nat) {
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 6b44fe4..76cf273 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -335,8 +335,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 			     const struct dp_upcall_info *upcall_info,
 				 uint32_t cutlen)
 {
-	unsigned short gso_type = skb_shinfo(skb)->gso_type;
-	struct sw_flow_key later_key;
 	struct sk_buff *segs, *nskb;
 	int err;
 
@@ -347,21 +345,9 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
 	if (segs == NULL)
 		return -EINVAL;
 
-	if (gso_type & SKB_GSO_UDP) {
-		/* The initial flow key extracted by ovs_flow_key_extract()
-		 * in this case is for a first fragment, so we need to
-		 * properly mark later fragments.
-		 */
-		later_key = *key;
-		later_key.ip.frag = OVS_FRAG_TYPE_LATER;
-	}
-
 	/* Queue all of the segments. */
 	skb = segs;
 	do {
-		if (gso_type & SKB_GSO_UDP && skb != segs)
-			key = &later_key;
-
 		err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
 		if (err)
 			break;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 3f76cb7..8c94cef 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -72,8 +72,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 			   const struct sk_buff *skb)
 {
 	struct flow_stats *stats;
-	int node = numa_node_id();
-	int cpu = smp_processor_id();
+	unsigned int cpu = smp_processor_id();
 	int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
 	stats = rcu_dereference(flow->stats[cpu]);
@@ -108,7 +107,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 							      __GFP_THISNODE |
 							      __GFP_NOWARN |
 							      __GFP_NOMEMALLOC,
-							      node);
+							      numa_node_id());
 				if (likely(new_stats)) {
 					new_stats->used = jiffies;
 					new_stats->packet_count = 1;
@@ -118,6 +117,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 
 					rcu_assign_pointer(flow->stats[cpu],
 							   new_stats);
+					cpumask_set_cpu(cpu, &flow->cpu_used_mask);
 					goto unlock;
 				}
 			}
@@ -145,7 +145,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
 	memset(ovs_stats, 0, sizeof(*ovs_stats));
 
 	/* We open code this to make sure cpu 0 is always considered */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
 		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
 
 		if (stats) {
@@ -169,7 +169,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow)
 	int cpu;
 
 	/* We open code this to make sure cpu 0 is always considered */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
 		struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
 
 		if (stats) {
@@ -584,8 +584,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 			key->ip.frag = OVS_FRAG_TYPE_LATER;
 			return 0;
 		}
-		if (nh->frag_off & htons(IP_MF) ||
-			skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+		if (nh->frag_off & htons(IP_MF))
 			key->ip.frag = OVS_FRAG_TYPE_FIRST;
 		else
 			key->ip.frag = OVS_FRAG_TYPE_NONE;
@@ -701,9 +700,6 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 
 		if (key->ip.frag == OVS_FRAG_TYPE_LATER)
 			return 0;
-		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
-			key->ip.frag = OVS_FRAG_TYPE_FIRST;
-
 		/* Transport layer. */
 		if (key->ip.proto == NEXTHDR_TCP) {
 			if (tcphdr_ok(skb)) {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index a9bc1c8..1875bba 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -31,6 +31,7 @@
 #include <linux/jiffies.h>
 #include <linux/time.h>
 #include <linux/flex_array.h>
+#include <linux/cpumask.h>
 #include <net/inet_ecn.h>
 #include <net/ip_tunnels.h>
 #include <net/dst_metadata.h>
@@ -219,6 +220,7 @@ struct sw_flow {
 					 */
 	struct sw_flow_key key;
 	struct sw_flow_id id;
+	struct cpumask cpu_used_mask;
 	struct sw_flow_mask *mask;
 	struct sw_flow_actions __rcu *sf_acts;
 	struct flow_stats __rcu *stats[]; /* One for each CPU.  First one
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index f07d10a..e8eb427 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1255,7 +1255,7 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
 		}
 
 		if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
-			OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n",
+			OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x)",
 				  ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
 			return -EINVAL;
 		}
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index ea7a807..80ea2a7 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -98,6 +98,8 @@ struct sw_flow *ovs_flow_alloc(void)
 
 	RCU_INIT_POINTER(flow->stats[0], stats);
 
+	cpumask_set_cpu(0, &flow->cpu_used_mask);
+
 	return flow;
 err:
 	kmem_cache_free(flow_cache, flow);
@@ -141,7 +143,7 @@ static void flow_free(struct sw_flow *flow)
 	if (flow->sf_acts)
 		ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts);
 	/* We open code this to make sure cpu 0 is always considered */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask))
+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
 		if (flow->stats[cpu])
 			kmem_cache_free(flow_stats_cache,
 					(struct flow_stats __force *)flow->stats[cpu]);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1c61af9..c261729 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -177,8 +177,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 #define BLK_PLUS_PRIV(sz_of_priv) \
 	(BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
 
-#define PGV_FROM_VMALLOC 1
-
 #define BLOCK_STATUS(x)	((x)->hdr.bh1.block_status)
 #define BLOCK_NUM_PKTS(x)	((x)->hdr.bh1.num_pkts)
 #define BLOCK_O2FP(x)		((x)->hdr.bh1.offset_to_first_pkt)
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 45b3af3..da754fc 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -300,15 +300,15 @@ out:
 int __init phonet_netlink_register(void)
 {
 	int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit,
-				  NULL, NULL);
+				  NULL, 0);
 	if (err)
 		return err;
 
 	/* Further __rtnl_register() cannot fail */
-	__rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, NULL);
-	__rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, NULL);
-	__rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, NULL);
-	__rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, NULL);
-	__rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, NULL);
+	__rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, 0);
+	__rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, 0);
+	__rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, 0);
+	__rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, 0);
+	__rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, 0);
 	return 0;
 }
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 5586609..c2f5c13 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1081,7 +1081,7 @@ static int __init qrtr_proto_init(void)
 		return rc;
 	}
 
-	rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, NULL);
+	rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0);
 
 	return 0;
 }
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 3a915be..75d43dc 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -40,7 +40,7 @@
 
 static struct rhashtable bind_hash_table;
 
-static struct rhashtable_params ht_parms = {
+static const struct rhashtable_params ht_parms = {
 	.nelem_hint = 768,
 	.key_len = sizeof(u64),
 	.key_offset = offsetof(struct rds_sock, rs_bound_key),
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 50a3789..7ee2d5d 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -151,6 +151,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	struct rds_transport *loop_trans;
 	unsigned long flags;
 	int ret, i;
+	int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
 
 	rcu_read_lock();
 	conn = rds_conn_lookup(net, head, laddr, faddr, trans);
@@ -172,6 +173,12 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 		conn = ERR_PTR(-ENOMEM);
 		goto out;
 	}
+	conn->c_path = kcalloc(npaths, sizeof(struct rds_conn_path), gfp);
+	if (!conn->c_path) {
+		kmem_cache_free(rds_conn_slab, conn);
+		conn = ERR_PTR(-ENOMEM);
+		goto out;
+	}
 
 	INIT_HLIST_NODE(&conn->c_hash_node);
 	conn->c_laddr = laddr;
@@ -181,6 +188,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 
 	ret = rds_cong_get_maps(conn);
 	if (ret) {
+		kfree(conn->c_path);
 		kmem_cache_free(rds_conn_slab, conn);
 		conn = ERR_PTR(ret);
 		goto out;
@@ -207,13 +215,14 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	conn->c_trans = trans;
 
 	init_waitqueue_head(&conn->c_hs_waitq);
-	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+	for (i = 0; i < npaths; i++) {
 		__rds_conn_path_init(conn, &conn->c_path[i],
 				     is_outgoing);
 		conn->c_path[i].cp_index = i;
 	}
 	ret = trans->conn_alloc(conn, gfp);
 	if (ret) {
+		kfree(conn->c_path);
 		kmem_cache_free(rds_conn_slab, conn);
 		conn = ERR_PTR(ret);
 		goto out;
@@ -236,6 +245,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 		/* Creating passive conn */
 		if (parent->c_passive) {
 			trans->conn_free(conn->c_path[0].cp_transport_data);
+			kfree(conn->c_path);
 			kmem_cache_free(rds_conn_slab, conn);
 			conn = parent->c_passive;
 		} else {
@@ -252,7 +262,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 			struct rds_conn_path *cp;
 			int i;
 
-			for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+			for (i = 0; i < npaths; i++) {
 				cp = &conn->c_path[i];
 				/* The ->conn_alloc invocation may have
 				 * allocated resource for all paths, so all
@@ -261,6 +271,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 				if (cp->cp_transport_data)
 					trans->conn_free(cp->cp_transport_data);
 			}
+			kfree(conn->c_path);
 			kmem_cache_free(rds_conn_slab, conn);
 			conn = found;
 		} else {
@@ -374,13 +385,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)
 	if (!cp->cp_transport_data)
 		return;
 
-	rds_conn_path_drop(cp);
-	flush_work(&cp->cp_down_w);
-
 	/* make sure lingering queued work won't try to ref the conn */
 	cancel_delayed_work_sync(&cp->cp_send_w);
 	cancel_delayed_work_sync(&cp->cp_recv_w);
 
+	rds_conn_path_drop(cp, true);
+	flush_work(&cp->cp_down_w);
+
 	/* tear down queued messages */
 	list_for_each_entry_safe(rm, rtmp,
 				 &cp->cp_send_queue,
@@ -407,6 +418,7 @@ void rds_conn_destroy(struct rds_connection *conn)
 	unsigned long flags;
 	int i;
 	struct rds_conn_path *cp;
+	int npaths = (conn->c_trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
 
 	rdsdebug("freeing conn %p for %pI4 -> "
 		 "%pI4\n", conn, &conn->c_laddr,
@@ -420,7 +432,7 @@ void rds_conn_destroy(struct rds_connection *conn)
 	synchronize_rcu();
 
 	/* shut the connection down */
-	for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+	for (i = 0; i < npaths; i++) {
 		cp = &conn->c_path[i];
 		rds_conn_path_destroy(cp);
 		BUG_ON(!list_empty(&cp->cp_retrans));
@@ -434,6 +446,7 @@ void rds_conn_destroy(struct rds_connection *conn)
 	rds_cong_remove_conn(conn);
 
 	put_net(conn->c_net);
+	kfree(conn->c_path);
 	kmem_cache_free(rds_conn_slab, conn);
 
 	spin_lock_irqsave(&rds_conn_lock, flags);
@@ -464,8 +477,12 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
 	     i++, head++) {
 		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
 			struct rds_conn_path *cp;
+			int npaths;
+
+			npaths = (conn->c_trans->t_mp_capable ?
+				 RDS_MPATH_WORKERS : 1);
 
-			for (j = 0; j < RDS_MPATH_WORKERS; j++) {
+			for (j = 0; j < npaths; j++) {
 				cp = &conn->c_path[j];
 				if (want_send)
 					list = &cp->cp_send_queue;
@@ -486,8 +503,6 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
 				}
 
 				spin_unlock_irqrestore(&cp->cp_lock, flags);
-				if (!conn->c_trans->t_mp_capable)
-					break;
 			}
 		}
 	}
@@ -571,15 +586,16 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len,
 	     i++, head++) {
 		hlist_for_each_entry_rcu(conn, head, c_hash_node) {
 			struct rds_conn_path *cp;
+			int npaths;
 
-			for (j = 0; j < RDS_MPATH_WORKERS; j++) {
+			npaths = (conn->c_trans->t_mp_capable ?
+				 RDS_MPATH_WORKERS : 1);
+			for (j = 0; j < npaths; j++) {
 				cp = &conn->c_path[j];
 
 				/* XXX no cp_lock usage.. */
 				if (!visitor(cp, buffer))
 					continue;
-				if (!conn->c_trans->t_mp_capable)
-					break;
 			}
 
 			/* We copy as much as we can fit in the buffer,
@@ -664,9 +680,13 @@ void rds_conn_exit(void)
 /*
  * Force a disconnect
  */
-void rds_conn_path_drop(struct rds_conn_path *cp)
+void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)
 {
 	atomic_set(&cp->cp_state, RDS_CONN_ERROR);
+
+	if (!destroy && cp->cp_conn->c_destroy_in_prog)
+		return;
+
 	queue_work(rds_wq, &cp->cp_down_w);
 }
 EXPORT_SYMBOL_GPL(rds_conn_path_drop);
@@ -674,7 +694,7 @@ EXPORT_SYMBOL_GPL(rds_conn_path_drop);
 void rds_conn_drop(struct rds_connection *conn)
 {
 	WARN_ON(conn->c_trans->t_mp_capable);
-	rds_conn_path_drop(&conn->c_path[0]);
+	rds_conn_path_drop(&conn->c_path[0], false);
 }
 EXPORT_SYMBOL_GPL(rds_conn_drop);
 
@@ -706,5 +726,5 @@ __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...)
 	vprintk(fmt, ap);
 	va_end(ap);
 
-	rds_conn_path_drop(cp);
+	rds_conn_path_drop(cp, false);
 }
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 516bcc8..2e0315b 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -154,7 +154,7 @@ struct rds_connection {
 	struct list_head	c_map_item;
 	unsigned long		c_map_queued;
 
-	struct rds_conn_path	c_path[RDS_MPATH_WORKERS];
+	struct rds_conn_path	*c_path;
 	wait_queue_head_t	c_hs_waitq; /* handshake waitq */
 
 	u32			c_my_gen_num;
@@ -700,7 +700,7 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
 void rds_conn_shutdown(struct rds_conn_path *cpath);
 void rds_conn_destroy(struct rds_connection *conn);
 void rds_conn_drop(struct rds_connection *conn);
-void rds_conn_path_drop(struct rds_conn_path *cpath);
+void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy);
 void rds_conn_connect_if_down(struct rds_connection *conn);
 void rds_conn_path_connect_if_down(struct rds_conn_path *cp);
 void rds_for_each_conn_info(struct socket *sock, unsigned int len,
diff --git a/net/rds/send.c b/net/rds/send.c
index 41b9f0f..058a407 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -273,7 +273,7 @@ restart:
 			len = ntohl(rm->m_inc.i_hdr.h_len);
 			if (cp->cp_unacked_packets == 0 ||
 			    cp->cp_unacked_bytes < len) {
-				__set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
+				set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
 
 				cp->cp_unacked_packets =
 					rds_sysctl_max_unacked_packets;
@@ -829,7 +829,7 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
 		 * throughput hits a certain threshold.
 		 */
 		if (rs->rs_snd_bytes >= rds_sk_sndbuf(rs) / 2)
-			__set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
+			set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
 
 		list_add_tail(&rm->m_sock_item, &rs->rs_send_queue);
 		set_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 431404d..6b7ee71 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -592,7 +592,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 			continue;
 
 		/* reconnect with new parameters */
-		rds_conn_path_drop(tc->t_cpath);
+		rds_conn_path_drop(tc->t_cpath, false);
 	}
 	spin_unlock_irq(&rds_tcp_conn_lock);
 }
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index cbe08a1..46f74da 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -69,14 +69,14 @@ void rds_tcp_state_change(struct sock *sk)
 		if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) &&
 		    rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
 					     RDS_CONN_ERROR)) {
-			rds_conn_path_drop(cp);
+			rds_conn_path_drop(cp, false);
 		} else {
 			rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
 		}
 		break;
 	case TCP_CLOSE_WAIT:
 	case TCP_CLOSE:
-		rds_conn_path_drop(cp);
+		rds_conn_path_drop(cp, false);
 	default:
 		break;
 	}
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 0d8616a..dc860d1 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -157,7 +157,7 @@ out:
 					"returned %d, "
 					"disconnecting and reconnecting\n",
 					&conn->c_faddr, cp->cp_index, ret);
-				rds_conn_path_drop(cp);
+				rds_conn_path_drop(cp, false);
 			}
 		}
 	}
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 2852bc1..f121daa 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -78,7 +78,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
 				"current state is %d\n",
 				__func__,
 				atomic_read(&cp->cp_state));
-		rds_conn_path_drop(cp);
+		rds_conn_path_drop(cp, false);
 		return;
 	}
 
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index a2ad448..fb17552 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -337,6 +337,75 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
 EXPORT_SYMBOL(rxrpc_kernel_end_call);
 
 /**
+ * rxrpc_kernel_check_call - Check a call's state
+ * @sock: The socket the call is on
+ * @call: The call to check
+ * @_compl: Where to store the completion state
+ * @_abort_code: Where to store any abort code
+ *
+ * Allow a kernel service to query the state of a call and find out the manner
+ * of its termination if it has completed.  Returns -EINPROGRESS if the call is
+ * still going, 0 if the call finished successfully, -ECONNABORTED if the call
+ * was aborted and an appropriate error if the call failed in some other way.
+ */
+int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call,
+			    enum rxrpc_call_completion *_compl, u32 *_abort_code)
+{
+	if (call->state != RXRPC_CALL_COMPLETE)
+		return -EINPROGRESS;
+	smp_rmb();
+	*_compl = call->completion;
+	*_abort_code = call->abort_code;
+	return call->error;
+}
+EXPORT_SYMBOL(rxrpc_kernel_check_call);
+
+/**
+ * rxrpc_kernel_retry_call - Allow a kernel service to retry a call
+ * @sock: The socket the call is on
+ * @call: The call to retry
+ * @srx: The address of the peer to contact
+ * @key: The security context to use (defaults to socket setting)
+ *
+ * Allow a kernel service to try resending a client call that failed due to a
+ * network error to a new address.  The Tx queue is maintained intact, thereby
+ * relieving the need to re-encrypt any request data that has already been
+ * buffered.
+ */
+int rxrpc_kernel_retry_call(struct socket *sock, struct rxrpc_call *call,
+			    struct sockaddr_rxrpc *srx, struct key *key)
+{
+	struct rxrpc_conn_parameters cp;
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+	int ret;
+
+	_enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
+
+	if (!key)
+		key = rx->key;
+	if (key && !key->payload.data[0])
+		key = NULL; /* a no-security key */
+
+	memset(&cp, 0, sizeof(cp));
+	cp.local		= rx->local;
+	cp.key			= key;
+	cp.security_level	= 0;
+	cp.exclusive		= false;
+	cp.service_id		= srx->srx_service;
+
+	mutex_lock(&call->user_mutex);
+
+	ret = rxrpc_prepare_call_for_retry(rx, call);
+	if (ret == 0)
+		ret = rxrpc_retry_client_call(rx, call, &cp, srx, GFP_KERNEL);
+
+	mutex_unlock(&call->user_mutex);
+	_leave(" = %d", ret);
+	return ret;
+}
+EXPORT_SYMBOL(rxrpc_kernel_retry_call);
+
+/**
  * rxrpc_kernel_new_call_notification - Get notifications of new calls
  * @sock: The socket to intercept received messages on
  * @notify_new_call: Function to be called when new calls appear
@@ -591,13 +660,13 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
 			    char __user *optval, int __user *_optlen)
 {
 	int optlen;
-	
+
 	if (level != SOL_RXRPC)
 		return -EOPNOTSUPP;
 
 	if (get_user(optlen, _optlen))
 		return -EFAULT;
-	
+
 	switch (optname) {
 	case RXRPC_SUPPORTED_CMSG:
 		if (optlen < sizeof(int))
@@ -606,7 +675,7 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
 		    put_user(sizeof(int), _optlen))
 			return -EFAULT;
 		return 0;
-		
+
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 69b9733..ea5600b 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -15,7 +15,7 @@
 #include <net/netns/generic.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
-#include <rxrpc/packet.h>
+#include "protocol.h"
 
 #if 0
 #define CHECK_SLAB_OKAY(X)				     \
@@ -445,6 +445,7 @@ enum rxrpc_call_flag {
 	RXRPC_CALL_EXPOSED,		/* The call was exposed to the world */
 	RXRPC_CALL_RX_LAST,		/* Received the last packet (at rxtx_top) */
 	RXRPC_CALL_TX_LAST,		/* Last packet in Tx buffer (at rxtx_top) */
+	RXRPC_CALL_TX_LASTQ,		/* Last packet has been queued */
 	RXRPC_CALL_SEND_PING,		/* A ping will need to be sent */
 	RXRPC_CALL_PINGING,		/* Ping in process */
 	RXRPC_CALL_RETRANS_TIMEOUT,	/* Retransmission due to timeout occurred */
@@ -482,18 +483,6 @@ enum rxrpc_call_state {
 };
 
 /*
- * Call completion condition (state == RXRPC_CALL_COMPLETE).
- */
-enum rxrpc_call_completion {
-	RXRPC_CALL_SUCCEEDED,		/* - Normal termination */
-	RXRPC_CALL_REMOTELY_ABORTED,	/* - call aborted by peer */
-	RXRPC_CALL_LOCALLY_ABORTED,	/* - call aborted locally on error or close */
-	RXRPC_CALL_LOCAL_ERROR,		/* - call failed due to local error */
-	RXRPC_CALL_NETWORK_ERROR,	/* - call terminated by network error */
-	NR__RXRPC_CALL_COMPLETIONS
-};
-
-/*
  * Call Tx congestion management modes.
  */
 enum rxrpc_congest_mode {
@@ -687,9 +676,15 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
 					 struct rxrpc_conn_parameters *,
 					 struct sockaddr_rxrpc *,
 					 unsigned long, s64, gfp_t);
+int rxrpc_retry_client_call(struct rxrpc_sock *,
+			    struct rxrpc_call *,
+			    struct rxrpc_conn_parameters *,
+			    struct sockaddr_rxrpc *,
+			    gfp_t);
 void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
 			 struct sk_buff *);
 void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
+int rxrpc_prepare_call_for_retry(struct rxrpc_sock *, struct rxrpc_call *);
 void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
 bool __rxrpc_queue_call(struct rxrpc_call *);
 bool rxrpc_queue_call(struct rxrpc_call *);
@@ -830,7 +825,6 @@ void rxrpc_process_connection(struct work_struct *);
  */
 extern unsigned int rxrpc_connection_expiry;
 
-int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
 struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
 struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
 						   struct sk_buff *);
@@ -894,7 +888,7 @@ extern struct key_type key_type_rxrpc_s;
 
 int rxrpc_request_key(struct rxrpc_sock *, char __user *, int);
 int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int);
-int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t,
+int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t,
 			      u32);
 
 /*
@@ -1060,7 +1054,8 @@ static inline void rxrpc_sysctl_exit(void) {}
 /*
  * utils.c
  */
-int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
+int rxrpc_extract_addr_from_skb(struct rxrpc_local *, struct sockaddr_rxrpc *,
+				struct sk_buff *);
 
 static inline bool before(u32 seq1, u32 seq2)
 {
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index ec3383f..cbd1701 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -277,7 +277,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
 		 * anticipation - and to save on stack space.
 		 */
 		xpeer = b->peer_backlog[peer_tail];
-		if (rxrpc_extract_addr_from_skb(&xpeer->srx, skb) < 0)
+		if (rxrpc_extract_addr_from_skb(local, &xpeer->srx, skb) < 0)
 			return NULL;
 
 		peer = rxrpc_lookup_incoming_peer(local, xpeer);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index d7809a0..fcdd655 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -269,11 +269,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
 			 here, NULL);
 
-	spin_lock_bh(&call->conn->params.peer->lock);
-	hlist_add_head(&call->error_link,
-		       &call->conn->params.peer->error_targets);
-	spin_unlock_bh(&call->conn->params.peer->lock);
-
 	rxrpc_start_call_timer(call);
 
 	_net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
@@ -304,6 +299,48 @@ error:
 }
 
 /*
+ * Retry a call to a new address.  It is expected that the Tx queue of the call
+ * will contain data previously packaged for an old call.
+ */
+int rxrpc_retry_client_call(struct rxrpc_sock *rx,
+			    struct rxrpc_call *call,
+			    struct rxrpc_conn_parameters *cp,
+			    struct sockaddr_rxrpc *srx,
+			    gfp_t gfp)
+{
+	const void *here = __builtin_return_address(0);
+	int ret;
+
+	/* Set up or get a connection record and set the protocol parameters,
+	 * including channel number and call ID.
+	 */
+	ret = rxrpc_connect_call(call, cp, srx, gfp);
+	if (ret < 0)
+		goto error;
+
+	trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
+			 here, NULL);
+
+	rxrpc_start_call_timer(call);
+
+	_net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
+
+	if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+		rxrpc_queue_call(call);
+
+	_leave(" = 0");
+	return 0;
+
+error:
+	rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+				  RX_CALL_DEAD, ret);
+	trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
+			 here, ERR_PTR(ret));
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
  * Set up an incoming call.  call->conn points to the connection.
  * This is called in BH context and isn't allowed to fail.
  */
@@ -471,6 +508,61 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
 }
 
 /*
+ * Prepare a kernel service call for retry.
+ */
+int rxrpc_prepare_call_for_retry(struct rxrpc_sock *rx, struct rxrpc_call *call)
+{
+	const void *here = __builtin_return_address(0);
+	int i;
+	u8 last = 0;
+
+	_enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
+
+	trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage),
+			 here, (const void *)call->flags);
+
+	ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
+	ASSERTCMP(call->completion, !=, RXRPC_CALL_REMOTELY_ABORTED);
+	ASSERTCMP(call->completion, !=, RXRPC_CALL_LOCALLY_ABORTED);
+	ASSERT(list_empty(&call->recvmsg_link));
+
+	del_timer_sync(&call->timer);
+
+	_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, call->conn);
+
+	if (call->conn)
+		rxrpc_disconnect_call(call);
+
+	if (rxrpc_is_service_call(call) ||
+	    !call->tx_phase ||
+	    call->tx_hard_ack != 0 ||
+	    call->rx_hard_ack != 0 ||
+	    call->rx_top != 0)
+		return -EINVAL;
+
+	call->state = RXRPC_CALL_UNINITIALISED;
+	call->completion = RXRPC_CALL_SUCCEEDED;
+	call->call_id = 0;
+	call->cid = 0;
+	call->cong_cwnd = 0;
+	call->cong_extra = 0;
+	call->cong_ssthresh = 0;
+	call->cong_mode = 0;
+	call->cong_dup_acks = 0;
+	call->cong_cumul_acks = 0;
+	call->acks_lowest_nak = 0;
+
+	for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
+		last |= call->rxtx_annotations[i];
+		call->rxtx_annotations[i] &= RXRPC_TX_ANNO_LAST;
+		call->rxtx_annotations[i] |= RXRPC_TX_ANNO_RETRANS;
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
  * release all the calls associated with a socket
  */
 void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index eb21576..5f9624b 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -555,7 +555,10 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
 	trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate);
 
 	write_lock_bh(&call->state_lock);
-	call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+	if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags))
+		call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+	else
+		call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
 	write_unlock_bh(&call->state_lock);
 
 	rxrpc_see_call(call);
@@ -688,15 +691,23 @@ int rxrpc_connect_call(struct rxrpc_call *call,
 
 	ret = rxrpc_get_client_conn(call, cp, srx, gfp);
 	if (ret < 0)
-		return ret;
+		goto out;
 
 	rxrpc_animate_client_conn(rxnet, call->conn);
 	rxrpc_activate_channels(call->conn);
 
 	ret = rxrpc_wait_for_channel(call, gfp);
-	if (ret < 0)
+	if (ret < 0) {
 		rxrpc_disconnect_client_call(call);
+		goto out;
+	}
+
+	spin_lock_bh(&call->conn->params.peer->lock);
+	hlist_add_head(&call->error_link,
+		       &call->conn->params.peer->error_targets);
+	spin_unlock_bh(&call->conn->params.peer->lock);
 
+out:
 	_leave(" = %d", ret);
 	return ret;
 }
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 929b50d..fe57579 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -72,7 +72,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
 
 	_enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
 
-	if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+	if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0)
 		goto not_found;
 
 	k.epoch	= sp->hdr.epoch;
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index e60fcd2..f6fcdb3 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -50,12 +50,11 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer,
 			else if (conn->proto.index_key > k.index_key)
 				p = rcu_dereference_raw(p->rb_right);
 			else
-				goto done;
+				break;
 			conn = NULL;
 		}
 	} while (need_seqretry(&peer->service_conn_lock, seq));
 
-done:
 	done_seqretry(&peer->service_conn_lock, seq);
 	_leave(" = %d", conn ? conn->debug_id : -1);
 	return conn;
diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 5436922..e7f6b88 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -92,6 +92,7 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep,
 				    const __be32 *xdr, unsigned int toklen)
 {
 	struct rxrpc_key_token *token, **pptoken;
+	time64_t expiry;
 	size_t plen;
 	u32 tktlen;
 
@@ -158,8 +159,9 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep,
 	     pptoken = &(*pptoken)->next)
 		continue;
 	*pptoken = token;
-	if (token->kad->expiry < prep->expiry)
-		prep->expiry = token->kad->expiry;
+	expiry = rxrpc_u32_to_time64(token->kad->expiry);
+	if (expiry < prep->expiry)
+		prep->expiry = expiry;
 
 	_leave(" = 0");
 	return 0;
@@ -433,6 +435,7 @@ static int rxrpc_preparse_xdr_rxk5(struct key_preparsed_payload *prep,
 	struct rxrpc_key_token *token, **pptoken;
 	struct rxk5_key *rxk5;
 	const __be32 *end_xdr = xdr + (toklen >> 2);
+	time64_t expiry;
 	int ret;
 
 	_enter(",{%x,%x,%x,%x},%u",
@@ -533,8 +536,9 @@ static int rxrpc_preparse_xdr_rxk5(struct key_preparsed_payload *prep,
 	     pptoken = &(*pptoken)->next)
 		continue;
 	*pptoken = token;
-	if (token->kad->expiry < prep->expiry)
-		prep->expiry = token->kad->expiry;
+	expiry = rxrpc_u32_to_time64(token->k5->endtime);
+	if (expiry < prep->expiry)
+		prep->expiry = expiry;
 
 	_leave(" = 0");
 	return 0;
@@ -691,6 +695,7 @@ static int rxrpc_preparse(struct key_preparsed_payload *prep)
 {
 	const struct rxrpc_key_data_v1 *v1;
 	struct rxrpc_key_token *token, **pp;
+	time64_t expiry;
 	size_t plen;
 	u32 kver;
 	int ret;
@@ -777,8 +782,9 @@ static int rxrpc_preparse(struct key_preparsed_payload *prep)
 	while (*pp)
 		pp = &(*pp)->next;
 	*pp = token;
-	if (token->kad->expiry < prep->expiry)
-		prep->expiry = token->kad->expiry;
+	expiry = rxrpc_u32_to_time64(token->kad->expiry);
+	if (expiry < prep->expiry)
+		prep->expiry = expiry;
 	token = NULL;
 	ret = 0;
 
@@ -955,7 +961,7 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval,
  */
 int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
 			      const void *session_key,
-			      time_t expiry,
+			      time64_t expiry,
 			      u32 kvno)
 {
 	const struct cred *cred = current_cred();
@@ -982,7 +988,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
 	data.kver = 1;
 	data.v1.security_index = RXRPC_SECURITY_RXKAD;
 	data.v1.ticket_length = 0;
-	data.v1.expiry = expiry;
+	data.v1.expiry = rxrpc_time64_to_u32(expiry);
 	data.v1.kvno = 0;
 
 	memcpy(&data.v1.session_key, session_key, sizeof(data.v1.session_key));
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 540d395..93b5d91 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -39,7 +39,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
 
 	_enter("");
 
-	if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+	if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0)
 		return;
 
 	msg.msg_name	= &srx.transport;
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 5bd2d0f..71e6f71 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -444,7 +444,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
 		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
 		sp = rxrpc_skb(skb);
 
-		if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
+		if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) {
 			msg.msg_namelen = srx.transport_len;
 
 			code = htonl(skb->priority);
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 1ed9c0c..7f74950 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -37,6 +37,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
 
 	memset(&srx, 0, sizeof(srx));
 	srx.transport_type = local->srx.transport_type;
+	srx.transport_len = local->srx.transport_len;
 	srx.transport.family = local->srx.transport.family;
 
 	/* Can we see an ICMP4 packet on an ICMP6 listening socket?  and vice
@@ -45,7 +46,6 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
 	switch (srx.transport.family) {
 	case AF_INET:
 		srx.transport.sin.sin_port = serr->port;
-		srx.transport_len = sizeof(struct sockaddr_in);
 		switch (serr->ee.ee_origin) {
 		case SO_EE_ORIGIN_ICMP:
 			_net("Rx ICMP");
@@ -69,7 +69,6 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
 #ifdef CONFIG_AF_RXRPC_IPV6
 	case AF_INET6:
 		srx.transport.sin6.sin6_port = serr->port;
-		srx.transport_len = sizeof(struct sockaddr_in6);
 		switch (serr->ee.ee_origin) {
 		case SO_EE_ORIGIN_ICMP6:
 			_net("Rx ICMP6");
@@ -79,6 +78,9 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
 			break;
 		case SO_EE_ORIGIN_ICMP:
 			_net("Rx ICMP on v6 sock");
+			srx.transport.sin6.sin6_addr.s6_addr32[0] = 0;
+			srx.transport.sin6.sin6_addr.s6_addr32[1] = 0;
+			srx.transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
 			memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12,
 			       skb_network_header(skb) + serr->addr_offset,
 			       sizeof(struct in_addr));
diff --git a/include/rxrpc/packet.h b/net/rxrpc/protocol.h
index a2dcfb8..4bddcf3 100644
--- a/include/rxrpc/packet.h
+++ b/net/rxrpc/protocol.h
@@ -187,49 +187,4 @@ struct rxkad_response {
 	__be32		ticket_len;	/* Kerberos ticket length  */
 } __packed;
 
-/*****************************************************************************/
-/*
- * RxRPC-level abort codes
- */
-#define RX_CALL_DEAD		-1	/* call/conn has been inactive and is shut down */
-#define RX_INVALID_OPERATION	-2	/* invalid operation requested / attempted */
-#define RX_CALL_TIMEOUT		-3	/* call timeout exceeded */
-#define RX_EOF			-4	/* unexpected end of data on read op */
-#define RX_PROTOCOL_ERROR	-5	/* low-level protocol error */
-#define RX_USER_ABORT		-6	/* generic user abort */
-#define RX_ADDRINUSE		-7	/* UDP port in use */
-#define RX_DEBUGI_BADTYPE	-8	/* bad debugging packet type */
-
-/*
- * (un)marshalling abort codes (rxgen)
- */
-#define	RXGEN_CC_MARSHAL    -450
-#define	RXGEN_CC_UNMARSHAL  -451
-#define	RXGEN_SS_MARSHAL    -452
-#define	RXGEN_SS_UNMARSHAL  -453
-#define	RXGEN_DECODE	    -454
-#define	RXGEN_OPCODE	    -455
-#define	RXGEN_SS_XDRFREE    -456
-#define	RXGEN_CC_XDRFREE    -457
-
-/*
- * Rx kerberos security abort codes
- * - unfortunately we have no generalised security abort codes to say things
- *   like "unsupported security", so we have to use these instead and hope the
- *   other side understands
- */
-#define RXKADINCONSISTENCY	19270400	/* security module structure inconsistent */
-#define RXKADPACKETSHORT	19270401	/* packet too short for security challenge */
-#define RXKADLEVELFAIL		19270402	/* security level negotiation failed */
-#define RXKADTICKETLEN		19270403	/* ticket length too short or too long */
-#define RXKADOUTOFSEQUENCE	19270404	/* packet had bad sequence number */
-#define RXKADNOAUTH		19270405	/* caller not authorised */
-#define RXKADBADKEY		19270406	/* illegal key: bad parity or weak */
-#define RXKADBADTICKET		19270407	/* security object was passed a bad ticket */
-#define RXKADUNKNOWNKEY		19270408	/* ticket contained unknown key version number */
-#define RXKADEXPIRED		19270409	/* authentication expired */
-#define RXKADSEALEDINCON	19270410	/* sealed data inconsistent */
-#define RXKADDATALEN		19270411	/* user data too long */
-#define RXKADILLEGALLEVEL	19270412	/* caller not authorised to use encrypted conns */
-
 #endif /* _LINUX_RXRPC_PACKET_H */
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 46d1a1f..c38b3a1 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -634,8 +634,8 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
 	challenge.min_level	= htonl(0);
 	challenge.__padding	= 0;
 
-	msg.msg_name	= &conn->params.peer->srx.transport.sin;
-	msg.msg_namelen	= sizeof(conn->params.peer->srx.transport.sin);
+	msg.msg_name	= &conn->params.peer->srx.transport;
+	msg.msg_namelen	= conn->params.peer->srx.transport_len;
 	msg.msg_control	= NULL;
 	msg.msg_controllen = 0;
 	msg.msg_flags	= 0;
@@ -689,8 +689,8 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
 
 	_enter("");
 
-	msg.msg_name	= &conn->params.peer->srx.transport.sin;
-	msg.msg_namelen	= sizeof(conn->params.peer->srx.transport.sin);
+	msg.msg_name	= &conn->params.peer->srx.transport;
+	msg.msg_namelen	= conn->params.peer->srx.transport_len;
 	msg.msg_control	= NULL;
 	msg.msg_controllen = 0;
 	msg.msg_flags	= 0;
@@ -854,7 +854,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 				struct sk_buff *skb,
 				void *ticket, size_t ticket_len,
 				struct rxrpc_crypt *_session_key,
-				time_t *_expiry,
+				time64_t *_expiry,
 				u32 *_abort_code)
 {
 	struct skcipher_request *req;
@@ -864,7 +864,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 	struct in_addr addr;
 	unsigned int life;
 	const char *eproto;
-	time_t issue, now;
+	time64_t issue, now;
 	bool little_endian;
 	int ret;
 	u32 abort_code;
@@ -960,15 +960,15 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 	if (little_endian) {
 		__le32 stamp;
 		memcpy(&stamp, p, 4);
-		issue = le32_to_cpu(stamp);
+		issue = rxrpc_u32_to_time64(le32_to_cpu(stamp));
 	} else {
 		__be32 stamp;
 		memcpy(&stamp, p, 4);
-		issue = be32_to_cpu(stamp);
+		issue = rxrpc_u32_to_time64(be32_to_cpu(stamp));
 	}
 	p += 4;
-	now = get_seconds();
-	_debug("KIV ISSUE: %lx [%lx]", issue, now);
+	now = ktime_get_real_seconds();
+	_debug("KIV ISSUE: %llx [%llx]", issue, now);
 
 	/* check the ticket is in date */
 	if (issue > now) {
@@ -1053,7 +1053,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	struct rxrpc_crypt session_key;
 	const char *eproto;
-	time_t expiry;
+	time64_t expiry;
 	void *ticket;
 	u32 abort_code, version, kvno, ticket_len, level;
 	__be32 csum;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index b0d2cda..9ea6f97 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -61,7 +61,7 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
 			  call->cong_cwnd + call->cong_extra))
 			break;
 		if (call->state >= RXRPC_CALL_COMPLETE) {
-			ret = -call->error;
+			ret = call->error;
 			break;
 		}
 		if (signal_pending(current)) {
@@ -101,11 +101,23 @@ static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix)
 }
 
 /*
+ * Notify the owner of the call that the transmit phase is ended and the last
+ * packet has been queued.
+ */
+static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call,
+				rxrpc_notify_end_tx_t notify_end_tx)
+{
+	if (notify_end_tx)
+		notify_end_tx(&rx->sk, call, call->user_call_ID);
+}
+
+/*
  * Queue a DATA packet for transmission, set the resend timeout and send the
  * packet immediately
  */
-static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
-			       bool last)
+static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
+			       struct sk_buff *skb, bool last,
+			       rxrpc_notify_end_tx_t notify_end_tx)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	rxrpc_seq_t seq = sp->hdr.seq;
@@ -116,8 +128,10 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
 
 	ASSERTCMP(seq, ==, call->tx_top + 1);
 
-	if (last)
+	if (last) {
 		annotation |= RXRPC_TX_ANNO_LAST;
+		set_bit(RXRPC_CALL_TX_LASTQ, &call->flags);
+	}
 
 	/* We have to set the timestamp before queueing as the retransmit
 	 * algorithm can see the packet as soon as we queue it.
@@ -141,6 +155,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
 		switch (call->state) {
 		case RXRPC_CALL_CLIENT_SEND_REQUEST:
 			call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+			rxrpc_notify_end_tx(rx, call, notify_end_tx);
 			break;
 		case RXRPC_CALL_SERVER_ACK_REQUEST:
 			call->state = RXRPC_CALL_SERVER_SEND_REPLY;
@@ -153,6 +168,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
 				break;
 		case RXRPC_CALL_SERVER_SEND_REPLY:
 			call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
+			rxrpc_notify_end_tx(rx, call, notify_end_tx);
 			break;
 		default:
 			break;
@@ -189,7 +205,8 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
  */
 static int rxrpc_send_data(struct rxrpc_sock *rx,
 			   struct rxrpc_call *call,
-			   struct msghdr *msg, size_t len)
+			   struct msghdr *msg, size_t len,
+			   rxrpc_notify_end_tx_t notify_end_tx)
 {
 	struct rxrpc_skb_priv *sp;
 	struct sk_buff *skb;
@@ -311,11 +328,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 				call->tx_total_len -= copy;
 		}
 
-		/* check for the far side aborting the call or a network error
-		 * occurring */
-		if (call->state == RXRPC_CALL_COMPLETE)
-			goto call_terminated;
-
 		/* add the packet to the send queue if it's now full */
 		if (sp->remain <= 0 ||
 		    (msg_data_left(msg) == 0 && !more)) {
@@ -350,9 +362,21 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 			if (ret < 0)
 				goto out;
 
-			rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
+			rxrpc_queue_packet(rx, call, skb,
+					   !msg_data_left(msg) && !more,
+					   notify_end_tx);
 			skb = NULL;
 		}
+
+		/* Check for the far side aborting the call or a network error
+		 * occurring.  If this happens, save any packet that was under
+		 * construction so that in the case of a network error, the
+		 * call can be retried or redirected.
+		 */
+		if (call->state == RXRPC_CALL_COMPLETE) {
+			ret = call->error;
+			goto out;
+		}
 	} while (msg_data_left(msg) > 0);
 
 success:
@@ -362,11 +386,6 @@ out:
 	_leave(" = %d", ret);
 	return ret;
 
-call_terminated:
-	rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
-	_leave(" = %d", -call->error);
-	return -call->error;
-
 maybe_error:
 	if (copied)
 		goto success;
@@ -611,7 +630,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 		/* Reply phase not begun or not complete for service call. */
 		ret = -EPROTO;
 	} else {
-		ret = rxrpc_send_data(rx, call, msg, len);
+		ret = rxrpc_send_data(rx, call, msg, len, NULL);
 	}
 
 	mutex_unlock(&call->user_mutex);
@@ -631,6 +650,7 @@ error_release_sock:
  * @call: The call to send data through
  * @msg: The data to send
  * @len: The amount of data to send
+ * @notify_end_tx: Notification that the last packet is queued.
  *
  * Allow a kernel service to send data on a call.  The call must be in an state
  * appropriate to sending data.  No control data should be supplied in @msg,
@@ -638,7 +658,8 @@ error_release_sock:
  * more data to come, otherwise this data will end the transmission phase.
  */
 int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
-			   struct msghdr *msg, size_t len)
+			   struct msghdr *msg, size_t len,
+			   rxrpc_notify_end_tx_t notify_end_tx)
 {
 	int ret;
 
@@ -656,11 +677,12 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
 	case RXRPC_CALL_CLIENT_SEND_REQUEST:
 	case RXRPC_CALL_SERVER_ACK_REQUEST:
 	case RXRPC_CALL_SERVER_SEND_REPLY:
-		ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
+		ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
+				      notify_end_tx);
 		break;
 	case RXRPC_CALL_COMPLETE:
 		read_lock_bh(&call->state_lock);
-		ret = -call->error;
+		ret = call->error;
 		read_unlock_bh(&call->state_lock);
 		break;
 	default:
diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
index ff7af71..e801171 100644
--- a/net/rxrpc/utils.c
+++ b/net/rxrpc/utils.c
@@ -17,17 +17,28 @@
 /*
  * Fill out a peer address from a socket buffer containing a packet.
  */
-int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
+int rxrpc_extract_addr_from_skb(struct rxrpc_local *local,
+				struct sockaddr_rxrpc *srx,
+				struct sk_buff *skb)
 {
 	memset(srx, 0, sizeof(*srx));
 
 	switch (ntohs(skb->protocol)) {
 	case ETH_P_IP:
-		srx->transport_type = SOCK_DGRAM;
-		srx->transport_len = sizeof(srx->transport.sin);
-		srx->transport.sin.sin_family = AF_INET;
-		srx->transport.sin.sin_port = udp_hdr(skb)->source;
-		srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+		if (local->srx.transport.family == AF_INET6) {
+			srx->transport_type = SOCK_DGRAM;
+			srx->transport_len = sizeof(srx->transport.sin6);
+			srx->transport.sin6.sin6_family = AF_INET6;
+			srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
+			srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
+			srx->transport.sin6.sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr;
+		} else {
+			srx->transport_type = SOCK_DGRAM;
+			srx->transport_len = sizeof(srx->transport.sin);
+			srx->transport.sin.sin_family = AF_INET;
+			srx->transport.sin.sin_port = udp_hdr(skb)->source;
+			srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+		}
 		return 0;
 
 #ifdef CONFIG_AF_RXRPC_IPV6
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f2e9ed3..a306974 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -70,11 +70,11 @@ static void free_tcf(struct rcu_head *head)
 	kfree(p);
 }
 
-static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p)
+static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
 {
-	spin_lock_bh(&hinfo->lock);
-	hlist_del(&p->tcfa_head);
-	spin_unlock_bh(&hinfo->lock);
+	spin_lock_bh(&idrinfo->lock);
+	idr_remove_ext(&idrinfo->action_idr, p->tcfa_index);
+	spin_unlock_bh(&idrinfo->lock);
 	gen_kill_estimator(&p->tcfa_rate_est);
 	/*
 	 * gen_estimator est_timer() might access p->tcfa_lock
@@ -83,7 +83,7 @@ static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p)
 	call_rcu(&p->tcfa_rcu, free_tcf);
 }
 
-int __tcf_hash_release(struct tc_action *p, bool bind, bool strict)
+int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
 {
 	int ret = 0;
 
@@ -97,55 +97,64 @@ int __tcf_hash_release(struct tc_action *p, bool bind, bool strict)
 		if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
 			if (p->ops->cleanup)
 				p->ops->cleanup(p, bind);
-			tcf_hash_destroy(p->hinfo, p);
+			tcf_idr_remove(p->idrinfo, p);
 			ret = ACT_P_DELETED;
 		}
 	}
 
 	return ret;
 }
-EXPORT_SYMBOL(__tcf_hash_release);
+EXPORT_SYMBOL(__tcf_idr_release);
 
-static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
+static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 			   struct netlink_callback *cb)
 {
-	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
+	int err = 0, index = -1, s_i = 0, n_i = 0;
+	u32 act_flags = cb->args[2];
+	unsigned long jiffy_since = cb->args[3];
 	struct nlattr *nest;
+	struct idr *idr = &idrinfo->action_idr;
+	struct tc_action *p;
+	unsigned long id = 1;
 
-	spin_lock_bh(&hinfo->lock);
+	spin_lock_bh(&idrinfo->lock);
 
 	s_i = cb->args[0];
 
-	for (i = 0; i < (hinfo->hmask + 1); i++) {
-		struct hlist_head *head;
-		struct tc_action *p;
-
-		head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
-
-		hlist_for_each_entry_rcu(p, head, tcfa_head) {
-			index++;
-			if (index < s_i)
-				continue;
-
-			nest = nla_nest_start(skb, n_i);
-			if (nest == NULL)
-				goto nla_put_failure;
-			err = tcf_action_dump_1(skb, p, 0, 0);
-			if (err < 0) {
-				index--;
-				nlmsg_trim(skb, nest);
-				goto done;
-			}
-			nla_nest_end(skb, nest);
-			n_i++;
-			if (n_i >= TCA_ACT_MAX_PRIO)
-				goto done;
+	idr_for_each_entry_ext(idr, p, id) {
+		index++;
+		if (index < s_i)
+			continue;
+
+		if (jiffy_since &&
+		    time_after(jiffy_since,
+			       (unsigned long)p->tcfa_tm.lastuse))
+			continue;
+
+		nest = nla_nest_start(skb, n_i);
+		if (!nest)
+			goto nla_put_failure;
+		err = tcf_action_dump_1(skb, p, 0, 0);
+		if (err < 0) {
+			index--;
+			nlmsg_trim(skb, nest);
+			goto done;
 		}
+		nla_nest_end(skb, nest);
+		n_i++;
+		if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
+		    n_i >= TCA_ACT_MAX_PRIO)
+			goto done;
 	}
 done:
-	spin_unlock_bh(&hinfo->lock);
-	if (n_i)
-		cb->args[0] += n_i;
+	if (index >= 0)
+		cb->args[0] = index + 1;
+
+	spin_unlock_bh(&idrinfo->lock);
+	if (n_i) {
+		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
+			cb->args[1] = n_i;
+	}
 	return n_i;
 
 nla_put_failure:
@@ -153,31 +162,29 @@ nla_put_failure:
 	goto done;
 }
 
-static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
+static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 			  const struct tc_action_ops *ops)
 {
 	struct nlattr *nest;
-	int i = 0, n_i = 0;
+	int n_i = 0;
 	int ret = -EINVAL;
+	struct idr *idr = &idrinfo->action_idr;
+	struct tc_action *p;
+	unsigned long id = 1;
 
 	nest = nla_nest_start(skb, 0);
 	if (nest == NULL)
 		goto nla_put_failure;
 	if (nla_put_string(skb, TCA_KIND, ops->kind))
 		goto nla_put_failure;
-	for (i = 0; i < (hinfo->hmask + 1); i++) {
-		struct hlist_head *head;
-		struct hlist_node *n;
-		struct tc_action *p;
-
-		head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
-		hlist_for_each_entry_safe(p, n, head, tcfa_head) {
-			ret = __tcf_hash_release(p, false, true);
-			if (ret == ACT_P_DELETED) {
-				module_put(p->ops->owner);
-				n_i++;
-			} else if (ret < 0)
-				goto nla_put_failure;
+
+	idr_for_each_entry_ext(idr, p, id) {
+		ret = __tcf_idr_release(p, false, true);
+		if (ret == ACT_P_DELETED) {
+			module_put(p->ops->owner);
+			n_i++;
+		} else if (ret < 0) {
+			goto nla_put_failure;
 		}
 	}
 	if (nla_put_u32(skb, TCA_FCNT, n_i))
@@ -194,12 +201,12 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 		       struct netlink_callback *cb, int type,
 		       const struct tc_action_ops *ops)
 {
-	struct tcf_hashinfo *hinfo = tn->hinfo;
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
 	if (type == RTM_DELACTION) {
-		return tcf_del_walker(hinfo, skb, ops);
+		return tcf_del_walker(idrinfo, skb, ops);
 	} else if (type == RTM_GETACTION) {
-		return tcf_dump_walker(hinfo, skb, cb);
+		return tcf_dump_walker(idrinfo, skb, cb);
 	} else {
 		WARN(1, "tcf_generic_walker: unknown action %d\n", type);
 		return -EINVAL;
@@ -207,40 +214,21 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(tcf_generic_walker);
 
-static struct tc_action *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
+static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo)
 {
 	struct tc_action *p = NULL;
-	struct hlist_head *head;
 
-	spin_lock_bh(&hinfo->lock);
-	head = &hinfo->htab[tcf_hash(index, hinfo->hmask)];
-	hlist_for_each_entry_rcu(p, head, tcfa_head)
-		if (p->tcfa_index == index)
-			break;
-	spin_unlock_bh(&hinfo->lock);
+	spin_lock_bh(&idrinfo->lock);
+	p = idr_find_ext(&idrinfo->action_idr, index);
+	spin_unlock_bh(&idrinfo->lock);
 
 	return p;
 }
 
-u32 tcf_hash_new_index(struct tc_action_net *tn)
-{
-	struct tcf_hashinfo *hinfo = tn->hinfo;
-	u32 val = hinfo->index;
-
-	do {
-		if (++val == 0)
-			val = 1;
-	} while (tcf_hash_lookup(val, hinfo));
-
-	hinfo->index = val;
-	return val;
-}
-EXPORT_SYMBOL(tcf_hash_new_index);
-
-int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
+int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
 {
-	struct tcf_hashinfo *hinfo = tn->hinfo;
-	struct tc_action *p = tcf_hash_lookup(index, hinfo);
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct tc_action *p = tcf_idr_lookup(index, idrinfo);
 
 	if (p) {
 		*a = p;
@@ -248,15 +236,15 @@ int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
 	}
 	return 0;
 }
-EXPORT_SYMBOL(tcf_hash_search);
+EXPORT_SYMBOL(tcf_idr_search);
 
-bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
-		    int bind)
+bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
+		   int bind)
 {
-	struct tcf_hashinfo *hinfo = tn->hinfo;
-	struct tc_action *p = NULL;
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct tc_action *p = tcf_idr_lookup(index, idrinfo);
 
-	if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
+	if (index && p) {
 		if (bind)
 			p->tcfa_bindcnt++;
 		p->tcfa_refcnt++;
@@ -265,23 +253,25 @@ bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 	}
 	return false;
 }
-EXPORT_SYMBOL(tcf_hash_check);
+EXPORT_SYMBOL(tcf_idr_check);
 
-void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
+void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est)
 {
 	if (est)
 		gen_kill_estimator(&a->tcfa_rate_est);
 	call_rcu(&a->tcfa_rcu, free_tcf);
 }
-EXPORT_SYMBOL(tcf_hash_cleanup);
+EXPORT_SYMBOL(tcf_idr_cleanup);
 
-int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
-		    struct tc_action **a, const struct tc_action_ops *ops,
-		    int bind, bool cpustats)
+int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
+		   struct tc_action **a, const struct tc_action_ops *ops,
+		   int bind, bool cpustats)
 {
 	struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
-	struct tcf_hashinfo *hinfo = tn->hinfo;
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct idr *idr = &idrinfo->action_idr;
 	int err = -ENOMEM;
+	unsigned long idr_index;
 
 	if (unlikely(!p))
 		return -ENOMEM;
@@ -304,8 +294,32 @@ err2:
 		}
 	}
 	spin_lock_init(&p->tcfa_lock);
-	INIT_HLIST_NODE(&p->tcfa_head);
-	p->tcfa_index = index ? index : tcf_hash_new_index(tn);
+	/* user doesn't specify an index */
+	if (!index) {
+		idr_preload(GFP_KERNEL);
+		spin_lock_bh(&idrinfo->lock);
+		err = idr_alloc_ext(idr, NULL, &idr_index, 1, 0,
+				    GFP_ATOMIC);
+		spin_unlock_bh(&idrinfo->lock);
+		idr_preload_end();
+		if (err) {
+err3:
+			free_percpu(p->cpu_qstats);
+			goto err2;
+		}
+		p->tcfa_index = idr_index;
+	} else {
+		idr_preload(GFP_KERNEL);
+		spin_lock_bh(&idrinfo->lock);
+		err = idr_alloc_ext(idr, NULL, NULL, index, index + 1,
+				    GFP_ATOMIC);
+		spin_unlock_bh(&idrinfo->lock);
+		idr_preload_end();
+		if (err)
+			goto err3;
+		p->tcfa_index = index;
+	}
+
 	p->tcfa_tm.install = jiffies;
 	p->tcfa_tm.lastuse = jiffies;
 	p->tcfa_tm.firstuse = 0;
@@ -314,52 +328,46 @@ err2:
 					&p->tcfa_rate_est,
 					&p->tcfa_lock, NULL, est);
 		if (err) {
-			free_percpu(p->cpu_qstats);
-			goto err2;
+			goto err3;
 		}
 	}
 
-	p->hinfo = hinfo;
+	p->idrinfo = idrinfo;
 	p->ops = ops;
 	INIT_LIST_HEAD(&p->list);
 	*a = p;
 	return 0;
 }
-EXPORT_SYMBOL(tcf_hash_create);
+EXPORT_SYMBOL(tcf_idr_create);
 
-void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a)
+void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
 {
-	struct tcf_hashinfo *hinfo = tn->hinfo;
-	unsigned int h = tcf_hash(a->tcfa_index, hinfo->hmask);
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
-	spin_lock_bh(&hinfo->lock);
-	hlist_add_head(&a->tcfa_head, &hinfo->htab[h]);
-	spin_unlock_bh(&hinfo->lock);
+	spin_lock_bh(&idrinfo->lock);
+	idr_replace_ext(&idrinfo->action_idr, a, a->tcfa_index);
+	spin_unlock_bh(&idrinfo->lock);
 }
-EXPORT_SYMBOL(tcf_hash_insert);
+EXPORT_SYMBOL(tcf_idr_insert);
 
-void tcf_hashinfo_destroy(const struct tc_action_ops *ops,
-			  struct tcf_hashinfo *hinfo)
+void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
+			 struct tcf_idrinfo *idrinfo)
 {
-	int i;
-
-	for (i = 0; i < hinfo->hmask + 1; i++) {
-		struct tc_action *p;
-		struct hlist_node *n;
-
-		hlist_for_each_entry_safe(p, n, &hinfo->htab[i], tcfa_head) {
-			int ret;
+	struct idr *idr = &idrinfo->action_idr;
+	struct tc_action *p;
+	int ret;
+	unsigned long id = 1;
 
-			ret = __tcf_hash_release(p, false, true);
-			if (ret == ACT_P_DELETED)
-				module_put(ops->owner);
-			else if (ret < 0)
-				return;
-		}
+	idr_for_each_entry_ext(idr, p, id) {
+		ret = __tcf_idr_release(p, false, true);
+		if (ret == ACT_P_DELETED)
+			module_put(ops->owner);
+		else if (ret < 0)
+			return;
 	}
-	kfree(hinfo->htab);
+	idr_destroy(&idrinfo->action_idr);
 }
-EXPORT_SYMBOL(tcf_hashinfo_destroy);
+EXPORT_SYMBOL(tcf_idrinfo_destroy);
 
 static LIST_HEAD(act_base);
 static DEFINE_RWLOCK(act_mod_lock);
@@ -460,9 +468,10 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
 int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 		    int nr_actions, struct tcf_result *res)
 {
-	int ret = -1, i;
 	u32 jmp_prgcnt = 0;
 	u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */
+	int i;
+	int ret = TC_ACT_OK;
 
 	if (skb_skip_tc_classify(skb))
 		return TC_ACT_OK;
@@ -510,7 +519,7 @@ int tcf_action_destroy(struct list_head *actions, int bind)
 	int ret = 0;
 
 	list_for_each_entry_safe(a, tmp, actions, list) {
-		ret = __tcf_hash_release(a, bind, true);
+		ret = __tcf_idr_release(a, bind, true);
 		if (ret == ACT_P_DELETED)
 			module_put(a->ops->owner);
 		else if (ret < 0)
@@ -1068,11 +1077,18 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 	return tcf_add_notify(net, n, &actions, portid);
 }
 
+static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
+static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
+	[TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD32,
+			     .validation_data = &tcaa_root_flags_allowed },
+	[TCA_ROOT_TIME_DELTA]      = { .type = NLA_U32 },
+};
+
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 			 struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
-	struct nlattr *tca[TCA_ACT_MAX + 1];
+	struct nlattr *tca[TCA_ROOT_MAX + 1];
 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
 	int ret = 0, ovr = 0;
 
@@ -1080,7 +1096,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
 			  extack);
 	if (ret < 0)
 		return ret;
@@ -1121,16 +1137,12 @@ replay:
 	return ret;
 }
 
-static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
+static struct nlattr *find_dump_kind(struct nlattr **nla)
 {
 	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
-	struct nlattr *nla[TCAA_MAX + 1];
 	struct nlattr *kind;
 
-	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
-			NULL, NULL) < 0)
-		return NULL;
 	tb1 = nla[TCA_ACT_TAB];
 	if (tb1 == NULL)
 		return NULL;
@@ -1157,8 +1169,20 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tc_action_ops *a_o;
 	int ret = 0;
 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
-	struct nlattr *kind = find_dump_kind(cb->nlh);
+	struct nlattr *tb[TCA_ROOT_MAX + 1];
+	struct nlattr *count_attr = NULL;
+	unsigned long jiffy_since = 0;
+	struct nlattr *kind = NULL;
+	struct nla_bitfield32 bf;
+	u32 msecs_since = 0;
+	u32 act_count = 0;
+
+	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
+			  tcaa_policy, NULL);
+	if (ret < 0)
+		return ret;
 
+	kind = find_dump_kind(tb);
 	if (kind == NULL) {
 		pr_info("tc_dump_action: action bad kind\n");
 		return 0;
@@ -1168,14 +1192,32 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (a_o == NULL)
 		return 0;
 
+	cb->args[2] = 0;
+	if (tb[TCA_ROOT_FLAGS]) {
+		bf = nla_get_bitfield32(tb[TCA_ROOT_FLAGS]);
+		cb->args[2] = bf.value;
+	}
+
+	if (tb[TCA_ROOT_TIME_DELTA]) {
+		msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]);
+	}
+
 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			cb->nlh->nlmsg_type, sizeof(*t), 0);
 	if (!nlh)
 		goto out_module_put;
+
+	if (msecs_since)
+		jiffy_since = jiffies - msecs_to_jiffies(msecs_since);
+
 	t = nlmsg_data(nlh);
 	t->tca_family = AF_UNSPEC;
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
+	cb->args[3] = jiffy_since;
+	count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32));
+	if (!count_attr)
+		goto out_module_put;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
 	if (nest == NULL)
@@ -1188,6 +1230,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (ret > 0) {
 		nla_nest_end(skb, nest);
 		ret = skb->len;
+		act_count = cb->args[1];
+		memcpy(nla_data(count_attr), &act_count, sizeof(u32));
+		cb->args[1] = 0;
 	} else
 		nlmsg_trim(skb, b);
 
@@ -1205,10 +1250,10 @@ out_module_put:
 
 static int __init tc_action_init(void)
 {
-	rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
-		      NULL);
+		      0);
 
 	return 0;
 }
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 9afe133..c0c707e 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -21,7 +21,6 @@
 #include <linux/tc_act/tc_bpf.h>
 #include <net/tc_act/tc_bpf.h>
 
-#define BPF_TAB_MASK		15
 #define ACT_BPF_NAME_LEN	256
 
 struct tcf_bpf_cfg {
@@ -295,9 +294,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
 
-	if (!tcf_hash_check(tn, parm->index, act, bind)) {
-		ret = tcf_hash_create(tn, parm->index, est, act,
-				      &act_bpf_ops, bind, true);
+	if (!tcf_idr_check(tn, parm->index, act, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, act,
+				     &act_bpf_ops, bind, true);
 		if (ret < 0)
 			return ret;
 
@@ -307,7 +306,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 		if (bind)
 			return 0;
 
-		tcf_hash_release(*act, bind);
+		tcf_idr_release(*act, bind);
 		if (!replace)
 			return -EEXIST;
 	}
@@ -343,7 +342,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 	rcu_assign_pointer(prog->filter, cfg.filter);
 
 	if (res == ACT_P_CREATED) {
-		tcf_hash_insert(tn, *act);
+		tcf_idr_insert(tn, *act);
 	} else {
 		/* make sure the program being replaced is no longer executing */
 		synchronize_rcu();
@@ -353,7 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 	return res;
 out:
 	if (res == ACT_P_CREATED)
-		tcf_hash_cleanup(*act, est);
+		tcf_idr_cleanup(*act, est);
 
 	return ret;
 }
@@ -379,7 +378,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_bpf_ops __read_mostly = {
@@ -399,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
-	return tc_action_net_init(tn, &act_bpf_ops, BPF_TAB_MASK);
+	return tc_action_net_init(tn, &act_bpf_ops);
 }
 
 static void __net_exit bpf_exit_net(struct net *net)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 2155bc6..10b7a88 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -28,8 +28,6 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 
-#define CONNMARK_TAB_MASK     3
-
 static unsigned int connmark_net_id;
 static struct tc_action_ops act_connmark_ops;
 
@@ -119,9 +117,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_CONNMARK_PARMS]);
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_connmark_ops, bind, false);
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_connmark_ops, bind, false);
 		if (ret)
 			return ret;
 
@@ -130,13 +128,13 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 		ci->net = net;
 		ci->zone = parm->zone;
 
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 		ret = ACT_P_CREATED;
 	} else {
 		ci = to_connmark(*a);
 		if (bind)
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 		/* replacing action and zone */
@@ -189,7 +187,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_connmark_ops = {
@@ -208,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
-	return tc_action_net_init(tn, &act_connmark_ops, CONNMARK_TAB_MASK);
+	return tc_action_net_init(tn, &act_connmark_ops);
 }
 
 static void __net_exit connmark_exit_net(struct net *net)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 3317a2f..1c40caa 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -37,8 +37,6 @@
 #include <linux/tc_act/tc_csum.h>
 #include <net/tc_act/tc_csum.h>
 
-#define CSUM_TAB_MASK 15
-
 static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
 	[TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
 };
@@ -67,16 +65,16 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_CSUM_PARMS]);
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_csum_ops, bind, false);
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_csum_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -88,7 +86,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 	spin_unlock_bh(&p->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 
 	return ret;
 }
@@ -231,9 +229,6 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl,
 	const struct iphdr *iph;
 	u16 ul;
 
-	if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
-		return 1;
-
 	/*
 	 * Support both UDP and UDPLITE checksum algorithms, Don't use
 	 * udph->len to get the real length without any protocol check,
@@ -287,9 +282,6 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl,
 	const struct ipv6hdr *ip6h;
 	u16 ul;
 
-	if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
-		return 1;
-
 	/*
 	 * Support both UDP and UDPLITE checksum algorithms, Don't use
 	 * udph->len to get the real length without any protocol check,
@@ -615,7 +607,7 @@ static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_csum_ops = {
@@ -634,7 +626,7 @@ static __net_init int csum_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
-	return tc_action_net_init(tn, &act_csum_ops, CSUM_TAB_MASK);
+	return tc_action_net_init(tn, &act_csum_ops);
 }
 
 static void __net_exit csum_exit_net(struct net *net)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 99afe8b..e29a48e 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -23,8 +23,6 @@
 #include <linux/tc_act/tc_gact.h>
 #include <net/tc_act/tc_gact.h>
 
-#define GACT_TAB_MASK	15
-
 static unsigned int gact_net_id;
 static struct tc_action_ops act_gact_ops;
 
@@ -92,16 +90,16 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 	}
 #endif
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_gact_ops, bind, true);
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_gact_ops, bind, true);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -122,7 +120,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 	}
 #endif
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -214,7 +212,7 @@ static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_gact_ops = {
@@ -234,7 +232,7 @@ static __net_init int gact_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
-	return tc_action_net_init(tn, &act_gact_ops, GACT_TAB_MASK);
+	return tc_action_net_init(tn, &act_gact_ops);
 }
 
 static void __net_exit gact_exit_net(struct net *net)
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index c5dec30..8ccd358 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -34,8 +34,6 @@
 #include <linux/etherdevice.h>
 #include <net/ife.h>
 
-#define IFE_TAB_MASK 15
-
 static unsigned int ife_net_id;
 static int max_metacnt = IFE_META_MAX + 1;
 static struct tc_action_ops act_ife_ops;
@@ -435,8 +433,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	struct nlattr *tb[TCA_IFE_MAX + 1];
 	struct nlattr *tb2[IFE_META_MAX + 1];
 	struct tcf_ife_info *ife;
+	u16 ife_type = ETH_P_IFE;
 	struct tc_ife *parm;
-	u16 ife_type = 0;
 	u8 *daddr = NULL;
 	u8 *saddr = NULL;
 	bool exists = false;
@@ -452,30 +450,18 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_IFE_PARMS]);
 
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
-	if (parm->flags & IFE_ENCODE) {
-		/* Until we get issued the ethertype, we cant have
-		 * a default..
-		**/
-		if (!tb[TCA_IFE_TYPE]) {
-			if (exists)
-				tcf_hash_release(*a, bind);
-			pr_info("You MUST pass etherype for encoding\n");
-			return -EINVAL;
-		}
-	}
-
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a, &act_ife_ops,
-				      bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
+				     bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -484,7 +470,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	ife->flags = parm->flags;
 
 	if (parm->flags & IFE_ENCODE) {
-		ife_type = nla_get_u16(tb[TCA_IFE_TYPE]);
+		if (tb[TCA_IFE_TYPE])
+			ife_type = nla_get_u16(tb[TCA_IFE_TYPE]);
 		if (tb[TCA_IFE_DMAC])
 			daddr = nla_data(tb[TCA_IFE_DMAC]);
 		if (tb[TCA_IFE_SMAC])
@@ -518,7 +505,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 		if (err) {
 metadata_parse_err:
 			if (exists)
-				tcf_hash_release(*a, bind);
+				tcf_idr_release(*a, bind);
 			if (ret == ACT_P_CREATED)
 				_tcf_ife_cleanup(*a, bind);
 
@@ -552,7 +539,7 @@ metadata_parse_err:
 		spin_unlock_bh(&ife->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 
 	return ret;
 }
@@ -811,7 +798,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_ife_ops = {
@@ -831,7 +818,7 @@ static __net_init int ife_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
-	return tc_action_net_init(tn, &act_ife_ops, IFE_TAB_MASK);
+	return tc_action_net_init(tn, &act_ife_ops);
 }
 
 static void __net_exit ife_exit_net(struct net *net)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 5417078..d9e399a 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -28,8 +28,6 @@
 #include <linux/netfilter_ipv4/ip_tables.h>
 
 
-#define IPT_TAB_MASK     15
-
 static unsigned int ipt_net_id;
 static struct tc_action_ops act_ipt_ops;
 
@@ -118,33 +116,33 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	if (tb[TCA_IPT_INDEX] != NULL)
 		index = nla_get_u32(tb[TCA_IPT_INDEX]);
 
-	exists = tcf_hash_check(tn, index, a, bind);
+	exists = tcf_idr_check(tn, index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) {
 		if (exists)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 
 	td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
 	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) {
 		if (exists)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, index, est, a, ops, bind,
-				      false);
+		ret = tcf_idr_create(tn, index, est, a, ops, bind,
+				     false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 
 		if (!ovr)
 			return -EEXIST;
@@ -180,7 +178,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	ipt->tcfi_hook  = hook;
 	spin_unlock_bh(&ipt->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 
 err3:
@@ -189,7 +187,7 @@ err2:
 	kfree(tname);
 err1:
 	if (ret == ACT_P_CREATED)
-		tcf_hash_cleanup(*a, est);
+		tcf_idr_cleanup(*a, est);
 	return err;
 }
 
@@ -316,7 +314,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_ipt_ops = {
@@ -336,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
-	return tc_action_net_init(tn, &act_ipt_ops, IPT_TAB_MASK);
+	return tc_action_net_init(tn, &act_ipt_ops);
 }
 
 static void __net_exit ipt_exit_net(struct net *net)
@@ -366,7 +364,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_xt_ops = {
@@ -386,7 +384,7 @@ static __net_init int xt_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
-	return tc_action_net_init(tn, &act_xt_ops, IPT_TAB_MASK);
+	return tc_action_net_init(tn, &act_xt_ops);
 }
 
 static void __net_exit xt_exit_net(struct net *net)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1b5549a..416627c 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -28,7 +28,6 @@
 #include <linux/tc_act/tc_mirred.h>
 #include <net/tc_act/tc_mirred.h>
 
-#define MIRRED_TAB_MASK     7
 static LIST_HEAD(mirred_list);
 static DEFINE_SPINLOCK(mirred_list_lock);
 
@@ -94,7 +93,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
@@ -106,14 +105,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		break;
 	default:
 		if (exists)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 	if (parm->ifindex) {
 		dev = __dev_get_by_index(net, parm->ifindex);
 		if (dev == NULL) {
 			if (exists)
-				tcf_hash_release(*a, bind);
+				tcf_idr_release(*a, bind);
 			return -ENODEV;
 		}
 		mac_header_xmit = dev_is_mac_header_xmit(dev);
@@ -124,13 +123,13 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	if (!exists) {
 		if (dev == NULL)
 			return -EINVAL;
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_mirred_ops, bind, true);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_mirred_ops, bind, true);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -152,7 +151,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		spin_lock_bh(&mirred_list_lock);
 		list_add(&m->tcfm_list, &mirred_list);
 		spin_unlock_bh(&mirred_list_lock);
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	}
 
 	return ret;
@@ -283,7 +282,7 @@ static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static int mirred_device_event(struct notifier_block *unused,
@@ -344,7 +343,7 @@ static __net_init int mirred_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
-	return tc_action_net_init(tn, &act_mirred_ops, MIRRED_TAB_MASK);
+	return tc_action_net_init(tn, &act_mirred_ops);
 }
 
 static void __net_exit mirred_exit_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 9016ab8..c365d01 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -29,8 +29,6 @@
 #include <net/udp.h>
 
 
-#define NAT_TAB_MASK	15
-
 static unsigned int nat_net_id;
 static struct tc_action_ops act_nat_ops;
 
@@ -58,16 +56,16 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_NAT_PARMS]);
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_nat_ops, bind, false);
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_nat_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -83,7 +81,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 	spin_unlock_bh(&p->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 
 	return ret;
 }
@@ -290,7 +288,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_nat_ops = {
@@ -309,7 +307,7 @@ static __net_init int nat_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
-	return tc_action_net_init(tn, &act_nat_ops, NAT_TAB_MASK);
+	return tc_action_net_init(tn, &act_nat_ops);
 }
 
 static void __net_exit nat_exit_net(struct net *net)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 7dc5892..491fe5d 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -24,8 +24,6 @@
 #include <net/tc_act/tc_pedit.h>
 #include <uapi/linux/tc_act/tc_pedit.h>
 
-#define PEDIT_TAB_MASK	15
-
 static unsigned int pedit_net_id;
 static struct tc_action_ops act_pedit_ops;
 
@@ -168,17 +166,17 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 	if (IS_ERR(keys_ex))
 		return PTR_ERR(keys_ex);
 
-	if (!tcf_hash_check(tn, parm->index, a, bind)) {
+	if (!tcf_idr_check(tn, parm->index, a, bind)) {
 		if (!parm->nkeys)
 			return -EINVAL;
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_pedit_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_pedit_ops, bind, false);
 		if (ret)
 			return ret;
 		p = to_pedit(*a);
 		keys = kmalloc(ksize, GFP_KERNEL);
 		if (keys == NULL) {
-			tcf_hash_cleanup(*a, est);
+			tcf_idr_cleanup(*a, est);
 			kfree(keys_ex);
 			return -ENOMEM;
 		}
@@ -186,7 +184,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 	} else {
 		if (bind)
 			return 0;
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 		p = to_pedit(*a);
@@ -214,7 +212,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 
 	spin_unlock_bh(&p->tcf_lock);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -432,7 +430,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_pedit_ops = {
@@ -452,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
-	return tc_action_net_init(tn, &act_pedit_ops, PEDIT_TAB_MASK);
+	return tc_action_net_init(tn, &act_pedit_ops);
 }
 
 static void __net_exit pedit_exit_net(struct net *net)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index b062bc8..3bb2ebf 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -40,8 +40,6 @@ struct tcf_police {
 
 #define to_police(pc) ((struct tcf_police *)pc)
 
-#define POL_TAB_MASK     15
-
 /* old policer structure from before tc actions */
 struct tc_police_compat {
 	u32			index;
@@ -101,18 +99,18 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_POLICE_TBF]);
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, NULL, a,
-				      &act_police_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, NULL, a,
+				     &act_police_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -188,7 +186,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 		return ret;
 
 	police->tcfp_t_c = ktime_get_ns();
-	tcf_hash_insert(tn, *a);
+	tcf_idr_insert(tn, *a);
 
 	return ret;
 
@@ -196,7 +194,7 @@ failure:
 	qdisc_put_rtab(P_tab);
 	qdisc_put_rtab(R_tab);
 	if (ret == ACT_P_CREATED)
-		tcf_hash_cleanup(*a, est);
+		tcf_idr_cleanup(*a, est);
 	return err;
 }
 
@@ -310,7 +308,7 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 MODULE_AUTHOR("Alexey Kuznetsov");
@@ -333,7 +331,7 @@ static __net_init int police_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
-	return tc_action_net_init(tn, &act_police_ops, POL_TAB_MASK);
+	return tc_action_net_init(tn, &act_police_ops);
 }
 
 static void __net_exit police_exit_net(struct net *net)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 59d6645..ec986ae 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -25,7 +25,6 @@
 
 #include <linux/if_arp.h>
 
-#define SAMPLE_TAB_MASK     7
 static unsigned int sample_net_id;
 static struct tc_action_ops act_sample_ops;
 
@@ -59,18 +58,18 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_SAMPLE_PARMS]);
 
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_sample_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_sample_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -82,7 +81,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	psample_group = psample_group_get(net, s->psample_group_num);
 	if (!psample_group) {
 		if (ret == ACT_P_CREATED)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 	RCU_INIT_POINTER(s->psample_group, psample_group);
@@ -93,7 +92,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -221,7 +220,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_sample_ops = {
@@ -241,7 +240,7 @@ static __net_init int sample_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
-	return tc_action_net_init(tn, &act_sample_ops, SAMPLE_TAB_MASK);
+	return tc_action_net_init(tn, &act_sample_ops);
 }
 
 static void __net_exit sample_exit_net(struct net *net)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 43605e7..e7b57e5 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -24,8 +24,6 @@
 #include <linux/tc_act/tc_defact.h>
 #include <net/tc_act/tc_defact.h>
 
-#define SIMP_TAB_MASK     7
-
 static unsigned int simp_net_id;
 static struct tc_action_ops act_simp_ops;
 
@@ -102,28 +100,28 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_DEF_PARMS]);
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (tb[TCA_DEF_DATA] == NULL) {
 		if (exists)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 
 	defdata = nla_data(tb[TCA_DEF_DATA]);
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_simp_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_simp_ops, bind, false);
 		if (ret)
 			return ret;
 
 		d = to_defact(*a);
 		ret = alloc_defdata(d, defdata);
 		if (ret < 0) {
-			tcf_hash_cleanup(*a, est);
+			tcf_idr_cleanup(*a, est);
 			return ret;
 		}
 		d->tcf_action = parm->action;
@@ -131,7 +129,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	} else {
 		d = to_defact(*a);
 
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 
@@ -139,7 +137,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -183,7 +181,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_simp_ops = {
@@ -203,7 +201,7 @@ static __net_init int simp_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
-	return tc_action_net_init(tn, &act_simp_ops, SIMP_TAB_MASK);
+	return tc_action_net_init(tn, &act_simp_ops);
 }
 
 static void __net_exit simp_exit_net(struct net *net)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 6b3e65d..59949d6 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -27,8 +27,6 @@
 #include <linux/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_skbedit.h>
 
-#define SKBEDIT_TAB_MASK     15
-
 static unsigned int skbedit_net_id;
 static struct tc_action_ops act_skbedit_ops;
 
@@ -118,18 +116,18 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
 
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
 	if (!flags) {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_skbedit_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_skbedit_ops, bind, false);
 		if (ret)
 			return ret;
 
@@ -137,7 +135,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 		ret = ACT_P_CREATED;
 	} else {
 		d = to_skbedit(*a);
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -163,7 +161,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	spin_unlock_bh(&d->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -221,7 +219,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_skbedit_ops = {
@@ -240,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
-	return tc_action_net_init(tn, &act_skbedit_ops, SKBEDIT_TAB_MASK);
+	return tc_action_net_init(tn, &act_skbedit_ops);
 }
 
 static void __net_exit skbedit_exit_net(struct net *net)
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index a73c4bb..b642ad3 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -20,8 +20,6 @@
 #include <linux/tc_act/tc_skbmod.h>
 #include <net/tc_act/tc_skbmod.h>
 
-#define SKBMOD_TAB_MASK     15
-
 static unsigned int skbmod_net_id;
 static struct tc_action_ops act_skbmod_ops;
 
@@ -129,7 +127,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	if (parm->flags & SKBMOD_F_SWAPMAC)
 		lflags = SKBMOD_F_SWAPMAC;
 
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
@@ -137,14 +135,14 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_skbmod_ops, bind, true);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_skbmod_ops, bind, true);
 		if (ret)
 			return ret;
 
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -155,7 +153,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
 	if (unlikely(!p)) {
 		if (ovr)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 
@@ -182,7 +180,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 		kfree_rcu(p_old, rcu);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -245,7 +243,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_skbmod_ops = {
@@ -265,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
-	return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK);
+	return tc_action_net_init(tn, &act_skbmod_ops);
 }
 
 static void __net_exit skbmod_exit_net(struct net *net)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index fd7e756..30c9627 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -20,8 +20,6 @@
 #include <linux/tc_act/tc_tunnel_key.h>
 #include <net/tc_act/tc_tunnel_key.h>
 
-#define TUNNEL_KEY_TAB_MASK     15
-
 static unsigned int tunnel_key_net_id;
 static struct tc_action_ops act_tunnel_key_ops;
 
@@ -100,7 +98,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
@@ -159,14 +157,14 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	}
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_tunnel_key_ops, bind, true);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_tunnel_key_ops, bind, true);
 		if (ret)
 			return ret;
 
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -177,7 +175,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
 	if (unlikely(!params_new)) {
 		if (ret == ACT_P_CREATED)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 
@@ -193,13 +191,13 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 		kfree_rcu(params_old, rcu);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 
 	return ret;
 
 err_out:
 	if (exists)
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 	return ret;
 }
 
@@ -304,7 +302,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_tunnel_key_ops = {
@@ -324,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
-	return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK);
+	return tc_action_net_init(tn, &act_tunnel_key_ops);
 }
 
 static void __net_exit tunnel_key_exit_net(struct net *net)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 13ba3a8..16eb067 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -19,8 +19,6 @@
 #include <linux/tc_act/tc_vlan.h>
 #include <net/tc_act/tc_vlan.h>
 
-#define VLAN_TAB_MASK     15
-
 static unsigned int vlan_net_id;
 static struct tc_action_ops act_vlan_ops;
 
@@ -128,7 +126,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	if (!tb[TCA_VLAN_PARMS])
 		return -EINVAL;
 	parm = nla_data(tb[TCA_VLAN_PARMS]);
-	exists = tcf_hash_check(tn, parm->index, a, bind);
+	exists = tcf_idr_check(tn, parm->index, a, bind);
 	if (exists && bind)
 		return 0;
 
@@ -139,13 +137,13 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	case TCA_VLAN_ACT_MODIFY:
 		if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
 			if (exists)
-				tcf_hash_release(*a, bind);
+				tcf_idr_release(*a, bind);
 			return -EINVAL;
 		}
 		push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]);
 		if (push_vid >= VLAN_VID_MASK) {
 			if (exists)
-				tcf_hash_release(*a, bind);
+				tcf_idr_release(*a, bind);
 			return -ERANGE;
 		}
 
@@ -167,20 +165,20 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 		break;
 	default:
 		if (exists)
-			tcf_hash_release(*a, bind);
+			tcf_idr_release(*a, bind);
 		return -EINVAL;
 	}
 	action = parm->v_action;
 
 	if (!exists) {
-		ret = tcf_hash_create(tn, parm->index, est, a,
-				      &act_vlan_ops, bind, false);
+		ret = tcf_idr_create(tn, parm->index, est, a,
+				     &act_vlan_ops, bind, false);
 		if (ret)
 			return ret;
 
 		ret = ACT_P_CREATED;
 	} else {
-		tcf_hash_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		if (!ovr)
 			return -EEXIST;
 	}
@@ -199,7 +197,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	spin_unlock_bh(&v->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(tn, *a);
+		tcf_idr_insert(tn, *a);
 	return ret;
 }
 
@@ -252,7 +250,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
-	return tcf_hash_search(tn, a, index);
+	return tcf_idr_search(tn, a, index);
 }
 
 static struct tc_action_ops act_vlan_ops = {
@@ -271,7 +269,7 @@ static __net_init int vlan_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
-	return tc_action_net_init(tn, &act_vlan_ops, VLAN_TAB_MASK);
+	return tc_action_net_init(tn, &act_vlan_ops);
 }
 
 static void __net_exit vlan_exit_net(struct net *net)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 6c5ea84..ea6c65f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -100,21 +100,6 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
 }
 EXPORT_SYMBOL(unregister_tcf_proto_ops);
 
-static int tfilter_notify(struct net *net, struct sk_buff *oskb,
-			  struct nlmsghdr *n, struct tcf_proto *tp,
-			  unsigned long fh, int event, bool unicast);
-
-static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
-				 struct nlmsghdr *n,
-				 struct tcf_chain *chain, int event)
-{
-	struct tcf_proto *tp;
-
-	for (tp = rtnl_dereference(chain->filter_chain);
-	     tp; tp = rtnl_dereference(tp->next))
-		tfilter_notify(net, oskb, n, tp, 0, event, false);
-}
-
 /* Select new prio value from the range, managed by kernel. */
 
 static inline u32 tcf_auto_prio(struct tcf_proto *tp)
@@ -415,6 +400,109 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
 	return tp;
 }
 
+static int tcf_fill_node(struct net *net, struct sk_buff *skb,
+			 struct tcf_proto *tp, void *fh, u32 portid,
+			 u32 seq, u16 flags, int event)
+{
+	struct tcmsg *tcm;
+	struct nlmsghdr  *nlh;
+	unsigned char *b = skb_tail_pointer(skb);
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+	if (!nlh)
+		goto out_nlmsg_trim;
+	tcm = nlmsg_data(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm__pad1 = 0;
+	tcm->tcm__pad2 = 0;
+	tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
+	tcm->tcm_parent = tp->classid;
+	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
+	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
+		goto nla_put_failure;
+	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
+		goto nla_put_failure;
+	if (!fh) {
+		tcm->tcm_handle = 0;
+	} else {
+		if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
+			goto nla_put_failure;
+	}
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	return skb->len;
+
+out_nlmsg_trim:
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tfilter_notify(struct net *net, struct sk_buff *oskb,
+			  struct nlmsghdr *n, struct tcf_proto *tp,
+			  void *fh, int event, bool unicast)
+{
+	struct sk_buff *skb;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
+			  n->nlmsg_flags, event) <= 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	if (unicast)
+		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+
+	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+			      n->nlmsg_flags & NLM_F_ECHO);
+}
+
+static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
+			      struct nlmsghdr *n, struct tcf_proto *tp,
+			      void *fh, bool unicast, bool *last)
+{
+	struct sk_buff *skb;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	int err;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
+			  n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	err = tp->ops->delete(tp, fh, last);
+	if (err) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	if (unicast)
+		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+
+	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+			      n->nlmsg_flags & NLM_F_ECHO);
+}
+
+static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
+				 struct nlmsghdr *n,
+				 struct tcf_chain *chain, int event)
+{
+	struct tcf_proto *tp;
+
+	for (tp = rtnl_dereference(chain->filter_chain);
+	     tp; tp = rtnl_dereference(tp->next))
+		tfilter_notify(net, oskb, n, tp, 0, event, false);
+}
+
 /* Add/change/delete/get a filter node */
 
 static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -436,7 +524,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	struct tcf_proto *tp;
 	const struct Qdisc_class_ops *cops;
 	unsigned long cl;
-	unsigned long fh;
+	void *fh;
 	int err;
 	int tp_created;
 
@@ -506,7 +594,7 @@ replay:
 
 	/* Do we search for filter, attached to class? */
 	if (TC_H_MIN(parent)) {
-		cl = cops->get(q, parent);
+		cl = cops->find(q, parent);
 		if (cl == 0)
 			return -ENOENT;
 	}
@@ -575,7 +663,7 @@ replay:
 
 	fh = tp->ops->get(tp, t->tcm_handle);
 
-	if (fh == 0) {
+	if (!fh) {
 		if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
 			tcf_chain_tp_remove(chain, &chain_info, tp);
 			tfilter_notify(net, skb, n, tp, fh,
@@ -603,11 +691,10 @@ replay:
 			}
 			break;
 		case RTM_DELTFILTER:
-			err = tp->ops->delete(tp, fh, &last);
+			err = tfilter_del_notify(net, skb, n, tp, fh, false,
+						 &last);
 			if (err)
 				goto errout;
-			tfilter_notify(net, skb, n, tp, t->tcm_handle,
-				       RTM_DELTFILTER, false);
 			if (last) {
 				tcf_chain_tp_remove(chain, &chain_info, tp);
 				tcf_proto_destroy(tp);
@@ -637,83 +724,19 @@ replay:
 errout:
 	if (chain)
 		tcf_chain_put(chain);
-	if (cl)
-		cops->put(q, cl);
 	if (err == -EAGAIN)
 		/* Replay the request. */
 		goto replay;
 	return err;
 }
 
-static int tcf_fill_node(struct net *net, struct sk_buff *skb,
-			 struct tcf_proto *tp, unsigned long fh, u32 portid,
-			 u32 seq, u16 flags, int event)
-{
-	struct tcmsg *tcm;
-	struct nlmsghdr  *nlh;
-	unsigned char *b = skb_tail_pointer(skb);
-
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
-	if (!nlh)
-		goto out_nlmsg_trim;
-	tcm = nlmsg_data(nlh);
-	tcm->tcm_family = AF_UNSPEC;
-	tcm->tcm__pad1 = 0;
-	tcm->tcm__pad2 = 0;
-	tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
-	tcm->tcm_parent = tp->classid;
-	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
-	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
-		goto nla_put_failure;
-	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
-		goto nla_put_failure;
-	tcm->tcm_handle = fh;
-	if (RTM_DELTFILTER != event) {
-		tcm->tcm_handle = 0;
-		if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
-			goto nla_put_failure;
-	}
-	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
-	return skb->len;
-
-out_nlmsg_trim:
-nla_put_failure:
-	nlmsg_trim(skb, b);
-	return -1;
-}
-
-static int tfilter_notify(struct net *net, struct sk_buff *oskb,
-			  struct nlmsghdr *n, struct tcf_proto *tp,
-			  unsigned long fh, int event, bool unicast)
-{
-	struct sk_buff *skb;
-	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
-
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb)
-		return -ENOBUFS;
-
-	if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
-			  n->nlmsg_flags, event) <= 0) {
-		kfree_skb(skb);
-		return -EINVAL;
-	}
-
-	if (unicast)
-		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
-
-	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-			      n->nlmsg_flags & NLM_F_ECHO);
-}
-
 struct tcf_dump_args {
 	struct tcf_walker w;
 	struct sk_buff *skb;
 	struct netlink_callback *cb;
 };
 
-static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
-			 struct tcf_walker *arg)
+static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
 {
 	struct tcf_dump_args *a = (void *)arg;
 	struct net *net = sock_net(a->skb->sk);
@@ -805,17 +828,17 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 		goto out;
 	cops = q->ops->cl_ops;
 	if (!cops)
-		goto errout;
+		goto out;
 	if (!cops->tcf_block)
-		goto errout;
+		goto out;
 	if (TC_H_MIN(tcm->tcm_parent)) {
-		cl = cops->get(q, tcm->tcm_parent);
+		cl = cops->find(q, tcm->tcm_parent);
 		if (cl == 0)
-			goto errout;
+			goto out;
 	}
 	block = cops->tcf_block(q, cl);
 	if (!block)
-		goto errout;
+		goto out;
 
 	index_start = cb->args[0];
 	index = 0;
@@ -830,9 +853,6 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 
 	cb->args[0] = index;
 
-errout:
-	if (cl)
-		cops->put(q, cl);
 out:
 	return skb->len;
 }
@@ -891,18 +911,12 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 }
 EXPORT_SYMBOL(tcf_exts_validate);
 
-void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
-		     struct tcf_exts *src)
+void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	struct tcf_exts old = *dst;
 
-	tcf_tree_lock(tp);
-	dst->nr_actions = src->nr_actions;
-	dst->actions = src->actions;
-	dst->type = src->type;
-	tcf_tree_unlock(tp);
-
+	*dst = *src;
 	tcf_exts_destroy(&old);
 #endif
 }
@@ -923,7 +937,7 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
 #ifdef CONFIG_NET_CLS_ACT
 	struct nlattr *nest;
 
-	if (exts->action && exts->nr_actions) {
+	if (exts->action && tcf_exts_has_actions(exts)) {
 		/*
 		 * again for backward compatible mode - we want
 		 * to work with both old and new modes of entering
@@ -980,7 +994,7 @@ int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
 	const struct tc_action *a;
 	LIST_HEAD(actions);
 
-	if (tc_no_actions(exts))
+	if (!tcf_exts_has_actions(exts))
 		return -EINVAL;
 
 	tcf_exts_to_list(exts, &actions);
@@ -999,10 +1013,10 @@ EXPORT_SYMBOL(tcf_exts_get_dev);
 
 static int __init tc_filter_init(void)
 {
-	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
-		      tc_dump_tfilter, NULL);
+		      tc_dump_tfilter, 0);
 
 	return 0;
 }
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index c4fd63a0..d89ebaf 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -56,20 +56,18 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	return -1;
 }
 
-static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
+static void *basic_get(struct tcf_proto *tp, u32 handle)
 {
-	unsigned long l = 0UL;
 	struct basic_head *head = rtnl_dereference(tp->root);
 	struct basic_filter *f;
 
 	list_for_each_entry(f, &head->flist, link) {
 		if (f->handle == handle) {
-			l = (unsigned long) f;
-			break;
+			return f;
 		}
 	}
 
-	return l;
+	return NULL;
 }
 
 static int basic_init(struct tcf_proto *tp)
@@ -106,10 +104,10 @@ static void basic_destroy(struct tcf_proto *tp)
 	kfree_rcu(head, rcu);
 }
 
-static int basic_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int basic_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct basic_head *head = rtnl_dereference(tp->root);
-	struct basic_filter *f = (struct basic_filter *) arg;
+	struct basic_filter *f = arg;
 
 	list_del_rcu(&f->link);
 	tcf_unbind_filter(tp, &f->res);
@@ -129,38 +127,27 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 			   struct nlattr *est, bool ovr)
 {
 	int err;
-	struct tcf_exts e;
-	struct tcf_ematch_tree t;
 
-	err = tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
-	if (err < 0)
-		goto errout;
 
-	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t);
+	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &f->ematches);
 	if (err < 0)
-		goto errout;
+		return err;
 
 	if (tb[TCA_BASIC_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_BASIC_CLASSID]);
 		tcf_bind_filter(tp, &f->res, base);
 	}
 
-	tcf_exts_change(tp, &f->exts, &e);
-	tcf_em_tree_change(tp, &f->ematches, &t);
 	f->tp = tp;
-
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static int basic_change(struct net *net, struct sk_buff *in_skb,
 			struct tcf_proto *tp, unsigned long base, u32 handle,
-			struct nlattr **tca, unsigned long *arg, bool ovr)
+			struct nlattr **tca, void **arg, bool ovr)
 {
 	int err;
 	struct basic_head *head = rtnl_dereference(tp->root);
@@ -213,7 +200,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		goto errout;
 
-	*arg = (unsigned long)fnew;
+	*arg = fnew;
 
 	if (fold) {
 		list_replace_rcu(&fold->link, &fnew->link);
@@ -239,7 +226,7 @@ static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 		if (arg->count < arg->skip)
 			goto skip;
 
-		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+		if (arg->fn(tp, f, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
@@ -248,10 +235,18 @@ skip:
 	}
 }
 
-static int basic_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static void basic_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct basic_filter *f = fh;
+
+	if (f && f->res.classid == classid)
+		f->res.class = cl;
+}
+
+static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		      struct sk_buff *skb, struct tcmsg *t)
 {
-	struct basic_filter *f = (struct basic_filter *) fh;
+	struct basic_filter *f = fh;
 	struct nlattr *nest;
 
 	if (f == NULL)
@@ -293,6 +288,7 @@ static struct tcf_proto_ops cls_basic_ops __read_mostly = {
 	.delete		=	basic_delete,
 	.walk		=	basic_walk,
 	.dump		=	basic_dump,
+	.bind_class	=	basic_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index f57bd53..520c502 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -147,24 +147,18 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 			       enum tc_clsbpf_command cmd)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_bpf_offload bpf_offload = {};
-	struct tc_to_netdev offload;
+	struct tc_cls_bpf_offload cls_bpf = {};
 	int err;
 
-	offload.type = TC_SETUP_CLSBPF;
-	offload.cls_bpf = &bpf_offload;
-
-	bpf_offload.command = cmd;
-	bpf_offload.exts = &prog->exts;
-	bpf_offload.prog = prog->filter;
-	bpf_offload.name = prog->bpf_name;
-	bpf_offload.exts_integrated = prog->exts_integrated;
-	bpf_offload.gen_flags = prog->gen_flags;
-
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->chain->index,
-					    tp->protocol, &offload);
+	tc_cls_common_offload_init(&cls_bpf.common, tp);
+	cls_bpf.command = cmd;
+	cls_bpf.exts = &prog->exts;
+	cls_bpf.prog = prog->filter;
+	cls_bpf.name = prog->bpf_name;
+	cls_bpf.exts_integrated = prog->exts_integrated;
+	cls_bpf.gen_flags = prog->gen_flags;
 
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSBPF, &cls_bpf);
 	if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE))
 		prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
 
@@ -184,7 +178,7 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 		(oldprog && tc_skip_sw(oldprog->gen_flags));
 
 	if (oldprog && oldprog->offloaded) {
-		if (tc_should_offload(dev, tp, prog->gen_flags)) {
+		if (tc_should_offload(dev, prog->gen_flags)) {
 			cmd = TC_CLSBPF_REPLACE;
 		} else if (!tc_skip_sw(prog->gen_flags)) {
 			obj = oldprog;
@@ -193,7 +187,7 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 			return -EINVAL;
 		}
 	} else {
-		if (!tc_should_offload(dev, tp, prog->gen_flags))
+		if (!tc_should_offload(dev, prog->gen_flags))
 			return skip_sw ? -EINVAL : 0;
 		cmd = TC_CLSBPF_ADD;
 	}
@@ -276,11 +270,11 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
 	call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
 }
 
-static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
 
-	__cls_bpf_delete(tp, (struct cls_bpf_prog *) arg);
+	__cls_bpf_delete(tp, arg);
 	*last = list_empty(&head->plist);
 	return 0;
 }
@@ -296,20 +290,17 @@ static void cls_bpf_destroy(struct tcf_proto *tp)
 	kfree_rcu(head, rcu);
 }
 
-static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
+static void *cls_bpf_get(struct tcf_proto *tp, u32 handle)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
 	struct cls_bpf_prog *prog;
-	unsigned long ret = 0UL;
 
 	list_for_each_entry(prog, &head->plist, link) {
-		if (prog->handle == handle) {
-			ret = (unsigned long) prog;
-			break;
-		}
+		if (prog->handle == handle)
+			return prog;
 	}
 
-	return ret;
+	return NULL;
 }
 
 static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
@@ -382,13 +373,11 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
 	return 0;
 }
 
-static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
-				   struct cls_bpf_prog *prog,
-				   unsigned long base, struct nlattr **tb,
-				   struct nlattr *est, bool ovr)
+static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
+			     struct cls_bpf_prog *prog, unsigned long base,
+			     struct nlattr **tb, struct nlattr *est, bool ovr)
 {
 	bool is_bpf, is_ebpf, have_exts = false;
-	struct tcf_exts exts;
 	u32 gen_flags = 0;
 	int ret;
 
@@ -397,30 +386,23 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
 		return -EINVAL;
 
-	ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+	ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr);
 	if (ret < 0)
 		return ret;
-	ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
-	if (ret < 0)
-		goto errout;
 
 	if (tb[TCA_BPF_FLAGS]) {
 		u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
 
-		if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
-			ret = -EINVAL;
-			goto errout;
-		}
+		if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT)
+			return -EINVAL;
 
 		have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
 	}
 	if (tb[TCA_BPF_FLAGS_GEN]) {
 		gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
 		if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
-		    !tc_flags_valid(gen_flags)) {
-			ret = -EINVAL;
-			goto errout;
-		}
+		    !tc_flags_valid(gen_flags))
+			return -EINVAL;
 	}
 
 	prog->exts_integrated = have_exts;
@@ -429,19 +411,14 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
 		       cls_bpf_prog_from_efd(tb, prog, tp);
 	if (ret < 0)
-		goto errout;
+		return ret;
 
 	if (tb[TCA_BPF_CLASSID]) {
 		prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
 		tcf_bind_filter(tp, &prog->res, base);
 	}
 
-	tcf_exts_change(tp, &prog->exts, &exts);
 	return 0;
-
-errout:
-	tcf_exts_destroy(&exts);
-	return ret;
 }
 
 static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
@@ -468,10 +445,10 @@ static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 			  struct tcf_proto *tp, unsigned long base,
 			  u32 handle, struct nlattr **tca,
-			  unsigned long *arg, bool ovr)
+			  void **arg, bool ovr)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
-	struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg;
+	struct cls_bpf_prog *oldprog = *arg;
 	struct nlattr *tb[TCA_BPF_MAX + 1];
 	struct cls_bpf_prog *prog;
 	int ret;
@@ -508,8 +485,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 		goto errout;
 	}
 
-	ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE],
-				      ovr);
+	ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
 	if (ret < 0)
 		goto errout;
 
@@ -530,7 +506,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 		list_add_rcu(&prog->link, &head->plist);
 	}
 
-	*arg = (unsigned long) prog;
+	*arg = prog;
 	return 0;
 
 errout:
@@ -578,10 +554,10 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
 	return 0;
 }
 
-static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh,
 			struct sk_buff *skb, struct tcmsg *tm)
 {
-	struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
+	struct cls_bpf_prog *prog = fh;
 	struct nlattr *nest;
 	u32 bpf_flags = 0;
 	int ret;
@@ -631,6 +607,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct cls_bpf_prog *prog = fh;
+
+	if (prog && prog->res.classid == classid)
+		prog->res.class = cl;
+}
+
 static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -639,7 +623,7 @@ static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	list_for_each_entry(prog, &head->plist, link) {
 		if (arg->count < arg->skip)
 			goto skip;
-		if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
+		if (arg->fn(tp, prog, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
@@ -659,6 +643,7 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
 	.delete		=	cls_bpf_delete,
 	.walk		=	cls_bpf_walk,
 	.dump		=	cls_bpf_dump,
+	.bind_class	=	cls_bpf_bind_class,
 };
 
 static int __init cls_bpf_init_mod(void)
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 12ce547..d48452f 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -43,9 +43,9 @@ static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	return tcf_exts_exec(skb, &head->exts, res);
 }
 
-static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
+static void *cls_cgroup_get(struct tcf_proto *tp, u32 handle)
 {
-	return 0UL;
+	return NULL;
 }
 
 static int cls_cgroup_init(struct tcf_proto *tp)
@@ -71,13 +71,11 @@ static void cls_cgroup_destroy_rcu(struct rcu_head *root)
 static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 			     struct tcf_proto *tp, unsigned long base,
 			     u32 handle, struct nlattr **tca,
-			     unsigned long *arg, bool ovr)
+			     void **arg, bool ovr)
 {
 	struct nlattr *tb[TCA_CGROUP_MAX + 1];
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
 	struct cls_cgroup_head *new;
-	struct tcf_ematch_tree t;
-	struct tcf_exts e;
 	int err;
 
 	if (!tca[TCA_OPTIONS])
@@ -103,23 +101,13 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		goto errout;
 
-	err = tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr);
 	if (err < 0)
 		goto errout;
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
-	if (err < 0) {
-		tcf_exts_destroy(&e);
-		goto errout;
-	}
 
-	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
-	if (err < 0) {
-		tcf_exts_destroy(&e);
+	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &new->ematches);
+	if (err < 0)
 		goto errout;
-	}
-
-	tcf_exts_change(tp, &new->exts, &e);
-	tcf_em_tree_change(tp, &new->ematches, &t);
 
 	rcu_assign_pointer(tp->root, new);
 	if (head)
@@ -140,7 +128,7 @@ static void cls_cgroup_destroy(struct tcf_proto *tp)
 		call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
 }
 
-static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	return -EOPNOTSUPP;
 }
@@ -152,7 +140,7 @@ static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	if (arg->count < arg->skip)
 		goto skip;
 
-	if (arg->fn(tp, (unsigned long) head, arg) < 0) {
+	if (arg->fn(tp, head, arg) < 0) {
 		arg->stop = 1;
 		return;
 	}
@@ -160,7 +148,7 @@ skip:
 	arg->count++;
 }
 
-static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh,
 			   struct sk_buff *skb, struct tcmsg *t)
 {
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 3065752..2a3a60e 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -382,14 +382,12 @@ static void flow_destroy_filter(struct rcu_head *head)
 static int flow_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
-		       unsigned long *arg, bool ovr)
+		       void **arg, bool ovr)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *fold, *fnew;
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_FLOW_MAX + 1];
-	struct tcf_exts e;
-	struct tcf_ematch_tree t;
 	unsigned int nkeys = 0;
 	unsigned int perturb_period = 0;
 	u32 baseclass = 0;
@@ -425,31 +423,27 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 			return -EOPNOTSUPP;
 	}
 
-	err = tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
-	if (err < 0)
-		goto err1;
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
-	if (err < 0)
-		goto err1;
+	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+	if (!fnew)
+		return -ENOBUFS;
 
-	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
+	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &fnew->ematches);
 	if (err < 0)
 		goto err1;
 
-	err = -ENOBUFS;
-	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
-	if (!fnew)
+	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	if (err < 0)
 		goto err2;
 
-	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr);
 	if (err < 0)
-		goto err3;
+		goto err2;
 
-	fold = (struct flow_filter *)*arg;
+	fold = *arg;
 	if (fold) {
 		err = -EINVAL;
 		if (fold->handle != handle && handle)
-			goto err3;
+			goto err2;
 
 		/* Copy fold into fnew */
 		fnew->tp = fold->tp;
@@ -469,31 +463,31 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 		if (tb[TCA_FLOW_MODE])
 			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
 		if (mode != FLOW_MODE_HASH && nkeys > 1)
-			goto err3;
+			goto err2;
 
 		if (mode == FLOW_MODE_HASH)
 			perturb_period = fold->perturb_period;
 		if (tb[TCA_FLOW_PERTURB]) {
 			if (mode != FLOW_MODE_HASH)
-				goto err3;
+				goto err2;
 			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
 		}
 	} else {
 		err = -EINVAL;
 		if (!handle)
-			goto err3;
+			goto err2;
 		if (!tb[TCA_FLOW_KEYS])
-			goto err3;
+			goto err2;
 
 		mode = FLOW_MODE_MAP;
 		if (tb[TCA_FLOW_MODE])
 			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
 		if (mode != FLOW_MODE_HASH && nkeys > 1)
-			goto err3;
+			goto err2;
 
 		if (tb[TCA_FLOW_PERTURB]) {
 			if (mode != FLOW_MODE_HASH)
-				goto err3;
+				goto err2;
 			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
 		}
 
@@ -511,9 +505,6 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation,
 			       (unsigned long)fnew);
 
-	tcf_exts_change(tp, &fnew->exts, &e);
-	tcf_em_tree_change(tp, &fnew->ematches, &t);
-
 	netif_keep_dst(qdisc_dev(tp->q));
 
 	if (tb[TCA_FLOW_KEYS]) {
@@ -541,31 +532,29 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	if (perturb_period)
 		mod_timer(&fnew->perturb_timer, jiffies + perturb_period);
 
-	if (*arg == 0)
+	if (!*arg)
 		list_add_tail_rcu(&fnew->list, &head->filters);
 	else
 		list_replace_rcu(&fold->list, &fnew->list);
 
-	*arg = (unsigned long)fnew;
+	*arg = fnew;
 
 	if (fold)
 		call_rcu(&fold->rcu, flow_destroy_filter);
 	return 0;
 
-err3:
-	tcf_exts_destroy(&fnew->exts);
 err2:
-	tcf_em_tree_destroy(&t);
-	kfree(fnew);
+	tcf_exts_destroy(&fnew->exts);
+	tcf_em_tree_destroy(&fnew->ematches);
 err1:
-	tcf_exts_destroy(&e);
+	kfree(fnew);
 	return err;
 }
 
-static int flow_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int flow_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
-	struct flow_filter *f = (struct flow_filter *)arg;
+	struct flow_filter *f = arg;
 
 	list_del_rcu(&f->list);
 	call_rcu(&f->rcu, flow_destroy_filter);
@@ -597,21 +586,21 @@ static void flow_destroy(struct tcf_proto *tp)
 	kfree_rcu(head, rcu);
 }
 
-static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
+static void *flow_get(struct tcf_proto *tp, u32 handle)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *f;
 
 	list_for_each_entry(f, &head->filters, list)
 		if (f->handle == handle)
-			return (unsigned long)f;
-	return 0;
+			return f;
+	return NULL;
 }
 
-static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct flow_filter *f = (struct flow_filter *)fh;
+	struct flow_filter *f = fh;
 	struct nlattr *nest;
 
 	if (f == NULL)
@@ -677,7 +666,7 @@ static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	list_for_each_entry(f, &head->filters, list) {
 		if (arg->count < arg->skip)
 			goto skip;
-		if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+		if (arg->fn(tp, f, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 7832eb9..1a267e7 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -68,7 +68,6 @@ struct cls_fl_head {
 	struct rhashtable ht;
 	struct fl_flow_mask mask;
 	struct flow_dissector dissector;
-	u32 hgen;
 	bool mask_assigned;
 	struct list_head filters;
 	struct rhashtable_params ht_params;
@@ -76,6 +75,7 @@ struct cls_fl_head {
 		struct work_struct work;
 		struct rcu_head	rcu;
 	};
+	struct idr handle_idr;
 };
 
 struct cls_fl_filter {
@@ -88,7 +88,6 @@ struct cls_fl_filter {
 	u32 handle;
 	u32 flags;
 	struct rcu_head	rcu;
-	struct tc_to_netdev tc;
 	struct net_device *hw_dev;
 };
 
@@ -211,6 +210,7 @@ static int fl_init(struct tcf_proto *tp)
 
 	INIT_LIST_HEAD_RCU(&head->filters);
 	rcu_assign_pointer(tp->root, head);
+	idr_init(&head->handle_idr);
 
 	return 0;
 }
@@ -225,22 +225,17 @@ static void fl_destroy_filter(struct rcu_head *head)
 
 static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
 {
-	struct tc_cls_flower_offload offload = {0};
+	struct tc_cls_flower_offload cls_flower = {};
 	struct net_device *dev = f->hw_dev;
-	struct tc_to_netdev *tc = &f->tc;
 
-	if (!tc_can_offload(dev, tp))
+	if (!tc_can_offload(dev))
 		return;
 
-	offload.command = TC_CLSFLOWER_DESTROY;
-	offload.prio = tp->prio;
-	offload.cookie = (unsigned long)f;
+	tc_cls_common_offload_init(&cls_flower.common, tp);
+	cls_flower.command = TC_CLSFLOWER_DESTROY;
+	cls_flower.cookie = (unsigned long) f;
 
-	tc->type = TC_SETUP_CLSFLOWER;
-	tc->cls_flower = &offload;
-
-	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index,
-				      tp->protocol, tc);
+	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower);
 }
 
 static int fl_hw_replace_filter(struct tcf_proto *tp,
@@ -249,35 +244,31 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 				struct cls_fl_filter *f)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_flower_offload offload = {0};
-	struct tc_to_netdev *tc = &f->tc;
+	struct tc_cls_flower_offload cls_flower = {};
 	int err;
 
-	if (!tc_can_offload(dev, tp)) {
+	if (!tc_can_offload(dev)) {
 		if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) ||
-		    (f->hw_dev && !tc_can_offload(f->hw_dev, tp))) {
+		    (f->hw_dev && !tc_can_offload(f->hw_dev))) {
 			f->hw_dev = dev;
 			return tc_skip_sw(f->flags) ? -EINVAL : 0;
 		}
 		dev = f->hw_dev;
-		tc->egress_dev = true;
+		cls_flower.egress_dev = true;
 	} else {
 		f->hw_dev = dev;
 	}
 
-	offload.command = TC_CLSFLOWER_REPLACE;
-	offload.prio = tp->prio;
-	offload.cookie = (unsigned long)f;
-	offload.dissector = dissector;
-	offload.mask = mask;
-	offload.key = &f->mkey;
-	offload.exts = &f->exts;
-
-	tc->type = TC_SETUP_CLSFLOWER;
-	tc->cls_flower = &offload;
+	tc_cls_common_offload_init(&cls_flower.common, tp);
+	cls_flower.command = TC_CLSFLOWER_REPLACE;
+	cls_flower.cookie = (unsigned long) f;
+	cls_flower.dissector = dissector;
+	cls_flower.mask = mask;
+	cls_flower.key = &f->mkey;
+	cls_flower.exts = &f->exts;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->chain->index, tp->protocol, tc);
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
+					    &cls_flower);
 	if (!err)
 		f->flags |= TCA_CLS_FLAGS_IN_HW;
 
@@ -288,27 +279,26 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 
 static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
 {
-	struct tc_cls_flower_offload offload = {0};
+	struct tc_cls_flower_offload cls_flower = {};
 	struct net_device *dev = f->hw_dev;
-	struct tc_to_netdev *tc = &f->tc;
 
-	if (!tc_can_offload(dev, tp))
+	if (!tc_can_offload(dev))
 		return;
 
-	offload.command = TC_CLSFLOWER_STATS;
-	offload.prio = tp->prio;
-	offload.cookie = (unsigned long)f;
-	offload.exts = &f->exts;
-
-	tc->type = TC_SETUP_CLSFLOWER;
-	tc->cls_flower = &offload;
+	tc_cls_common_offload_init(&cls_flower.common, tp);
+	cls_flower.command = TC_CLSFLOWER_STATS;
+	cls_flower.cookie = (unsigned long) f;
+	cls_flower.exts = &f->exts;
 
-	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-				      tp->chain->index, tp->protocol, tc);
+	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
+				      &cls_flower);
 }
 
 static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
 {
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+
+	idr_remove_ext(&head->handle_idr, f->handle);
 	list_del_rcu(&f->list);
 	if (!tc_skip_hw(f->flags))
 		fl_hw_destroy_filter(tp, f);
@@ -341,20 +331,17 @@ static void fl_destroy(struct tcf_proto *tp)
 
 	list_for_each_entry_safe(f, next, &head->filters, list)
 		__fl_delete(tp, f);
+	idr_destroy(&head->handle_idr);
 
 	__module_get(THIS_MODULE);
 	call_rcu(&head->rcu, fl_destroy_rcu);
 }
 
-static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
+static void *fl_get(struct tcf_proto *tp, u32 handle)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
-	struct cls_fl_filter *f;
 
-	list_for_each_entry(f, &head->filters, list)
-		if (f->handle == handle)
-			return (unsigned long) f;
-	return 0;
+	return idr_find_ext(&head->handle_idr, handle);
 }
 
 static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
@@ -852,15 +839,11 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 			unsigned long base, struct nlattr **tb,
 			struct nlattr *est, bool ovr)
 {
-	struct tcf_exts e;
 	int err;
 
-	err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
+	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
-	if (err < 0)
-		goto errout;
 
 	if (tb[TCA_FLOWER_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
@@ -869,50 +852,25 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 
 	err = fl_set_key(net, tb, &f->key, &mask->key);
 	if (err)
-		goto errout;
+		return err;
 
 	fl_mask_update_range(mask);
 	fl_set_masked_key(&f->mkey, &f->key, mask);
 
-	tcf_exts_change(tp, &f->exts, &e);
-
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
-}
-
-static u32 fl_grab_new_handle(struct tcf_proto *tp,
-			      struct cls_fl_head *head)
-{
-	unsigned int i = 0x80000000;
-	u32 handle;
-
-	do {
-		if (++head->hgen == 0x7FFFFFFF)
-			head->hgen = 1;
-	} while (--i > 0 && fl_get(tp, head->hgen));
-
-	if (unlikely(i == 0)) {
-		pr_err("Insufficient number of handles\n");
-		handle = 0;
-	} else {
-		handle = head->hgen;
-	}
-
-	return handle;
 }
 
 static int fl_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
 		     u32 handle, struct nlattr **tca,
-		     unsigned long *arg, bool ovr)
+		     void **arg, bool ovr)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
-	struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
+	struct cls_fl_filter *fold = *arg;
 	struct cls_fl_filter *fnew;
 	struct nlattr **tb;
 	struct fl_flow_mask mask = {};
+	unsigned long idr_index;
 	int err;
 
 	if (!tca[TCA_OPTIONS])
@@ -943,13 +901,21 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		goto errout;
 
 	if (!handle) {
-		handle = fl_grab_new_handle(tp, head);
-		if (!handle) {
-			err = -EINVAL;
+		err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
+				    1, 0x80000000, GFP_KERNEL);
+		if (err)
 			goto errout;
-		}
+		fnew->handle = idr_index;
+	}
+
+	/* user specifies a handle and it doesn't exist */
+	if (handle && !fold) {
+		err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
+				    handle, handle + 1, GFP_KERNEL);
+		if (err)
+			goto errout;
+		fnew->handle = idr_index;
 	}
-	fnew->handle = handle;
 
 	if (tb[TCA_FLOWER_FLAGS]) {
 		fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
@@ -1000,9 +966,11 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 			fl_hw_destroy_filter(tp, fold);
 	}
 
-	*arg = (unsigned long) fnew;
+	*arg = fnew;
 
 	if (fold) {
+		fnew->handle = handle;
+		idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
 		list_replace_rcu(&fold->list, &fnew->list);
 		tcf_unbind_filter(tp, &fold->res);
 		call_rcu(&fold->rcu, fl_destroy_filter);
@@ -1021,10 +989,10 @@ errout_tb:
 	return err;
 }
 
-static int fl_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int fl_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
-	struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
+	struct cls_fl_filter *f = arg;
 
 	if (!tc_skip_sw(f->flags))
 		rhashtable_remove_fast(&head->ht, &f->ht_node,
@@ -1042,7 +1010,7 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	list_for_each_entry_rcu(f, &head->filters, list) {
 		if (arg->count < arg->skip)
 			goto skip;
-		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+		if (arg->fn(tp, f, arg) < 0) {
 			arg->stop = 1;
 			break;
 		}
@@ -1177,11 +1145,11 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
 	return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
 }
 
-static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		   struct sk_buff *skb, struct tcmsg *t)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
-	struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
+	struct cls_fl_filter *f = fh;
 	struct nlattr *nest;
 	struct fl_flow_key *key, *mask;
 
@@ -1383,6 +1351,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct cls_fl_filter *f = fh;
+
+	if (f && f->res.classid == classid)
+		f->res.class = cl;
+}
+
 static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.kind		= "flower",
 	.classify	= fl_classify,
@@ -1393,6 +1369,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.delete		= fl_delete,
 	.walk		= fl_walk,
 	.dump		= fl_dump,
+	.bind_class	= fl_bind_class,
 	.owner		= THIS_MODULE,
 };
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index d388536..941245a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -95,20 +95,20 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	return -1;
 }
 
-static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
+static void *fw_get(struct tcf_proto *tp, u32 handle)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct fw_filter *f;
 
 	if (head == NULL)
-		return 0;
+		return NULL;
 
 	f = rtnl_dereference(head->ht[fw_hash(handle)]);
 	for (; f; f = rtnl_dereference(f->next)) {
 		if (f->id == handle)
-			return (unsigned long)f;
+			return f;
 	}
-	return 0;
+	return NULL;
 }
 
 static int fw_init(struct tcf_proto *tp)
@@ -147,10 +147,10 @@ static void fw_destroy(struct tcf_proto *tp)
 	kfree_rcu(head, rcu);
 }
 
-static int fw_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int fw_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
-	struct fw_filter *f = (struct fw_filter *)arg;
+	struct fw_filter *f = arg;
 	struct fw_filter __rcu **fp;
 	struct fw_filter *pfp;
 	int ret = -EINVAL;
@@ -190,22 +190,17 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
 	[TCA_FW_MASK]		= { .type = NLA_U32 },
 };
 
-static int
-fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
-		struct nlattr **tb, struct nlattr **tca, unsigned long base,
-		bool ovr)
+static int fw_set_parms(struct net *net, struct tcf_proto *tp,
+			struct fw_filter *f, struct nlattr **tb,
+			struct nlattr **tca, unsigned long base, bool ovr)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
-	struct tcf_exts e;
 	u32 mask;
 	int err;
 
-	err = tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
-	if (err < 0)
-		goto errout;
 
 	if (tb[TCA_FW_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
@@ -216,10 +211,8 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
 	if (tb[TCA_FW_INDEV]) {
 		int ret;
 		ret = tcf_change_indev(net, tb[TCA_FW_INDEV]);
-		if (ret < 0) {
-			err = ret;
-			goto errout;
-		}
+		if (ret < 0)
+			return ret;
 		f->ifindex = ret;
 	}
 #endif /* CONFIG_NET_CLS_IND */
@@ -228,25 +221,20 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
 	if (tb[TCA_FW_MASK]) {
 		mask = nla_get_u32(tb[TCA_FW_MASK]);
 		if (mask != head->mask)
-			goto errout;
+			return err;
 	} else if (head->mask != 0xFFFFFFFF)
-		goto errout;
-
-	tcf_exts_change(tp, &f->exts, &e);
+		return err;
 
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static int fw_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
-		     u32 handle, struct nlattr **tca, unsigned long *arg,
+		     u32 handle, struct nlattr **tca, void **arg,
 		     bool ovr)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
-	struct fw_filter *f = (struct fw_filter *) *arg;
+	struct fw_filter *f = *arg;
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_FW_MAX + 1];
 	int err;
@@ -282,7 +270,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 			return err;
 		}
 
-		err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr);
+		err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr);
 		if (err < 0) {
 			tcf_exts_destroy(&fnew->exts);
 			kfree(fnew);
@@ -300,7 +288,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 		tcf_unbind_filter(tp, &f->res);
 		call_rcu(&f->rcu, fw_delete_filter);
 
-		*arg = (unsigned long)fnew;
+		*arg = fnew;
 		return err;
 	}
 
@@ -330,14 +318,14 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 	f->id = handle;
 	f->tp = tp;
 
-	err = fw_change_attrs(net, tp, f, tb, tca, base, ovr);
+	err = fw_set_parms(net, tp, f, tb, tca, base, ovr);
 	if (err < 0)
 		goto errout;
 
 	RCU_INIT_POINTER(f->next, head->ht[fw_hash(handle)]);
 	rcu_assign_pointer(head->ht[fw_hash(handle)], f);
 
-	*arg = (unsigned long)f;
+	*arg = f;
 	return 0;
 
 errout:
@@ -366,7 +354,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 				arg->count++;
 				continue;
 			}
-			if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+			if (arg->fn(tp, f, arg) < 0) {
 				arg->stop = 1;
 				return;
 			}
@@ -375,11 +363,11 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
-static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		   struct sk_buff *skb, struct tcmsg *t)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
-	struct fw_filter *f = (struct fw_filter *)fh;
+	struct fw_filter *f = fh;
 	struct nlattr *nest;
 
 	if (f == NULL)
@@ -387,7 +375,7 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 
 	t->tcm_handle = f->id;
 
-	if (!f->res.classid && !tcf_exts_is_available(&f->exts))
+	if (!f->res.classid && !tcf_exts_has_actions(&f->exts))
 		return skb->len;
 
 	nest = nla_nest_start(skb, TCA_OPTIONS);
@@ -424,6 +412,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void fw_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct fw_filter *f = fh;
+
+	if (f && f->res.classid == classid)
+		f->res.class = cl;
+}
+
 static struct tcf_proto_ops cls_fw_ops __read_mostly = {
 	.kind		=	"fw",
 	.classify	=	fw_classify,
@@ -434,6 +430,7 @@ static struct tcf_proto_ops cls_fw_ops __read_mostly = {
 	.delete		=	fw_delete,
 	.walk		=	fw_walk,
 	.dump		=	fw_dump,
+	.bind_class	=	fw_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 9dc26c3..21cc45ca 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -54,19 +54,16 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
 				  unsigned long cookie)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_to_netdev offload;
-	struct tc_cls_matchall_offload mall_offload = {0};
+	struct tc_cls_matchall_offload cls_mall = {};
 	int err;
 
-	offload.type = TC_SETUP_MATCHALL;
-	offload.cls_mall = &mall_offload;
-	offload.cls_mall->command = TC_CLSMATCHALL_REPLACE;
-	offload.cls_mall->exts = &head->exts;
-	offload.cls_mall->cookie = cookie;
+	tc_cls_common_offload_init(&cls_mall.common, tp);
+	cls_mall.command = TC_CLSMATCHALL_REPLACE;
+	cls_mall.exts = &head->exts;
+	cls_mall.cookie = cookie;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->chain->index,
-					    tp->protocol, &offload);
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL,
+					    &cls_mall);
 	if (!err)
 		head->flags |= TCA_CLS_FLAGS_IN_HW;
 
@@ -78,17 +75,13 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp,
 				   unsigned long cookie)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_to_netdev offload;
-	struct tc_cls_matchall_offload mall_offload = {0};
+	struct tc_cls_matchall_offload cls_mall = {};
 
-	offload.type = TC_SETUP_MATCHALL;
-	offload.cls_mall = &mall_offload;
-	offload.cls_mall->command = TC_CLSMATCHALL_DESTROY;
-	offload.cls_mall->exts = NULL;
-	offload.cls_mall->cookie = cookie;
+	tc_cls_common_offload_init(&cls_mall.common, tp);
+	cls_mall.command = TC_CLSMATCHALL_DESTROY;
+	cls_mall.cookie = cookie;
 
-	dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index,
-				      tp->protocol, &offload);
+	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, &cls_mall);
 }
 
 static void mall_destroy(struct tcf_proto *tp)
@@ -99,15 +92,15 @@ static void mall_destroy(struct tcf_proto *tp)
 	if (!head)
 		return;
 
-	if (tc_should_offload(dev, tp, head->flags))
+	if (tc_should_offload(dev, head->flags))
 		mall_destroy_hw_filter(tp, head, (unsigned long) head);
 
 	call_rcu(&head->rcu, mall_destroy_rcu);
 }
 
-static unsigned long mall_get(struct tcf_proto *tp, u32 handle)
+static void *mall_get(struct tcf_proto *tp, u32 handle)
 {
-	return 0UL;
+	return NULL;
 }
 
 static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
@@ -120,33 +113,23 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
 			  unsigned long base, struct nlattr **tb,
 			  struct nlattr *est, bool ovr)
 {
-	struct tcf_exts e;
 	int err;
 
-	err = tcf_exts_init(&e, TCA_MATCHALL_ACT, 0);
-	if (err)
-		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr);
 	if (err < 0)
-		goto errout;
+		return err;
 
 	if (tb[TCA_MATCHALL_CLASSID]) {
 		head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
 		tcf_bind_filter(tp, &head->res, base);
 	}
-
-	tcf_exts_change(tp, &head->exts, &e);
-
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static int mall_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
-		       unsigned long *arg, bool ovr)
+		       void **arg, bool ovr)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
 	struct net_device *dev = tp->q->dev_queue->dev;
@@ -189,7 +172,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 	if (err)
 		goto err_set_parms;
 
-	if (tc_should_offload(dev, tp, flags)) {
+	if (tc_should_offload(dev, flags)) {
 		err = mall_replace_hw_filter(tp, new, (unsigned long) new);
 		if (err) {
 			if (tc_skip_sw(flags))
@@ -202,7 +185,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 	if (!tc_in_hw(new->flags))
 		new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
 
-	*arg = (unsigned long) head;
+	*arg = head;
 	rcu_assign_pointer(tp->root, new);
 	return 0;
 
@@ -214,7 +197,7 @@ err_exts_init:
 	return err;
 }
 
-static int mall_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int mall_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	return -EOPNOTSUPP;
 }
@@ -225,16 +208,16 @@ static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 
 	if (arg->count < arg->skip)
 		goto skip;
-	if (arg->fn(tp, (unsigned long) head, arg) < 0)
+	if (arg->fn(tp, head, arg) < 0)
 		arg->stop = 1;
 skip:
 	arg->count++;
 }
 
-static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct cls_mall_head *head = (struct cls_mall_head *) fh;
+	struct cls_mall_head *head = fh;
 	struct nlattr *nest;
 
 	if (!head)
@@ -268,6 +251,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void mall_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct cls_mall_head *head = fh;
+
+	if (head && head->res.classid == classid)
+		head->res.class = cl;
+}
+
 static struct tcf_proto_ops cls_mall_ops __read_mostly = {
 	.kind		= "matchall",
 	.classify	= mall_classify,
@@ -278,6 +269,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = {
 	.delete		= mall_delete,
 	.walk		= mall_walk,
 	.dump		= mall_dump,
+	.bind_class	= mall_bind_class,
 	.owner		= THIS_MODULE,
 };
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index d63d550..9ddde65 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -113,7 +113,7 @@ static inline int route4_hash_wild(void)
 #define ROUTE4_APPLY_RESULT()					\
 {								\
 	*res = f->res;						\
-	if (tcf_exts_is_available(&f->exts)) {			\
+	if (tcf_exts_has_actions(&f->exts)) {			\
 		int r = tcf_exts_exec(skb, &f->exts, res);	\
 		if (r < 0) {					\
 			dont_cache = 1;				\
@@ -216,7 +216,7 @@ static inline u32 from_hash(u32 id)
 	return 16 + (id & 0xF);
 }
 
-static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
+static void *route4_get(struct tcf_proto *tp, u32 handle)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	struct route4_bucket *b;
@@ -225,11 +225,11 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
 
 	h1 = to_hash(handle);
 	if (h1 > 256)
-		return 0;
+		return NULL;
 
 	h2 = from_hash(handle >> 16);
 	if (h2 > 32)
-		return 0;
+		return NULL;
 
 	b = rtnl_dereference(head->table[h1]);
 	if (b) {
@@ -237,9 +237,9 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
 		     f;
 		     f = rtnl_dereference(f->next))
 			if (f->handle == handle)
-				return (unsigned long)f;
+				return f;
 	}
-	return 0;
+	return NULL;
 }
 
 static int route4_init(struct tcf_proto *tp)
@@ -294,10 +294,10 @@ static void route4_destroy(struct tcf_proto *tp)
 	kfree_rcu(head, rcu);
 }
 
-static int route4_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int route4_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
-	struct route4_filter *f = (struct route4_filter *)arg;
+	struct route4_filter *f = arg;
 	struct route4_filter __rcu **fp;
 	struct route4_filter *nf;
 	struct route4_bucket *b;
@@ -372,37 +372,32 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 	struct route4_filter *fp;
 	unsigned int h1;
 	struct route4_bucket *b;
-	struct tcf_exts e;
 	int err;
 
-	err = tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
-	if (err < 0)
-		goto errout;
 
-	err = -EINVAL;
 	if (tb[TCA_ROUTE4_TO]) {
 		if (new && handle & 0x8000)
-			goto errout;
+			return -EINVAL;
 		to = nla_get_u32(tb[TCA_ROUTE4_TO]);
 		if (to > 0xFF)
-			goto errout;
+			return -EINVAL;
 		nhandle = to;
 	}
 
 	if (tb[TCA_ROUTE4_FROM]) {
 		if (tb[TCA_ROUTE4_IIF])
-			goto errout;
+			return -EINVAL;
 		id = nla_get_u32(tb[TCA_ROUTE4_FROM]);
 		if (id > 0xFF)
-			goto errout;
+			return -EINVAL;
 		nhandle |= id << 16;
 	} else if (tb[TCA_ROUTE4_IIF]) {
 		id = nla_get_u32(tb[TCA_ROUTE4_IIF]);
 		if (id > 0x7FFF)
-			goto errout;
+			return -EINVAL;
 		nhandle |= (id | 0x8000) << 16;
 	} else
 		nhandle |= 0xFFFF << 16;
@@ -410,27 +405,25 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 	if (handle && new) {
 		nhandle |= handle & 0x7F00;
 		if (nhandle != handle)
-			goto errout;
+			return -EINVAL;
 	}
 
 	h1 = to_hash(nhandle);
 	b = rtnl_dereference(head->table[h1]);
 	if (!b) {
-		err = -ENOBUFS;
 		b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
 		if (b == NULL)
-			goto errout;
+			return -ENOBUFS;
 
 		rcu_assign_pointer(head->table[h1], b);
 	} else {
 		unsigned int h2 = from_hash(nhandle >> 16);
 
-		err = -EEXIST;
 		for (fp = rtnl_dereference(b->ht[h2]);
 		     fp;
 		     fp = rtnl_dereference(fp->next))
 			if (fp->handle == f->handle)
-				goto errout;
+				return -EEXIST;
 	}
 
 	if (tb[TCA_ROUTE4_TO])
@@ -450,17 +443,12 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 		tcf_bind_filter(tp, &f->res, base);
 	}
 
-	tcf_exts_change(tp, &f->exts, &e);
-
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static int route4_change(struct net *net, struct sk_buff *in_skb,
 			 struct tcf_proto *tp, unsigned long base, u32 handle,
-			 struct nlattr **tca, unsigned long *arg, bool ovr)
+			 struct nlattr **tca, void **arg, bool ovr)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	struct route4_filter __rcu **fp;
@@ -479,7 +467,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		return err;
 
-	fold = (struct route4_filter *)*arg;
+	fold = *arg;
 	if (fold && handle && fold->handle != handle)
 			return -EINVAL;
 
@@ -537,7 +525,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 	}
 
 	route4_reset_fastmap(head);
-	*arg = (unsigned long)f;
+	*arg = f;
 	if (fold) {
 		tcf_unbind_filter(tp, &fold->res);
 		call_rcu(&fold->rcu, route4_delete_filter);
@@ -576,7 +564,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 						arg->count++;
 						continue;
 					}
-					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+					if (arg->fn(tp, f, arg) < 0) {
 						arg->stop = 1;
 						return;
 					}
@@ -587,10 +575,10 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
-static int route4_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		       struct sk_buff *skb, struct tcmsg *t)
 {
-	struct route4_filter *f = (struct route4_filter *)fh;
+	struct route4_filter *f = fh;
 	struct nlattr *nest;
 	u32 id;
 
@@ -636,6 +624,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void route4_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct route4_filter *f = fh;
+
+	if (f && f->res.classid == classid)
+		f->res.class = cl;
+}
+
 static struct tcf_proto_ops cls_route4_ops __read_mostly = {
 	.kind		=	"route",
 	.classify	=	route4_classify,
@@ -646,6 +642,7 @@ static struct tcf_proto_ops cls_route4_ops __read_mostly = {
 	.delete		=	route4_delete,
 	.walk		=	route4_walk,
 	.dump		=	route4_dump,
+	.bind_class	=	route4_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 0d9d077..98c05db 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -248,7 +248,7 @@ static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
 	BUG_ON(1);
 }
 
-static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
+static void *rsvp_get(struct tcf_proto *tp, u32 handle)
 {
 	struct rsvp_head *head = rtnl_dereference(tp->root);
 	struct rsvp_session *s;
@@ -257,17 +257,17 @@ static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
 	unsigned int h2 = (handle >> 8) & 0xFF;
 
 	if (h2 > 16)
-		return 0;
+		return NULL;
 
 	for (s = rtnl_dereference(head->ht[h1]); s;
 	     s = rtnl_dereference(s->next)) {
 		for (f = rtnl_dereference(s->ht[h2]); f;
 		     f = rtnl_dereference(f->next)) {
 			if (f->handle == handle)
-				return (unsigned long)f;
+				return f;
 		}
 	}
-	return 0;
+	return NULL;
 }
 
 static int rsvp_init(struct tcf_proto *tp)
@@ -328,10 +328,10 @@ static void rsvp_destroy(struct tcf_proto *tp)
 	kfree_rcu(data, rcu);
 }
 
-static int rsvp_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct rsvp_head *head = rtnl_dereference(tp->root);
-	struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg;
+	struct rsvp_filter *nfp, *f = arg;
 	struct rsvp_filter __rcu **fp;
 	unsigned int h = f->handle;
 	struct rsvp_session __rcu **sp;
@@ -464,7 +464,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
 		       struct nlattr **tca,
-		       unsigned long *arg, bool ovr)
+		       void **arg, bool ovr)
 {
 	struct rsvp_head *data = rtnl_dereference(tp->root);
 	struct rsvp_filter *f, *nfp;
@@ -493,7 +493,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		goto errout2;
 
-	f = (struct rsvp_filter *)*arg;
+	f = *arg;
 	if (f) {
 		/* Node exists: adjust only classid */
 		struct rsvp_filter *n;
@@ -518,7 +518,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 			tcf_bind_filter(tp, &n->res, base);
 		}
 
-		tcf_exts_change(tp, &n->exts, &e);
+		tcf_exts_change(&n->exts, &e);
 		rsvp_replace(tp, n, handle);
 		return 0;
 	}
@@ -591,7 +591,7 @@ insert:
 			if (f->tunnelhdr == 0)
 				tcf_bind_filter(tp, &f->res, base);
 
-			tcf_exts_change(tp, &f->exts, &e);
+			tcf_exts_change(&f->exts, &e);
 
 			fp = &s->ht[h2];
 			for (nfp = rtnl_dereference(*fp); nfp;
@@ -604,7 +604,7 @@ insert:
 			RCU_INIT_POINTER(f->next, nfp);
 			rcu_assign_pointer(*fp, f);
 
-			*arg = (unsigned long)f;
+			*arg = f;
 			return 0;
 		}
 	}
@@ -663,7 +663,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 						arg->count++;
 						continue;
 					}
-					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+					if (arg->fn(tp, f, arg) < 0) {
 						arg->stop = 1;
 						return;
 					}
@@ -674,10 +674,10 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
-static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct rsvp_filter *f = (struct rsvp_filter *)fh;
+	struct rsvp_filter *f = fh;
 	struct rsvp_session *s;
 	struct nlattr *nest;
 	struct tc_rsvp_pinfo pinfo;
@@ -723,6 +723,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct rsvp_filter *f = fh;
+
+	if (f && f->res.classid == classid)
+		f->res.class = cl;
+}
+
 static struct tcf_proto_ops RSVP_OPS __read_mostly = {
 	.kind		=	RSVP_ID,
 	.classify	=	rsvp_classify,
@@ -733,6 +741,7 @@ static struct tcf_proto_ops RSVP_OPS __read_mostly = {
 	.delete		=	rsvp_delete,
 	.walk		=	rsvp_walk,
 	.dump		=	rsvp_dump,
+	.bind_class	=	rsvp_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 8a8a583..14a7e08 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -52,7 +52,7 @@ struct tcindex_data {
 
 static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
 {
-	return tcf_exts_is_predicative(&r->exts) || r->res.classid;
+	return tcf_exts_has_actions(&r->exts) || r->res.classid;
 }
 
 static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
@@ -104,16 +104,16 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 }
 
 
-static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
+static void *tcindex_get(struct tcf_proto *tp, u32 handle)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r;
 
 	pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
 	if (p->perfect && handle >= p->alloc_hash)
-		return 0;
+		return NULL;
 	r = tcindex_lookup(p, handle);
-	return r && tcindex_filter_is_set(r) ? (unsigned long) r : 0UL;
+	return r && tcindex_filter_is_set(r) ? r : NULL;
 }
 
 static int tcindex_init(struct tcf_proto *tp)
@@ -150,14 +150,14 @@ static void tcindex_destroy_fexts(struct rcu_head *head)
 	kfree(f);
 }
 
-static int tcindex_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
-	struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
+	struct tcindex_filter_result *r = arg;
 	struct tcindex_filter __rcu **walk;
 	struct tcindex_filter *f = NULL;
 
-	pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p);
+	pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p);
 	if (p->perfect) {
 		if (!r->res.class)
 			return -ENOENT;
@@ -192,8 +192,7 @@ found:
 }
 
 static int tcindex_destroy_element(struct tcf_proto *tp,
-				   unsigned long arg,
-				   struct tcf_walker *walker)
+				   void *arg, struct tcf_walker *walker)
 {
 	bool last;
 
@@ -419,9 +418,9 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	}
 
 	if (old_r)
-		tcf_exts_change(tp, &r->exts, &e);
+		tcf_exts_change(&r->exts, &e);
 	else
-		tcf_exts_change(tp, &cr.exts, &e);
+		tcf_exts_change(&cr.exts, &e);
 
 	if (old_r && old_r != r) {
 		err = tcindex_filter_result_init(old_r);
@@ -439,7 +438,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 		struct tcindex_filter *nfp;
 		struct tcindex_filter __rcu **fp;
 
-		tcf_exts_change(tp, &f->result.exts, &r->exts);
+		tcf_exts_change(&f->result.exts, &r->exts);
 
 		fp = cp->h + (handle % cp->hash);
 		for (nfp = rtnl_dereference(*fp);
@@ -471,17 +470,17 @@ errout:
 static int
 tcindex_change(struct net *net, struct sk_buff *in_skb,
 	       struct tcf_proto *tp, unsigned long base, u32 handle,
-	       struct nlattr **tca, unsigned long *arg, bool ovr)
+	       struct nlattr **tca, void **arg, bool ovr)
 {
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_TCINDEX_MAX + 1];
 	struct tcindex_data *p = rtnl_dereference(tp->root);
-	struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
+	struct tcindex_filter_result *r = *arg;
 	int err;
 
 	pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
-	    "p %p,r %p,*arg 0x%lx\n",
-	    tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L);
+	    "p %p,r %p,*arg %p\n",
+	    tp, handle, tca, arg, opt, p, r, arg ? *arg : NULL);
 
 	if (!opt)
 		return 0;
@@ -506,9 +505,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 			if (!p->perfect[i].res.class)
 				continue;
 			if (walker->count >= walker->skip) {
-				if (walker->fn(tp,
-				    (unsigned long) (p->perfect+i), walker)
-				     < 0) {
+				if (walker->fn(tp, p->perfect + i, walker) < 0) {
 					walker->stop = 1;
 					return;
 				}
@@ -522,8 +519,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 		for (f = rtnl_dereference(p->h[i]); f; f = next) {
 			next = rtnl_dereference(f->next);
 			if (walker->count >= walker->skip) {
-				if (walker->fn(tp, (unsigned long) &f->result,
-				    walker) < 0) {
+				if (walker->fn(tp, &f->result, walker) < 0) {
 					walker->stop = 1;
 					return;
 				}
@@ -548,14 +544,14 @@ static void tcindex_destroy(struct tcf_proto *tp)
 }
 
 
-static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
 			struct sk_buff *skb, struct tcmsg *t)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
-	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
+	struct tcindex_filter_result *r = fh;
 	struct nlattr *nest;
 
-	pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p\n",
+	pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n",
 		 tp, fh, skb, t, p, r);
 	pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
 
@@ -610,6 +606,14 @@ nla_put_failure:
 	return -1;
 }
 
+static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct tcindex_filter_result *r = fh;
+
+	if (r && r->res.classid == classid)
+		r->res.class = cl;
+}
+
 static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
 	.kind		=	"tcindex",
 	.classify	=	tcindex_classify,
@@ -620,6 +624,7 @@ static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
 	.delete		=	tcindex_delete,
 	.walk		=	tcindex_walk,
 	.dump		=	tcindex_dump,
+	.bind_class	=	tcindex_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 2d01195..10b8d85 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -40,6 +40,8 @@
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
 #include <linux/bitmap.h>
+#include <linux/netdevice.h>
+#include <linux/hash.h>
 #include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
@@ -92,6 +94,7 @@ struct tc_u_common {
 	struct Qdisc		*q;
 	int			refcnt;
 	u32			hgenerator;
+	struct hlist_node	hnode;
 	struct rcu_head		rcu;
 };
 
@@ -289,7 +292,7 @@ out:
 }
 
 
-static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
+static void *u32_get(struct tcf_proto *tp, u32 handle)
 {
 	struct tc_u_hnode *ht;
 	struct tc_u_common *tp_c = tp->data;
@@ -300,12 +303,12 @@ static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
 		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
 
 	if (!ht)
-		return 0;
+		return NULL;
 
 	if (TC_U32_KEY(handle) == 0)
-		return (unsigned long)ht;
+		return ht;
 
-	return (unsigned long)u32_lookup_key(ht, handle);
+	return u32_lookup_key(ht, handle);
 }
 
 static u32 gen_new_htid(struct tc_u_common *tp_c)
@@ -323,12 +326,40 @@ static u32 gen_new_htid(struct tc_u_common *tp_c)
 	return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
 }
 
+static struct hlist_head *tc_u_common_hash;
+
+#define U32_HASH_SHIFT 10
+#define U32_HASH_SIZE (1 << U32_HASH_SHIFT)
+
+static unsigned int tc_u_hash(const struct tcf_proto *tp)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	u32 qhandle = tp->q->handle;
+	int ifindex = dev->ifindex;
+
+	return hash_64((u64)ifindex << 32 | qhandle, U32_HASH_SHIFT);
+}
+
+static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
+{
+	struct tc_u_common *tc;
+	unsigned int h;
+
+	h = tc_u_hash(tp);
+	hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
+		if (tc->q == tp->q)
+			return tc;
+	}
+	return NULL;
+}
+
 static int u32_init(struct tcf_proto *tp)
 {
 	struct tc_u_hnode *root_ht;
 	struct tc_u_common *tp_c;
+	unsigned int h;
 
-	tp_c = tp->q->u32_node;
+	tp_c = tc_u_common_find(tp);
 
 	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
 	if (root_ht == NULL)
@@ -345,7 +376,10 @@ static int u32_init(struct tcf_proto *tp)
 			return -ENOBUFS;
 		}
 		tp_c->q = tp->q;
-		tp->q->u32_node = tp_c;
+		INIT_HLIST_NODE(&tp_c->hnode);
+
+		h = tc_u_hash(tp);
+		hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
 	}
 
 	tp_c->refcnt++;
@@ -431,43 +465,35 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
 static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_u32_offload u32_offload = {0};
-	struct tc_to_netdev offload;
-
-	offload.type = TC_SETUP_CLSU32;
-	offload.cls_u32 = &u32_offload;
-
-	if (tc_should_offload(dev, tp, 0)) {
-		offload.cls_u32->command = TC_CLSU32_DELETE_KNODE;
-		offload.cls_u32->knode.handle = handle;
-		dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					      tp->chain->index, tp->protocol,
-					      &offload);
-	}
+	struct tc_cls_u32_offload cls_u32 = {};
+
+	if (!tc_should_offload(dev, 0))
+		return;
+
+	tc_cls_common_offload_init(&cls_u32.common, tp);
+	cls_u32.command = TC_CLSU32_DELETE_KNODE;
+	cls_u32.knode.handle = handle;
+
+	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
 }
 
 static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
 				u32 flags)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_u32_offload u32_offload = {0};
-	struct tc_to_netdev offload;
+	struct tc_cls_u32_offload cls_u32 = {};
 	int err;
 
-	if (!tc_should_offload(dev, tp, flags))
+	if (!tc_should_offload(dev, flags))
 		return tc_skip_sw(flags) ? -EINVAL : 0;
 
-	offload.type = TC_SETUP_CLSU32;
-	offload.cls_u32 = &u32_offload;
-
-	offload.cls_u32->command = TC_CLSU32_NEW_HNODE;
-	offload.cls_u32->hnode.divisor = h->divisor;
-	offload.cls_u32->hnode.handle = h->handle;
-	offload.cls_u32->hnode.prio = h->prio;
+	tc_cls_common_offload_init(&cls_u32.common, tp);
+	cls_u32.command = TC_CLSU32_NEW_HNODE;
+	cls_u32.hnode.divisor = h->divisor;
+	cls_u32.hnode.handle = h->handle;
+	cls_u32.hnode.prio = h->prio;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->chain->index, tp->protocol,
-					    &offload);
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
 	if (tc_skip_sw(flags))
 		return err;
 
@@ -477,56 +503,47 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
 static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_u32_offload u32_offload = {0};
-	struct tc_to_netdev offload;
+	struct tc_cls_u32_offload cls_u32 = {};
 
-	offload.type = TC_SETUP_CLSU32;
-	offload.cls_u32 = &u32_offload;
+	if (!tc_should_offload(dev, 0))
+		return;
 
-	if (tc_should_offload(dev, tp, 0)) {
-		offload.cls_u32->command = TC_CLSU32_DELETE_HNODE;
-		offload.cls_u32->hnode.divisor = h->divisor;
-		offload.cls_u32->hnode.handle = h->handle;
-		offload.cls_u32->hnode.prio = h->prio;
+	tc_cls_common_offload_init(&cls_u32.common, tp);
+	cls_u32.command = TC_CLSU32_DELETE_HNODE;
+	cls_u32.hnode.divisor = h->divisor;
+	cls_u32.hnode.handle = h->handle;
+	cls_u32.hnode.prio = h->prio;
 
-		dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					      tp->chain->index, tp->protocol,
-					      &offload);
-	}
+	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
 }
 
 static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 				u32 flags)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
-	struct tc_cls_u32_offload u32_offload = {0};
-	struct tc_to_netdev offload;
+	struct tc_cls_u32_offload cls_u32 = {};
 	int err;
 
-	offload.type = TC_SETUP_CLSU32;
-	offload.cls_u32 = &u32_offload;
-
-	if (!tc_should_offload(dev, tp, flags))
+	if (!tc_should_offload(dev, flags))
 		return tc_skip_sw(flags) ? -EINVAL : 0;
 
-	offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE;
-	offload.cls_u32->knode.handle = n->handle;
-	offload.cls_u32->knode.fshift = n->fshift;
+	tc_cls_common_offload_init(&cls_u32.common, tp);
+	cls_u32.command = TC_CLSU32_REPLACE_KNODE;
+	cls_u32.knode.handle = n->handle;
+	cls_u32.knode.fshift = n->fshift;
 #ifdef CONFIG_CLS_U32_MARK
-	offload.cls_u32->knode.val = n->val;
-	offload.cls_u32->knode.mask = n->mask;
+	cls_u32.knode.val = n->val;
+	cls_u32.knode.mask = n->mask;
 #else
-	offload.cls_u32->knode.val = 0;
-	offload.cls_u32->knode.mask = 0;
+	cls_u32.knode.val = 0;
+	cls_u32.knode.mask = 0;
 #endif
-	offload.cls_u32->knode.sel = &n->sel;
-	offload.cls_u32->knode.exts = &n->exts;
+	cls_u32.knode.sel = &n->sel;
+	cls_u32.knode.exts = &n->exts;
 	if (n->ht_down)
-		offload.cls_u32->knode.link_handle = n->ht_down->handle;
+		cls_u32.knode.link_handle = n->ht_down->handle;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
-					    tp->chain->index, tp->protocol,
-					    &offload);
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
 
 	if (!err)
 		n->flags |= TCA_CLS_FLAGS_IN_HW;
@@ -602,7 +619,7 @@ static void u32_destroy(struct tcf_proto *tp)
 	if (--tp_c->refcnt == 0) {
 		struct tc_u_hnode *ht;
 
-		tp->q->u32_node = NULL;
+		hlist_del(&tp_c->hnode);
 
 		for (ht = rtnl_dereference(tp_c->hlist);
 		     ht;
@@ -622,9 +639,9 @@ static void u32_destroy(struct tcf_proto *tp)
 	tp->data = NULL;
 }
 
-static int u32_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
+static int u32_delete(struct tcf_proto *tp, void *arg, bool *last)
 {
-	struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
+	struct tc_u_hnode *ht = arg;
 	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
 	struct tc_u_common *tp_c = tp->data;
 	int ret = 0;
@@ -723,29 +740,24 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 			 struct tc_u_knode *n, struct nlattr **tb,
 			 struct nlattr *est, bool ovr)
 {
-	struct tcf_exts e;
 	int err;
 
-	err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
+	err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
-	if (err < 0)
-		goto errout;
 
-	err = -EINVAL;
 	if (tb[TCA_U32_LINK]) {
 		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
 		struct tc_u_hnode *ht_down = NULL, *ht_old;
 
 		if (TC_U32_KEY(handle))
-			goto errout;
+			return -EINVAL;
 
 		if (handle) {
 			ht_down = u32_lookup_ht(ht->tp_c, handle);
 
 			if (ht_down == NULL)
-				goto errout;
+				return -EINVAL;
 			ht_down->refcnt++;
 		}
 
@@ -765,16 +777,11 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 		int ret;
 		ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
 		if (ret < 0)
-			goto errout;
+			return -EINVAL;
 		n->ifindex = ret;
 	}
 #endif
-	tcf_exts_change(tp, &n->exts, &e);
-
 	return 0;
-errout:
-	tcf_exts_destroy(&e);
-	return err;
 }
 
 static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
@@ -858,7 +865,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
 
 static int u32_change(struct net *net, struct sk_buff *in_skb,
 		      struct tcf_proto *tp, unsigned long base, u32 handle,
-		      struct nlattr **tca, unsigned long *arg, bool ovr)
+		      struct nlattr **tca, void **arg, bool ovr)
 {
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *ht;
@@ -885,7 +892,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 			return -EINVAL;
 	}
 
-	n = (struct tc_u_knode *)*arg;
+	n = *arg;
 	if (n) {
 		struct tc_u_knode *new;
 
@@ -952,7 +959,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 
 		RCU_INIT_POINTER(ht->next, tp_c->hlist);
 		rcu_assign_pointer(tp_c->hlist, ht);
-		*arg = (unsigned long)ht;
+		*arg = ht;
 
 		return 0;
 	}
@@ -1047,7 +1054,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 
 		RCU_INIT_POINTER(n->next, pins);
 		rcu_assign_pointer(*ins, n);
-		*arg = (unsigned long)n;
+		*arg = n;
 		return 0;
 	}
 
@@ -1081,7 +1088,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 		if (ht->prio != tp->prio)
 			continue;
 		if (arg->count >= arg->skip) {
-			if (arg->fn(tp, (unsigned long)ht, arg) < 0) {
+			if (arg->fn(tp, ht, arg) < 0) {
 				arg->stop = 1;
 				return;
 			}
@@ -1095,7 +1102,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 					arg->count++;
 					continue;
 				}
-				if (arg->fn(tp, (unsigned long)n, arg) < 0) {
+				if (arg->fn(tp, n, arg) < 0) {
 					arg->stop = 1;
 					return;
 				}
@@ -1105,10 +1112,18 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
-static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
+{
+	struct tc_u_knode *n = fh;
+
+	if (n && n->res.classid == classid)
+		n->res.class = cl;
+}
+
+static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		    struct sk_buff *skb, struct tcmsg *t)
 {
-	struct tc_u_knode *n = (struct tc_u_knode *)fh;
+	struct tc_u_knode *n = fh;
 	struct tc_u_hnode *ht_up, *ht_down;
 	struct nlattr *nest;
 
@@ -1122,7 +1137,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 		goto nla_put_failure;
 
 	if (TC_U32_KEY(n->handle) == 0) {
-		struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
+		struct tc_u_hnode *ht = fh;
 		u32 divisor = ht->divisor + 1;
 
 		if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
@@ -1235,11 +1250,14 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = {
 	.delete		=	u32_delete,
 	.walk		=	u32_walk,
 	.dump		=	u32_dump,
+	.bind_class	=	u32_bind_class,
 	.owner		=	THIS_MODULE,
 };
 
 static int __init init_u32(void)
 {
+	int i, ret;
+
 	pr_info("u32 classifier\n");
 #ifdef CONFIG_CLS_U32_PERF
 	pr_info("    Performance counters on\n");
@@ -1250,12 +1268,25 @@ static int __init init_u32(void)
 #ifdef CONFIG_NET_CLS_ACT
 	pr_info("    Actions configured\n");
 #endif
-	return register_tcf_proto_ops(&cls_u32_ops);
+	tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE,
+					  sizeof(struct hlist_head),
+					  GFP_KERNEL);
+	if (!tc_u_common_hash)
+		return -ENOMEM;
+
+	for (i = 0; i < U32_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&tc_u_common_hash[i]);
+
+	ret = register_tcf_proto_ops(&cls_u32_ops);
+	if (ret)
+		kvfree(tc_u_common_hash);
+	return ret;
 }
 
 static void __exit exit_u32(void)
 {
 	unregister_tcf_proto_ops(&cls_u32_ops);
+	kvfree(tc_u_common_hash);
 }
 
 module_init(init_u32)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 4fb5a32..c6deb74 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -35,13 +35,7 @@
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
-
-static int qdisc_notify(struct net *net, struct sk_buff *oskb,
-			struct nlmsghdr *n, u32 clid,
-			struct Qdisc *old, struct Qdisc *new);
-static int tclass_notify(struct net *net, struct sk_buff *oskb,
-			 struct nlmsghdr *n, struct Qdisc *q,
-			 unsigned long cl, int event);
+#include <net/pkt_cls.h>
 
 /*
 
@@ -160,7 +154,7 @@ int register_qdisc(struct Qdisc_ops *qops)
 	if (qops->cl_ops) {
 		const struct Qdisc_class_ops *cops = qops->cl_ops;
 
-		if (!(cops->get && cops->put && cops->walk && cops->leaf))
+		if (!(cops->find && cops->walk && cops->leaf))
 			goto out_einval;
 
 		if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
@@ -327,12 +321,11 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 
 	if (cops == NULL)
 		return NULL;
-	cl = cops->get(p, classid);
+	cl = cops->find(p, classid);
 
 	if (cl == 0)
 		return NULL;
 	leaf = cops->leaf(p, cl);
-	cops->put(p, cl);
 	return leaf;
 }
 
@@ -621,14 +614,10 @@ EXPORT_SYMBOL(qdisc_watchdog_cancel);
 
 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
 {
-	unsigned int size = n * sizeof(struct hlist_head), i;
 	struct hlist_head *h;
+	unsigned int i;
 
-	if (size <= PAGE_SIZE)
-		h = kmalloc(size, GFP_KERNEL);
-	else
-		h = (struct hlist_head *)
-			__get_free_pages(GFP_KERNEL, get_order(size));
+	h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
 
 	if (h != NULL) {
 		for (i = 0; i < n; i++)
@@ -637,16 +626,6 @@ static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
 	return h;
 }
 
-static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
-{
-	unsigned int size = n * sizeof(struct hlist_head);
-
-	if (size <= PAGE_SIZE)
-		kfree(h);
-	else
-		free_pages((unsigned long)h, get_order(size));
-}
-
 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
 {
 	struct Qdisc_class_common *cl;
@@ -679,7 +658,7 @@ void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
 	clhash->hashmask = nmask;
 	sch_tree_unlock(sch);
 
-	qdisc_class_hash_free(ohash, osize);
+	kvfree(ohash);
 }
 EXPORT_SYMBOL(qdisc_class_hash_grow);
 
@@ -699,7 +678,7 @@ EXPORT_SYMBOL(qdisc_class_hash_init);
 
 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
 {
-	qdisc_class_hash_free(clhash->hash, clhash->hashsize);
+	kvfree(clhash->hash);
 }
 EXPORT_SYMBOL(qdisc_class_hash_destroy);
 
@@ -749,6 +728,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
 	const struct Qdisc_class_ops *cops;
 	unsigned long cl;
 	u32 parentid;
+	bool notify;
 	int drops;
 
 	if (n == 0 && len == 0)
@@ -761,6 +741,13 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
 
 		if (sch->flags & TCQ_F_NOPARENT)
 			break;
+		/* Notify parent qdisc only if child qdisc becomes empty.
+		 *
+		 * If child was empty even before update then backlog
+		 * counter is screwed and we skip notification because
+		 * parent class is already passive.
+		 */
+		notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
 		/* TODO: perform the search on a per txq basis */
 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
 		if (sch == NULL) {
@@ -768,10 +755,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
 			break;
 		}
 		cops = sch->ops->cl_ops;
-		if (cops->qlen_notify) {
-			cl = cops->get(sch, parentid);
+		if (notify && cops->qlen_notify) {
+			cl = cops->find(sch, parentid);
 			cops->qlen_notify(sch, cl);
-			cops->put(sch, cl);
 		}
 		sch->q.qlen -= n;
 		sch->qstats.backlog -= len;
@@ -781,6 +767,111 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
 }
 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
 
+static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
+			 u32 portid, u32 seq, u16 flags, int event)
+{
+	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
+	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
+	struct tcmsg *tcm;
+	struct nlmsghdr  *nlh;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct gnet_dump d;
+	struct qdisc_size_table *stab;
+	__u32 qlen;
+
+	cond_resched();
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+	if (!nlh)
+		goto out_nlmsg_trim;
+	tcm = nlmsg_data(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm__pad1 = 0;
+	tcm->tcm__pad2 = 0;
+	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+	tcm->tcm_parent = clid;
+	tcm->tcm_handle = q->handle;
+	tcm->tcm_info = refcount_read(&q->refcnt);
+	if (nla_put_string(skb, TCA_KIND, q->ops->id))
+		goto nla_put_failure;
+	if (q->ops->dump && q->ops->dump(q, skb) < 0)
+		goto nla_put_failure;
+	qlen = q->q.qlen;
+
+	stab = rtnl_dereference(q->stab);
+	if (stab && qdisc_dump_stab(skb, stab) < 0)
+		goto nla_put_failure;
+
+	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
+					 NULL, &d, TCA_PAD) < 0)
+		goto nla_put_failure;
+
+	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
+		goto nla_put_failure;
+
+	if (qdisc_is_percpu_stats(q)) {
+		cpu_bstats = q->cpu_bstats;
+		cpu_qstats = q->cpu_qstats;
+	}
+
+	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
+				  &d, cpu_bstats, &q->bstats) < 0 ||
+	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
+	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
+		goto nla_put_failure;
+
+	if (gnet_stats_finish_copy(&d) < 0)
+		goto nla_put_failure;
+
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	return skb->len;
+
+out_nlmsg_trim:
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
+{
+	if (q->flags & TCQ_F_BUILTIN)
+		return true;
+	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
+		return true;
+
+	return false;
+}
+
+static int qdisc_notify(struct net *net, struct sk_buff *oskb,
+			struct nlmsghdr *n, u32 clid,
+			struct Qdisc *old, struct Qdisc *new)
+{
+	struct sk_buff *skb;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (old && !tc_qdisc_dump_ignore(old, false)) {
+		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
+				  0, RTM_DELQDISC) < 0)
+			goto err_out;
+	}
+	if (new && !tc_qdisc_dump_ignore(new, false)) {
+		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
+				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+			goto err_out;
+	}
+
+	if (skb->len)
+		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+				      n->nlmsg_flags & NLM_F_ECHO);
+
+err_out:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
 			       struct nlmsghdr *n, u32 clid,
 			       struct Qdisc *old, struct Qdisc *new)
@@ -863,11 +954,11 @@ skip:
 
 		err = -EOPNOTSUPP;
 		if (cops && cops->graft) {
-			unsigned long cl = cops->get(parent, classid);
-			if (cl) {
+			unsigned long cl = cops->find(parent, classid);
+
+			if (cl)
 				err = cops->graft(parent, cl, new, &old);
-				cops->put(parent, cl);
-			} else
+			else
 				err = -ENOENT;
 		}
 		if (!err)
@@ -1348,111 +1439,6 @@ graft:
 	return 0;
 }
 
-static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
-			 u32 portid, u32 seq, u16 flags, int event)
-{
-	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
-	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
-	struct tcmsg *tcm;
-	struct nlmsghdr  *nlh;
-	unsigned char *b = skb_tail_pointer(skb);
-	struct gnet_dump d;
-	struct qdisc_size_table *stab;
-	__u32 qlen;
-
-	cond_resched();
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
-	if (!nlh)
-		goto out_nlmsg_trim;
-	tcm = nlmsg_data(nlh);
-	tcm->tcm_family = AF_UNSPEC;
-	tcm->tcm__pad1 = 0;
-	tcm->tcm__pad2 = 0;
-	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
-	tcm->tcm_parent = clid;
-	tcm->tcm_handle = q->handle;
-	tcm->tcm_info = refcount_read(&q->refcnt);
-	if (nla_put_string(skb, TCA_KIND, q->ops->id))
-		goto nla_put_failure;
-	if (q->ops->dump && q->ops->dump(q, skb) < 0)
-		goto nla_put_failure;
-	qlen = q->q.qlen;
-
-	stab = rtnl_dereference(q->stab);
-	if (stab && qdisc_dump_stab(skb, stab) < 0)
-		goto nla_put_failure;
-
-	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
-					 NULL, &d, TCA_PAD) < 0)
-		goto nla_put_failure;
-
-	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
-		goto nla_put_failure;
-
-	if (qdisc_is_percpu_stats(q)) {
-		cpu_bstats = q->cpu_bstats;
-		cpu_qstats = q->cpu_qstats;
-	}
-
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
-				  &d, cpu_bstats, &q->bstats) < 0 ||
-	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
-	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
-		goto nla_put_failure;
-
-	if (gnet_stats_finish_copy(&d) < 0)
-		goto nla_put_failure;
-
-	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
-	return skb->len;
-
-out_nlmsg_trim:
-nla_put_failure:
-	nlmsg_trim(skb, b);
-	return -1;
-}
-
-static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
-{
-	if (q->flags & TCQ_F_BUILTIN)
-		return true;
-	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
-		return true;
-
-	return false;
-}
-
-static int qdisc_notify(struct net *net, struct sk_buff *oskb,
-			struct nlmsghdr *n, u32 clid,
-			struct Qdisc *old, struct Qdisc *new)
-{
-	struct sk_buff *skb;
-	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
-
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb)
-		return -ENOBUFS;
-
-	if (old && !tc_qdisc_dump_ignore(old, false)) {
-		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
-				  0, RTM_DELQDISC) < 0)
-			goto err_out;
-	}
-	if (new && !tc_qdisc_dump_ignore(new, false)) {
-		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
-				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
-			goto err_out;
-	}
-
-	if (skb->len)
-		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-				      n->nlmsg_flags & NLM_F_ECHO);
-
-err_out:
-	kfree_skb(skb);
-	return -EINVAL;
-}
-
 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
 			      struct netlink_callback *cb,
 			      int *q_idx_p, int s_q_idx, bool recur,
@@ -1565,7 +1551,161 @@ done:
  *	Traffic classes manipulation.		*
  ************************************************/
 
+static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
+			  unsigned long cl,
+			  u32 portid, u32 seq, u16 flags, int event)
+{
+	struct tcmsg *tcm;
+	struct nlmsghdr  *nlh;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct gnet_dump d;
+	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
+
+	cond_resched();
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+	if (!nlh)
+		goto out_nlmsg_trim;
+	tcm = nlmsg_data(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm__pad1 = 0;
+	tcm->tcm__pad2 = 0;
+	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+	tcm->tcm_parent = q->handle;
+	tcm->tcm_handle = q->handle;
+	tcm->tcm_info = 0;
+	if (nla_put_string(skb, TCA_KIND, q->ops->id))
+		goto nla_put_failure;
+	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
+		goto nla_put_failure;
+
+	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
+					 NULL, &d, TCA_PAD) < 0)
+		goto nla_put_failure;
+
+	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
+		goto nla_put_failure;
+
+	if (gnet_stats_finish_copy(&d) < 0)
+		goto nla_put_failure;
+
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	return skb->len;
+
+out_nlmsg_trim:
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tclass_notify(struct net *net, struct sk_buff *oskb,
+			 struct nlmsghdr *n, struct Qdisc *q,
+			 unsigned long cl, int event)
+{
+	struct sk_buff *skb;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+			      n->nlmsg_flags & NLM_F_ECHO);
+}
+
+static int tclass_del_notify(struct net *net,
+			     const struct Qdisc_class_ops *cops,
+			     struct sk_buff *oskb, struct nlmsghdr *n,
+			     struct Qdisc *q, unsigned long cl)
+{
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	struct sk_buff *skb;
+	int err = 0;
+
+	if (!cops->delete)
+		return -EOPNOTSUPP;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
+			   RTM_DELTCLASS) < 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	err = cops->delete(q, cl);
+	if (err) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+			      n->nlmsg_flags & NLM_F_ECHO);
+}
+
+#ifdef CONFIG_NET_CLS
+
+struct tcf_bind_args {
+	struct tcf_walker w;
+	u32 classid;
+	unsigned long cl;
+};
+
+static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
+{
+	struct tcf_bind_args *a = (void *)arg;
+
+	if (tp->ops->bind_class) {
+		tcf_tree_lock(tp);
+		tp->ops->bind_class(n, a->classid, a->cl);
+		tcf_tree_unlock(tp);
+	}
+	return 0;
+}
+
+static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
+			   unsigned long new_cl)
+{
+	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+	struct tcf_block *block;
+	struct tcf_chain *chain;
+	unsigned long cl;
+
+	cl = cops->find(q, portid);
+	if (!cl)
+		return;
+	block = cops->tcf_block(q, cl);
+	if (!block)
+		return;
+	list_for_each_entry(chain, &block->chain_list, list) {
+		struct tcf_proto *tp;
+
+		for (tp = rtnl_dereference(chain->filter_chain);
+		     tp; tp = rtnl_dereference(tp->next)) {
+			struct tcf_bind_args arg = {};
+
+			arg.w.fn = tcf_node_bind;
+			arg.classid = clid;
+			arg.cl = new_cl;
+			tp->ops->walk(tp, &arg.w);
+		}
+	}
+}
+
+#else
 
+static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
+			   unsigned long new_cl)
+{
+}
+
+#endif
 
 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 			 struct netlink_ext_ack *extack)
@@ -1656,7 +1796,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 		clid = TC_H_MAKE(qid, clid);
 
 	if (clid)
-		cl = cops->get(q, clid);
+		cl = cops->find(q, clid);
 
 	if (cl == 0) {
 		err = -ENOENT;
@@ -1671,12 +1811,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 				goto out;
 			break;
 		case RTM_DELTCLASS:
-			err = -EOPNOTSUPP;
-			if (cops->delete)
-				err = cops->delete(q, cl);
-			if (err == 0)
-				tclass_notify(net, skb, n, q, cl,
-					      RTM_DELTCLASS);
+			err = tclass_del_notify(net, cops, skb, n, q, cl);
+			/* Unbind the class with flilters with 0 */
+			tc_bind_tclass(q, portid, clid, 0);
 			goto out;
 		case RTM_GETTCLASS:
 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
@@ -1691,83 +1828,16 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 	err = -EOPNOTSUPP;
 	if (cops->change)
 		err = cops->change(q, clid, portid, tca, &new_cl);
-	if (err == 0)
+	if (err == 0) {
 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
-
+		/* We just create a new class, need to do reverse binding. */
+		if (cl != new_cl)
+			tc_bind_tclass(q, portid, clid, new_cl);
+	}
 out:
-	if (cl)
-		cops->put(q, cl);
-
 	return err;
 }
 
-
-static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
-			  unsigned long cl,
-			  u32 portid, u32 seq, u16 flags, int event)
-{
-	struct tcmsg *tcm;
-	struct nlmsghdr  *nlh;
-	unsigned char *b = skb_tail_pointer(skb);
-	struct gnet_dump d;
-	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
-
-	cond_resched();
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
-	if (!nlh)
-		goto out_nlmsg_trim;
-	tcm = nlmsg_data(nlh);
-	tcm->tcm_family = AF_UNSPEC;
-	tcm->tcm__pad1 = 0;
-	tcm->tcm__pad2 = 0;
-	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
-	tcm->tcm_parent = q->handle;
-	tcm->tcm_handle = q->handle;
-	tcm->tcm_info = 0;
-	if (nla_put_string(skb, TCA_KIND, q->ops->id))
-		goto nla_put_failure;
-	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
-		goto nla_put_failure;
-
-	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
-					 NULL, &d, TCA_PAD) < 0)
-		goto nla_put_failure;
-
-	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
-		goto nla_put_failure;
-
-	if (gnet_stats_finish_copy(&d) < 0)
-		goto nla_put_failure;
-
-	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
-	return skb->len;
-
-out_nlmsg_trim:
-nla_put_failure:
-	nlmsg_trim(skb, b);
-	return -1;
-}
-
-static int tclass_notify(struct net *net, struct sk_buff *oskb,
-			 struct nlmsghdr *n, struct Qdisc *q,
-			 unsigned long cl, int event)
-{
-	struct sk_buff *skb;
-	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
-
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb)
-		return -ENOBUFS;
-
-	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
-		kfree_skb(skb);
-		return -EINVAL;
-	}
-
-	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-			      n->nlmsg_flags & NLM_F_ECHO);
-}
-
 struct qdisc_dump_args {
 	struct qdisc_walker	w;
 	struct sk_buff		*skb;
@@ -1949,14 +2019,14 @@ static int __init pktsched_init(void)
 	register_qdisc(&mq_qdisc_ops);
 	register_qdisc(&noqueue_qdisc_ops);
 
-	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
-		      NULL);
-	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
+		      0);
+	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
-		      NULL);
+		      0);
 
 	return 0;
 }
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index c403c87..c5fcdf1 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -41,6 +41,7 @@
 #define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back))
 
 struct atm_flow_data {
+	struct Qdisc_class_common common;
 	struct Qdisc		*q;	/* FIFO, TBF, etc. */
 	struct tcf_proto __rcu	*filter_list;
 	struct tcf_block	*block;
@@ -49,7 +50,6 @@ struct atm_flow_data {
 					   struct sk_buff *skb); /* chaining */
 	struct atm_qdisc_data	*parent;	/* parent qdisc */
 	struct socket		*sock;		/* for closing */
-	u32			classid;	/* x:y type ID */
 	int			ref;		/* reference count */
 	struct gnet_stats_basic_packed	bstats;
 	struct gnet_stats_queue	qstats;
@@ -75,7 +75,7 @@ static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
 	struct atm_flow_data *flow;
 
 	list_for_each_entry(flow, &p->flows, list) {
-		if (flow->classid == classid)
+		if (flow->common.classid == classid)
 			return flow;
 	}
 	return NULL;
@@ -108,23 +108,29 @@ static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl)
 	return flow ? flow->q : NULL;
 }
 
-static unsigned long atm_tc_get(struct Qdisc *sch, u32 classid)
+static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid)
 {
 	struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
 	struct atm_flow_data *flow;
 
-	pr_debug("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid);
+	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
 	flow = lookup_flow(sch, classid);
-	if (flow)
-		flow->ref++;
-	pr_debug("atm_tc_get: flow %p\n", flow);
+	pr_debug("%s: flow %p\n", __func__, flow);
 	return (unsigned long)flow;
 }
 
 static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
 					unsigned long parent, u32 classid)
 {
-	return atm_tc_get(sch, classid);
+	struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
+	struct atm_flow_data *flow;
+
+	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
+	flow = lookup_flow(sch, classid);
+	if (flow)
+		flow->ref++;
+	pr_debug("%s: flow %p\n", __func__, flow);
+	return (unsigned long)flow;
 }
 
 /*
@@ -234,7 +240,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 		excess = NULL;
 	else {
 		excess = (struct atm_flow_data *)
-			atm_tc_get(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
+			atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
 		if (!excess)
 			return -ENOENT;
 	}
@@ -262,10 +268,9 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 
 		for (i = 1; i < 0x8000; i++) {
 			classid = TC_H_MAKE(sch->handle, 0x8000 | i);
-			cl = atm_tc_get(sch, classid);
+			cl = atm_tc_find(sch, classid);
 			if (!cl)
 				break;
-			atm_tc_put(sch, cl);
 		}
 	}
 	pr_debug("atm_tc_change: new id %x\n", classid);
@@ -293,7 +298,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	flow->old_pop = flow->vcc->pop;
 	flow->parent = p;
 	flow->vcc->pop = sch_atm_pop;
-	flow->classid = classid;
+	flow->common.classid = classid;
 	flow->ref = 1;
 	flow->excess = excess;
 	list_add(&flow->list, &p->link.list);
@@ -305,8 +310,6 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	*arg = (unsigned long)flow;
 	return 0;
 err_out:
-	if (excess)
-		atm_tc_put(sch, (unsigned long)excess);
 	sockfd_put(sock);
 	return error;
 }
@@ -377,7 +380,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	result = TC_ACT_OK;	/* be nice to gcc */
 	flow = NULL;
 	if (TC_H_MAJ(skb->priority) != sch->handle ||
-	    !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) {
+	    !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) {
 		struct tcf_proto *fl;
 
 		list_for_each_entry(flow, &p->flows, list) {
@@ -549,7 +552,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 
 	p->link.vcc = NULL;
 	p->link.sock = NULL;
-	p->link.classid = sch->handle;
+	p->link.common.classid = sch->handle;
 	p->link.ref = 1;
 	tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch);
 	return 0;
@@ -596,7 +599,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 		sch, p, flow, skb, tcm);
 	if (list_empty(&flow->list))
 		return -EINVAL;
-	tcm->tcm_handle = flow->classid;
+	tcm->tcm_handle = flow->common.classid;
 	tcm->tcm_info = flow->q->handle;
 
 	nest = nla_nest_start(skb, TCA_OPTIONS);
@@ -621,7 +624,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 			goto nla_put_failure;
 	}
 	if (flow->excess) {
-		if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->classid))
+		if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid))
 			goto nla_put_failure;
 	} else {
 		if (nla_put_u32(skb, TCA_ATM_EXCESS, 0))
@@ -655,8 +658,7 @@ static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb)
 static const struct Qdisc_class_ops atm_class_ops = {
 	.graft		= atm_tc_graft,
 	.leaf		= atm_tc_leaf,
-	.get		= atm_tc_get,
-	.put		= atm_tc_put,
+	.find		= atm_tc_find,
 	.change		= atm_tc_change,
 	.delete		= atm_tc_delete,
 	.walk		= atm_tc_walk,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 156c8a3..dcef97f 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -129,7 +129,6 @@ struct cbq_class {
 	struct tcf_proto __rcu	*filter_list;
 	struct tcf_block	*block;
 
-	int			refcnt;
 	int			filters;
 
 	struct cbq_class	*defaults[TC_PRIO_MAX + 1];
@@ -1162,7 +1161,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	if (err < 0)
 		goto put_rtab;
 
-	q->link.refcnt = 1;
 	q->link.sibling = &q->link;
 	q->link.common.classid = sch->handle;
 	q->link.qdisc = sch;
@@ -1389,20 +1387,14 @@ static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
 {
 	struct cbq_class *cl = (struct cbq_class *)arg;
 
-	if (cl->q->q.qlen == 0)
-		cbq_deactivate_class(cl);
+	cbq_deactivate_class(cl);
 }
 
-static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
+static unsigned long cbq_find(struct Qdisc *sch, u32 classid)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl = cbq_class_lookup(q, classid);
 
-	if (cl) {
-		cl->refcnt++;
-		return (unsigned long)cl;
-	}
-	return 0;
+	return (unsigned long)cbq_class_lookup(q, classid);
 }
 
 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
@@ -1448,25 +1440,6 @@ static void cbq_destroy(struct Qdisc *sch)
 	qdisc_class_hash_destroy(&q->clhash);
 }
 
-static void cbq_put(struct Qdisc *sch, unsigned long arg)
-{
-	struct cbq_class *cl = (struct cbq_class *)arg;
-
-	if (--cl->refcnt == 0) {
-#ifdef CONFIG_NET_CLS_ACT
-		spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
-		struct cbq_sched_data *q = qdisc_priv(sch);
-
-		spin_lock_bh(root_lock);
-		if (q->rx_class == cl)
-			q->rx_class = NULL;
-		spin_unlock_bh(root_lock);
-#endif
-
-		cbq_destroy_class(sch, cl);
-	}
-}
-
 static int
 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
 		 unsigned long *arg)
@@ -1613,7 +1586,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 	cl->R_tab = rtab;
 	rtab = NULL;
-	cl->refcnt = 1;
 	cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
 	if (!cl->q)
 		cl->q = &noop_qdisc;
@@ -1694,12 +1666,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
 	cbq_rmprio(q, cl);
 	sch_tree_unlock(sch);
 
-	BUG_ON(--cl->refcnt == 0);
-	/*
-	 * This shouldn't happen: we "hold" one cops->get() when called
-	 * from tc_ctl_tclass; the destroy method is done from cops->put().
-	 */
-
+	cbq_destroy_class(sch, cl);
 	return 0;
 }
 
@@ -1765,8 +1732,7 @@ static const struct Qdisc_class_ops cbq_class_ops = {
 	.graft		=	cbq_graft,
 	.leaf		=	cbq_leaf,
 	.qlen_notify	=	cbq_qlen_notify,
-	.get		=	cbq_get,
-	.put		=	cbq_put,
+	.find		=	cbq_find,
 	.change		=	cbq_change_class,
 	.delete		=	cbq_delete,
 	.walk		=	cbq_walk,
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index a413dc1..2d0e8d4 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -20,7 +20,6 @@
 
 struct drr_class {
 	struct Qdisc_class_common	common;
-	unsigned int			refcnt;
 	unsigned int			filter_cnt;
 
 	struct gnet_stats_basic_packed		bstats;
@@ -111,7 +110,6 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (cl == NULL)
 		return -ENOBUFS;
 
-	cl->refcnt	   = 1;
 	cl->common.classid = classid;
 	cl->quantum	   = quantum;
 	cl->qdisc	   = qdisc_create_dflt(sch->dev_queue,
@@ -163,32 +161,15 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
 	drr_purge_queue(cl);
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
 
-	BUG_ON(--cl->refcnt == 0);
-	/*
-	 * This shouldn't happen: we "hold" one cops->get() when called
-	 * from tc_ctl_tclass; the destroy method is done from cops->put().
-	 */
-
 	sch_tree_unlock(sch);
-	return 0;
-}
-
-static unsigned long drr_get_class(struct Qdisc *sch, u32 classid)
-{
-	struct drr_class *cl = drr_find_class(sch, classid);
 
-	if (cl != NULL)
-		cl->refcnt++;
-
-	return (unsigned long)cl;
+	drr_destroy_class(sch, cl);
+	return 0;
 }
 
-static void drr_put_class(struct Qdisc *sch, unsigned long arg)
+static unsigned long drr_search_class(struct Qdisc *sch, u32 classid)
 {
-	struct drr_class *cl = (struct drr_class *)arg;
-
-	if (--cl->refcnt == 0)
-		drr_destroy_class(sch, cl);
+	return (unsigned long)drr_find_class(sch, classid);
 }
 
 static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl)
@@ -246,8 +227,7 @@ static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg)
 {
 	struct drr_class *cl = (struct drr_class *)arg;
 
-	if (cl->qdisc->q.qlen == 0)
-		list_del(&cl->alist);
+	list_del(&cl->alist);
 }
 
 static int drr_dump_class(struct Qdisc *sch, unsigned long arg,
@@ -479,8 +459,7 @@ static void drr_destroy_qdisc(struct Qdisc *sch)
 static const struct Qdisc_class_ops drr_class_ops = {
 	.change		= drr_change_class,
 	.delete		= drr_delete_class,
-	.get		= drr_get_class,
-	.put		= drr_put_class,
+	.find		= drr_search_class,
 	.tcf_block	= drr_tcf_block,
 	.bind_tcf	= drr_bind_tcf,
 	.unbind_tcf	= drr_unbind_tcf,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 6d94fcc..2836c80 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -85,21 +85,21 @@ static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
 	return p->q;
 }
 
-static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
+static unsigned long dsmark_find(struct Qdisc *sch, u32 classid)
 {
-	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n",
-		 __func__, sch, qdisc_priv(sch), classid);
-
 	return TC_H_MIN(classid) + 1;
 }
 
 static unsigned long dsmark_bind_filter(struct Qdisc *sch,
 					unsigned long parent, u32 classid)
 {
-	return dsmark_get(sch, classid);
+	pr_debug("%s(sch %p,[qdisc %p],classid %x)\n",
+		 __func__, sch, qdisc_priv(sch), classid);
+
+	return dsmark_find(sch, classid);
 }
 
-static void dsmark_put(struct Qdisc *sch, unsigned long cl)
+static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl)
 {
 }
 
@@ -469,14 +469,13 @@ nla_put_failure:
 static const struct Qdisc_class_ops dsmark_class_ops = {
 	.graft		=	dsmark_graft,
 	.leaf		=	dsmark_leaf,
-	.get		=	dsmark_get,
-	.put		=	dsmark_put,
+	.find		=	dsmark_find,
 	.change		=	dsmark_change,
 	.delete		=	dsmark_delete,
 	.walk		=	dsmark_walk,
 	.tcf_block	=	dsmark_tcf_block,
 	.bind_tcf	=	dsmark_bind_filter,
-	.unbind_tcf	=	dsmark_put,
+	.unbind_tcf	=	dsmark_unbind_filter,
 	.dump		=	dsmark_dump_class,
 };
 
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 2c0c05f..de3b57c 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -577,7 +577,7 @@ static struct Qdisc *fq_codel_leaf(struct Qdisc *sch, unsigned long arg)
 	return NULL;
 }
 
-static unsigned long fq_codel_get(struct Qdisc *sch, u32 classid)
+static unsigned long fq_codel_find(struct Qdisc *sch, u32 classid)
 {
 	return 0;
 }
@@ -590,7 +590,7 @@ static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent,
 	return 0;
 }
 
-static void fq_codel_put(struct Qdisc *q, unsigned long cl)
+static void fq_codel_unbind(struct Qdisc *q, unsigned long cl)
 {
 }
 
@@ -681,11 +681,10 @@ static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 
 static const struct Qdisc_class_ops fq_codel_class_ops = {
 	.leaf		=	fq_codel_leaf,
-	.get		=	fq_codel_get,
-	.put		=	fq_codel_put,
+	.find		=	fq_codel_find,
 	.tcf_block	=	fq_codel_tcf_block,
 	.bind_tcf	=	fq_codel_bind,
-	.unbind_tcf	=	fq_codel_put,
+	.unbind_tcf	=	fq_codel_unbind,
 	.dump		=	fq_codel_dump_class,
 	.dump_stats	=	fq_codel_dump_class_stats,
 	.walk		=	fq_codel_walk,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 4ba6da5..92237e7 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -29,6 +29,7 @@
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 #include <net/dst.h>
+#include <trace/events/qdisc.h>
 
 /* Qdisc to use by default */
 const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
@@ -126,7 +127,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 			q->q.qlen--;
 		} else
 			skb = NULL;
-		return skb;
+		goto trace;
 	}
 	*validate = true;
 	skb = q->skb_bad_txq;
@@ -139,7 +140,8 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 			q->q.qlen--;
 			goto bulk;
 		}
-		return NULL;
+		skb = NULL;
+		goto trace;
 	}
 	if (!(q->flags & TCQ_F_ONETXQUEUE) ||
 	    !netif_xmit_frozen_or_stopped(txq))
@@ -151,6 +153,8 @@ bulk:
 		else
 			try_bulk_dequeue_skb_slow(q, skb, packets);
 	}
+trace:
+	trace_qdisc_dequeue(q, txq, *packets, skb);
 	return skb;
 }
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 11ab8da..daaf214 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -110,7 +110,6 @@ enum hfsc_class_flags {
 
 struct hfsc_class {
 	struct Qdisc_class_common cl_common;
-	unsigned int	refcnt;		/* usage count */
 
 	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
@@ -829,28 +828,6 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
 	}
 }
 
-static void
-set_active(struct hfsc_class *cl, unsigned int len)
-{
-	if (cl->cl_flags & HFSC_RSC)
-		init_ed(cl, len);
-	if (cl->cl_flags & HFSC_FSC)
-		init_vf(cl, len);
-
-}
-
-static void
-set_passive(struct hfsc_class *cl)
-{
-	if (cl->cl_flags & HFSC_RSC)
-		eltree_remove(cl);
-
-	/*
-	 * vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
-	 * needs to be called explicitly to remove a class from vttree.
-	 */
-}
-
 static unsigned int
 qdisc_peek_len(struct Qdisc *sch)
 {
@@ -1067,7 +1044,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		hfsc_change_usc(cl, usc, 0);
 
 	cl->cl_common.classid = classid;
-	cl->refcnt    = 1;
 	cl->sched     = q;
 	cl->cl_parent = parent;
 	cl->qdisc = qdisc_create_dflt(sch->dev_queue,
@@ -1123,13 +1099,9 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
 	hfsc_purge_queue(sch, cl);
 	qdisc_class_hash_remove(&q->clhash, &cl->cl_common);
 
-	BUG_ON(--cl->refcnt == 0);
-	/*
-	 * This shouldn't happen: we "hold" one cops->get() when called
-	 * from tc_ctl_tclass; the destroy method is done from cops->put().
-	 */
-
 	sch_tree_unlock(sch);
+
+	hfsc_destroy_class(sch, cl);
 	return 0;
 }
 
@@ -1221,30 +1193,18 @@ hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg)
 {
 	struct hfsc_class *cl = (struct hfsc_class *)arg;
 
-	if (cl->qdisc->q.qlen == 0) {
-		update_vf(cl, 0, 0);
-		set_passive(cl);
-	}
+	/* vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
+	 * needs to be called explicitly to remove a class from vttree.
+	 */
+	update_vf(cl, 0, 0);
+	if (cl->cl_flags & HFSC_RSC)
+		eltree_remove(cl);
 }
 
 static unsigned long
-hfsc_get_class(struct Qdisc *sch, u32 classid)
-{
-	struct hfsc_class *cl = hfsc_find_class(classid, sch);
-
-	if (cl != NULL)
-		cl->refcnt++;
-
-	return (unsigned long)cl;
-}
-
-static void
-hfsc_put_class(struct Qdisc *sch, unsigned long arg)
+hfsc_search_class(struct Qdisc *sch, u32 classid)
 {
-	struct hfsc_class *cl = (struct hfsc_class *)arg;
-
-	if (--cl->refcnt == 0)
-		hfsc_destroy_class(sch, cl);
+	return (unsigned long)hfsc_find_class(classid, sch);
 }
 
 static unsigned long
@@ -1435,7 +1395,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 		return err;
 
 	q->root.cl_common.classid = sch->handle;
-	q->root.refcnt  = 1;
 	q->root.sched   = q;
 	q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
 					  sch->handle);
@@ -1581,7 +1540,12 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 	}
 
 	if (cl->qdisc->q.qlen == 1) {
-		set_active(cl, qdisc_pkt_len(skb));
+		unsigned int len = qdisc_pkt_len(skb);
+
+		if (cl->cl_flags & HFSC_RSC)
+			init_ed(cl, len);
+		if (cl->cl_flags & HFSC_FSC)
+			init_vf(cl, len);
 		/*
 		 * If this is the first packet, isolate the head so an eventual
 		 * head drop before the first dequeue operation has no chance
@@ -1645,18 +1609,18 @@ hfsc_dequeue(struct Qdisc *sch)
 	if (realtime)
 		cl->cl_cumul += qdisc_pkt_len(skb);
 
-	if (cl->qdisc->q.qlen != 0) {
-		if (cl->cl_flags & HFSC_RSC) {
+	if (cl->cl_flags & HFSC_RSC) {
+		if (cl->qdisc->q.qlen != 0) {
 			/* update ed */
 			next_len = qdisc_peek_len(cl->qdisc);
 			if (realtime)
 				update_ed(cl, next_len);
 			else
 				update_d(cl, next_len);
+		} else {
+			/* the class becomes passive */
+			eltree_remove(cl);
 		}
-	} else {
-		/* the class becomes passive */
-		set_passive(cl);
 	}
 
 	qdisc_bstats_update(sch, skb);
@@ -1672,8 +1636,7 @@ static const struct Qdisc_class_ops hfsc_class_ops = {
 	.graft		= hfsc_graft_class,
 	.leaf		= hfsc_class_leaf,
 	.qlen_notify	= hfsc_qlen_notify,
-	.get		= hfsc_get_class,
-	.put		= hfsc_put_class,
+	.find		= hfsc_search_class,
 	.bind_tcf	= hfsc_bind_tcf,
 	.unbind_tcf	= hfsc_unbind_tcf,
 	.tcf_block	= hfsc_tcf_block,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 5bf5177..7e14837 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -107,7 +107,6 @@ struct htb_class {
 	struct tcf_proto __rcu	*filter_list;	/* class attached filters */
 	struct tcf_block	*block;
 	int			filter_cnt;
-	int			refcnt;		/* usage count of this class */
 
 	int			level;		/* our level (see above) */
 	unsigned int		children;
@@ -193,6 +192,10 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 	return container_of(clc, struct htb_class, common);
 }
 
+static unsigned long htb_search(struct Qdisc *sch, u32 handle)
+{
+	return (unsigned long)htb_find(handle, sch);
+}
 /**
  * htb_classify - classify a packet into class
  *
@@ -1187,16 +1190,7 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
 {
 	struct htb_class *cl = (struct htb_class *)arg;
 
-	if (cl->un.leaf.q->q.qlen == 0)
-		htb_deactivate(qdisc_priv(sch), cl);
-}
-
-static unsigned long htb_get(struct Qdisc *sch, u32 classid)
-{
-	struct htb_class *cl = htb_find(classid, sch);
-	if (cl)
-		cl->refcnt++;
-	return (unsigned long)cl;
+	htb_deactivate(qdisc_priv(sch), cl);
 }
 
 static inline int htb_parent_last_child(struct htb_class *cl)
@@ -1318,22 +1312,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 	if (last_child)
 		htb_parent_to_leaf(q, cl, new_q);
 
-	BUG_ON(--cl->refcnt == 0);
-	/*
-	 * This shouldn't happen: we "hold" one cops->get() when called
-	 * from tc_ctl_tclass; the destroy method is done from cops->put().
-	 */
-
 	sch_tree_unlock(sch);
-	return 0;
-}
 
-static void htb_put(struct Qdisc *sch, unsigned long arg)
-{
-	struct htb_class *cl = (struct htb_class *)arg;
-
-	if (--cl->refcnt == 0)
-		htb_destroy_class(sch, cl);
+	htb_destroy_class(sch, cl);
+	return 0;
 }
 
 static int htb_change_class(struct Qdisc *sch, u32 classid,
@@ -1424,7 +1406,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			}
 		}
 
-		cl->refcnt = 1;
 		cl->children = 0;
 		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 		RB_CLEAR_NODE(&cl->pq_node);
@@ -1600,8 +1581,7 @@ static const struct Qdisc_class_ops htb_class_ops = {
 	.graft		=	htb_graft,
 	.leaf		=	htb_leaf,
 	.qlen_notify	=	htb_qlen_notify,
-	.get		=	htb_get,
-	.put		=	htb_put,
+	.find		=	htb_search,
 	.change		=	htb_change_class,
 	.delete		=	htb_delete,
 	.walk		=	htb_walk,
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index d8a9beb..44de4ee 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -27,23 +27,18 @@ static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
 	return NULL;
 }
 
-static unsigned long ingress_get(struct Qdisc *sch, u32 classid)
+static unsigned long ingress_find(struct Qdisc *sch, u32 classid)
 {
 	return TC_H_MIN(classid) + 1;
 }
 
-static bool ingress_cl_offload(u32 classid)
-{
-	return true;
-}
-
 static unsigned long ingress_bind_filter(struct Qdisc *sch,
 					 unsigned long parent, u32 classid)
 {
-	return ingress_get(sch, classid);
+	return ingress_find(sch, classid);
 }
 
-static void ingress_put(struct Qdisc *sch, unsigned long cl)
+static void ingress_unbind_filter(struct Qdisc *sch, unsigned long cl)
 {
 }
 
@@ -99,13 +94,11 @@ nla_put_failure:
 
 static const struct Qdisc_class_ops ingress_class_ops = {
 	.leaf		=	ingress_leaf,
-	.get		=	ingress_get,
-	.put		=	ingress_put,
+	.find		=	ingress_find,
 	.walk		=	ingress_walk,
 	.tcf_block	=	ingress_tcf_block,
-	.tcf_cl_offload	=	ingress_cl_offload,
 	.bind_tcf	=	ingress_bind_filter,
-	.unbind_tcf	=	ingress_put,
+	.unbind_tcf	=	ingress_unbind_filter,
 };
 
 static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
@@ -123,7 +116,7 @@ struct clsact_sched_data {
 	struct tcf_block *egress_block;
 };
 
-static unsigned long clsact_get(struct Qdisc *sch, u32 classid)
+static unsigned long clsact_find(struct Qdisc *sch, u32 classid)
 {
 	switch (TC_H_MIN(classid)) {
 	case TC_H_MIN(TC_H_MIN_INGRESS):
@@ -134,15 +127,10 @@ static unsigned long clsact_get(struct Qdisc *sch, u32 classid)
 	}
 }
 
-static bool clsact_cl_offload(u32 classid)
-{
-	return TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS);
-}
-
 static unsigned long clsact_bind_filter(struct Qdisc *sch,
 					unsigned long parent, u32 classid)
 {
-	return clsact_get(sch, classid);
+	return clsact_find(sch, classid);
 }
 
 static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl)
@@ -194,13 +182,11 @@ static void clsact_destroy(struct Qdisc *sch)
 
 static const struct Qdisc_class_ops clsact_class_ops = {
 	.leaf		=	ingress_leaf,
-	.get		=	clsact_get,
-	.put		=	ingress_put,
+	.find		=	clsact_find,
 	.walk		=	ingress_walk,
 	.tcf_block	=	clsact_tcf_block,
-	.tcf_cl_offload	=	clsact_cl_offload,
 	.bind_tcf	=	clsact_bind_filter,
-	.unbind_tcf	=	ingress_put,
+	.unbind_tcf	=	ingress_unbind_filter,
 };
 
 static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index cadfdd4..f3a3e50 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -165,7 +165,7 @@ static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
 	return dev_queue->qdisc_sleeping;
 }
 
-static unsigned long mq_get(struct Qdisc *sch, u32 classid)
+static unsigned long mq_find(struct Qdisc *sch, u32 classid)
 {
 	unsigned int ntx = TC_H_MIN(classid);
 
@@ -174,10 +174,6 @@ static unsigned long mq_get(struct Qdisc *sch, u32 classid)
 	return ntx;
 }
 
-static void mq_put(struct Qdisc *sch, unsigned long cl)
-{
-}
-
 static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
 			 struct sk_buff *skb, struct tcmsg *tcm)
 {
@@ -223,8 +219,7 @@ static const struct Qdisc_class_ops mq_class_ops = {
 	.select_queue	= mq_select_queue,
 	.graft		= mq_graft,
 	.leaf		= mq_leaf,
-	.get		= mq_get,
-	.put		= mq_put,
+	.find		= mq_find,
 	.walk		= mq_walk,
 	.dump		= mq_dump_class,
 	.dump_stats	= mq_dump_class_stats,
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index e0c0272..6bcdfe6 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -39,11 +39,9 @@ static void mqprio_destroy(struct Qdisc *sch)
 	}
 
 	if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
-		struct tc_mqprio_qopt offload = { 0 };
-		struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
-					   { .mqprio = &offload } };
+		struct tc_mqprio_qopt mqprio = {};
 
-		dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, 0, &tc);
+		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, &mqprio);
 	} else {
 		netdev_set_num_tc(dev, 0);
 	}
@@ -148,16 +146,14 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 	 * supplied and verified mapping
 	 */
 	if (qopt->hw) {
-		struct tc_mqprio_qopt offload = *qopt;
-		struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO,
-					   { .mqprio = &offload } };
+		struct tc_mqprio_qopt mqprio = *qopt;
 
-		err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle,
-						    0, 0, &tc);
+		err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO,
+						    &mqprio);
 		if (err)
 			return err;
 
-		priv->hw_offload = offload.hw;
+		priv->hw_offload = mqprio.hw;
 	} else {
 		netdev_set_num_tc(dev, qopt->num_tc);
 		for (i = 0; i < qopt->num_tc; i++)
@@ -281,7 +277,7 @@ static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
 	return dev_queue->qdisc_sleeping;
 }
 
-static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
+static unsigned long mqprio_find(struct Qdisc *sch, u32 classid)
 {
 	struct net_device *dev = qdisc_dev(sch);
 	unsigned int ntx = TC_H_MIN(classid);
@@ -291,10 +287,6 @@ static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
 	return ntx;
 }
 
-static void mqprio_put(struct Qdisc *sch, unsigned long cl)
-{
-}
-
 static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
 			 struct sk_buff *skb, struct tcmsg *tcm)
 {
@@ -407,8 +399,7 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 static const struct Qdisc_class_ops mqprio_class_ops = {
 	.graft		= mqprio_graft,
 	.leaf		= mqprio_leaf,
-	.get		= mqprio_get,
-	.put		= mqprio_put,
+	.find		= mqprio_find,
 	.walk		= mqprio_walk,
 	.dump		= mqprio_dump_class,
 	.dump_stats	= mqprio_dump_class_stats,
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 9c454f5..ff4fc3e 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -301,7 +301,7 @@ multiq_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->queues[band];
 }
 
-static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
+static unsigned long multiq_find(struct Qdisc *sch, u32 classid)
 {
 	struct multiq_sched_data *q = qdisc_priv(sch);
 	unsigned long band = TC_H_MIN(classid);
@@ -314,11 +314,11 @@ static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
 static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
 				 u32 classid)
 {
-	return multiq_get(sch, classid);
+	return multiq_find(sch, classid);
 }
 
 
-static void multiq_put(struct Qdisc *q, unsigned long cl)
+static void multiq_unbind(struct Qdisc *q, unsigned long cl)
 {
 }
 
@@ -380,12 +380,11 @@ static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl)
 static const struct Qdisc_class_ops multiq_class_ops = {
 	.graft		=	multiq_graft,
 	.leaf		=	multiq_leaf,
-	.get		=	multiq_get,
-	.put		=	multiq_put,
+	.find		=	multiq_find,
 	.walk		=	multiq_walk,
 	.tcf_block	=	multiq_tcf_block,
 	.bind_tcf	=	multiq_bind,
-	.unbind_tcf	=	multiq_put,
+	.unbind_tcf	=	multiq_unbind,
 	.dump		=	multiq_dump_class,
 	.dump_stats	=	multiq_dump_class_stats,
 };
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 14d1724..b1266e7 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -1096,15 +1096,11 @@ static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->qdisc;
 }
 
-static unsigned long netem_get(struct Qdisc *sch, u32 classid)
+static unsigned long netem_find(struct Qdisc *sch, u32 classid)
 {
 	return 1;
 }
 
-static void netem_put(struct Qdisc *sch, unsigned long arg)
-{
-}
-
 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
 	if (!walker->stop) {
@@ -1120,8 +1116,7 @@ static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 static const struct Qdisc_class_ops netem_class_ops = {
 	.graft		=	netem_graft,
 	.leaf		=	netem_leaf,
-	.get		=	netem_get,
-	.put		=	netem_put,
+	.find		=	netem_find,
 	.walk		=	netem_walk,
 	.dump		=	netem_dump_class,
 };
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index e3e364c..f31b28f 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -260,7 +260,7 @@ prio_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->queues[band];
 }
 
-static unsigned long prio_get(struct Qdisc *sch, u32 classid)
+static unsigned long prio_find(struct Qdisc *sch, u32 classid)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
 	unsigned long band = TC_H_MIN(classid);
@@ -272,11 +272,11 @@ static unsigned long prio_get(struct Qdisc *sch, u32 classid)
 
 static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
 {
-	return prio_get(sch, classid);
+	return prio_find(sch, classid);
 }
 
 
-static void prio_put(struct Qdisc *q, unsigned long cl)
+static void prio_unbind(struct Qdisc *q, unsigned long cl)
 {
 }
 
@@ -338,12 +338,11 @@ static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl)
 static const struct Qdisc_class_ops prio_class_ops = {
 	.graft		=	prio_graft,
 	.leaf		=	prio_leaf,
-	.get		=	prio_get,
-	.put		=	prio_put,
+	.find		=	prio_find,
 	.walk		=	prio_walk,
 	.tcf_block	=	prio_tcf_block,
 	.bind_tcf	=	prio_bind,
-	.unbind_tcf	=	prio_put,
+	.unbind_tcf	=	prio_unbind,
 	.dump		=	prio_dump_class,
 	.dump_stats	=	prio_dump_class_stats,
 };
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 0e16dfd..cd661a7 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -132,7 +132,6 @@ struct qfq_aggregate;
 struct qfq_class {
 	struct Qdisc_class_common common;
 
-	unsigned int refcnt;
 	unsigned int filter_cnt;
 
 	struct gnet_stats_basic_packed bstats;
@@ -477,7 +476,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (cl == NULL)
 		return -ENOBUFS;
 
-	cl->refcnt = 1;
 	cl->common.classid = classid;
 	cl->deficit = lmax;
 
@@ -555,32 +553,15 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
 	qfq_purge_queue(cl);
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
 
-	BUG_ON(--cl->refcnt == 0);
-	/*
-	 * This shouldn't happen: we "hold" one cops->get() when called
-	 * from tc_ctl_tclass; the destroy method is done from cops->put().
-	 */
-
 	sch_tree_unlock(sch);
-	return 0;
-}
-
-static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid)
-{
-	struct qfq_class *cl = qfq_find_class(sch, classid);
 
-	if (cl != NULL)
-		cl->refcnt++;
-
-	return (unsigned long)cl;
+	qfq_destroy_class(sch, cl);
+	return 0;
 }
 
-static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
+static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid)
 {
-	struct qfq_class *cl = (struct qfq_class *)arg;
-
-	if (--cl->refcnt == 0)
-		qfq_destroy_class(sch, cl);
+	return (unsigned long)qfq_find_class(sch, classid);
 }
 
 static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl)
@@ -1428,8 +1409,7 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
 	struct qfq_sched *q = qdisc_priv(sch);
 	struct qfq_class *cl = (struct qfq_class *)arg;
 
-	if (cl->qdisc->q.qlen == 0)
-		qfq_deactivate_class(q, cl);
+	qfq_deactivate_class(q, cl);
 }
 
 static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
@@ -1511,8 +1491,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch)
 static const struct Qdisc_class_ops qfq_class_ops = {
 	.change		= qfq_change_class,
 	.delete		= qfq_delete_class,
-	.get		= qfq_get_class,
-	.put		= qfq_put_class,
+	.find		= qfq_search_class,
 	.tcf_block	= qfq_tcf_block,
 	.bind_tcf	= qfq_bind_tcf,
 	.unbind_tcf	= qfq_unbind_tcf,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 11292ad..93b9d70 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -311,15 +311,11 @@ static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->qdisc;
 }
 
-static unsigned long red_get(struct Qdisc *sch, u32 classid)
+static unsigned long red_find(struct Qdisc *sch, u32 classid)
 {
 	return 1;
 }
 
-static void red_put(struct Qdisc *sch, unsigned long arg)
-{
-}
-
 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
 	if (!walker->stop) {
@@ -335,8 +331,7 @@ static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 static const struct Qdisc_class_ops red_class_ops = {
 	.graft		=	red_graft,
 	.leaf		=	red_leaf,
-	.get		=	red_get,
-	.put		=	red_put,
+	.find		=	red_find,
 	.walk		=	red_walk,
 	.dump		=	red_dump_class,
 };
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 11fb6ec..cc39e17 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -632,12 +632,12 @@ static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->qdisc;
 }
 
-static unsigned long sfb_get(struct Qdisc *sch, u32 classid)
+static unsigned long sfb_find(struct Qdisc *sch, u32 classid)
 {
 	return 1;
 }
 
-static void sfb_put(struct Qdisc *sch, unsigned long arg)
+static void sfb_unbind(struct Qdisc *sch, unsigned long arg)
 {
 }
 
@@ -683,14 +683,13 @@ static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent,
 static const struct Qdisc_class_ops sfb_class_ops = {
 	.graft		=	sfb_graft,
 	.leaf		=	sfb_leaf,
-	.get		=	sfb_get,
-	.put		=	sfb_put,
+	.find		=	sfb_find,
 	.change		=	sfb_change_class,
 	.delete		=	sfb_delete,
 	.walk		=	sfb_walk,
 	.tcf_block	=	sfb_tcf_block,
 	.bind_tcf	=	sfb_bind,
-	.unbind_tcf	=	sfb_put,
+	.unbind_tcf	=	sfb_unbind,
 	.dump		=	sfb_dump_class,
 };
 
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index fc69fc5..74ea863 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -292,7 +292,7 @@ static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
 	slot->skblist_prev = skb;
 }
 
-static unsigned int sfq_drop(struct Qdisc *sch)
+static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	sfq_index x, d = q->cur_depth;
@@ -310,9 +310,8 @@ drop:
 		slot->backlog -= len;
 		sfq_dec(q, x);
 		sch->q.qlen--;
-		qdisc_qstats_drop(sch);
 		qdisc_qstats_backlog_dec(sch, skb);
-		kfree_skb(skb);
+		qdisc_drop(skb, sch, to_free);
 		return len;
 	}
 
@@ -360,7 +359,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 	if (hash == 0) {
 		if (ret & __NET_XMIT_BYPASS)
 			qdisc_qstats_drop(sch);
-		kfree_skb(skb);
+		__qdisc_drop(skb, to_free);
 		return ret;
 	}
 	hash--;
@@ -465,7 +464,7 @@ enqueue:
 		return NET_XMIT_SUCCESS;
 
 	qlen = slot->qlen;
-	dropped = sfq_drop(sch);
+	dropped = sfq_drop(sch, to_free);
 	/* Return Congestion Notification only if we dropped a packet
 	 * from this flow.
 	 */
@@ -628,6 +627,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
 	unsigned int qlen, dropped = 0;
 	struct red_parms *p = NULL;
+	struct sk_buff *to_free = NULL;
+	struct sk_buff *tail = NULL;
 
 	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
@@ -674,8 +675,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	}
 
 	qlen = sch->q.qlen;
-	while (sch->q.qlen > q->limit)
-		dropped += sfq_drop(sch);
+	while (sch->q.qlen > q->limit) {
+		dropped += sfq_drop(sch, &to_free);
+		if (!tail)
+			tail = to_free;
+	}
+
+	rtnl_kfree_skbs(to_free, tail);
 	qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
 
 	del_timer(&q->perturb_timer);
@@ -808,7 +814,7 @@ static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
 	return NULL;
 }
 
-static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
+static unsigned long sfq_find(struct Qdisc *sch, u32 classid)
 {
 	return 0;
 }
@@ -821,7 +827,7 @@ static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
 	return 0;
 }
 
-static void sfq_put(struct Qdisc *q, unsigned long cl)
+static void sfq_unbind(struct Qdisc *q, unsigned long cl)
 {
 }
 
@@ -885,11 +891,10 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 
 static const struct Qdisc_class_ops sfq_class_ops = {
 	.leaf		=	sfq_leaf,
-	.get		=	sfq_get,
-	.put		=	sfq_put,
+	.find		=	sfq_find,
 	.tcf_block	=	sfq_tcf_block,
 	.bind_tcf	=	sfq_bind,
-	.unbind_tcf	=	sfq_put,
+	.unbind_tcf	=	sfq_unbind,
 	.dump		=	sfq_dump_class,
 	.dump_stats	=	sfq_dump_class_stats,
 	.walk		=	sfq_walk,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 493270f..120f4f3 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -511,15 +511,11 @@ static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
 	return q->qdisc;
 }
 
-static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
+static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
 {
 	return 1;
 }
 
-static void tbf_put(struct Qdisc *sch, unsigned long arg)
-{
-}
-
 static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
 	if (!walker->stop) {
@@ -535,8 +531,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 static const struct Qdisc_class_ops tbf_class_ops = {
 	.graft		=	tbf_graft,
 	.leaf		=	tbf_leaf,
-	.get		=	tbf_get,
-	.put		=	tbf_put,
+	.find		=	tbf_find,
 	.walk		=	tbf_walk,
 	.dump		=	tbf_dump_class,
 };
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 40ec836..dfb9651 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -63,11 +63,11 @@ static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc);
 /* 1st Level Abstractions. */
 
 /* Initialize a new association from provided memory. */
-static struct sctp_association *sctp_association_init(struct sctp_association *asoc,
-					  const struct sctp_endpoint *ep,
-					  const struct sock *sk,
-					  sctp_scope_t scope,
-					  gfp_t gfp)
+static struct sctp_association *sctp_association_init(
+					struct sctp_association *asoc,
+					const struct sctp_endpoint *ep,
+					const struct sock *sk,
+					enum sctp_scope scope, gfp_t gfp)
 {
 	struct net *net = sock_net(sk);
 	struct sctp_sock *sp;
@@ -301,9 +301,8 @@ fail_init:
 
 /* Allocate and initialize a new association */
 struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
-					 const struct sock *sk,
-					 sctp_scope_t scope,
-					 gfp_t gfp)
+					      const struct sock *sk,
+					      enum sctp_scope scope, gfp_t gfp)
 {
 	struct sctp_association *asoc;
 
@@ -797,7 +796,7 @@ void sctp_assoc_del_nonprimary_peers(struct sctp_association *asoc,
  */
 void sctp_assoc_control_transport(struct sctp_association *asoc,
 				  struct sctp_transport *transport,
-				  sctp_transport_cmd_t command,
+				  enum sctp_transport_cmd command,
 				  sctp_sn_error_t error)
 {
 	struct sctp_ulpevent *event;
@@ -1022,11 +1021,11 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 		container_of(work, struct sctp_association,
 			     base.inqueue.immediate);
 	struct net *net = sock_net(asoc->base.sk);
+	union sctp_subtype subtype;
 	struct sctp_endpoint *ep;
 	struct sctp_chunk *chunk;
 	struct sctp_inq *inqueue;
 	int state;
-	sctp_subtype_t subtype;
 	int error = 0;
 
 	/* The association should be held so we should be safe. */
@@ -1564,7 +1563,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
  * local endpoint and the remote peer.
  */
 int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
-				     sctp_scope_t scope, gfp_t gfp)
+				     enum sctp_scope scope, gfp_t gfp)
 {
 	int flags;
 
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index e001b01..00667c5 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -185,9 +185,9 @@ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1,
  *    are called the two key vectors.
  */
 static struct sctp_auth_bytes *sctp_auth_make_key_vector(
-			sctp_random_param_t *random,
-			sctp_chunks_param_t *chunks,
-			sctp_hmac_algo_param_t *hmacs,
+			struct sctp_random_param *random,
+			struct sctp_chunks_param *chunks,
+			struct sctp_hmac_algo_param *hmacs,
 			gfp_t gfp)
 {
 	struct sctp_auth_bytes *new;
@@ -226,10 +226,9 @@ static struct sctp_auth_bytes *sctp_auth_make_local_vector(
 				    gfp_t gfp)
 {
 	return sctp_auth_make_key_vector(
-				    (sctp_random_param_t *)asoc->c.auth_random,
-				    (sctp_chunks_param_t *)asoc->c.auth_chunks,
-				    (sctp_hmac_algo_param_t *)asoc->c.auth_hmacs,
-				    gfp);
+			(struct sctp_random_param *)asoc->c.auth_random,
+			(struct sctp_chunks_param *)asoc->c.auth_chunks,
+			(struct sctp_hmac_algo_param *)asoc->c.auth_hmacs, gfp);
 }
 
 /* Make a key vector based on peer's parameters */
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 1ebc184..7df3704 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -45,9 +45,9 @@
 #include <net/sctp/sm.h>
 
 /* Forward declarations for internal helpers. */
-static int sctp_copy_one_addr(struct net *, struct sctp_bind_addr *,
-			      union sctp_addr *, sctp_scope_t scope, gfp_t gfp,
-			      int flags);
+static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
+			      union sctp_addr *addr, enum sctp_scope scope,
+			      gfp_t gfp, int flags);
 static void sctp_bind_addr_clean(struct sctp_bind_addr *);
 
 /* First Level Abstractions. */
@@ -57,7 +57,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *);
  */
 int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
 			const struct sctp_bind_addr *src,
-			sctp_scope_t scope, gfp_t gfp,
+			enum sctp_scope scope, gfp_t gfp,
 			int flags)
 {
 	struct sctp_sockaddr_entry *addr;
@@ -440,9 +440,8 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr	*bp,
 
 /* Copy out addresses from the global local address list. */
 static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
-			      union sctp_addr *addr,
-			      sctp_scope_t scope, gfp_t gfp,
-			      int flags)
+			      union sctp_addr *addr, enum sctp_scope scope,
+			      gfp_t gfp, int flags)
 {
 	int error = 0;
 
@@ -485,9 +484,10 @@ int sctp_is_any(struct sock *sk, const union sctp_addr *addr)
 }
 
 /* Is 'addr' valid for 'scope'?  */
-int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t scope)
+int sctp_in_scope(struct net *net, const union sctp_addr *addr,
+		  enum sctp_scope scope)
 {
-	sctp_scope_t addr_scope = sctp_scope(addr);
+	enum sctp_scope addr_scope = sctp_scope(addr);
 
 	/* The unusable SCTP addresses will not be considered with
 	 * any defined scopes.
@@ -545,7 +545,7 @@ int sctp_is_ep_boundall(struct sock *sk)
  ********************************************************************/
 
 /* What is the scope of 'addr'?  */
-sctp_scope_t sctp_scope(const union sctp_addr *addr)
+enum sctp_scope sctp_scope(const union sctp_addr *addr)
 {
 	struct sctp_af *af;
 
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 1323d41..3afac27 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -201,7 +201,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 		struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc);
 
 		if (hmac_desc)
-			max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) +
+			max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) +
 					      hmac_desc->hmac_len);
 	}
 
@@ -221,7 +221,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 	    asoc->outqueue.out_qlen == 0 &&
 	    list_empty(&asoc->outqueue.retransmit) &&
 	    msg_len > max_data)
-		first_len -= SCTP_PAD4(sizeof(sctp_sack_chunk_t));
+		first_len -= SCTP_PAD4(sizeof(struct sctp_sack_chunk));
 
 	/* Encourage Cookie-ECHO bundling. */
 	if (asoc->state < SCTP_STATE_COOKIE_ECHOED)
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 2e47eb2..3f619fd 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -60,7 +60,7 @@ static const char *const sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = {
 };
 
 /* Lookup "chunk type" debug name. */
-const char *sctp_cname(const sctp_subtype_t cid)
+const char *sctp_cname(const union sctp_subtype cid)
 {
 	if (cid.chunk <= SCTP_CID_BASE_MAX)
 		return sctp_cid_tbl[cid.chunk];
@@ -130,7 +130,7 @@ static const char *const sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = {
 };
 
 /* Lookup primitive debug name. */
-const char *sctp_pname(const sctp_subtype_t id)
+const char *sctp_pname(const union sctp_subtype id)
 {
 	if (id.primitive <= SCTP_EVENT_PRIMITIVE_MAX)
 		return sctp_primitive_tbl[id.primitive];
@@ -143,7 +143,7 @@ static const char *const sctp_other_tbl[] = {
 };
 
 /* Lookup "other" debug name. */
-const char *sctp_oname(const sctp_subtype_t id)
+const char *sctp_oname(const union sctp_subtype id)
 {
 	if (id.other <= SCTP_EVENT_OTHER_MAX)
 		return sctp_other_tbl[id.other];
@@ -165,7 +165,7 @@ static const char *const sctp_timer_tbl[] = {
 };
 
 /* Lookup timer debug name. */
-const char *sctp_tname(const sctp_subtype_t id)
+const char *sctp_tname(const union sctp_subtype id)
 {
 	BUILD_BUG_ON(SCTP_EVENT_TIMEOUT_MAX + 1 != ARRAY_SIZE(sctp_timer_tbl));
 
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 0e86f98..ee1e601 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -73,13 +73,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 		 * variables.  There are arrays that we encode directly
 		 * into parameters to make the rest of the operations easier.
 		 */
-		auth_hmacs = kzalloc(sizeof(sctp_hmac_algo_param_t) +
-				sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp);
+		auth_hmacs = kzalloc(sizeof(*auth_hmacs) +
+				     sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp);
 		if (!auth_hmacs)
 			goto nomem;
 
-		auth_chunks = kzalloc(sizeof(sctp_chunks_param_t) +
-					SCTP_NUM_CHUNK_TYPES, gfp);
+		auth_chunks = kzalloc(sizeof(*auth_chunks) +
+				      SCTP_NUM_CHUNK_TYPES, gfp);
 		if (!auth_chunks)
 			goto nomem;
 
@@ -382,8 +382,8 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work)
 	struct sctp_transport *transport;
 	struct sctp_chunk *chunk;
 	struct sctp_inq *inqueue;
-	sctp_subtype_t subtype;
-	sctp_state_t state;
+	union sctp_subtype subtype;
+	enum sctp_state state;
 	int error = 0;
 	int first_time = 1;	/* is this the first time through the loop */
 
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 41eb2ec..92a0714 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -1111,7 +1111,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
 					__be16 peer_port,
 					struct sctp_transport **transportp)
 {
-	sctp_addip_chunk_t *asconf = (struct sctp_addip_chunk *)ch;
+	struct sctp_addip_chunk *asconf = (struct sctp_addip_chunk *)ch;
 	struct sctp_af *af;
 	union sctp_addr_param *param;
 	union sctp_addr paddr;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index a4b6ffb..51c4887 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -243,8 +243,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	union sctp_addr *daddr = &t->ipaddr;
 	union sctp_addr dst_saddr;
 	struct in6_addr *final_p, final;
+	enum sctp_scope scope;
 	__u8 matchlen = 0;
-	sctp_scope_t scope;
 
 	memset(fl6, 0, sizeof(struct flowi6));
 	fl6->daddr = daddr->v6.sin6_addr;
@@ -497,7 +497,7 @@ static void sctp_v6_from_addr_param(union sctp_addr *addr,
 static int sctp_v6_to_addr_param(const union sctp_addr *addr,
 				 union sctp_addr_param *param)
 {
-	int length = sizeof(sctp_ipv6addr_param_t);
+	int length = sizeof(struct sctp_ipv6addr_param);
 
 	param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS;
 	param->v6.param_hdr.length = htons(length);
@@ -626,10 +626,10 @@ static int sctp_v6_addr_valid(union sctp_addr *addr,
 }
 
 /* What is the scope of 'addr'?  */
-static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
+static enum sctp_scope sctp_v6_scope(union sctp_addr *addr)
 {
+	enum sctp_scope retval;
 	int v6scope;
-	sctp_scope_t retval;
 
 	/* The IPv6 scope is really a set of bit fields.
 	 * See IFA_* in <net/if_inet6.h>.  Map to a generic SCTP scope.
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index 105ac33..aeea6da 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -57,7 +57,7 @@ SCTP_DBG_OBJCNT(keys);
 /* An array to make it easy to pretty print the debug information
  * to the proc fs.
  */
-static sctp_dbg_objcnt_entry_t sctp_dbg_objcnt[] = {
+static struct sctp_dbg_objcnt_entry sctp_dbg_objcnt[] = {
 	SCTP_DBG_OBJCNT_ENTRY(sock),
 	SCTP_DBG_OBJCNT_ENTRY(ep),
 	SCTP_DBG_OBJCNT_ENTRY(assoc),
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 9d85049..4a865cd 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -57,15 +57,15 @@
 #include <net/sctp/checksum.h>
 
 /* Forward declarations for private helpers. */
-static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
-					      struct sctp_chunk *chunk);
-static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
-					   struct sctp_chunk *chunk);
+static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet,
+						 struct sctp_chunk *chunk);
+static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet,
+						  struct sctp_chunk *chunk);
 static void sctp_packet_append_data(struct sctp_packet *packet,
-					   struct sctp_chunk *chunk);
-static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
-					struct sctp_chunk *chunk,
-					u16 chunk_len);
+				    struct sctp_chunk *chunk);
+static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
+					   struct sctp_chunk *chunk,
+					   u16 chunk_len);
 
 static void sctp_packet_reset(struct sctp_packet *packet)
 {
@@ -181,11 +181,11 @@ void sctp_packet_free(struct sctp_packet *packet)
  * as it can fit in the packet, but any more data that does not fit in this
  * packet can be sent only after receiving the COOKIE_ACK.
  */
-sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
-				       struct sctp_chunk *chunk,
-				       int one_packet, gfp_t gfp)
+enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet,
+					  struct sctp_chunk *chunk,
+					  int one_packet, gfp_t gfp)
 {
-	sctp_xmit_t retval;
+	enum sctp_xmit retval;
 
 	pr_debug("%s: packet:%p size:%zu chunk:%p size:%d\n", __func__,
 		 packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1);
@@ -218,12 +218,12 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
 }
 
 /* Try to bundle an auth chunk into the packet. */
-static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt,
-					   struct sctp_chunk *chunk)
+static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt,
+					      struct sctp_chunk *chunk)
 {
 	struct sctp_association *asoc = pkt->transport->asoc;
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 	struct sctp_chunk *auth;
-	sctp_xmit_t retval = SCTP_XMIT_OK;
 
 	/* if we don't have an association, we can't do authentication */
 	if (!asoc)
@@ -254,10 +254,10 @@ static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt,
 }
 
 /* Try to bundle a SACK with the packet. */
-static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt,
-					   struct sctp_chunk *chunk)
+static enum sctp_xmit sctp_packet_bundle_sack(struct sctp_packet *pkt,
+					      struct sctp_chunk *chunk)
 {
-	sctp_xmit_t retval = SCTP_XMIT_OK;
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 
 	/* If sending DATA and haven't aleady bundled a SACK, try to
 	 * bundle one in to the packet.
@@ -299,11 +299,11 @@ out:
 /* Append a chunk to the offered packet reporting back any inability to do
  * so.
  */
-static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
-					      struct sctp_chunk *chunk)
+static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet,
+						 struct sctp_chunk *chunk)
 {
-	sctp_xmit_t retval = SCTP_XMIT_OK;
 	__u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length));
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 
 	/* Check to see if this chunk will fit into the packet */
 	retval = sctp_packet_will_fit(packet, chunk, chunk_len);
@@ -353,10 +353,10 @@ finish:
 /* Append a chunk to the offered packet reporting back any inability to do
  * so.
  */
-sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet,
-				     struct sctp_chunk *chunk)
+enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet,
+					struct sctp_chunk *chunk)
 {
-	sctp_xmit_t retval = SCTP_XMIT_OK;
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 
 	pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk);
 
@@ -653,8 +653,8 @@ out:
  ********************************************************************/
 
 /* This private function check to see if a chunk can be added */
-static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
-					   struct sctp_chunk *chunk)
+static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet,
+						  struct sctp_chunk *chunk)
 {
 	size_t datasize, rwnd, inflight, flight_size;
 	struct sctp_transport *transport = packet->transport;
@@ -762,12 +762,12 @@ static void sctp_packet_append_data(struct sctp_packet *packet,
 	sctp_chunk_assign_ssn(chunk);
 }
 
-static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
-					struct sctp_chunk *chunk,
-					u16 chunk_len)
+static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet,
+					   struct sctp_chunk *chunk,
+					   u16 chunk_len)
 {
+	enum sctp_xmit retval = SCTP_XMIT_OK;
 	size_t psize, pmtu, maxsize;
-	sctp_xmit_t retval = SCTP_XMIT_OK;
 
 	psize = packet->size;
 	if (packet->transport->asoc)
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index e876270..2966ff4 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -534,7 +534,7 @@ void sctp_retransmit_mark(struct sctp_outq *q,
  * one packet out.
  */
 void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
-		     sctp_retransmit_reason_t reason)
+		     enum sctp_retransmit_reason reason)
 {
 	struct net *net = sock_net(q->asoc->base.sk);
 
@@ -594,14 +594,14 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
 static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 			       int rtx_timeout, int *start_timer)
 {
-	struct list_head *lqueue;
 	struct sctp_transport *transport = pkt->transport;
-	sctp_xmit_t status;
 	struct sctp_chunk *chunk, *chunk1;
-	int fast_rtx;
+	struct list_head *lqueue;
+	enum sctp_xmit status;
 	int error = 0;
 	int timer = 0;
 	int done = 0;
+	int fast_rtx;
 
 	lqueue = &q->retransmit;
 	fast_rtx = q->fast_rtx;
@@ -781,7 +781,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 	struct sctp_transport *transport = NULL;
 	struct sctp_transport *new_transport;
 	struct sctp_chunk *chunk, *tmp;
-	sctp_xmit_t status;
+	enum sctp_xmit status;
 	int error = 0;
 	int start_timer = 0;
 	int one_packet = 0;
@@ -1197,7 +1197,7 @@ sctp_flush_out:
 static void sctp_sack_update_unack_data(struct sctp_association *assoc,
 					struct sctp_sackhdr *sack)
 {
-	sctp_sack_variable_t *frags;
+	union sctp_sack_variable *frags;
 	__u16 unack_data;
 	int i;
 
@@ -1224,7 +1224,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
 	struct sctp_transport *transport;
 	struct sctp_chunk *tchunk = NULL;
 	struct list_head *lchunk, *transport_list, *temp;
-	sctp_sack_variable_t *frags = sack->variable;
+	union sctp_sack_variable *frags = sack->variable;
 	__u32 sack_ctsn, ctsn, tsn;
 	__u32 highest_tsn, highest_new_tsn;
 	__u32 sack_a_rwnd;
@@ -1736,10 +1736,10 @@ static void sctp_mark_missing(struct sctp_outq *q,
 /* Is the given TSN acked by this packet?  */
 static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn)
 {
-	int i;
-	sctp_sack_variable_t *frags;
-	__u16 tsn_offset, blocks;
 	__u32 ctsn = ntohl(sack->cum_tsn_ack);
+	union sctp_sack_variable *frags;
+	__u16 tsn_offset, blocks;
+	int i;
 
 	if (TSN_lte(tsn, ctsn))
 		goto pass;
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index f0553a0..c0817f7a 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -53,8 +53,8 @@
 int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \
 			    void *arg) { \
 	int error = 0; \
-	sctp_event_t event_type; sctp_subtype_t subtype; \
-	sctp_state_t state; \
+	enum sctp_event event_type; union sctp_subtype subtype; \
+	enum sctp_state state; \
 	struct sctp_endpoint *ep; \
 	\
 	event_type = SCTP_EVENT_T_PRIMITIVE; \
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index 6cc2152..1280f85 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -127,12 +127,13 @@ static const struct file_operations sctpprobe_fops = {
 	.llseek = noop_llseek,
 };
 
-static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
-					    const struct sctp_endpoint *ep,
-					    const struct sctp_association *asoc,
-					    const sctp_subtype_t type,
-					    void *arg,
-					    sctp_cmd_seq_t *commands)
+static enum sctp_disposition jsctp_sf_eat_sack(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sk_buff *skb = chunk->skb;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 989a900..fcd80fe 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -196,7 +196,7 @@ static void sctp_free_local_addr_list(struct net *net)
 
 /* Copy the local addresses which are valid for 'scope' into 'bp'.  */
 int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
-			      sctp_scope_t scope, gfp_t gfp, int copy_flags)
+			      enum sctp_scope scope, gfp_t gfp, int copy_flags)
 {
 	struct sctp_sockaddr_entry *addr;
 	union sctp_addr laddr;
@@ -292,7 +292,7 @@ static void sctp_v4_from_addr_param(union sctp_addr *addr,
 static int sctp_v4_to_addr_param(const union sctp_addr *addr,
 				 union sctp_addr_param *param)
 {
-	int length = sizeof(sctp_ipv4addr_param_t);
+	int length = sizeof(struct sctp_ipv4addr_param);
 
 	param->v4.param_hdr.type = SCTP_PARAM_IPV4_ADDRESS;
 	param->v4.param_hdr.length = htons(length);
@@ -400,9 +400,9 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
  * IPv4 scoping can be controlled through sysctl option
  * net.sctp.addr_scope_policy
  */
-static sctp_scope_t sctp_v4_scope(union sctp_addr *addr)
+static enum sctp_scope sctp_v4_scope(union sctp_addr *addr)
 {
-	sctp_scope_t retval;
+	enum sctp_scope retval;
 
 	/* Check for unusable SCTP addresses. */
 	if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr)) {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 6110447..ca8f196 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -69,7 +69,8 @@ static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
 static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
 					   __u8 type, __u8 flags, int paylen,
 					   gfp_t gfp);
-static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
+static struct sctp_cookie_param *sctp_pack_cookie(
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const struct sctp_chunk *init_chunk,
 					int *cookie_len,
@@ -131,17 +132,17 @@ static const struct sctp_paramhdr prsctp_param = {
  * provided chunk, as most cause codes will be embedded inside an
  * abort chunk.
  */
-void  sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
-		      size_t paylen)
+void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
+		     size_t paylen)
 {
-	sctp_errhdr_t err;
+	struct sctp_errhdr err;
 	__u16 len;
 
 	/* Cause code constants are now defined in network order.  */
 	err.cause = cause_code;
-	len = sizeof(sctp_errhdr_t) + paylen;
+	len = sizeof(err) + paylen;
 	err.length  = htons(len);
-	chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err);
+	chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(err), &err);
 }
 
 /* A helper to initialize an op error inside a
@@ -150,21 +151,21 @@ void  sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code,
  * if there isn't enough space in the op error chunk
  */
 static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
-		      size_t paylen)
+				 size_t paylen)
 {
-	sctp_errhdr_t err;
+	struct sctp_errhdr err;
 	__u16 len;
 
 	/* Cause code constants are now defined in network order.  */
 	err.cause = cause_code;
-	len = sizeof(sctp_errhdr_t) + paylen;
+	len = sizeof(err) + paylen;
 	err.length  = htons(len);
 
 	if (skb_tailroom(chunk->skb) < len)
 		return -ENOSPC;
-	chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk,
-						     sizeof(sctp_errhdr_t),
-						     &err);
+
+	chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, sizeof(err), &err);
+
 	return 0;
 }
 /* 3.3.2 Initiation (INIT) (1)
@@ -212,32 +213,31 @@ static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
  * Supported Address Types (Note 4)    Optional    12
  */
 struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
-			     const struct sctp_bind_addr *bp,
-			     gfp_t gfp, int vparam_len)
+				  const struct sctp_bind_addr *bp,
+				  gfp_t gfp, int vparam_len)
 {
 	struct net *net = sock_net(asoc->base.sk);
+	struct sctp_supported_ext_param ext_param;
+	struct sctp_adaptation_ind_param aiparam;
+	struct sctp_paramhdr *auth_chunks = NULL;
+	struct sctp_paramhdr *auth_hmacs = NULL;
+	struct sctp_supported_addrs_param sat;
 	struct sctp_endpoint *ep = asoc->ep;
-	struct sctp_inithdr init;
-	union sctp_params addrs;
-	size_t chunksize;
 	struct sctp_chunk *retval = NULL;
 	int num_types, addrs_len = 0;
+	struct sctp_inithdr init;
+	union sctp_params addrs;
 	struct sctp_sock *sp;
-	sctp_supported_addrs_param_t sat;
+	__u8 extensions[4];
+	size_t chunksize;
 	__be16 types[2];
-	sctp_adaptation_ind_param_t aiparam;
-	sctp_supported_ext_param_t ext_param;
 	int num_ext = 0;
-	__u8 extensions[4];
-	struct sctp_paramhdr *auth_chunks = NULL,
-			*auth_hmacs = NULL;
 
 	/* RFC 2960 3.3.2 Initiation (INIT) (1)
 	 *
 	 * Note 1: The INIT chunks can contain multiple addresses that
 	 * can be IPv4 and/or IPv6 in any combination.
 	 */
-	retval = NULL;
 
 	/* Convert the provided bind address list to raw format. */
 	addrs = sctp_bind_addrs_to_raw(bp, &addrs_len, gfp);
@@ -305,8 +305,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 
 	/* If we have any extensions to report, account for that */
 	if (num_ext)
-		chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
-				       num_ext);
+		chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext);
 
 	/* RFC 2960 3.3.2 Initiation (INIT) (1)
 	 *
@@ -348,10 +347,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	 */
 	if (num_ext) {
 		ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT;
-		ext_param.param_hdr.length =
-			    htons(sizeof(sctp_supported_ext_param_t) + num_ext);
-		sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t),
-				&ext_param);
+		ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext);
+		sctp_addto_chunk(retval, sizeof(ext_param), &ext_param);
 		sctp_addto_param(retval, num_ext, extensions);
 	}
 
@@ -382,26 +379,24 @@ nodata:
 }
 
 struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
-				 const struct sctp_chunk *chunk,
-				 gfp_t gfp, int unkparam_len)
+				      const struct sctp_chunk *chunk,
+				      gfp_t gfp, int unkparam_len)
 {
+	struct sctp_supported_ext_param ext_param;
+	struct sctp_adaptation_ind_param aiparam;
+	struct sctp_paramhdr *auth_chunks = NULL;
+	struct sctp_paramhdr *auth_random = NULL;
+	struct sctp_paramhdr *auth_hmacs = NULL;
+	struct sctp_chunk *retval = NULL;
+	struct sctp_cookie_param *cookie;
 	struct sctp_inithdr initack;
-	struct sctp_chunk *retval;
 	union sctp_params addrs;
 	struct sctp_sock *sp;
-	int addrs_len;
-	sctp_cookie_param_t *cookie;
-	int cookie_len;
+	__u8 extensions[4];
 	size_t chunksize;
-	sctp_adaptation_ind_param_t aiparam;
-	sctp_supported_ext_param_t ext_param;
 	int num_ext = 0;
-	__u8 extensions[4];
-	struct sctp_paramhdr *auth_chunks = NULL,
-			*auth_hmacs = NULL,
-			*auth_random = NULL;
-
-	retval = NULL;
+	int cookie_len;
+	int addrs_len;
 
 	/* Note: there may be no addresses to embed. */
 	addrs = sctp_bind_addrs_to_raw(&asoc->base.bind_addr, &addrs_len, gfp);
@@ -468,8 +463,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 	}
 
 	if (num_ext)
-		chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
-				       num_ext);
+		chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext);
 
 	/* Now allocate and fill out the chunk.  */
 	retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp);
@@ -495,10 +489,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 		sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param);
 	if (num_ext) {
 		ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT;
-		ext_param.param_hdr.length =
-			    htons(sizeof(sctp_supported_ext_param_t) + num_ext);
-		sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t),
-				 &ext_param);
+		ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext);
+		sctp_addto_chunk(retval, sizeof(ext_param), &ext_param);
 		sctp_addto_param(retval, num_ext, extensions);
 	}
 	if (asoc->peer.prsctp_capable)
@@ -567,11 +559,11 @@ nomem_cookie:
  *   to insure interoperability.
  */
 struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc,
-				    const struct sctp_chunk *chunk)
+					 const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
-	void *cookie;
 	int cookie_len;
+	void *cookie;
 
 	cookie = asoc->peer.cookie;
 	cookie_len = asoc->peer.cookie_len;
@@ -619,7 +611,7 @@ nodata:
  *   Set to zero on transmit and ignored on receipt.
  */
 struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
-				   const struct sctp_chunk *chunk)
+					const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
 
@@ -664,15 +656,15 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
  *     Note: The CWR is considered a Control chunk.
  */
 struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
-			    const __u32 lowest_tsn,
-			    const struct sctp_chunk *chunk)
+				 const __u32 lowest_tsn,
+				 const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
-	sctp_cwrhdr_t cwr;
+	struct sctp_cwrhdr cwr;
 
 	cwr.lowest_tsn = htonl(lowest_tsn);
 	retval = sctp_make_control(asoc, SCTP_CID_ECN_CWR, 0,
-				   sizeof(sctp_cwrhdr_t), GFP_ATOMIC);
+				   sizeof(cwr), GFP_ATOMIC);
 
 	if (!retval)
 		goto nodata;
@@ -699,14 +691,14 @@ nodata:
 
 /* Make an ECNE chunk.  This is a congestion experienced report.  */
 struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
-			     const __u32 lowest_tsn)
+				  const __u32 lowest_tsn)
 {
 	struct sctp_chunk *retval;
-	sctp_ecnehdr_t ecne;
+	struct sctp_ecnehdr ecne;
 
 	ecne.lowest_tsn = htonl(lowest_tsn);
 	retval = sctp_make_control(asoc, SCTP_CID_ECN_ECNE, 0,
-				   sizeof(sctp_ecnehdr_t), GFP_ATOMIC);
+				   sizeof(ecne), GFP_ATOMIC);
 	if (!retval)
 		goto nodata;
 	retval->subh.ecne_hdr =
@@ -720,9 +712,9 @@ nodata:
  * parameters.  However, do not populate the data payload.
  */
 struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
-				       const struct sctp_sndrcvinfo *sinfo,
-				       int data_len, __u8 flags, __u16 ssn,
-				       gfp_t gfp)
+					    const struct sctp_sndrcvinfo *sinfo,
+					    int data_len, __u8 flags, __u16 ssn,
+					    gfp_t gfp)
 {
 	struct sctp_chunk *retval;
 	struct sctp_datahdr dp;
@@ -760,15 +752,15 @@ nodata:
  */
 struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
 {
-	struct sctp_chunk *retval;
-	struct sctp_sackhdr sack;
-	int len;
-	__u32 ctsn;
-	__u16 num_gabs, num_dup_tsns;
-	struct sctp_association *aptr = (struct sctp_association *)asoc;
 	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
+	struct sctp_association *aptr = (struct sctp_association *)asoc;
 	struct sctp_gap_ack_block gabs[SCTP_MAX_GABS];
+	__u16 num_gabs, num_dup_tsns;
 	struct sctp_transport *trans;
+	struct sctp_chunk *retval;
+	struct sctp_sackhdr sack;
+	__u32 ctsn;
+	int len;
 
 	memset(gabs, 0, sizeof(gabs));
 	ctsn = sctp_tsnmap_get_ctsn(map);
@@ -862,15 +854,15 @@ nodata:
 struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
 				      const struct sctp_chunk *chunk)
 {
+	struct sctp_shutdownhdr shut;
 	struct sctp_chunk *retval;
-	sctp_shutdownhdr_t shut;
 	__u32 ctsn;
 
 	ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
 	shut.cum_tsn_ack = htonl(ctsn);
 
 	retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0,
-				   sizeof(sctp_shutdownhdr_t), GFP_ATOMIC);
+				   sizeof(shut), GFP_ATOMIC);
 	if (!retval)
 		goto nodata;
 
@@ -884,7 +876,7 @@ nodata:
 }
 
 struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
-				     const struct sctp_chunk *chunk)
+					  const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
 
@@ -907,8 +899,8 @@ struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
 }
 
 struct sctp_chunk *sctp_make_shutdown_complete(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *retval;
 	__u8 flags = 0;
@@ -941,8 +933,8 @@ struct sctp_chunk *sctp_make_shutdown_complete(
  * association, except when responding to an INIT (sctpimpguide 2.41).
  */
 struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
-			      const struct sctp_chunk *chunk,
-			      const size_t hint)
+				   const struct sctp_chunk *chunk,
+				   const size_t hint)
 {
 	struct sctp_chunk *retval;
 	__u8 flags = 0;
@@ -978,14 +970,15 @@ struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
 
 /* Helper to create ABORT with a NO_USER_DATA error.  */
 struct sctp_chunk *sctp_make_abort_no_data(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk, __u32 tsn)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					__u32 tsn)
 {
 	struct sctp_chunk *retval;
 	__be32 payload;
 
-	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t)
-				 + sizeof(tsn));
+	retval = sctp_make_abort(asoc, chunk,
+				 sizeof(struct sctp_errhdr) + sizeof(tsn));
 
 	if (!retval)
 		goto no_mem;
@@ -1020,7 +1013,8 @@ struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc,
 	void *payload = NULL;
 	int err;
 
-	retval = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t) + paylen);
+	retval = sctp_make_abort(asoc, NULL,
+				 sizeof(struct sctp_errhdr) + paylen);
 	if (!retval)
 		goto err_chunk;
 
@@ -1058,8 +1052,8 @@ err_chunk:
 static void *sctp_addto_param(struct sctp_chunk *chunk, int len,
 			      const void *data)
 {
-	void *target;
 	int chunklen = ntohs(chunk->chunk_hdr->length);
+	void *target;
 
 	target = skb_put(chunk->skb, len);
 
@@ -1077,16 +1071,16 @@ static void *sctp_addto_param(struct sctp_chunk *chunk, int len,
 
 /* Make an ABORT chunk with a PROTOCOL VIOLATION cause code. */
 struct sctp_chunk *sctp_make_abort_violation(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk,
-	const __u8   *payload,
-	const size_t paylen)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					const __u8 *payload,
+					const size_t paylen)
 {
 	struct sctp_chunk  *retval;
 	struct sctp_paramhdr phdr;
 
-	retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen +
-					      sizeof(phdr));
+	retval = sctp_make_abort(asoc, chunk, sizeof(struct sctp_errhdr) +
+					      paylen + sizeof(phdr));
 	if (!retval)
 		goto end;
 
@@ -1103,14 +1097,14 @@ end:
 }
 
 struct sctp_chunk *sctp_make_violation_paramlen(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk,
-	struct sctp_paramhdr *param)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					struct sctp_paramhdr *param)
 {
-	struct sctp_chunk *retval;
 	static const char error[] = "The following parameter had invalid length:";
-	size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) +
+	size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr) +
 			     sizeof(*param);
+	struct sctp_chunk *retval;
 
 	retval = sctp_make_abort(asoc, chunk, payload_len);
 	if (!retval)
@@ -1126,12 +1120,12 @@ nodata:
 }
 
 struct sctp_chunk *sctp_make_violation_max_retrans(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk)
 {
-	struct sctp_chunk *retval;
 	static const char error[] = "Association exceeded its max_retans count";
-	size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t);
+	size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr);
+	struct sctp_chunk *retval;
 
 	retval = sctp_make_abort(asoc, chunk, payload_len);
 	if (!retval)
@@ -1146,10 +1140,10 @@ nodata:
 
 /* Make a HEARTBEAT chunk.  */
 struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
-				  const struct sctp_transport *transport)
+				       const struct sctp_transport *transport)
 {
+	struct sctp_sender_hb_info hbinfo;
 	struct sctp_chunk *retval;
-	sctp_sender_hb_info_t hbinfo;
 
 	retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0,
 				   sizeof(hbinfo), GFP_ATOMIC);
@@ -1158,7 +1152,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
 		goto nodata;
 
 	hbinfo.param_hdr.type = SCTP_PARAM_HEARTBEAT_INFO;
-	hbinfo.param_hdr.length = htons(sizeof(sctp_sender_hb_info_t));
+	hbinfo.param_hdr.length = htons(sizeof(hbinfo));
 	hbinfo.daddr = transport->ipaddr;
 	hbinfo.sent_at = jiffies;
 	hbinfo.hb_nonce = transport->hb_nonce;
@@ -1175,8 +1169,9 @@ nodata:
 }
 
 struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
-				      const struct sctp_chunk *chunk,
-				      const void *payload, const size_t paylen)
+					   const struct sctp_chunk *chunk,
+					   const void *payload,
+					   const size_t paylen)
 {
 	struct sctp_chunk *retval;
 
@@ -1207,14 +1202,15 @@ nodata:
  * This routine can be used for containing multiple causes in the chunk.
  */
 static struct sctp_chunk *sctp_make_op_error_space(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk,
-	size_t size)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk,
+					size_t size)
 {
 	struct sctp_chunk *retval;
 
 	retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0,
-				   sizeof(sctp_errhdr_t) + size, GFP_ATOMIC);
+				   sizeof(struct sctp_errhdr) + size,
+				   GFP_ATOMIC);
 	if (!retval)
 		goto nodata;
 
@@ -1240,8 +1236,8 @@ nodata:
  * to report all the errors, if the incoming chunk is large
  */
 static inline struct sctp_chunk *sctp_make_op_error_fixed(
-	const struct sctp_association *asoc,
-	const struct sctp_chunk *chunk)
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk)
 {
 	size_t size = asoc ? asoc->pathmtu : 0;
 
@@ -1253,9 +1249,9 @@ static inline struct sctp_chunk *sctp_make_op_error_fixed(
 
 /* Create an Operation Error chunk.  */
 struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc,
-				 const struct sctp_chunk *chunk,
-				 __be16 cause_code, const void *payload,
-				 size_t paylen, size_t reserve_tail)
+				      const struct sctp_chunk *chunk,
+				      __be16 cause_code, const void *payload,
+				      size_t paylen, size_t reserve_tail)
 {
 	struct sctp_chunk *retval;
 
@@ -1274,9 +1270,9 @@ nodata:
 
 struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
 {
-	struct sctp_chunk *retval;
-	struct sctp_hmac *hmac_desc;
 	struct sctp_authhdr auth_hdr;
+	struct sctp_hmac *hmac_desc;
+	struct sctp_chunk *retval;
 	__u8 *hmac;
 
 	/* Get the first hmac that the peer told us to use */
@@ -1285,16 +1281,16 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
 		return NULL;
 
 	retval = sctp_make_control(asoc, SCTP_CID_AUTH, 0,
-			hmac_desc->hmac_len + sizeof(sctp_authhdr_t),
-			GFP_ATOMIC);
+				   hmac_desc->hmac_len + sizeof(auth_hdr),
+				   GFP_ATOMIC);
 	if (!retval)
 		return NULL;
 
 	auth_hdr.hmac_id = htons(hmac_desc->hmac_id);
 	auth_hdr.shkey_id = htons(asoc->active_key_id);
 
-	retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(sctp_authhdr_t),
-						&auth_hdr);
+	retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr),
+						 &auth_hdr);
 
 	hmac = skb_put_zero(retval->skb, hmac_desc->hmac_len);
 
@@ -1322,8 +1318,8 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
  *
  */
 struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
-			    const struct sctp_association *asoc,
-			    struct sock *sk, gfp_t gfp)
+				 const struct sctp_association *asoc,
+				 struct sock *sk, gfp_t gfp)
 {
 	struct sctp_chunk *retval;
 
@@ -1375,11 +1371,11 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk)
  * arguments, reserving enough space for a 'paylen' byte payload.
  */
 static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
-					    __u8 type, __u8 flags, int paylen,
-					    gfp_t gfp)
+					   __u8 type, __u8 flags, int paylen,
+					   gfp_t gfp)
 {
-	struct sctp_chunk *retval;
 	struct sctp_chunkhdr *chunk_hdr;
+	struct sctp_chunk *retval;
 	struct sk_buff *skb;
 	struct sock *sk;
 
@@ -1473,9 +1469,9 @@ void sctp_chunk_put(struct sctp_chunk *ch)
  */
 void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
 {
-	void *target;
 	int chunklen = ntohs(chunk->chunk_hdr->length);
 	int padlen = SCTP_PAD4(chunklen) - chunklen;
+	void *target;
 
 	skb_put_zero(chunk->skb, padlen);
 	target = skb_put_data(chunk->skb, data, len);
@@ -1528,11 +1524,10 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int len,
  */
 void sctp_chunk_assign_ssn(struct sctp_chunk *chunk)
 {
-	struct sctp_datamsg *msg;
-	struct sctp_chunk *lchunk;
 	struct sctp_stream *stream;
-	__u16 ssn;
-	__u16 sid;
+	struct sctp_chunk *lchunk;
+	struct sctp_datamsg *msg;
+	__u16 ssn, sid;
 
 	if (chunk->has_ssn)
 		return;
@@ -1577,12 +1572,12 @@ void sctp_chunk_assign_tsn(struct sctp_chunk *chunk)
 
 /* Create a CLOSED association to use with an incoming packet.  */
 struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
-					struct sctp_chunk *chunk,
-					gfp_t gfp)
+					     struct sctp_chunk *chunk,
+					     gfp_t gfp)
 {
 	struct sctp_association *asoc;
+	enum sctp_scope scope;
 	struct sk_buff *skb;
-	sctp_scope_t scope;
 
 	/* Create the bare association.  */
 	scope = sctp_scope(sctp_source(chunk));
@@ -1601,14 +1596,15 @@ nodata:
 /* Build a cookie representing asoc.
  * This INCLUDES the param header needed to put the cookie in the INIT ACK.
  */
-static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
-				      const struct sctp_association *asoc,
-				      const struct sctp_chunk *init_chunk,
-				      int *cookie_len,
-				      const __u8 *raw_addrs, int addrs_len)
+static struct sctp_cookie_param *sctp_pack_cookie(
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *init_chunk,
+					int *cookie_len, const __u8 *raw_addrs,
+					int addrs_len)
 {
-	sctp_cookie_param_t *retval;
 	struct sctp_signed_cookie *cookie;
+	struct sctp_cookie_param *retval;
 	int headersize, bodysize;
 
 	/* Header size is static data prior to the actual cookie, including
@@ -1692,19 +1688,19 @@ nodata:
 
 /* Unpack the cookie from COOKIE ECHO chunk, recreating the association.  */
 struct sctp_association *sctp_unpack_cookie(
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	struct sctp_chunk *chunk, gfp_t gfp,
-	int *error, struct sctp_chunk **errp)
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					struct sctp_chunk *chunk, gfp_t gfp,
+					int *error, struct sctp_chunk **errp)
 {
 	struct sctp_association *retval = NULL;
+	int headersize, bodysize, fixed_size;
 	struct sctp_signed_cookie *cookie;
+	struct sk_buff *skb = chunk->skb;
 	struct sctp_cookie *bear_cookie;
-	int headersize, bodysize, fixed_size;
 	__u8 *digest = ep->digest;
+	enum sctp_scope scope;
 	unsigned int len;
-	sctp_scope_t scope;
-	struct sk_buff *skb = chunk->skb;
 	ktime_t kt;
 
 	/* Header size is static data prior to the actual cookie, including
@@ -1976,8 +1972,8 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
 static int sctp_verify_ext_param(struct net *net, union sctp_params param)
 {
 	__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
-	int have_auth = 0;
 	int have_asconf = 0;
+	int have_auth = 0;
 	int i;
 
 	for (i = 0; i < num_ext; i++) {
@@ -2007,10 +2003,10 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param)
 }
 
 static void sctp_process_ext_param(struct sctp_association *asoc,
-				    union sctp_params param)
+				   union sctp_params param)
 {
-	struct net *net = sock_net(asoc->base.sk);
 	__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
+	struct net *net = sock_net(asoc->base.sk);
 	int i;
 
 	for (i = 0; i < num_ext; i++) {
@@ -2067,10 +2063,11 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
  * 	SCTP_IERROR_ERROR    - stop and report an error.
  * 	SCTP_IERROR_NOMEME   - out of memory.
  */
-static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
-					    union sctp_params param,
-					    struct sctp_chunk *chunk,
-					    struct sctp_chunk **errp)
+static enum sctp_ierror sctp_process_unk_param(
+					const struct sctp_association *asoc,
+					union sctp_params param,
+					struct sctp_chunk *chunk,
+					struct sctp_chunk **errp)
 {
 	int retval = SCTP_IERROR_NO_ERROR;
 
@@ -2119,13 +2116,13 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
  *	SCTP_IERROR_ERROR - stop processing, trigger an ERROR
  * 	SCTP_IERROR_NO_ERROR - continue with the chunk
  */
-static sctp_ierror_t sctp_verify_param(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					union sctp_params param,
-					enum sctp_cid cid,
-					struct sctp_chunk *chunk,
-					struct sctp_chunk **err_chunk)
+static enum sctp_ierror sctp_verify_param(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  union sctp_params param,
+					  enum sctp_cid cid,
+					  struct sctp_chunk *chunk,
+					  struct sctp_chunk **err_chunk)
 {
 	struct sctp_hmac_algo_param *hmacs;
 	int retval = SCTP_IERROR_NO_ERROR;
@@ -2310,13 +2307,13 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 		      struct sctp_init_chunk *peer_init, gfp_t gfp)
 {
 	struct net *net = sock_net(asoc->base.sk);
-	union sctp_params param;
 	struct sctp_transport *transport;
 	struct list_head *pos, *temp;
-	struct sctp_af *af;
+	union sctp_params param;
 	union sctp_addr addr;
-	char *cookie;
+	struct sctp_af *af;
 	int src_match = 0;
+	char *cookie;
 
 	/* We must include the address that the INIT packet came from.
 	 * This is the only address that matters for an INIT packet.
@@ -2500,16 +2497,15 @@ static int sctp_process_param(struct sctp_association *asoc,
 			      gfp_t gfp)
 {
 	struct net *net = sock_net(asoc->base.sk);
-	union sctp_addr addr;
-	int i;
-	__u16 sat;
-	int retval = 1;
-	sctp_scope_t scope;
-	u32 stale;
-	struct sctp_af *af;
+	struct sctp_endpoint *ep = asoc->ep;
 	union sctp_addr_param *addr_param;
 	struct sctp_transport *t;
-	struct sctp_endpoint *ep = asoc->ep;
+	enum sctp_scope scope;
+	union sctp_addr addr;
+	struct sctp_af *af;
+	int retval = 1, i;
+	u32 stale;
+	__u16 sat;
 
 	/* We maintain all INIT parameters in network byte order all the
 	 * time.  This allows us to not worry about whether the parameters
@@ -2617,7 +2613,7 @@ do_addr_param:
 		if (!net->sctp.addip_enable)
 			goto fall_through;
 
-		addr_param = param.v + sizeof(sctp_addip_param_t);
+		addr_param = param.v + sizeof(struct sctp_addip_param);
 
 		af = sctp_get_af_specific(param_type2af(addr_param->p.type));
 		if (af == NULL)
@@ -2754,7 +2750,7 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc,
 					   union sctp_addr *addr,
 					   int vparam_len)
 {
-	sctp_addiphdr_t asconf;
+	struct sctp_addiphdr asconf;
 	struct sctp_chunk *retval;
 	int length = sizeof(asconf) + vparam_len;
 	union sctp_addr_param addrparam;
@@ -2807,22 +2803,20 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc,
  *
  */
 struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
-					      union sctp_addr	      *laddr,
-					      struct sockaddr	      *addrs,
-					      int		      addrcnt,
-					      __be16		      flags)
-{
-	sctp_addip_param_t	param;
-	struct sctp_chunk	*retval;
-	union sctp_addr_param	addr_param;
-	union sctp_addr		*addr;
-	void			*addr_buf;
-	struct sctp_af		*af;
-	int			paramlen = sizeof(param);
-	int			addr_param_len = 0;
-	int 			totallen = 0;
-	int 			i;
-	int			del_pickup = 0;
+					      union sctp_addr *laddr,
+					      struct sockaddr *addrs,
+					      int addrcnt, __be16 flags)
+{
+	union sctp_addr_param addr_param;
+	struct sctp_addip_param	param;
+	int paramlen = sizeof(param);
+	struct sctp_chunk *retval;
+	int addr_param_len = 0;
+	union sctp_addr *addr;
+	int totallen = 0, i;
+	int del_pickup = 0;
+	struct sctp_af *af;
+	void *addr_buf;
 
 	/* Get total length of all the address parameters. */
 	addr_buf = addrs;
@@ -2898,12 +2892,12 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
 struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc,
 					     union sctp_addr *addr)
 {
-	sctp_addip_param_t	param;
-	struct sctp_chunk 	*retval;
-	int 			len = sizeof(param);
-	union sctp_addr_param	addrparam;
-	int			addrlen;
-	struct sctp_af		*af = sctp_get_af_specific(addr->v4.sin_family);
+	struct sctp_af *af = sctp_get_af_specific(addr->v4.sin_family);
+	union sctp_addr_param addrparam;
+	struct sctp_addip_param	param;
+	struct sctp_chunk *retval;
+	int len = sizeof(param);
+	int addrlen;
 
 	addrlen = af->to_addr_param(addr, &addrparam);
 	if (!addrlen)
@@ -2947,9 +2941,9 @@ struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc,
 static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *asoc,
 					       __u32 serial, int vparam_len)
 {
-	sctp_addiphdr_t		asconf;
-	struct sctp_chunk	*retval;
-	int			length = sizeof(asconf) + vparam_len;
+	struct sctp_addiphdr asconf;
+	struct sctp_chunk *retval;
+	int length = sizeof(asconf) + vparam_len;
 
 	/* Create the chunk.  */
 	retval = sctp_make_control(asoc, SCTP_CID_ASCONF_ACK, 0, length,
@@ -2967,13 +2961,14 @@ static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *as
 
 /* Add response parameters to an ASCONF_ACK chunk. */
 static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id,
-			      __be16 err_code, sctp_addip_param_t *asconf_param)
+				     __be16 err_code,
+				     struct sctp_addip_param *asconf_param)
 {
-	sctp_addip_param_t 	ack_param;
-	sctp_errhdr_t		err_param;
-	int			asconf_param_len = 0;
-	int			err_param_len = 0;
-	__be16			response_type;
+	struct sctp_addip_param ack_param;
+	struct sctp_errhdr err_param;
+	int asconf_param_len = 0;
+	int err_param_len = 0;
+	__be16 response_type;
 
 	if (SCTP_ERROR_NO_ERROR == err_code) {
 		response_type = SCTP_PARAM_SUCCESS_REPORT;
@@ -3008,15 +3003,15 @@ static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id,
 
 /* Process a asconf parameter. */
 static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
-				       struct sctp_chunk *asconf,
-				       sctp_addip_param_t *asconf_param)
+					struct sctp_chunk *asconf,
+					struct sctp_addip_param *asconf_param)
 {
+	union sctp_addr_param *addr_param;
 	struct sctp_transport *peer;
-	struct sctp_af *af;
 	union sctp_addr	addr;
-	union sctp_addr_param *addr_param;
+	struct sctp_af *af;
 
-	addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t);
+	addr_param = (void *)asconf_param + sizeof(*asconf_param);
 
 	if (asconf_param->param_hdr.type != SCTP_PARAM_ADD_IP &&
 	    asconf_param->param_hdr.type != SCTP_PARAM_DEL_IP &&
@@ -3141,10 +3136,11 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 			struct sctp_chunk *chunk, bool addr_param_needed,
 			struct sctp_paramhdr **errp)
 {
-	sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) chunk->chunk_hdr;
-	union sctp_params param;
+	struct sctp_addip_chunk *addip;
 	bool addr_param_seen = false;
+	union sctp_params param;
 
+	addip = (struct sctp_addip_chunk *)chunk->chunk_hdr;
 	sctp_walk_params(param, addip, addip_hdr.params) {
 		size_t length = ntohs(param.p->length);
 
@@ -3153,7 +3149,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 		case SCTP_PARAM_ERR_CAUSE:
 			break;
 		case SCTP_PARAM_IPV4_ADDRESS:
-			if (length != sizeof(sctp_ipv4addr_param_t))
+			if (length != sizeof(struct sctp_ipv4addr_param))
 				return false;
 			/* ensure there is only one addr param and it's in the
 			 * beginning of addip_hdr params, or we reject it.
@@ -3163,7 +3159,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 			addr_param_seen = true;
 			break;
 		case SCTP_PARAM_IPV6_ADDRESS:
-			if (length != sizeof(sctp_ipv6addr_param_t))
+			if (length != sizeof(struct sctp_ipv6addr_param))
 				return false;
 			if (param.v != addip->addip_hdr.params)
 				return false;
@@ -3176,13 +3172,13 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 			if (addr_param_needed && !addr_param_seen)
 				return false;
 			length = ntohs(param.addip->param_hdr.length);
-			if (length < sizeof(sctp_addip_param_t) +
+			if (length < sizeof(struct sctp_addip_param) +
 				     sizeof(**errp))
 				return false;
 			break;
 		case SCTP_PARAM_SUCCESS_REPORT:
 		case SCTP_PARAM_ADAPTATION_LAYER_IND:
-			if (length != sizeof(sctp_addip_param_t))
+			if (length != sizeof(struct sctp_addip_param))
 				return false;
 			break;
 		default:
@@ -3208,24 +3204,24 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
 				       struct sctp_chunk *asconf)
 {
-	sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) asconf->chunk_hdr;
+	union sctp_addr_param *addr_param;
+	struct sctp_addip_chunk *addip;
+	struct sctp_chunk *asconf_ack;
 	bool all_param_pass = true;
+	struct sctp_addiphdr *hdr;
+	int length = 0, chunk_len;
 	union sctp_params param;
-	sctp_addiphdr_t		*hdr;
-	union sctp_addr_param	*addr_param;
-	struct sctp_chunk	*asconf_ack;
-	__be16	err_code;
-	int	length = 0;
-	int	chunk_len;
-	__u32	serial;
+	__be16 err_code;
+	__u32 serial;
 
+	addip = (struct sctp_addip_chunk *)asconf->chunk_hdr;
 	chunk_len = ntohs(asconf->chunk_hdr->length) -
 		    sizeof(struct sctp_chunkhdr);
-	hdr = (sctp_addiphdr_t *)asconf->skb->data;
+	hdr = (struct sctp_addiphdr *)asconf->skb->data;
 	serial = ntohl(hdr->serial);
 
 	/* Skip the addiphdr and store a pointer to address parameter.  */
-	length = sizeof(sctp_addiphdr_t);
+	length = sizeof(*hdr);
 	addr_param = (union sctp_addr_param *)(asconf->skb->data + length);
 	chunk_len -= length;
 
@@ -3291,16 +3287,16 @@ done:
 
 /* Process a asconf parameter that is successfully acked. */
 static void sctp_asconf_param_success(struct sctp_association *asoc,
-				     sctp_addip_param_t *asconf_param)
+				      struct sctp_addip_param *asconf_param)
 {
-	struct sctp_af *af;
-	union sctp_addr	addr;
 	struct sctp_bind_addr *bp = &asoc->base.bind_addr;
 	union sctp_addr_param *addr_param;
-	struct sctp_transport *transport;
 	struct sctp_sockaddr_entry *saddr;
+	struct sctp_transport *transport;
+	union sctp_addr	addr;
+	struct sctp_af *af;
 
-	addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t);
+	addr_param = (void *)asconf_param + sizeof(*asconf_param);
 
 	/* We have checked the packet before, so we do not check again.	*/
 	af = sctp_get_af_specific(param_type2af(addr_param->p.type));
@@ -3351,14 +3347,14 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
  * specific success indication is present for the parameter.
  */
 static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
-				      sctp_addip_param_t *asconf_param,
-				      int no_err)
+				       struct sctp_addip_param *asconf_param,
+				       int no_err)
 {
-	sctp_addip_param_t	*asconf_ack_param;
-	sctp_errhdr_t		*err_param;
-	int			length;
-	int			asconf_ack_len;
-	__be16			err_code;
+	struct sctp_addip_param	*asconf_ack_param;
+	struct sctp_errhdr *err_param;
+	int asconf_ack_len;
+	__be16 err_code;
+	int length;
 
 	if (no_err)
 		err_code = SCTP_ERROR_NO_ERROR;
@@ -3371,9 +3367,9 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
 	/* Skip the addiphdr from the asconf_ack chunk and store a pointer to
 	 * the first asconf_ack parameter.
 	 */
-	length = sizeof(sctp_addiphdr_t);
-	asconf_ack_param = (sctp_addip_param_t *)(asconf_ack->skb->data +
-						  length);
+	length = sizeof(struct sctp_addiphdr);
+	asconf_ack_param = (struct sctp_addip_param *)(asconf_ack->skb->data +
+						       length);
 	asconf_ack_len -= length;
 
 	while (asconf_ack_len > 0) {
@@ -3382,7 +3378,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
 			case SCTP_PARAM_SUCCESS_REPORT:
 				return SCTP_ERROR_NO_ERROR;
 			case SCTP_PARAM_ERR_CAUSE:
-				length = sizeof(sctp_addip_param_t);
+				length = sizeof(*asconf_ack_param);
 				err_param = (void *)asconf_ack_param + length;
 				asconf_ack_len -= length;
 				if (asconf_ack_len > 0)
@@ -3407,20 +3403,20 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
 int sctp_process_asconf_ack(struct sctp_association *asoc,
 			    struct sctp_chunk *asconf_ack)
 {
-	struct sctp_chunk	*asconf = asoc->addip_last_asconf;
-	union sctp_addr_param	*addr_param;
-	sctp_addip_param_t	*asconf_param;
-	int	length = 0;
-	int	asconf_len = asconf->skb->len;
-	int	all_param_pass = 0;
-	int	no_err = 1;
-	int	retval = 0;
-	__be16	err_code = SCTP_ERROR_NO_ERROR;
+	struct sctp_chunk *asconf = asoc->addip_last_asconf;
+	struct sctp_addip_param *asconf_param;
+	__be16 err_code = SCTP_ERROR_NO_ERROR;
+	union sctp_addr_param *addr_param;
+	int asconf_len = asconf->skb->len;
+	int all_param_pass = 0;
+	int length = 0;
+	int no_err = 1;
+	int retval = 0;
 
 	/* Skip the chunkhdr and addiphdr from the last asconf sent and store
 	 * a pointer to address parameter.
 	 */
-	length = sizeof(sctp_addip_chunk_t);
+	length = sizeof(struct sctp_addip_chunk);
 	addr_param = (union sctp_addr_param *)(asconf->skb->data + length);
 	asconf_len -= length;
 
@@ -3436,7 +3432,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
 	 * failures are indicated, then all request(s) are considered
 	 * successful.
 	 */
-	if (asconf_ack->skb->len == sizeof(sctp_addiphdr_t))
+	if (asconf_ack->skb->len == sizeof(struct sctp_addiphdr))
 		all_param_pass = 1;
 
 	/* Process the TLVs contained in the last sent ASCONF chunk. */
@@ -3542,9 +3538,8 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
  *  \                                                               \
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
-static struct sctp_chunk *sctp_make_reconf(
-				const struct sctp_association *asoc,
-				int length)
+static struct sctp_chunk *sctp_make_reconf(const struct sctp_association *asoc,
+					   int length)
 {
 	struct sctp_reconf_chunk *reconf;
 	struct sctp_chunk *retval;
@@ -3595,9 +3590,9 @@ static struct sctp_chunk *sctp_make_reconf(
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
 struct sctp_chunk *sctp_make_strreset_req(
-				const struct sctp_association *asoc,
-				__u16 stream_num, __u16 *stream_list,
-				bool out, bool in)
+					const struct sctp_association *asoc,
+					__u16 stream_num, __u16 *stream_list,
+					bool out, bool in)
 {
 	struct sctp_strreset_outreq outreq;
 	__u16 stream_len = stream_num * 2;
@@ -3649,7 +3644,7 @@ struct sctp_chunk *sctp_make_strreset_req(
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
 struct sctp_chunk *sctp_make_strreset_tsnreq(
-				const struct sctp_association *asoc)
+					const struct sctp_association *asoc)
 {
 	struct sctp_strreset_tsnreq tsnreq;
 	__u16 length = sizeof(tsnreq);
@@ -3680,8 +3675,8 @@ struct sctp_chunk *sctp_make_strreset_tsnreq(
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
 struct sctp_chunk *sctp_make_strreset_addstrm(
-				const struct sctp_association *asoc,
-				__u16 out, __u16 in)
+					const struct sctp_association *asoc,
+					__u16 out, __u16 in)
 {
 	struct sctp_strreset_addstrm addstrm;
 	__u16 size = sizeof(addstrm);
@@ -3725,9 +3720,8 @@ struct sctp_chunk *sctp_make_strreset_addstrm(
  *  |                            Result                             |
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
-struct sctp_chunk *sctp_make_strreset_resp(
-				const struct sctp_association *asoc,
-				__u32 result, __u32 sn)
+struct sctp_chunk *sctp_make_strreset_resp(const struct sctp_association *asoc,
+					   __u32 result, __u32 sn)
 {
 	struct sctp_strreset_resp resp;
 	__u16 length = sizeof(resp);
@@ -3762,10 +3756,10 @@ struct sctp_chunk *sctp_make_strreset_resp(
  *  |                  Receiver's Next TSN (optional)               |
  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  */
-struct sctp_chunk *sctp_make_strreset_tsnresp(
-					struct sctp_association *asoc,
-					__u32 result, __u32 sn,
-					__u32 sender_tsn, __u32 receiver_tsn)
+struct sctp_chunk *sctp_make_strreset_tsnresp(struct sctp_association *asoc,
+					      __u32 result, __u32 sn,
+					      __u32 sender_tsn,
+					      __u32 receiver_tsn)
 {
 	struct sctp_strreset_resptsn tsnresp;
 	__u16 length = sizeof(tsnresp);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index d6e5e9e..e6a2974 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -51,22 +51,23 @@
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 
-static int sctp_cmd_interpreter(sctp_event_t event_type,
-				sctp_subtype_t subtype,
-				sctp_state_t state,
+static int sctp_cmd_interpreter(enum sctp_event event_type,
+				union sctp_subtype subtype,
+				enum sctp_state state,
 				struct sctp_endpoint *ep,
 				struct sctp_association *asoc,
 				void *event_arg,
-				sctp_disposition_t status,
-				sctp_cmd_seq_t *commands,
+				enum sctp_disposition status,
+				struct sctp_cmd_seq *commands,
 				gfp_t gfp);
-static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
-			     sctp_state_t state,
+static int sctp_side_effects(enum sctp_event event_type,
+			     union sctp_subtype subtype,
+			     enum sctp_state state,
 			     struct sctp_endpoint *ep,
 			     struct sctp_association **asoc,
 			     void *event_arg,
-			     sctp_disposition_t status,
-			     sctp_cmd_seq_t *commands,
+			     enum sctp_disposition status,
+			     struct sctp_cmd_seq *commands,
 			     gfp_t gfp);
 
 /********************************************************************
@@ -96,8 +97,8 @@ static void sctp_do_ecn_ce_work(struct sctp_association *asoc,
  * that was originally marked with the CE bit.
  */
 static struct sctp_chunk *sctp_do_ecn_ecne_work(struct sctp_association *asoc,
-					   __u32 lowest_tsn,
-					   struct sctp_chunk *chunk)
+						__u32 lowest_tsn,
+						struct sctp_chunk *chunk)
 {
 	struct sctp_chunk *repl;
 
@@ -149,11 +150,11 @@ static void sctp_do_ecn_cwr_work(struct sctp_association *asoc,
 
 /* Generate SACK if necessary.  We call this at the end of a packet.  */
 static int sctp_gen_sack(struct sctp_association *asoc, int force,
-			 sctp_cmd_seq_t *commands)
+			 struct sctp_cmd_seq *commands)
 {
+	struct sctp_transport *trans = asoc->peer.last_data_from;
 	__u32 ctsn, max_tsn_seen;
 	struct sctp_chunk *sack;
-	struct sctp_transport *trans = asoc->peer.last_data_from;
 	int error = 0;
 
 	if (force ||
@@ -243,11 +244,11 @@ nomem:
  */
 void sctp_generate_t3_rtx_event(unsigned long peer)
 {
-	int error;
 	struct sctp_transport *transport = (struct sctp_transport *) peer;
 	struct sctp_association *asoc = transport->asoc;
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
+	int error;
 
 	/* Check whether a task is in the sock.  */
 
@@ -280,7 +281,7 @@ out_unlock:
  * for timeouts which use the association as their parameter.
  */
 static void sctp_generate_timeout_event(struct sctp_association *asoc,
-					sctp_event_timeout_t timeout_type)
+					enum sctp_event_timeout timeout_type)
 {
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
@@ -360,12 +361,12 @@ static void sctp_generate_autoclose_event(unsigned long data)
  */
 void sctp_generate_heartbeat_event(unsigned long data)
 {
-	int error = 0;
 	struct sctp_transport *transport = (struct sctp_transport *) data;
 	struct sctp_association *asoc = transport->asoc;
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
 	u32 elapsed, timeout;
+	int error = 0;
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
@@ -405,7 +406,7 @@ out_unlock:
  */
 void sctp_generate_proto_unreach_event(unsigned long data)
 {
-	struct sctp_transport *transport = (struct sctp_transport *) data;
+	struct sctp_transport *transport = (struct sctp_transport *)data;
 	struct sctp_association *asoc = transport->asoc;
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
@@ -471,7 +472,7 @@ out_unlock:
 /* Inject a SACK Timeout event into the state machine.  */
 static void sctp_generate_sack_event(unsigned long data)
 {
-	struct sctp_association *asoc = (struct sctp_association *) data;
+	struct sctp_association *asoc = (struct sctp_association *)data;
 	sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_SACK);
 }
 
@@ -505,7 +506,7 @@ sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = {
  * notification SHOULD be sent to the upper layer.
  *
  */
-static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
+static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands,
 					 struct sctp_association *asoc,
 					 struct sctp_transport *transport,
 					 int is_hb)
@@ -577,7 +578,7 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
 }
 
 /* Worker routine to handle INIT command failure.  */
-static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
+static void sctp_cmd_init_failed(struct sctp_cmd_seq *commands,
 				 struct sctp_association *asoc,
 				 unsigned int error)
 {
@@ -600,15 +601,16 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
 }
 
 /* Worker routine to handle SCTP_CMD_ASSOC_FAILED.  */
-static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
+static void sctp_cmd_assoc_failed(struct sctp_cmd_seq *commands,
 				  struct sctp_association *asoc,
-				  sctp_event_t event_type,
-				  sctp_subtype_t subtype,
+				  enum sctp_event event_type,
+				  union sctp_subtype subtype,
 				  struct sctp_chunk *chunk,
 				  unsigned int error)
 {
 	struct sctp_ulpevent *event;
 	struct sctp_chunk *abort;
+
 	/* Cancel any partial delivery in progress. */
 	sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
 
@@ -644,7 +646,7 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
  * since all other cases use "temporary" associations and can do all
  * their work in statefuns directly.
  */
-static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
+static int sctp_cmd_process_init(struct sctp_cmd_seq *commands,
 				 struct sctp_association *asoc,
 				 struct sctp_chunk *chunk,
 				 struct sctp_init_chunk *peer_init,
@@ -666,7 +668,7 @@ static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
 }
 
 /* Helper function to break out starting up of heartbeat timers.  */
-static void sctp_cmd_hb_timers_start(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_hb_timers_start(struct sctp_cmd_seq *cmds,
 				     struct sctp_association *asoc)
 {
 	struct sctp_transport *t;
@@ -679,7 +681,7 @@ static void sctp_cmd_hb_timers_start(sctp_cmd_seq_t *cmds,
 		sctp_transport_reset_hb_timer(t);
 }
 
-static void sctp_cmd_hb_timers_stop(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_hb_timers_stop(struct sctp_cmd_seq *cmds,
 				    struct sctp_association *asoc)
 {
 	struct sctp_transport *t;
@@ -694,7 +696,7 @@ static void sctp_cmd_hb_timers_stop(sctp_cmd_seq_t *cmds,
 }
 
 /* Helper function to stop any pending T3-RTX timers */
-static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_t3_rtx_timers_stop(struct sctp_cmd_seq *cmds,
 					struct sctp_association *asoc)
 {
 	struct sctp_transport *t;
@@ -708,12 +710,12 @@ static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds,
 
 
 /* Helper function to handle the reception of an HEARTBEAT ACK.  */
-static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_transport_on(struct sctp_cmd_seq *cmds,
 				  struct sctp_association *asoc,
 				  struct sctp_transport *t,
 				  struct sctp_chunk *chunk)
 {
-	sctp_sender_hb_info_t *hbinfo;
+	struct sctp_sender_hb_info *hbinfo;
 	int was_unconfirmed = 0;
 
 	/* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the
@@ -767,7 +769,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
 	if (t->rto_pending == 0)
 		t->rto_pending = 1;
 
-	hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
+	hbinfo = (struct sctp_sender_hb_info *)chunk->skb->data;
 	sctp_transport_update_rto(t, (jiffies - hbinfo->sent_at));
 
 	/* Update the heartbeat timer.  */
@@ -779,7 +781,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
 
 
 /* Helper function to process the process SACK command.  */
-static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
+static int sctp_cmd_process_sack(struct sctp_cmd_seq *cmds,
 				 struct sctp_association *asoc,
 				 struct sctp_chunk *chunk)
 {
@@ -801,7 +803,7 @@ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
 /* Helper function to set the timeout value for T2-SHUTDOWN timer and to set
  * the transport for a shutdown chunk.
  */
-static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_setup_t2(struct sctp_cmd_seq *cmds,
 			      struct sctp_association *asoc,
 			      struct sctp_chunk *chunk)
 {
@@ -818,7 +820,7 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds,
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto;
 }
 
-static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_assoc_update(struct sctp_cmd_seq *cmds,
 				  struct sctp_association *asoc,
 				  struct sctp_association *new)
 {
@@ -828,7 +830,7 @@ static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds,
 	if (!sctp_assoc_update(asoc, new))
 		return;
 
-	abort = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t));
+	abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr));
 	if (abort) {
 		sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0);
 		sctp_add_cmd_sf(cmds, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
@@ -841,9 +843,9 @@ static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds,
 }
 
 /* Helper function to change the state of an association. */
-static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds,
 			       struct sctp_association *asoc,
-			       sctp_state_t state)
+			       enum sctp_state state)
 {
 	struct sock *sk = asoc->base.sk;
 
@@ -901,7 +903,7 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds,
 }
 
 /* Helper function to delete an association. */
-static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_delete_tcb(struct sctp_cmd_seq *cmds,
 				struct sctp_association *asoc)
 {
 	struct sock *sk = asoc->base.sk;
@@ -923,9 +925,9 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds,
  * destination address (we use active path instead of primary path just
  * because primary path may be inactive.
  */
-static void sctp_cmd_setup_t4(sctp_cmd_seq_t *cmds,
-				struct sctp_association *asoc,
-				struct sctp_chunk *chunk)
+static void sctp_cmd_setup_t4(struct sctp_cmd_seq *cmds,
+			      struct sctp_association *asoc,
+			      struct sctp_chunk *chunk)
 {
 	struct sctp_transport *t;
 
@@ -935,7 +937,7 @@ static void sctp_cmd_setup_t4(sctp_cmd_seq_t *cmds,
 }
 
 /* Process an incoming Operation Error Chunk. */
-static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds,
+static void sctp_cmd_process_operr(struct sctp_cmd_seq *cmds,
 				   struct sctp_association *asoc,
 				   struct sctp_chunk *chunk)
 {
@@ -990,6 +992,7 @@ static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq,
 				    struct sctp_chunk *chunk)
 {
 	struct sctp_fwdtsn_skip *skip;
+
 	/* Walk through all the skipped SSNs */
 	sctp_walk_fwdtsn(skip, chunk) {
 		sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn));
@@ -1002,8 +1005,8 @@ static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq,
 static void sctp_cmd_del_non_primary(struct sctp_association *asoc)
 {
 	struct sctp_transport *t;
-	struct list_head *pos;
 	struct list_head *temp;
+	struct list_head *pos;
 
 	list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) {
 		t = list_entry(pos, struct sctp_transport, transports);
@@ -1024,9 +1027,9 @@ static void sctp_cmd_set_sk_err(struct sctp_association *asoc, int error)
 }
 
 /* Helper function to generate an association change event */
-static void sctp_cmd_assoc_change(sctp_cmd_seq_t *commands,
-				 struct sctp_association *asoc,
-				 u8 state)
+static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands,
+				  struct sctp_association *asoc,
+				  u8 state)
 {
 	struct sctp_ulpevent *ev;
 
@@ -1039,7 +1042,7 @@ static void sctp_cmd_assoc_change(sctp_cmd_seq_t *commands,
 }
 
 /* Helper function to generate an adaptation indication event */
-static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands,
+static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands,
 				    struct sctp_association *asoc)
 {
 	struct sctp_ulpevent *ev;
@@ -1052,8 +1055,8 @@ static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands,
 
 
 static void sctp_cmd_t1_timer_update(struct sctp_association *asoc,
-				    sctp_event_timeout_t timer,
-				    char *name)
+				     enum sctp_event_timeout timer,
+				     char *name)
 {
 	struct sctp_transport *t;
 
@@ -1139,22 +1142,20 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc)
  * If you want to understand all of lksctp, this is a
  * good place to start.
  */
-int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
-	       sctp_state_t state,
-	       struct sctp_endpoint *ep,
-	       struct sctp_association *asoc,
-	       void *event_arg,
-	       gfp_t gfp)
+int sctp_do_sm(struct net *net, enum sctp_event event_type,
+	       union sctp_subtype subtype, enum sctp_state state,
+	       struct sctp_endpoint *ep, struct sctp_association *asoc,
+	       void *event_arg, gfp_t gfp)
 {
-	sctp_cmd_seq_t commands;
-	const sctp_sm_table_entry_t *state_fn;
-	sctp_disposition_t status;
-	int error = 0;
-	typedef const char *(printfn_t)(sctp_subtype_t);
+	typedef const char *(printfn_t)(union sctp_subtype);
 	static printfn_t *table[] = {
 		NULL, sctp_cname, sctp_tname, sctp_oname, sctp_pname,
 	};
 	printfn_t *debug_fn  __attribute__ ((unused)) = table[event_type];
+	const struct sctp_sm_table_entry *state_fn;
+	struct sctp_cmd_seq commands;
+	enum sctp_disposition status;
+	int error = 0;
 
 	/* Look up the state function, run it, and then process the
 	 * side effects.  These three steps are the heart of lksctp.
@@ -1178,13 +1179,14 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
 /*****************************************************************
  * This the master state function side effect processing function.
  *****************************************************************/
-static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
-			     sctp_state_t state,
+static int sctp_side_effects(enum sctp_event event_type,
+			     union sctp_subtype subtype,
+			     enum sctp_state state,
 			     struct sctp_endpoint *ep,
 			     struct sctp_association **asoc,
 			     void *event_arg,
-			     sctp_disposition_t status,
-			     sctp_cmd_seq_t *commands,
+			     enum sctp_disposition status,
+			     struct sctp_cmd_seq *commands,
 			     gfp_t gfp)
 {
 	int error;
@@ -1263,29 +1265,27 @@ bail:
  ********************************************************************/
 
 /* This is the side-effect interpreter.  */
-static int sctp_cmd_interpreter(sctp_event_t event_type,
-				sctp_subtype_t subtype,
-				sctp_state_t state,
+static int sctp_cmd_interpreter(enum sctp_event event_type,
+				union sctp_subtype subtype,
+				enum sctp_state state,
 				struct sctp_endpoint *ep,
 				struct sctp_association *asoc,
 				void *event_arg,
-				sctp_disposition_t status,
-				sctp_cmd_seq_t *commands,
+				enum sctp_disposition status,
+				struct sctp_cmd_seq *commands,
 				gfp_t gfp)
 {
-	struct sock *sk = ep->base.sk;
-	struct sctp_sock *sp = sctp_sk(sk);
-	int error = 0;
-	int force;
-	sctp_cmd_t *cmd;
-	struct sctp_chunk *new_obj;
-	struct sctp_chunk *chunk = NULL;
+	struct sctp_sock *sp = sctp_sk(ep->base.sk);
+	struct sctp_chunk *chunk = NULL, *new_obj;
 	struct sctp_packet *packet;
+	struct sctp_sackhdr sackh;
 	struct timer_list *timer;
-	unsigned long timeout;
 	struct sctp_transport *t;
-	struct sctp_sackhdr sackh;
+	unsigned long timeout;
+	struct sctp_cmd *cmd;
 	int local_cork = 0;
+	int error = 0;
+	int force;
 
 	if (SCTP_EVENT_T_TIMEOUT != event_type)
 		chunk = event_arg;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index b2a74c3..8f8ccde 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -59,103 +59,110 @@
 #include <net/sctp/sm.h>
 #include <net/sctp/structs.h>
 
-static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
-				  const struct sctp_endpoint *ep,
-				  const struct sctp_association *asoc,
-				  struct sctp_chunk *chunk,
-				  const void *payload,
-				  size_t paylen);
+static struct sctp_packet *sctp_abort_pkt_new(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					struct sctp_chunk *chunk,
+					const void *payload, size_t paylen);
 static int sctp_eat_data(const struct sctp_association *asoc,
 			 struct sctp_chunk *chunk,
-			 sctp_cmd_seq_t *commands);
-static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
-					     const struct sctp_association *asoc,
-					     const struct sctp_chunk *chunk);
+			 struct sctp_cmd_seq *commands);
+static struct sctp_packet *sctp_ootb_pkt_new(
+					struct net *net,
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk);
 static void sctp_send_stale_cookie_err(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
 				       const struct sctp_chunk *chunk,
-				       sctp_cmd_seq_t *commands,
+				       struct sctp_cmd_seq *commands,
 				       struct sctp_chunk *err_chunk);
-static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
-						 const struct sctp_endpoint *ep,
-						 const struct sctp_association *asoc,
-						 const sctp_subtype_t type,
-						 void *arg,
-						 sctp_cmd_seq_t *commands);
-static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
-					     const struct sctp_endpoint *ep,
-					     const struct sctp_association *asoc,
-					     const sctp_subtype_t type,
-					     void *arg,
-					     sctp_cmd_seq_t *commands);
-static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
+static enum sctp_disposition sctp_sf_do_5_2_6_stale(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
+static enum sctp_disposition sctp_sf_shut_8_4_5(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
+static enum sctp_disposition sctp_sf_tabort_8_4_8(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands);
+					struct sctp_cmd_seq *commands);
 static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk);
 
-static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
-					   sctp_cmd_seq_t *commands,
-					   __be16 error, int sk_err,
-					   const struct sctp_association *asoc,
-					   struct sctp_transport *transport);
+static enum sctp_disposition sctp_stop_t1_and_abort(
+					struct net *net,
+					struct sctp_cmd_seq *commands,
+					__be16 error, int sk_err,
+					const struct sctp_association *asoc,
+					struct sctp_transport *transport);
 
-static sctp_disposition_t sctp_sf_abort_violation(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     void *arg,
-				     sctp_cmd_seq_t *commands,
-				     const __u8 *payload,
-				     const size_t paylen);
+static enum sctp_disposition sctp_sf_abort_violation(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					void *arg,
+					struct sctp_cmd_seq *commands,
+					const __u8 *payload,
+					const size_t paylen);
 
-static sctp_disposition_t sctp_sf_violation_chunklen(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands);
+static enum sctp_disposition sctp_sf_violation_chunklen(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
 
-static sctp_disposition_t sctp_sf_violation_paramlen(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg, void *ext,
-				     sctp_cmd_seq_t *commands);
+static enum sctp_disposition sctp_sf_violation_paramlen(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg, void *ext,
+					struct sctp_cmd_seq *commands);
 
-static sctp_disposition_t sctp_sf_violation_ctsn(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands);
+static enum sctp_disposition sctp_sf_violation_ctsn(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
 
-static sctp_disposition_t sctp_sf_violation_chunk(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands);
+static enum sctp_disposition sctp_sf_violation_chunk(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands);
 
-static sctp_ierror_t sctp_sf_authenticate(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    struct sctp_chunk *chunk);
+static enum sctp_ierror sctp_sf_authenticate(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					struct sctp_chunk *chunk);
 
-static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
+static enum sctp_disposition __sctp_sf_do_9_1_abort(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands);
+					struct sctp_cmd_seq *commands);
 
 /* Small helper function that checks if the chunk length
  * is of the appropriate length.  The 'required_length' argument
@@ -164,8 +171,8 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
  * 		   false = Invalid length
  *
  */
-static inline bool
-sctp_chunk_length_valid(struct sctp_chunk *chunk, __u16 required_length)
+static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk,
+					   __u16 required_length)
 {
 	__u16 chunk_length = ntohs(chunk->chunk_hdr->length);
 
@@ -213,12 +220,11 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, __u16 required_length)
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_4_C(struct net *net,
-				  const struct sctp_endpoint *ep,
-				  const struct sctp_association *asoc,
-				  const sctp_subtype_t type,
-				  void *arg,
-				  sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_4_C(struct net *net,
+				     const struct sctp_endpoint *ep,
+				     const struct sctp_association *asoc,
+				     const union sctp_subtype type,
+				     void *arg, struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_ulpevent *ev;
@@ -299,19 +305,17 @@ sctp_disposition_t sctp_sf_do_4_C(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
-	struct sctp_chunk *repl;
+	struct sctp_chunk *chunk = arg, *repl, *err_chunk;
+	struct sctp_unrecognized_param *unk_param;
 	struct sctp_association *new_asoc;
-	struct sctp_chunk *err_chunk;
 	struct sctp_packet *packet;
-	sctp_unrecognized_param_t *unk_param;
 	int len;
 
 	/* 6.10 Bundling
@@ -435,7 +439,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 		 * construct the parameters in INIT ACK by copying the
 		 * ERROR causes over.
 		 */
-		unk_param = (sctp_unrecognized_param_t *)
+		unk_param = (struct sctp_unrecognized_param *)
 			    ((__u8 *)(err_chunk->chunk_hdr) +
 			    sizeof(struct sctp_chunkhdr));
 		/* Replace the cause code with the "Unrecognized parameter"
@@ -495,15 +499,15 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
-				       void *arg,
-				       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_1C_ack(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
 	struct sctp_init_chunk *initchunk;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *err_chunk;
 	struct sctp_packet *packet;
 
@@ -518,7 +522,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 		return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the INIT-ACK chunk has a valid length */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_initack_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	/* Grab the INIT header.  */
@@ -530,7 +534,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 			      (struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 
-		sctp_error_t error = SCTP_ERROR_NO_RESOURCE;
+		enum sctp_error error = SCTP_ERROR_NO_RESOURCE;
 
 		/* This chunk contains fatal error. It is to be discarded.
 		 * Send an ABORT, with causes.  If there are no causes,
@@ -645,20 +649,21 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
-				      const struct sctp_endpoint *ep,
-				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type, void *arg,
-				      sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
+					 const struct sctp_endpoint *ep,
+					 const struct sctp_association *asoc,
+					 const union sctp_subtype type,
+					 void *arg,
+					 struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
+	struct sctp_ulpevent *ev, *ai_ev = NULL;
 	struct sctp_association *new_asoc;
 	struct sctp_init_chunk *peer_init;
-	struct sctp_chunk *repl;
-	struct sctp_ulpevent *ev, *ai_ev = NULL;
-	int error = 0;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *err_chk_p;
+	struct sctp_chunk *repl;
 	struct sock *sk;
+	int error = 0;
 
 	/* If the packet is an OOTB packet which is temporarily on the
 	 * control endpoint, respond with an ABORT.
@@ -758,7 +763,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
 	 */
 	if (chunk->auth_chunk) {
 		struct sctp_chunk auth;
-		sctp_ierror_t ret;
+		enum sctp_ierror ret;
 
 		/* Make sure that we and the peer are AUTH capable */
 		if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) {
@@ -872,11 +877,12 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net,
-				      const struct sctp_endpoint *ep,
-				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type, void *arg,
-				      sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
+					 const struct sctp_endpoint *ep,
+					 const struct sctp_association *asoc,
+					 const union sctp_subtype type,
+					 void *arg,
+					 struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_ulpevent *ev;
@@ -950,11 +956,12 @@ nomem:
 }
 
 /* Generate and sendout a heartbeat packet.  */
-static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
-					    const struct sctp_association *asoc,
-					    const sctp_subtype_t type,
-					    void *arg,
-					    sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_heartbeat(
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = (struct sctp_transport *) arg;
 	struct sctp_chunk *reply;
@@ -975,12 +982,12 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
 }
 
 /* Generate a HEARTBEAT packet on the given transport.  */
-sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_sendbeat_8_3(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = (struct sctp_transport *) arg;
 
@@ -1023,11 +1030,12 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
 }
 
 /* resend asoc strreset_chunk.  */
-sctp_disposition_t sctp_sf_send_reconf(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type, void *arg,
-				       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_send_reconf(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = arg;
 
@@ -1074,12 +1082,11 @@ sctp_disposition_t sctp_sf_send_reconf(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    void *arg,
-				    sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_beat_8_3(struct net *net,
+				       const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const union sctp_subtype type,
+				       void *arg, struct sctp_cmd_seq *commands)
 {
 	struct sctp_paramhdr *param_hdr;
 	struct sctp_chunk *chunk = arg;
@@ -1090,7 +1097,8 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the HEARTBEAT chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk,
+				     sizeof(struct sctp_heartbeat_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -1098,7 +1106,7 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
 	 * respond with a HEARTBEAT ACK that contains the Heartbeat
 	 * Information field copied from the received HEARTBEAT chunk.
 	 */
-	chunk->subh.hb_hdr = (sctp_heartbeathdr_t *)chunk->skb->data;
+	chunk->subh.hb_hdr = (struct sctp_heartbeathdr *)chunk->skb->data;
 	param_hdr = (struct sctp_paramhdr *)chunk->subh.hb_hdr;
 	paylen = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr);
 
@@ -1148,34 +1156,32 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
+	struct sctp_sender_hb_info *hbinfo;
 	struct sctp_chunk *chunk = arg;
-	union sctp_addr from_addr;
 	struct sctp_transport *link;
-	sctp_sender_hb_info_t *hbinfo;
 	unsigned long max_interval;
+	union sctp_addr from_addr;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the HEARTBEAT-ACK chunk has a valid length.  */
 	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr) +
-					    sizeof(sctp_sender_hb_info_t)))
+					    sizeof(*hbinfo)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
+	hbinfo = (struct sctp_sender_hb_info *)chunk->skb->data;
 	/* Make sure that the length of the parameter is what we expect */
-	if (ntohs(hbinfo->param_hdr.length) !=
-				    sizeof(sctp_sender_hb_info_t)) {
+	if (ntohs(hbinfo->param_hdr.length) != sizeof(*hbinfo))
 		return SCTP_DISPOSITION_DISCARD;
-	}
 
 	from_addr = hbinfo->daddr;
 	link = sctp_assoc_lookup_paddr(asoc, &from_addr);
@@ -1227,15 +1233,15 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
  */
 static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
 				      struct sctp_chunk *init,
-				      sctp_cmd_seq_t *commands)
+				      struct sctp_cmd_seq *commands)
 {
-	int len;
-	struct sctp_packet *pkt;
+	struct sctp_af *af = sctp_get_af_specific(ssa->v4.sin_family);
 	union sctp_addr_param *addrparm;
 	struct sctp_errhdr *errhdr;
+	char buffer[sizeof(*errhdr) + sizeof(*addrparm)];
 	struct sctp_endpoint *ep;
-	char buffer[sizeof(struct sctp_errhdr)+sizeof(union sctp_addr_param)];
-	struct sctp_af *af = sctp_get_af_specific(ssa->v4.sin_family);
+	struct sctp_packet *pkt;
+	int len;
 
 	/* Build the error on the stack.   We are way to malloc crazy
 	 * throughout the code today.
@@ -1245,7 +1251,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
 
 	/* Copy into a parm format. */
 	len = af->to_addr_param(ssa, addrparm);
-	len += sizeof(sctp_errhdr_t);
+	len += sizeof(*errhdr);
 
 	errhdr->cause = SCTP_ERROR_RESTART;
 	errhdr->length = htons(len);
@@ -1292,7 +1298,7 @@ static bool list_has_sctp_addr(const struct list_head *list,
 static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc,
 				       const struct sctp_association *asoc,
 				       struct sctp_chunk *init,
-				       sctp_cmd_seq_t *commands)
+				       struct sctp_cmd_seq *commands)
 {
 	struct net *net = sock_net(new_asoc->base.sk);
 	struct sctp_transport *new_addr;
@@ -1412,20 +1418,19 @@ static char sctp_tietags_compare(struct sctp_association *new_asoc,
 /* Common helper routine for both duplicate and simulataneous INIT
  * chunk handling.
  */
-static sctp_disposition_t sctp_sf_do_unexpected_init(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg, sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_do_unexpected_init(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	sctp_disposition_t retval;
-	struct sctp_chunk *chunk = arg;
-	struct sctp_chunk *repl;
+	struct sctp_chunk *chunk = arg, *repl, *err_chunk;
+	struct sctp_unrecognized_param *unk_param;
 	struct sctp_association *new_asoc;
-	struct sctp_chunk *err_chunk;
+	enum sctp_disposition retval;
 	struct sctp_packet *packet;
-	sctp_unrecognized_param_t *unk_param;
 	int len;
 
 	/* 6.10 Bundling
@@ -1555,7 +1560,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 		 * construct the parameters in INIT ACK by copying the
 		 * ERROR causes over.
 		 */
-		unk_param = (sctp_unrecognized_param_t *)
+		unk_param = (struct sctp_unrecognized_param *)
 			    ((__u8 *)(err_chunk->chunk_hdr) +
 			    sizeof(struct sctp_chunkhdr));
 		/* Replace the cause code with the "Unrecognized parameter"
@@ -1626,12 +1631,13 @@ cleanup:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    void *arg,
-				    sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_2_1_siminit(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* Call helper to do the real work for both simulataneous and
 	 * duplicate INIT chunk handling.
@@ -1680,12 +1686,13 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
+enum sctp_disposition sctp_sf_do_5_2_2_dupinit(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	/* Call helper to do the real work for both simulataneous and
 	 * duplicate INIT chunk handling.
@@ -1703,11 +1710,13 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
  * An unexpected INIT ACK usually indicates the processing of an old or
  * duplicated INIT chunk.
 */
-sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net,
-					    const struct sctp_endpoint *ep,
-					    const struct sctp_association *asoc,
-					    const sctp_subtype_t type,
-					    void *arg, sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_5_2_3_initack(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* Per the above section, we'll discard the chunk if we have an
 	 * endpoint.  If this is an OOTB INIT-ACK, treat it as such.
@@ -1723,18 +1732,19 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net,
  * Section 5.2.4
  *  A)  In this case, the peer may have restarted.
  */
-static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_a(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
-					sctp_cmd_seq_t *commands,
+					struct sctp_cmd_seq *commands,
 					struct sctp_association *new_asoc)
 {
 	struct sctp_init_chunk *peer_init;
+	enum sctp_disposition disposition;
 	struct sctp_ulpevent *ev;
 	struct sctp_chunk *repl;
 	struct sctp_chunk *err;
-	sctp_disposition_t disposition;
 
 	/* new_asoc is a brand-new association, so these are not yet
 	 * side effects--it is safe to run them here.
@@ -1838,11 +1848,12 @@ nomem:
  *      after responding to the local endpoint's INIT
  */
 /* This case represents an initialization collision.  */
-static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_b(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
-					sctp_cmd_seq_t *commands,
+					struct sctp_cmd_seq *commands,
 					struct sctp_association *new_asoc)
 {
 	struct sctp_init_chunk *peer_init;
@@ -1909,11 +1920,12 @@ nomem:
  *     but a new tag of its own.
  */
 /* This case represents an initialization collision.  */
-static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_c(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
-					sctp_cmd_seq_t *commands,
+					struct sctp_cmd_seq *commands,
 					struct sctp_association *new_asoc)
 {
 	/* The cookie should be silently discarded.
@@ -1931,11 +1943,12 @@ static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net,
  *    enter the ESTABLISHED state, if it has not already done so.
  */
 /* This case represents an initialization collision.  */
-static sctp_disposition_t sctp_sf_do_dupcook_d(struct net *net,
+static enum sctp_disposition sctp_sf_do_dupcook_d(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
-					sctp_cmd_seq_t *commands,
+					struct sctp_cmd_seq *commands,
 					struct sctp_association *new_asoc)
 {
 	struct sctp_ulpevent *ev = NULL, *ai_ev = NULL;
@@ -2026,19 +2039,20 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
+enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
-	sctp_disposition_t retval;
-	struct sctp_chunk *chunk = arg;
 	struct sctp_association *new_asoc;
+	struct sctp_chunk *chunk = arg;
+	enum sctp_disposition retval;
+	struct sctp_chunk *err_chk_p;
 	int error = 0;
 	char action;
-	struct sctp_chunk *err_chk_p;
 
 	/* Make sure that the chunk has a valid length from the protocol
 	 * perspective.  In this case check to make sure we have at least
@@ -2144,13 +2158,13 @@ nomem:
  *
  * See sctp_sf_do_9_1_abort().
  */
-sctp_disposition_t sctp_sf_shutdown_pending_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_shutdown_pending_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -2167,7 +2181,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* ADD-IP: Special case for ABORT chunks
@@ -2187,12 +2201,13 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
  *
  * See sctp_sf_do_9_1_abort().
  */
-sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
+enum sctp_disposition sctp_sf_shutdown_sent_abort(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -2209,7 +2224,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* ADD-IP: Special case for ABORT chunks
@@ -2237,13 +2252,13 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
  *
  * See sctp_sf_do_9_1_abort().
  */
-sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_shutdown_ack_sent_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* The same T2 timer, so we should be able to use
 	 * common function with the SHUTDOWN-SENT state.
@@ -2265,15 +2280,16 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
+enum sctp_disposition sctp_sf_cookie_echoed_err(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_errhdr_t *err;
+	struct sctp_errhdr *err;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2281,7 +2297,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
 	/* Make sure that the ERROR chunk has a valid length.
 	 * The parameter walking depends on this as well.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_operr_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -2329,20 +2345,20 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
-						 const struct sctp_endpoint *ep,
-						 const struct sctp_association *asoc,
-						 const sctp_subtype_t type,
-						 void *arg,
-						 sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_do_5_2_6_stale(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
-	u32 stale;
-	sctp_cookie_preserve_param_t bht;
-	sctp_errhdr_t *err;
-	struct sctp_chunk *reply;
-	struct sctp_bind_addr *bp;
 	int attempts = asoc->init_err_counter + 1;
+	struct sctp_chunk *chunk = arg, *reply;
+	struct sctp_cookie_preserve_param bht;
+	struct sctp_bind_addr *bp;
+	struct sctp_errhdr *err;
+	u32 stale;
 
 	if (attempts > asoc->max_init_attempts) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
@@ -2352,7 +2368,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 		return SCTP_DISPOSITION_DELETE_TCB;
 	}
 
-	err = (sctp_errhdr_t *)(chunk->skb->data);
+	err = (struct sctp_errhdr *)(chunk->skb->data);
 
 	/* When calculating the time extension, an implementation
 	 * SHOULD use the RTT information measured based on the
@@ -2368,7 +2384,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 	 * to give ample time to retransmit the new cookie and thus
 	 * yield a higher probability of success on the reattempt.
 	 */
-	stale = ntohl(*(__be32 *)((u8 *)err + sizeof(sctp_errhdr_t)));
+	stale = ntohl(*(__be32 *)((u8 *)err + sizeof(*err)));
 	stale = (stale * 2) / 1000;
 
 	bht.param_hdr.type = SCTP_PARAM_COOKIE_PRESERVATIVE;
@@ -2452,12 +2468,13 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
+enum sctp_disposition sctp_sf_do_9_1_abort(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -2474,7 +2491,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* ADD-IP: Special case for ABORT chunks
@@ -2489,27 +2506,29 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
 	return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
 }
 
-static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
+static enum sctp_disposition __sctp_sf_do_9_1_abort(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
+	__be16 error = SCTP_ERROR_NO_ERROR;
 	struct sctp_chunk *chunk = arg;
 	unsigned int len;
-	__be16 error = SCTP_ERROR_NO_ERROR;
 
 	/* See if we have an error cause code in the chunk.  */
 	len = ntohs(chunk->chunk_hdr->length);
 	if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) {
+		struct sctp_errhdr *err;
 
-		sctp_errhdr_t *err;
 		sctp_walk_errors(err, chunk->chunk_hdr);
 		if ((void *)err != (void *)chunk->chunk_end)
-			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg,
+						commands);
 
-		error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
+		error = ((struct sctp_errhdr *)chunk->skb->data)->cause;
 	}
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
@@ -2526,16 +2545,17 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
  *
  * See sctp_sf_do_9_1_abort() above.
  */
-sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_wait_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
+	__be16 error = SCTP_ERROR_NO_ERROR;
 	struct sctp_chunk *chunk = arg;
 	unsigned int len;
-	__be16 error = SCTP_ERROR_NO_ERROR;
 
 	if (!sctp_vtag_verify_either(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2550,13 +2570,13 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
 	 * as we do not know its true length.  So, to be safe, discard the
 	 * packet.
 	 */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk)))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* See if we have an error cause code in the chunk.  */
 	len = ntohs(chunk->chunk_hdr->length);
 	if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
-		error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
+		error = ((struct sctp_errhdr *)chunk->skb->data)->cause;
 
 	return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc,
 				      chunk->transport);
@@ -2565,12 +2585,13 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
 /*
  * Process an incoming ICMP as an ABORT.  (COOKIE-WAIT state)
  */
-sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
+enum sctp_disposition sctp_sf_cookie_wait_icmp_abort(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	return sctp_stop_t1_and_abort(net, commands, SCTP_ERROR_NO_ERROR,
 				      ENOPROTOOPT, asoc,
@@ -2580,12 +2601,13 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
 /*
  * Process an ABORT.  (COOKIE-ECHOED state)
  */
-sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net,
-					       const struct sctp_endpoint *ep,
-					       const struct sctp_association *asoc,
-					       const sctp_subtype_t type,
-					       void *arg,
-					       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_echoed_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* There is a single T1 timer, so we should be able to use
 	 * common function with the COOKIE-WAIT state.
@@ -2598,11 +2620,12 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net,
  *
  * This is common code called by several sctp_sf_*_abort() functions above.
  */
-static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
-					   sctp_cmd_seq_t *commands,
-					   __be16 error, int sk_err,
-					   const struct sctp_association *asoc,
-					   struct sctp_transport *transport)
+static enum sctp_disposition sctp_stop_t1_and_abort(
+					struct net *net,
+					struct sctp_cmd_seq *commands,
+					__be16 error, int sk_err,
+					const struct sctp_association *asoc,
+					struct sctp_transport *transport)
 {
 	pr_debug("%s: ABORT received (INIT)\n", __func__);
 
@@ -2652,16 +2675,17 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
-					   void *arg,
-					   sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
+	enum sctp_disposition disposition;
 	struct sctp_chunk *chunk = arg;
-	sctp_shutdownhdr_t *sdh;
-	sctp_disposition_t disposition;
+	struct sctp_shutdownhdr *sdh;
 	struct sctp_ulpevent *ev;
 	__u32 ctsn;
 
@@ -2669,14 +2693,13 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net,
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SHUTDOWN chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk,
-				      sizeof(struct sctp_shutdown_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	/* Convert the elaborate header.  */
-	sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
-	skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t));
+	sdh = (struct sctp_shutdownhdr *)chunk->skb->data;
+	skb_pull(chunk->skb, sizeof(*sdh));
 	chunk->subh.shutdown_hdr = sdh;
 	ctsn = ntohl(sdh->cum_tsn_ack);
 
@@ -2742,27 +2765,27 @@ out:
  * The Cumulative TSN Ack of the received SHUTDOWN chunk
  * MUST be processed.
  */
-sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
-					   void *arg,
-					   sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_shut_ctsn(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_shutdownhdr_t *sdh;
+	struct sctp_shutdownhdr *sdh;
 	__u32 ctsn;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SHUTDOWN chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk,
-				      sizeof(struct sctp_shutdown_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
+	sdh = (struct sctp_shutdownhdr *)chunk->skb->data;
 	ctsn = ntohl(sdh->cum_tsn_ack);
 
 	if (TSN_lt(ctsn, asoc->ctsn_ack_point)) {
@@ -2796,14 +2819,15 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
  * that belong to this association, it should discard the INIT chunk and
  * retransmit the SHUTDOWN ACK chunk.
  */
-sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    void *arg,
-				    sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_reshutack(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = (struct sctp_chunk *) arg;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *reply;
 
 	/* Make sure that the chunk has a valid length */
@@ -2860,26 +2884,26 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
-				      const struct sctp_endpoint *ep,
-				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type,
-				      void *arg,
-				      sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_ecn_cwr(struct net *net,
+					 const struct sctp_endpoint *ep,
+					 const struct sctp_association *asoc,
+					 const union sctp_subtype type,
+					 void *arg,
+					 struct sctp_cmd_seq *commands)
 {
-	sctp_cwrhdr_t *cwr;
 	struct sctp_chunk *chunk = arg;
+	struct sctp_cwrhdr *cwr;
 	u32 lowest_tsn;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_ecne_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	cwr = (sctp_cwrhdr_t *) chunk->skb->data;
-	skb_pull(chunk->skb, sizeof(sctp_cwrhdr_t));
+	cwr = (struct sctp_cwrhdr *)chunk->skb->data;
+	skb_pull(chunk->skb, sizeof(*cwr));
 
 	lowest_tsn = ntohl(cwr->lowest_tsn);
 
@@ -2916,25 +2940,24 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_ecne(struct net *net,
-				   const struct sctp_endpoint *ep,
-				   const struct sctp_association *asoc,
-				   const sctp_subtype_t type,
-				   void *arg,
-				   sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_ecne(struct net *net,
+				      const struct sctp_endpoint *ep,
+				      const struct sctp_association *asoc,
+				      const union sctp_subtype type,
+				      void *arg, struct sctp_cmd_seq *commands)
 {
-	sctp_ecnehdr_t *ecne;
 	struct sctp_chunk *chunk = arg;
+	struct sctp_ecnehdr *ecne;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_ecne_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	ecne = (sctp_ecnehdr_t *) chunk->skb->data;
-	skb_pull(chunk->skb, sizeof(sctp_ecnehdr_t));
+	ecne = (struct sctp_ecnehdr *)chunk->skb->data;
+	skb_pull(chunk->skb, sizeof(*ecne));
 
 	/* If this is a newer ECNE than the last CWR packet we sent out */
 	sctp_add_cmd_sf(commands, SCTP_CMD_ECN_ECNE,
@@ -2973,15 +2996,15 @@ sctp_disposition_t sctp_sf_do_ecne(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_data_6_2(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
+	union sctp_arg force = SCTP_NOFORCE();
 	struct sctp_chunk *chunk = arg;
-	sctp_arg_t force = SCTP_NOFORCE();
 	int error;
 
 	if (!sctp_vtag_verify(chunk, asoc)) {
@@ -3093,12 +3116,13 @@ discard_noforce:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_data_fast_4_4(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	int error;
@@ -3184,22 +3208,22 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_sack_6_2(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_sackhdr_t *sackh;
+	struct sctp_sackhdr *sackh;
 	__u32 ctsn;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SACK chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_sack_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_sack_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
@@ -3258,12 +3282,13 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
  *
  * The return value is the disposition of the chunk.
 */
-static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
+static enum sctp_disposition sctp_sf_tabort_8_4_8(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk = arg;
@@ -3308,21 +3333,21 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
  *
  * The return value is the disposition of the chunk.
 */
-sctp_disposition_t sctp_sf_operr_notify(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_operr_notify(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	sctp_errhdr_t *err;
+	struct sctp_errhdr *err;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the ERROR chunk has a valid length. */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_operr_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	sctp_walk_errors(err, chunk->chunk_hdr);
@@ -3346,12 +3371,12 @@ sctp_disposition_t sctp_sf_operr_notify(struct net *net,
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_2_final(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_final(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *reply;
@@ -3429,20 +3454,19 @@ nomem:
  *    receiver of the OOTB packet shall discard the OOTB packet and take
  *    no further action.
  */
-sctp_disposition_t sctp_sf_ootb(struct net *net,
-				const struct sctp_endpoint *ep,
-				const struct sctp_association *asoc,
-				const sctp_subtype_t type,
-				void *arg,
-				sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_ootb(struct net *net,
+				   const struct sctp_endpoint *ep,
+				   const struct sctp_association *asoc,
+				   const union sctp_subtype type,
+				   void *arg, struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sk_buff *skb = chunk->skb;
 	struct sctp_chunkhdr *ch;
-	sctp_errhdr_t *err;
-	__u8 *ch_end;
-	int ootb_shut_ack = 0;
+	struct sctp_errhdr *err;
 	int ootb_cookie_ack = 0;
+	int ootb_shut_ack = 0;
+	__u8 *ch_end;
 
 	SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
 
@@ -3518,16 +3542,17 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
  * (endpoint, asoc, type, arg, commands)
  *
  * Outputs
- * (sctp_disposition_t)
+ * (enum sctp_disposition)
  *
  * The return value is the disposition of the chunk.
  */
-static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
-					     const struct sctp_endpoint *ep,
-					     const struct sctp_association *asoc,
-					     const sctp_subtype_t type,
-					     void *arg,
-					     sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_shut_8_4_5(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk = arg;
@@ -3584,12 +3609,12 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
  *   chunks. --piggy ]
  *
  */
-sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
-				      const struct sctp_endpoint *ep,
-				      const struct sctp_association *asoc,
-				      const sctp_subtype_t type,
-				      void *arg,
-				      sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_8_5_1_E_sa(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const union sctp_subtype type,
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -3609,17 +3634,18 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
 }
 
 /* ADDIP Section 4.2 Upon reception of an ASCONF Chunk.  */
-sctp_disposition_t sctp_sf_do_asconf(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type, void *arg,
-				     sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_asconf(struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk	*chunk = arg;
-	struct sctp_chunk	*asconf_ack = NULL;
-	struct sctp_paramhdr	*err_param = NULL;
-	sctp_addiphdr_t		*hdr;
-	__u32			serial;
+	struct sctp_paramhdr *err_param = NULL;
+	struct sctp_chunk *asconf_ack = NULL;
+	struct sctp_chunk *chunk = arg;
+	struct sctp_addiphdr *hdr;
+	__u32 serial;
 
 	if (!sctp_vtag_verify(chunk, asoc)) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
@@ -3634,14 +3660,15 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
 	 * described in [I-D.ietf-tsvwg-sctp-auth].
 	 */
 	if (!net->sctp.addip_noauth && !chunk->auth)
-		return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+		return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
+					     commands);
 
 	/* Make sure that the ASCONF ADDIP chunk has a valid length.  */
-	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t)))
+	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	hdr = (sctp_addiphdr_t *)chunk->skb->data;
+	hdr = (struct sctp_addiphdr *)chunk->skb->data;
 	serial = ntohl(hdr->serial);
 
 	/* Verify the ASCONF chunk before processing it. */
@@ -3725,18 +3752,19 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
  * When building TLV parameters for the ASCONF Chunk that will add or
  * delete IP addresses the D0 to D13 rules should be applied:
  */
-sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
-					 const struct sctp_endpoint *ep,
-					 const struct sctp_association *asoc,
-					 const sctp_subtype_t type, void *arg,
-					 sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const union sctp_subtype type,
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk	*asconf_ack = arg;
-	struct sctp_chunk	*last_asconf = asoc->addip_last_asconf;
-	struct sctp_chunk	*abort;
-	struct sctp_paramhdr	*err_param = NULL;
-	sctp_addiphdr_t		*addip_hdr;
-	__u32			sent_serial, rcvd_serial;
+	struct sctp_chunk *last_asconf = asoc->addip_last_asconf;
+	struct sctp_paramhdr *err_param = NULL;
+	struct sctp_chunk *asconf_ack = arg;
+	struct sctp_addiphdr *addip_hdr;
+	__u32 sent_serial, rcvd_serial;
+	struct sctp_chunk *abort;
 
 	if (!sctp_vtag_verify(asconf_ack, asoc)) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
@@ -3751,14 +3779,16 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 	 * described in [I-D.ietf-tsvwg-sctp-auth].
 	 */
 	if (!net->sctp.addip_noauth && !asconf_ack->auth)
-		return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+		return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
+					     commands);
 
 	/* Make sure that the ADDIP chunk has a valid length.  */
-	if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t)))
+	if (!sctp_chunk_length_valid(asconf_ack,
+				     sizeof(struct sctp_addip_chunk)))
 		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
-	addip_hdr = (sctp_addiphdr_t *)asconf_ack->skb->data;
+	addip_hdr = (struct sctp_addiphdr *)asconf_ack->skb->data;
 	rcvd_serial = ntohl(addip_hdr->serial);
 
 	/* Verify the ASCONF-ACK chunk before processing it. */
@@ -3767,7 +3797,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 			   (void *)err_param, commands);
 
 	if (last_asconf) {
-		addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr;
+		addip_hdr = (struct sctp_addiphdr *)last_asconf->subh.addip_hdr;
 		sent_serial = ntohl(addip_hdr->serial);
 	} else {
 		sent_serial = asoc->addip_serial - 1;
@@ -3782,7 +3812,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 	if (ADDIP_SERIAL_gte(rcvd_serial, sent_serial + 1) &&
 	    !(asoc->addip_last_asconf)) {
 		abort = sctp_make_abort(asoc, asconf_ack,
-					sizeof(sctp_errhdr_t));
+					sizeof(struct sctp_errhdr));
 		if (abort) {
 			sctp_init_cause(abort, SCTP_ERROR_ASCONF_ACK, 0);
 			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
@@ -3818,7 +3848,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 		}
 
 		abort = sctp_make_abort(asoc, asconf_ack,
-					sizeof(sctp_errhdr_t));
+					sizeof(struct sctp_errhdr));
 		if (abort) {
 			sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0);
 			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
@@ -3841,11 +3871,12 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 }
 
 /* RE-CONFIG Section 5.2 Upon reception of an RECONF Chunk. */
-sctp_disposition_t sctp_sf_do_reconf(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type, void *arg,
-				     sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_reconf(struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_paramhdr *err_param = NULL;
 	struct sctp_chunk *chunk = arg;
@@ -3917,15 +3948,15 @@ sctp_disposition_t sctp_sf_do_reconf(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
-				       void *arg,
-				       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
 	struct sctp_fwdtsn_hdr *fwdtsn_hdr;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_fwdtsn_skip *skip;
 	__u16 len;
 	__u32 tsn;
@@ -3987,16 +4018,16 @@ discard_noforce:
 	return SCTP_DISPOSITION_DISCARD;
 }
 
-sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_fwd_tsn_fast(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = arg;
 	struct sctp_fwdtsn_hdr *fwdtsn_hdr;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_fwdtsn_skip *skip;
 	__u16 len;
 	__u32 tsn;
@@ -4079,23 +4110,23 @@ gen_shutdown:
  *
  * The return value is the disposition of the chunk.
  */
-static sctp_ierror_t sctp_sf_authenticate(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    struct sctp_chunk *chunk)
+static enum sctp_ierror sctp_sf_authenticate(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					struct sctp_chunk *chunk)
 {
 	struct sctp_authhdr *auth_hdr;
+	__u8 *save_digest, *digest;
 	struct sctp_hmac *hmac;
 	unsigned int sig_len;
 	__u16 key_id;
-	__u8 *save_digest;
-	__u8 *digest;
 
 	/* Pull in the auth header, so we can do some more verification */
 	auth_hdr = (struct sctp_authhdr *)chunk->skb->data;
 	chunk->subh.auth_hdr = auth_hdr;
-	skb_pull(chunk->skb, sizeof(struct sctp_authhdr));
+	skb_pull(chunk->skb, sizeof(*auth_hdr));
 
 	/* Make sure that we support the HMAC algorithm from the auth
 	 * chunk.
@@ -4114,7 +4145,8 @@ static sctp_ierror_t sctp_sf_authenticate(struct net *net,
 	/* Make sure that the length of the signature matches what
 	 * we expect.
 	 */
-	sig_len = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_auth_chunk_t);
+	sig_len = ntohs(chunk->chunk_hdr->length) -
+		  sizeof(struct sctp_auth_chunk);
 	hmac = sctp_auth_get_hmac(ntohs(auth_hdr->hmac_id));
 	if (sig_len != hmac->hmac_len)
 		return SCTP_IERROR_PROTO_VIOLATION;
@@ -4136,8 +4168,8 @@ static sctp_ierror_t sctp_sf_authenticate(struct net *net,
 	memset(digest, 0, sig_len);
 
 	sctp_auth_calculate_hmac(asoc, chunk->skb,
-				(struct sctp_auth_chunk *)chunk->chunk_hdr,
-				GFP_ATOMIC);
+				 (struct sctp_auth_chunk *)chunk->chunk_hdr,
+				 GFP_ATOMIC);
 
 	/* Discard the packet if the digests do not match */
 	if (memcmp(save_digest, digest, sig_len)) {
@@ -4153,17 +4185,16 @@ nomem:
 	return SCTP_IERROR_NOMEM;
 }
 
-sctp_disposition_t sctp_sf_eat_auth(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    void *arg,
-				    sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_eat_auth(struct net *net,
+				       const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const union sctp_subtype type,
+				       void *arg, struct sctp_cmd_seq *commands)
 {
-	struct sctp_authhdr *auth_hdr;
 	struct sctp_chunk *chunk = arg;
+	struct sctp_authhdr *auth_hdr;
 	struct sctp_chunk *err_chunk;
-	sctp_ierror_t error;
+	enum sctp_ierror error;
 
 	/* Make sure that the peer has AUTH capable */
 	if (!asoc->peer.auth_capable)
@@ -4250,12 +4281,12 @@ sctp_disposition_t sctp_sf_eat_auth(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_unk_chunk(struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *unk_chunk = arg;
 	struct sctp_chunk *err_chunk;
@@ -4330,12 +4361,12 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
-					 const struct sctp_endpoint *ep,
-					 const struct sctp_association *asoc,
-					 const sctp_subtype_t type,
-					 void *arg,
-					 sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_discard_chunk(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const union sctp_subtype type,
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -4370,12 +4401,11 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_pdiscard(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    void *arg,
-				    sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_pdiscard(struct net *net,
+				       const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const union sctp_subtype type,
+				       void *arg, struct sctp_cmd_seq *commands)
 {
 	SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
@@ -4398,12 +4428,12 @@ sctp_disposition_t sctp_sf_pdiscard(struct net *net,
  * We simply tag the chunk as a violation.  The state machine will log
  * the violation and continue.
  */
-sctp_disposition_t sctp_sf_violation(struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_violation(struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -4418,14 +4448,14 @@ sctp_disposition_t sctp_sf_violation(struct net *net,
 /*
  * Common function to handle a protocol violation.
  */
-static sctp_disposition_t sctp_sf_abort_violation(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     void *arg,
-				     sctp_cmd_seq_t *commands,
-				     const __u8 *payload,
-				     const size_t paylen)
+static enum sctp_disposition sctp_sf_abort_violation(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					void *arg,
+					struct sctp_cmd_seq *commands,
+					const __u8 *payload,
+					const size_t paylen)
 {
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk =  arg;
@@ -4454,11 +4484,10 @@ static sctp_disposition_t sctp_sf_abort_violation(
 		/* Treat INIT-ACK as a special case during COOKIE-WAIT. */
 		if (chunk->chunk_hdr->type == SCTP_CID_INIT_ACK &&
 		    !asoc->peer.i.init_tag) {
-			sctp_initack_chunk_t *initack;
+			struct sctp_initack_chunk *initack;
 
-			initack = (sctp_initack_chunk_t *)chunk->chunk_hdr;
-			if (!sctp_chunk_length_valid(chunk,
-						     sizeof(sctp_initack_chunk_t)))
+			initack = (struct sctp_initack_chunk *)chunk->chunk_hdr;
+			if (!sctp_chunk_length_valid(chunk, sizeof(*initack)))
 				abort->chunk_hdr->flags |= SCTP_CHUNK_FLAG_T;
 			else {
 				unsigned int inittag;
@@ -4521,7 +4550,7 @@ nomem:
  * Handle a protocol violation when the chunk length is invalid.
  * "Invalid" length is identified as smaller than the minimal length a
  * given chunk can be.  For example, a SACK chunk has invalid length
- * if its length is set to be smaller than the size of sctp_sack_chunk_t.
+ * if its length is set to be smaller than the size of struct sctp_sack_chunk.
  *
  * We inform the other end by sending an ABORT with a Protocol Violation
  * error code.
@@ -4536,18 +4565,18 @@ nomem:
  *
  * Generate an  ABORT chunk and terminate the association.
  */
-static sctp_disposition_t sctp_sf_violation_chunklen(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_violation_chunklen(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	static const char err_str[] = "The following chunk had invalid length:";
 
 	return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
-					sizeof(err_str));
+				       sizeof(err_str));
 }
 
 /*
@@ -4556,17 +4585,17 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
  * or accumulated length in multi parameters exceeds the end of the chunk,
  * the length is considered as invalid.
  */
-static sctp_disposition_t sctp_sf_violation_paramlen(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg, void *ext,
-				     sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_violation_paramlen(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg, void *ext,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk =  arg;
 	struct sctp_paramhdr *param = ext;
 	struct sctp_chunk *abort = NULL;
+	struct sctp_chunk *chunk = arg;
 
 	if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
 		goto discard;
@@ -4599,18 +4628,18 @@ nomem:
  * We inform the other end by sending an ABORT with a Protocol Violation
  * error code.
  */
-static sctp_disposition_t sctp_sf_violation_ctsn(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_violation_ctsn(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	static const char err_str[] = "The cumulative tsn ack beyond the max tsn currently sent:";
 
 	return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
-					sizeof(err_str));
+				       sizeof(err_str));
 }
 
 /* Handle protocol violation of an invalid chunk bundling.  For example,
@@ -4619,13 +4648,13 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
  * statement from the specs.  Additionally, there might be an attacker
  * on the path and we may not want to continue this communication.
  */
-static sctp_disposition_t sctp_sf_violation_chunk(
-				     struct net *net,
-				     const struct sctp_endpoint *ep,
-				     const struct sctp_association *asoc,
-				     const sctp_subtype_t type,
-				     void *arg,
-				     sctp_cmd_seq_t *commands)
+static enum sctp_disposition sctp_sf_violation_chunk(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	static const char err_str[] = "The following chunk violates protocol:";
 
@@ -4633,7 +4662,7 @@ static sctp_disposition_t sctp_sf_violation_chunk(
 		return sctp_sf_violation(net, ep, asoc, type, arg, commands);
 
 	return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
-					sizeof(err_str));
+				       sizeof(err_str));
 }
 /***************************************************************************
  * These are the state functions for handling primitive (Section 10) events.
@@ -4695,15 +4724,15 @@ static sctp_disposition_t sctp_sf_violation_chunk(
  *
  * The return value is a disposition.
  */
-sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
-				       void *arg,
-				       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_prm_asoc(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *repl;
 	struct sctp_association *my_asoc;
+	struct sctp_chunk *repl;
 
 	/* The comment below says that we enter COOKIE-WAIT AFTER
 	 * sending the INIT, but that doesn't actually work in our
@@ -4807,12 +4836,12 @@ nomem:
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_prm_send(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
-				       void *arg,
-				       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_prm_send(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
 	struct sctp_datamsg *msg = arg;
 
@@ -4846,15 +4875,15 @@ sctp_disposition_t sctp_sf_do_prm_send(struct net *net,
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_prm_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	int disposition;
+	enum sctp_disposition disposition;
 
 	/* From 9.2 Shutdown of an Association
 	 * Upon receipt of the SHUTDOWN primitive from its upper
@@ -4872,6 +4901,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
 		disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
 							    arg, commands);
 	}
+
 	return disposition;
 }
 
@@ -4902,13 +4932,13 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_1_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_1_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* From 9.1 Abort of an Association
 	 * Upon receipt of the ABORT primitive from its upper
@@ -4940,12 +4970,12 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
 }
 
 /* We tried an illegal operation on an association which is closed.  */
-sctp_disposition_t sctp_sf_error_closed(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_error_closed(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_ERROR, SCTP_ERROR(-EINVAL));
 	return SCTP_DISPOSITION_CONSUME;
@@ -4954,12 +4984,13 @@ sctp_disposition_t sctp_sf_error_closed(struct net *net,
 /* We tried an illegal operation on an association which is shutting
  * down.
  */
-sctp_disposition_t sctp_sf_error_shutdown(struct net *net,
-					  const struct sctp_endpoint *ep,
-					  const struct sctp_association *asoc,
-					  const sctp_subtype_t type,
-					  void *arg,
-					  sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_error_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_ERROR,
 			SCTP_ERROR(-ESHUTDOWN));
@@ -4980,13 +5011,13 @@ sctp_disposition_t sctp_sf_error_shutdown(struct net *net,
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_wait_prm_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
@@ -5015,12 +5046,13 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg, sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_echoed_prm_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* There is a single T1 timer, so we should be able to use
 	 * common function with the COOKIE-WAIT state.
@@ -5042,13 +5074,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_wait_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *abort = arg;
 
@@ -5091,13 +5123,13 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_cookie_echoed_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* There is a single T1 timer, so we should be able to use
 	 * common function with the COOKIE-WAIT state.
@@ -5117,13 +5149,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_shutdown_pending_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* Stop the T5-shutdown guard timer.  */
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -5144,13 +5176,13 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_shutdown_sent_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* Stop the T2-shutdown timer.  */
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -5175,13 +5207,13 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
  * Outputs
  * (timers)
  */
-sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_shutdown_ack_sent_prm_abort(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	/* The same T2 timer, so we should be able to use
 	 * common function with the SHUTDOWN-SENT state.
@@ -5211,13 +5243,13 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
  * o destination transport address - the transport address of the
  *   association on which a heartbeat should be issued.
  */
-sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
+enum sctp_disposition sctp_sf_do_prm_requestheartbeat(
 					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
+					const union sctp_subtype type,
 					void *arg,
-					sctp_cmd_seq_t *commands)
+					struct sctp_cmd_seq *commands)
 {
 	if (SCTP_DISPOSITION_NOMEM == sctp_sf_heartbeat(ep, asoc, type,
 				      (struct sctp_transport *)arg, commands))
@@ -5244,12 +5276,12 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
  * When an endpoint has an ASCONF signaled change to be sent to the
  * remote endpoint it should do A1 to A9
  */
-sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_prm_asconf(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const union sctp_subtype type,
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -5261,11 +5293,12 @@ sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
 }
 
 /* RE-CONFIG Section 5.1 RECONF Chunk Procedures */
-sctp_disposition_t sctp_sf_do_prm_reconf(struct net *net,
-					 const struct sctp_endpoint *ep,
-					 const struct sctp_association *asoc,
-					 const sctp_subtype_t type,
-					 void *arg, sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_prm_reconf(struct net *net,
+					    const struct sctp_endpoint *ep,
+					    const struct sctp_association *asoc,
+					    const union sctp_subtype type,
+					    void *arg,
+					    struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = arg;
 
@@ -5278,13 +5311,13 @@ sctp_disposition_t sctp_sf_do_prm_reconf(struct net *net,
  *
  * The return value is the disposition of the primitive.
  */
-sctp_disposition_t sctp_sf_ignore_primitive(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_ignore_primitive(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	pr_debug("%s: primitive type:%d is ignored\n", __func__,
 		 type.primitive);
@@ -5302,13 +5335,13 @@ sctp_disposition_t sctp_sf_ignore_primitive(
  * subscribes to this event, if there is no data to be sent or
  * retransmit, the stack will immediately send up this notification.
  */
-sctp_disposition_t sctp_sf_do_no_pending_tsn(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_no_pending_tsn(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_ulpevent *event;
 
@@ -5334,13 +5367,13 @@ sctp_disposition_t sctp_sf_do_no_pending_tsn(
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_2_start_shutdown(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_start_shutdown(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *reply;
 
@@ -5404,15 +5437,15 @@ nomem:
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_9_2_shutdown_ack(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *chunk = (struct sctp_chunk *) arg;
+	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *reply;
 
 	/* There are 2 ways of getting here:
@@ -5424,12 +5457,14 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
 	 */
 	if (chunk) {
 		if (!sctp_vtag_verify(chunk, asoc))
-			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg,
+						commands);
 
 		/* Make sure that the SHUTDOWN chunk has a valid length. */
-		if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t)))
-			return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
-							  commands);
+		if (!sctp_chunk_length_valid(
+				chunk, sizeof(struct sctp_shutdown_chunk)))
+			return sctp_sf_violation_chunklen(net, ep, asoc, type,
+							  arg, commands);
 	}
 
 	/* If it has no more outstanding DATA chunks, the SHUTDOWN receiver
@@ -5476,12 +5511,12 @@ nomem:
  *
  * The return value is the disposition of the event.
  */
-sctp_disposition_t sctp_sf_ignore_other(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_ignore_other(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	pr_debug("%s: the event other type:%d is ignored\n",
 		 __func__, type.other);
@@ -5504,12 +5539,12 @@ sctp_disposition_t sctp_sf_ignore_other(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_6_3_3_rtx(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	struct sctp_transport *transport = arg;
 
@@ -5592,12 +5627,12 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
  * allow. However, an SCTP transmitter MUST NOT be more aggressive than
  * the following algorithms allow.
  */
-sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
-				       const struct sctp_endpoint *ep,
-				       const struct sctp_association *asoc,
-				       const sctp_subtype_t type,
-				       void *arg,
-				       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_do_6_2_sack(struct net *net,
+					  const struct sctp_endpoint *ep,
+					  const struct sctp_association *asoc,
+					  const union sctp_subtype type,
+					  void *arg,
+					  struct sctp_cmd_seq *commands)
 {
 	SCTP_INC_STATS(net, SCTP_MIB_DELAY_SACK_EXPIREDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
@@ -5623,16 +5658,17 @@ sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
  * (timers, events)
  *
  */
-sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
-					   void *arg,
-					   sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_t1_init_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
+	int attempts = asoc->init_err_counter + 1;
 	struct sctp_chunk *repl = NULL;
 	struct sctp_bind_addr *bp;
-	int attempts = asoc->init_err_counter + 1;
 
 	pr_debug("%s: timer T1 expired (INIT)\n", __func__);
 
@@ -5687,15 +5723,16 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
  * (timers, events)
  *
  */
-sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
-					   void *arg,
-					   sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_t1_cookie_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	struct sctp_chunk *repl = NULL;
 	int attempts = asoc->init_err_counter + 1;
+	struct sctp_chunk *repl = NULL;
 
 	pr_debug("%s: timer T1 expired (COOKIE-ECHO)\n", __func__);
 
@@ -5737,12 +5774,13 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
  * the T2-Shutdown timer,  giving its peer ample opportunity to transmit
  * all of its queued DATA chunks that have not yet been sent.
  */
-sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
-					   void *arg,
-					   sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_t2_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *reply = NULL;
 
@@ -5807,13 +5845,13 @@ nomem:
  * ADDIP Section 4.1 ASCONF CHunk Procedures
  * If the T4 RTO timer expires the endpoint should do B1 to B5
  */
-sctp_disposition_t sctp_sf_t4_timer_expire(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_t4_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *chunk = asoc->addip_last_asconf;
 	struct sctp_transport *transport = chunk->transport;
@@ -5879,12 +5917,13 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
  * At the expiration of this timer the sender SHOULD abort the association
  * by sending an ABORT chunk.
  */
-sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net,
-					   const struct sctp_endpoint *ep,
-					   const struct sctp_association *asoc,
-					   const sctp_subtype_t type,
-					   void *arg,
-					   sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_t5_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
 	struct sctp_chunk *reply = NULL;
 
@@ -5915,15 +5954,15 @@ nomem:
  * The work that needs to be done is same as when SHUTDOWN is initiated by
  * the user.  So this routine looks same as sctp_sf_do_9_2_prm_shutdown().
  */
-sctp_disposition_t sctp_sf_autoclose_timer_expire(
-	struct net *net,
-	const struct sctp_endpoint *ep,
-	const struct sctp_association *asoc,
-	const sctp_subtype_t type,
-	void *arg,
-	sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_autoclose_timer_expire(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					const union sctp_subtype type,
+					void *arg,
+					struct sctp_cmd_seq *commands)
 {
-	int disposition;
+	enum sctp_disposition disposition;
 
 	SCTP_INC_STATS(net, SCTP_MIB_AUTOCLOSE_EXPIREDS);
 
@@ -5943,6 +5982,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
 		disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
 							    arg, commands);
 	}
+
 	return disposition;
 }
 
@@ -5958,12 +5998,11 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_not_impl(struct net *net,
-				    const struct sctp_endpoint *ep,
-				    const struct sctp_association *asoc,
-				    const sctp_subtype_t type,
-				    void *arg,
-				    sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_not_impl(struct net *net,
+				       const struct sctp_endpoint *ep,
+				       const struct sctp_association *asoc,
+				       const union sctp_subtype type,
+				       void *arg, struct sctp_cmd_seq *commands)
 {
 	return SCTP_DISPOSITION_NOT_IMPL;
 }
@@ -5976,12 +6015,11 @@ sctp_disposition_t sctp_sf_not_impl(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_bug(struct net *net,
-			       const struct sctp_endpoint *ep,
-			       const struct sctp_association *asoc,
-			       const sctp_subtype_t type,
-			       void *arg,
-			       sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_bug(struct net *net,
+				  const struct sctp_endpoint *ep,
+				  const struct sctp_association *asoc,
+				  const union sctp_subtype type,
+				  void *arg, struct sctp_cmd_seq *commands)
 {
 	return SCTP_DISPOSITION_BUG;
 }
@@ -5997,12 +6035,12 @@ sctp_disposition_t sctp_sf_bug(struct net *net,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_timer_ignore(struct net *net,
-					const struct sctp_endpoint *ep,
-					const struct sctp_association *asoc,
-					const sctp_subtype_t type,
-					void *arg,
-					sctp_cmd_seq_t *commands)
+enum sctp_disposition sctp_sf_timer_ignore(struct net *net,
+					   const struct sctp_endpoint *ep,
+					   const struct sctp_association *asoc,
+					   const union sctp_subtype type,
+					   void *arg,
+					   struct sctp_cmd_seq *commands)
 {
 	pr_debug("%s: timer %d ignored\n", __func__, type.chunk);
 
@@ -6017,9 +6055,9 @@ sctp_disposition_t sctp_sf_timer_ignore(struct net *net,
 static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk)
 {
 	struct sctp_sackhdr *sack;
+	__u16 num_dup_tsns;
 	unsigned int len;
 	__u16 num_blocks;
-	__u16 num_dup_tsns;
 
 	/* Protect ourselves from reading too far into
 	 * the skb from a bogus sender.
@@ -6041,12 +6079,12 @@ static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk)
 /* Create an ABORT packet to be sent as a response, with the specified
  * error causes.
  */
-static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
-				  const struct sctp_endpoint *ep,
-				  const struct sctp_association *asoc,
-				  struct sctp_chunk *chunk,
-				  const void *payload,
-				  size_t paylen)
+static struct sctp_packet *sctp_abort_pkt_new(
+					struct net *net,
+					const struct sctp_endpoint *ep,
+					const struct sctp_association *asoc,
+					struct sctp_chunk *chunk,
+					const void *payload, size_t paylen)
 {
 	struct sctp_packet *packet;
 	struct sctp_chunk *abort;
@@ -6083,14 +6121,14 @@ static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
 }
 
 /* Allocate a packet for responding in the OOTB conditions.  */
-static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
-					     const struct sctp_association *asoc,
-					     const struct sctp_chunk *chunk)
+static struct sctp_packet *sctp_ootb_pkt_new(
+					struct net *net,
+					const struct sctp_association *asoc,
+					const struct sctp_chunk *chunk)
 {
-	struct sctp_packet *packet;
 	struct sctp_transport *transport;
-	__u16 sport;
-	__u16 dport;
+	struct sctp_packet *packet;
+	__u16 sport, dport;
 	__u32 vtag;
 
 	/* Get the source and destination port from the inbound packet.  */
@@ -6107,9 +6145,9 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
 		switch (chunk->chunk_hdr->type) {
 		case SCTP_CID_INIT_ACK:
 		{
-			sctp_initack_chunk_t *initack;
+			struct sctp_initack_chunk *initack;
 
-			initack = (sctp_initack_chunk_t *)chunk->chunk_hdr;
+			initack = (struct sctp_initack_chunk *)chunk->chunk_hdr;
 			vtag = ntohl(initack->init_hdr.init_tag);
 			break;
 		}
@@ -6168,7 +6206,7 @@ static void sctp_send_stale_cookie_err(struct net *net,
 				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
 				       const struct sctp_chunk *chunk,
-				       sctp_cmd_seq_t *commands,
+				       struct sctp_cmd_seq *commands,
 				       struct sctp_chunk *err_chunk)
 {
 	struct sctp_packet *packet;
@@ -6197,20 +6235,19 @@ static void sctp_send_stale_cookie_err(struct net *net,
 /* Process a data chunk */
 static int sctp_eat_data(const struct sctp_association *asoc,
 			 struct sctp_chunk *chunk,
-			 sctp_cmd_seq_t *commands)
+			 struct sctp_cmd_seq *commands)
 {
-	struct sctp_datahdr *data_hdr;
-	struct sctp_chunk *err;
-	size_t datalen;
-	sctp_verb_t deliver;
-	int tmp;
-	__u32 tsn;
 	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
 	struct sock *sk = asoc->base.sk;
 	struct net *net = sock_net(sk);
-	u16 ssn;
-	u16 sid;
+	struct sctp_datahdr *data_hdr;
+	struct sctp_chunk *err;
+	enum sctp_verb deliver;
+	size_t datalen;
 	u8 ordered = 0;
+	u16 ssn, sid;
+	__u32 tsn;
+	int tmp;
 
 	data_hdr = (struct sctp_datahdr *)chunk->skb->data;
 	chunk->subh.data_hdr = data_hdr;
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 3e958c1..79b6bee 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -45,26 +45,27 @@
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
 primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES];
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
 other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES];
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
 timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES];
 
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
-							    enum sctp_cid cid,
-							    sctp_state_t state);
+static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
+						struct net *net,
+						enum sctp_cid cid,
+						enum sctp_state state);
 
 
-static const sctp_sm_table_entry_t bug = {
+static const struct sctp_sm_table_entry bug = {
 	.fn = sctp_sf_bug,
 	.name = "sctp_sf_bug"
 };
 
 #define DO_LOOKUP(_max, _type, _table)					\
 ({									\
-	const sctp_sm_table_entry_t *rtn;				\
+	const struct sctp_sm_table_entry *rtn;				\
 									\
 	if ((event_subtype._type > (_max))) {				\
 		pr_warn("table %p possible attack: event %d exceeds max %d\n", \
@@ -76,10 +77,11 @@ static const sctp_sm_table_entry_t bug = {
 	rtn;								\
 })
 
-const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
-						  sctp_event_t event_type,
-						  sctp_state_t state,
-						  sctp_subtype_t event_subtype)
+const struct sctp_sm_table_entry *sctp_sm_lookup_event(
+					struct net *net,
+					enum sctp_event event_type,
+					enum sctp_state state,
+					union sctp_subtype event_subtype)
 {
 	switch (event_type) {
 	case SCTP_EVENT_T_CHUNK:
@@ -392,7 +394,8 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
  *
  * For base protocol (RFC 2960).
  */
-static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_DATA,
 	TYPE_SCTP_INIT,
 	TYPE_SCTP_INIT_ACK,
@@ -451,7 +454,8 @@ static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][
 /* The primary index for this table is the chunk type.
  * The secondary index for this table is the state.
  */
-static const sctp_sm_table_entry_t addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_ASCONF,
 	TYPE_SCTP_ASCONF_ACK,
 }; /*state_fn_t addip_chunk_event_table[][] */
@@ -478,7 +482,8 @@ static const sctp_sm_table_entry_t addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_
 /* The primary index for this table is the chunk type.
  * The secondary index for this table is the state.
  */
-static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_FWD_TSN,
 }; /*state_fn_t prsctp_chunk_event_table[][] */
 
@@ -504,7 +509,8 @@ static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUN
 /* The primary index for this table is the chunk type.
  * The secondary index for this table is the state.
  */
-static const sctp_sm_table_entry_t reconf_chunk_event_table[SCTP_NUM_RECONF_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+reconf_chunk_event_table[SCTP_NUM_RECONF_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_RECONF,
 }; /*state_fn_t reconf_chunk_event_table[][] */
 
@@ -530,11 +536,12 @@ static const sctp_sm_table_entry_t reconf_chunk_event_table[SCTP_NUM_RECONF_CHUN
 /* The primary index for this table is the chunk type.
  * The secondary index for this table is the state.
  */
-static const sctp_sm_table_entry_t auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_AUTH,
 }; /*state_fn_t auth_chunk_event_table[][] */
 
-static const sctp_sm_table_entry_t
+static const struct sctp_sm_table_entry
 chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
 	/* SCTP_STATE_CLOSED */
 	TYPE_SCTP_FUNC(sctp_sf_ootb),
@@ -691,7 +698,8 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
 /* The primary index for this table is the primitive type.
  * The secondary index for this table is the state.
  */
-static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_PRIMITIVE_ASSOCIATE,
 	TYPE_SCTP_PRIMITIVE_SHUTDOWN,
 	TYPE_SCTP_PRIMITIVE_ABORT,
@@ -739,7 +747,8 @@ static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPE
 	TYPE_SCTP_FUNC(sctp_sf_ignore_other), \
 }
 
-static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_OTHER_NO_PENDING_TSN,
 	TYPE_SCTP_OTHER_ICMP_PROTO_UNREACH,
 };
@@ -953,7 +962,8 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_
 	TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
 }
 
-static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
+static const struct sctp_sm_table_entry
+timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
 	TYPE_SCTP_EVENT_TIMEOUT_NONE,
 	TYPE_SCTP_EVENT_TIMEOUT_T1_COOKIE,
 	TYPE_SCTP_EVENT_TIMEOUT_T1_INIT,
@@ -967,9 +977,10 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S
 	TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE,
 };
 
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
-							    enum sctp_cid cid,
-							    sctp_state_t state)
+static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
+						struct net *net,
+						enum sctp_cid cid,
+						enum sctp_state state)
 {
 	if (state > SCTP_STATE_MAX)
 		return &bug;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 8d76086..1b00a1e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -100,8 +100,9 @@ static int sctp_send_asconf(struct sctp_association *asoc,
 			    struct sctp_chunk *chunk);
 static int sctp_do_bind(struct sock *, union sctp_addr *, int);
 static int sctp_autobind(struct sock *sk);
-static void sctp_sock_migrate(struct sock *, struct sock *,
-			      struct sctp_association *, sctp_socket_type_t);
+static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
+			      struct sctp_association *assoc,
+			      enum sctp_socket_type type);
 
 static unsigned long sctp_memory_pressure;
 static atomic_long_t sctp_memory_allocated;
@@ -1055,7 +1056,7 @@ static int __sctp_connect(struct sock *sk,
 	struct sctp_association *asoc2;
 	struct sctp_transport *transport;
 	union sctp_addr to;
-	sctp_scope_t scope;
+	enum sctp_scope scope;
 	long timeo;
 	int err = 0;
 	int addrcnt = 0;
@@ -1593,7 +1594,8 @@ static int sctp_error(struct sock *sk, int flags, int err)
  */
 /* BUG:  We do not implement the equivalent of sk_stream_wait_memory(). */
 
-static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *);
+static int sctp_msghdr_parse(const struct msghdr *msg,
+			     struct sctp_cmsgs *cmsgs);
 
 static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 {
@@ -1609,8 +1611,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	struct sctp_sndrcvinfo *sinfo;
 	struct sctp_initmsg *sinit;
 	sctp_assoc_t associd = 0;
-	sctp_cmsgs_t cmsgs = { NULL };
-	sctp_scope_t scope;
+	struct sctp_cmsgs cmsgs = { NULL };
+	enum sctp_scope scope;
 	bool fill_sinfo_ttl = false, wait_connect = false;
 	struct sctp_datamsg *datamsg;
 	int msg_flags = msg->msg_flags;
@@ -7444,10 +7446,10 @@ static int sctp_autobind(struct sock *sk)
  * msg_control
  * points here
  */
-static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
+static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs)
 {
-	struct cmsghdr *cmsg;
 	struct msghdr *my_msg = (struct msghdr *)msg;
+	struct cmsghdr *cmsg;
 
 	for_each_cmsghdr(cmsg, my_msg) {
 		if (!CMSG_OK(my_msg, cmsg))
@@ -8084,7 +8086,7 @@ static inline void sctp_copy_descendant(struct sock *sk_to,
  */
 static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 			      struct sctp_association *assoc,
-			      sctp_socket_type_t type)
+			      enum sctp_socket_type type)
 {
 	struct sctp_sock *oldsp = sctp_sk(oldsk);
 	struct sctp_sock *newsp = sctp_sk(newsk);
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 0e732f6..ef7ca44 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -46,7 +46,7 @@ static int timer_max = 86400000; /* ms in one day */
 static int int_max = INT_MAX;
 static int sack_timer_min = 1;
 static int sack_timer_max = 500;
-static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
+static int addr_scope_max = SCTP_SCOPE_POLICY_MAX;
 static int rwnd_scale_max = 16;
 static int rto_alpha_min = 0;
 static int rto_beta_min = 0;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 80a97c8..2d9bd37 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -490,7 +490,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
  * detected.
  */
 void sctp_transport_lower_cwnd(struct sctp_transport *transport,
-			       sctp_lower_cwnd_t reason)
+			       enum sctp_lower_cwnd reason)
 {
 	struct sctp_association *asoc = transport->asoc;
 
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 5f86c50..67abc01 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -371,19 +371,19 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc,
 				struct sctp_chunk *chunk, __u16 flags,
 				gfp_t gfp)
 {
-	struct sctp_ulpevent *event;
 	struct sctp_remote_error *sre;
+	struct sctp_ulpevent *event;
+	struct sctp_errhdr *ch;
 	struct sk_buff *skb;
-	sctp_errhdr_t *ch;
 	__be16 cause;
 	int elen;
 
-	ch = (sctp_errhdr_t *)(chunk->skb->data);
+	ch = (struct sctp_errhdr *)(chunk->skb->data);
 	cause = ch->cause;
-	elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t);
+	elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(*ch);
 
 	/* Pull off the ERROR header.  */
-	skb_pull(chunk->skb, sizeof(sctp_errhdr_t));
+	skb_pull(chunk->skb, sizeof(*ch));
 
 	/* Copy the skb to a new skb with room for us to prepend
 	 * notification with.
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index 3395485..c717ef0 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -8,10 +8,6 @@ config SMC
 	  The Linux implementation of the SMC-R solution is designed as
 	  a separate socket family SMC.
 
-	  Warning: SMC will expose all memory for remote reads and writes
-	  once a connection is established.  Don't enable this option except
-	  for tightly controlled lab environment.
-
 	  Select this option if you want to run SMC socket applications
 
 config SMC_DIAG
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6793d73..8c6d24b 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -338,6 +338,12 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
 		return SMC_CLC_DECL_INTERR;
 
 	smc_wr_remember_qp_attr(link);
+
+	rc = smc_wr_reg_send(link,
+			     smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
+	if (rc)
+		return SMC_CLC_DECL_INTERR;
+
 	/* send CONFIRM LINK response over RoCE fabric */
 	rc = smc_llc_send_confirm_link(link,
 				       link->smcibdev->mac[link->ibport - 1],
@@ -430,12 +436,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
 
 	smc_conn_save_peer_info(smc, &aclc);
 
-	rc = smc_sndbuf_create(smc);
-	if (rc) {
-		reason_code = SMC_CLC_DECL_MEM;
-		goto decline_rdma_unlock;
-	}
-	rc = smc_rmb_create(smc);
+	/* create send buffer and rmb */
+	rc = smc_buf_create(smc);
 	if (rc) {
 		reason_code = SMC_CLC_DECL_MEM;
 		goto decline_rdma_unlock;
@@ -459,7 +461,20 @@ static int smc_connect_rdma(struct smc_sock *smc)
 			reason_code = SMC_CLC_DECL_INTERR;
 			goto decline_rdma_unlock;
 		}
+	} else {
+		struct smc_buf_desc *buf_desc = smc->conn.rmb_desc;
+
+		if (!buf_desc->reused) {
+			/* register memory region for new rmb */
+			rc = smc_wr_reg_send(link,
+					     buf_desc->mr_rx[SMC_SINGLE_LINK]);
+			if (rc) {
+				reason_code = SMC_CLC_DECL_INTERR;
+				goto decline_rdma_unlock;
+			}
+		}
 	}
+	smc_rmb_sync_sg_for_device(&smc->conn);
 
 	rc = smc_clc_send_confirm(smc);
 	if (rc)
@@ -692,6 +707,12 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
 	int rc;
 
 	link = &lgr->lnk[SMC_SINGLE_LINK];
+
+	rc = smc_wr_reg_send(link,
+			     smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]);
+	if (rc)
+		return SMC_CLC_DECL_INTERR;
+
 	/* send CONFIRM LINK request to client over the RoCE fabric */
 	rc = smc_llc_send_confirm_link(link,
 				       link->smcibdev->mac[link->ibport - 1],
@@ -779,11 +800,6 @@ static void smc_listen_work(struct work_struct *work)
 	mutex_lock(&smc_create_lgr_pending);
 	local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
 					smcibdev, ibport, &pclc.lcl, 0);
-	if (local_contact == SMC_REUSE_CONTACT)
-		/* lock no longer needed, free it due to following
-		 * smc_clc_wait_msg() call
-		 */
-		mutex_unlock(&smc_create_lgr_pending);
 	if (local_contact < 0) {
 		rc = local_contact;
 		if (rc == -ENOMEM)
@@ -794,12 +810,8 @@ static void smc_listen_work(struct work_struct *work)
 	}
 	link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
 
-	rc = smc_sndbuf_create(new_smc);
-	if (rc) {
-		reason_code = SMC_CLC_DECL_MEM;
-		goto decline_rdma;
-	}
-	rc = smc_rmb_create(new_smc);
+	/* create send buffer and rmb */
+	rc = smc_buf_create(new_smc);
 	if (rc) {
 		reason_code = SMC_CLC_DECL_MEM;
 		goto decline_rdma;
@@ -808,6 +820,21 @@ static void smc_listen_work(struct work_struct *work)
 	smc_close_init(new_smc);
 	smc_rx_init(new_smc);
 
+	if (local_contact != SMC_FIRST_CONTACT) {
+		struct smc_buf_desc *buf_desc = new_smc->conn.rmb_desc;
+
+		if (!buf_desc->reused) {
+			/* register memory region for new rmb */
+			rc = smc_wr_reg_send(link,
+					     buf_desc->mr_rx[SMC_SINGLE_LINK]);
+			if (rc) {
+				reason_code = SMC_CLC_DECL_INTERR;
+				goto decline_rdma;
+			}
+		}
+	}
+	smc_rmb_sync_sg_for_device(&new_smc->conn);
+
 	rc = smc_clc_send_accept(new_smc, local_contact);
 	if (rc)
 		goto out_err;
@@ -853,8 +880,7 @@ out_connected:
 	if (newsmcsk->sk_state == SMC_INIT)
 		newsmcsk->sk_state = SMC_ACTIVE;
 enqueue:
-	if (local_contact == SMC_FIRST_CONTACT)
-		mutex_unlock(&smc_create_lgr_pending);
+	mutex_unlock(&smc_create_lgr_pending);
 	lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
 	if (lsmc->sk.sk_state == SMC_LISTEN) {
 		smc_accept_enqueue(&lsmc->sk, newsmcsk);
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 03ec058..3934913 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -204,13 +204,13 @@ int smc_clc_send_confirm(struct smc_sock *smc)
 	memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
 	hton24(cclc.qpn, link->roce_qp->qp_num);
 	cclc.rmb_rkey =
-		htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
+		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
 	cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
 	cclc.rmbe_alert_token = htonl(conn->alert_token_local);
 	cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
 	cclc.rmbe_size = conn->rmbe_size_short;
-	cclc.rmb_dma_addr =
-		cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]);
+	cclc.rmb_dma_addr = cpu_to_be64(
+		(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
 	hton24(cclc.psn, link->psn_initial);
 
 	memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
@@ -256,13 +256,13 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
 	memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
 	hton24(aclc.qpn, link->roce_qp->qp_num);
 	aclc.rmb_rkey =
-		htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]);
+		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
 	aclc.conn_idx = 1;			/* as long as 1 RMB = 1 RMBE */
 	aclc.rmbe_alert_token = htonl(conn->alert_token_local);
 	aclc.qp_mtu = link->path_mtu;
 	aclc.rmbe_size = conn->rmbe_size_short,
-	aclc.rmb_dma_addr =
-		cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]);
+	aclc.rmb_dma_addr = cpu_to_be64(
+		(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
 	hton24(aclc.psn, link->psn_initial);
 	memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3ac09a6..1a16d51 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -175,7 +175,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
 	rc = smc_wr_alloc_link_mem(lnk);
 	if (rc)
 		goto free_lgr;
-	init_waitqueue_head(&lnk->wr_tx_wait);
 	rc = smc_ib_create_protection_domain(lnk);
 	if (rc)
 		goto free_link_mem;
@@ -207,17 +206,14 @@ out:
 	return rc;
 }
 
-static void smc_sndbuf_unuse(struct smc_connection *conn)
+static void smc_buf_unuse(struct smc_connection *conn)
 {
 	if (conn->sndbuf_desc) {
 		conn->sndbuf_desc->used = 0;
 		conn->sndbuf_size = 0;
 	}
-}
-
-static void smc_rmb_unuse(struct smc_connection *conn)
-{
 	if (conn->rmb_desc) {
+		conn->rmb_desc->reused = true;
 		conn->rmb_desc->used = 0;
 		conn->rmbe_size = 0;
 	}
@@ -232,8 +228,7 @@ void smc_conn_free(struct smc_connection *conn)
 		return;
 	smc_cdc_tx_dismiss_slots(conn);
 	smc_lgr_unregister_conn(conn);
-	smc_rmb_unuse(conn);
-	smc_sndbuf_unuse(conn);
+	smc_buf_unuse(conn);
 }
 
 static void smc_link_clear(struct smc_link *lnk)
@@ -246,48 +241,57 @@ static void smc_link_clear(struct smc_link *lnk)
 	smc_wr_free_link_mem(lnk);
 }
 
-static void smc_lgr_free_sndbufs(struct smc_link_group *lgr)
+static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk,
+			 bool is_rmb)
 {
-	struct smc_buf_desc *sndbuf_desc, *bf_desc;
-	int i;
-
-	for (i = 0; i < SMC_RMBE_SIZES; i++) {
-		list_for_each_entry_safe(sndbuf_desc, bf_desc, &lgr->sndbufs[i],
-					 list) {
-			list_del(&sndbuf_desc->list);
-			smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-					 smc_uncompress_bufsize(i),
-					 sndbuf_desc, DMA_TO_DEVICE);
-			kfree(sndbuf_desc->cpu_addr);
-			kfree(sndbuf_desc);
-		}
+	if (is_rmb) {
+		if (buf_desc->mr_rx[SMC_SINGLE_LINK])
+			smc_ib_put_memory_region(
+					buf_desc->mr_rx[SMC_SINGLE_LINK]);
+		smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
+				    DMA_FROM_DEVICE);
+	} else {
+		smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
+				    DMA_TO_DEVICE);
 	}
+	sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
+	if (buf_desc->cpu_addr)
+		free_pages((unsigned long)buf_desc->cpu_addr, buf_desc->order);
+	kfree(buf_desc);
 }
 
-static void smc_lgr_free_rmbs(struct smc_link_group *lgr)
+static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
 {
-	struct smc_buf_desc *rmb_desc, *bf_desc;
 	struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+	struct smc_buf_desc *buf_desc, *bf_desc;
+	struct list_head *buf_list;
 	int i;
 
 	for (i = 0; i < SMC_RMBE_SIZES; i++) {
-		list_for_each_entry_safe(rmb_desc, bf_desc, &lgr->rmbs[i],
+		if (is_rmb)
+			buf_list = &lgr->rmbs[i];
+		else
+			buf_list = &lgr->sndbufs[i];
+		list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
 					 list) {
-			list_del(&rmb_desc->list);
-			smc_ib_buf_unmap(lnk->smcibdev,
-					 smc_uncompress_bufsize(i),
-					 rmb_desc, DMA_FROM_DEVICE);
-			kfree(rmb_desc->cpu_addr);
-			kfree(rmb_desc);
+			list_del(&buf_desc->list);
+			smc_buf_free(buf_desc, lnk, is_rmb);
 		}
 	}
 }
 
+static void smc_lgr_free_bufs(struct smc_link_group *lgr)
+{
+	/* free send buffers */
+	__smc_lgr_free_bufs(lgr, false);
+	/* free rmbs */
+	__smc_lgr_free_bufs(lgr, true);
+}
+
 /* remove a link group */
 void smc_lgr_free(struct smc_link_group *lgr)
 {
-	smc_lgr_free_rmbs(lgr);
-	smc_lgr_free_sndbufs(lgr);
+	smc_lgr_free_bufs(lgr);
 	smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
 	kfree(lgr);
 }
@@ -452,45 +456,25 @@ out:
 	return rc ? rc : local_contact;
 }
 
-/* try to reuse a sndbuf description slot of the sndbufs list for a certain
- * buf_size; if not available, return NULL
+/* try to reuse a sndbuf or rmb description slot for a certain
+ * buffer size; if not available, return NULL
  */
 static inline
-struct smc_buf_desc *smc_sndbuf_get_slot(struct smc_link_group *lgr,
-					 int compressed_bufsize)
+struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr,
+				      int compressed_bufsize,
+				      rwlock_t *lock,
+				      struct list_head *buf_list)
 {
-	struct smc_buf_desc *sndbuf_slot;
-
-	read_lock_bh(&lgr->sndbufs_lock);
-	list_for_each_entry(sndbuf_slot, &lgr->sndbufs[compressed_bufsize],
-			    list) {
-		if (cmpxchg(&sndbuf_slot->used, 0, 1) == 0) {
-			read_unlock_bh(&lgr->sndbufs_lock);
-			return sndbuf_slot;
-		}
-	}
-	read_unlock_bh(&lgr->sndbufs_lock);
-	return NULL;
-}
+	struct smc_buf_desc *buf_slot;
 
-/* try to reuse an rmb description slot of the rmbs list for a certain
- * rmbe_size; if not available, return NULL
- */
-static inline
-struct smc_buf_desc *smc_rmb_get_slot(struct smc_link_group *lgr,
-				      int compressed_bufsize)
-{
-	struct smc_buf_desc *rmb_slot;
-
-	read_lock_bh(&lgr->rmbs_lock);
-	list_for_each_entry(rmb_slot, &lgr->rmbs[compressed_bufsize],
-			    list) {
-		if (cmpxchg(&rmb_slot->used, 0, 1) == 0) {
-			read_unlock_bh(&lgr->rmbs_lock);
-			return rmb_slot;
+	read_lock_bh(lock);
+	list_for_each_entry(buf_slot, buf_list, list) {
+		if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
+			read_unlock_bh(lock);
+			return buf_slot;
 		}
 	}
-	read_unlock_bh(&lgr->rmbs_lock);
+	read_unlock_bh(lock);
 	return NULL;
 }
 
@@ -503,136 +487,186 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
 	return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
 }
 
-/* create the tx buffer for an SMC socket */
-int smc_sndbuf_create(struct smc_sock *smc)
+static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
+					       bool is_rmb, int bufsize)
 {
-	struct smc_connection *conn = &smc->conn;
-	struct smc_link_group *lgr = conn->lgr;
-	int tmp_bufsize, tmp_bufsize_short;
-	struct smc_buf_desc *sndbuf_desc;
+	struct smc_buf_desc *buf_desc;
+	struct smc_link *lnk;
 	int rc;
 
-	/* use socket send buffer size (w/o overhead) as start value */
-	for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2);
-	     tmp_bufsize_short >= 0; tmp_bufsize_short--) {
-		tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short);
-		/* check for reusable sndbuf_slot in the link group */
-		sndbuf_desc = smc_sndbuf_get_slot(lgr, tmp_bufsize_short);
-		if (sndbuf_desc) {
-			memset(sndbuf_desc->cpu_addr, 0, tmp_bufsize);
-			break; /* found reusable slot */
-		}
-		/* try to alloc a new send buffer */
-		sndbuf_desc = kzalloc(sizeof(*sndbuf_desc), GFP_KERNEL);
-		if (!sndbuf_desc)
-			break; /* give up with -ENOMEM */
-		sndbuf_desc->cpu_addr = kzalloc(tmp_bufsize,
-						GFP_KERNEL | __GFP_NOWARN |
-						__GFP_NOMEMALLOC |
-						__GFP_NORETRY);
-		if (!sndbuf_desc->cpu_addr) {
-			kfree(sndbuf_desc);
-			sndbuf_desc = NULL;
-			/* if send buffer allocation has failed,
-			 * try a smaller one
-			 */
-			continue;
-		}
-		rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-				    tmp_bufsize, sndbuf_desc,
-				    DMA_TO_DEVICE);
+	/* try to alloc a new buffer */
+	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
+	if (!buf_desc)
+		return ERR_PTR(-ENOMEM);
+
+	buf_desc->cpu_addr =
+		(void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN |
+					 __GFP_NOMEMALLOC |
+					 __GFP_NORETRY | __GFP_ZERO,
+					 get_order(bufsize));
+	if (!buf_desc->cpu_addr) {
+		kfree(buf_desc);
+		return ERR_PTR(-EAGAIN);
+	}
+	buf_desc->order = get_order(bufsize);
+
+	/* build the sg table from the pages */
+	lnk = &lgr->lnk[SMC_SINGLE_LINK];
+	rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
+			    GFP_KERNEL);
+	if (rc) {
+		smc_buf_free(buf_desc, lnk, is_rmb);
+		return ERR_PTR(rc);
+	}
+	sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
+		   buf_desc->cpu_addr, bufsize);
+
+	/* map sg table to DMA address */
+	rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
+			       is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	/* SMC protocol depends on mapping to one DMA address only */
+	if (rc != 1)  {
+		smc_buf_free(buf_desc, lnk, is_rmb);
+		return ERR_PTR(-EAGAIN);
+	}
+
+	/* create a new memory region for the RMB */
+	if (is_rmb) {
+		rc = smc_ib_get_memory_region(lnk->roce_pd,
+					      IB_ACCESS_REMOTE_WRITE |
+					      IB_ACCESS_LOCAL_WRITE,
+					      buf_desc);
 		if (rc) {
-			kfree(sndbuf_desc->cpu_addr);
-			kfree(sndbuf_desc);
-			sndbuf_desc = NULL;
-			continue; /* if mapping failed, try smaller one */
+			smc_buf_free(buf_desc, lnk, is_rmb);
+			return ERR_PTR(rc);
 		}
-		sndbuf_desc->used = 1;
-		write_lock_bh(&lgr->sndbufs_lock);
-		list_add(&sndbuf_desc->list,
-			 &lgr->sndbufs[tmp_bufsize_short]);
-		write_unlock_bh(&lgr->sndbufs_lock);
-		break;
-	}
-	if (sndbuf_desc && sndbuf_desc->cpu_addr) {
-		conn->sndbuf_desc = sndbuf_desc;
-		conn->sndbuf_size = tmp_bufsize;
-		smc->sk.sk_sndbuf = tmp_bufsize * 2;
-		atomic_set(&conn->sndbuf_space, tmp_bufsize);
-		return 0;
-	} else {
-		return -ENOMEM;
 	}
+
+	return buf_desc;
 }
 
-/* create the RMB for an SMC socket (even though the SMC protocol
- * allows more than one RMB-element per RMB, the Linux implementation
- * uses just one RMB-element per RMB, i.e. uses an extra RMB for every
- * connection in a link group
- */
-int smc_rmb_create(struct smc_sock *smc)
+static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
 {
 	struct smc_connection *conn = &smc->conn;
 	struct smc_link_group *lgr = conn->lgr;
-	int tmp_bufsize, tmp_bufsize_short;
-	struct smc_buf_desc *rmb_desc;
-	int rc;
+	struct smc_buf_desc *buf_desc = NULL;
+	struct list_head *buf_list;
+	int bufsize, bufsize_short;
+	int sk_buf_size;
+	rwlock_t *lock;
+
+	if (is_rmb)
+		/* use socket recv buffer size (w/o overhead) as start value */
+		sk_buf_size = smc->sk.sk_rcvbuf / 2;
+	else
+		/* use socket send buffer size (w/o overhead) as start value */
+		sk_buf_size = smc->sk.sk_sndbuf / 2;
+
+	for (bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2);
+	     bufsize_short >= 0; bufsize_short--) {
+
+		if (is_rmb) {
+			lock = &lgr->rmbs_lock;
+			buf_list = &lgr->rmbs[bufsize_short];
+		} else {
+			lock = &lgr->sndbufs_lock;
+			buf_list = &lgr->sndbufs[bufsize_short];
+		}
+		bufsize = smc_uncompress_bufsize(bufsize_short);
+		if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
+			continue;
 
-	/* use socket recv buffer size (w/o overhead) as start value */
-	for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_rcvbuf / 2);
-	     tmp_bufsize_short >= 0; tmp_bufsize_short--) {
-		tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short);
-		/* check for reusable rmb_slot in the link group */
-		rmb_desc = smc_rmb_get_slot(lgr, tmp_bufsize_short);
-		if (rmb_desc) {
-			memset(rmb_desc->cpu_addr, 0, tmp_bufsize);
+		/* check for reusable slot in the link group */
+		buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list);
+		if (buf_desc) {
+			memset(buf_desc->cpu_addr, 0, bufsize);
 			break; /* found reusable slot */
 		}
-		/* try to alloc a new RMB */
-		rmb_desc = kzalloc(sizeof(*rmb_desc), GFP_KERNEL);
-		if (!rmb_desc)
-			break; /* give up with -ENOMEM */
-		rmb_desc->cpu_addr = kzalloc(tmp_bufsize,
-					     GFP_KERNEL | __GFP_NOWARN |
-					     __GFP_NOMEMALLOC |
-					     __GFP_NORETRY);
-		if (!rmb_desc->cpu_addr) {
-			kfree(rmb_desc);
-			rmb_desc = NULL;
-			/* if RMB allocation has failed,
-			 * try a smaller one
-			 */
+
+		buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize);
+		if (PTR_ERR(buf_desc) == -ENOMEM)
+			break;
+		if (IS_ERR(buf_desc))
 			continue;
-		}
-		rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-				    tmp_bufsize, rmb_desc,
-				    DMA_FROM_DEVICE);
-		if (rc) {
-			kfree(rmb_desc->cpu_addr);
-			kfree(rmb_desc);
-			rmb_desc = NULL;
-			continue; /* if mapping failed, try smaller one */
-		}
-		rmb_desc->rkey[SMC_SINGLE_LINK] =
-			lgr->lnk[SMC_SINGLE_LINK].roce_pd->unsafe_global_rkey;
-		rmb_desc->used = 1;
-		write_lock_bh(&lgr->rmbs_lock);
-		list_add(&rmb_desc->list,
-			 &lgr->rmbs[tmp_bufsize_short]);
-		write_unlock_bh(&lgr->rmbs_lock);
-		break;
+
+		buf_desc->used = 1;
+		write_lock_bh(lock);
+		list_add(&buf_desc->list, buf_list);
+		write_unlock_bh(lock);
+		break; /* found */
 	}
-	if (rmb_desc && rmb_desc->cpu_addr) {
-		conn->rmb_desc = rmb_desc;
-		conn->rmbe_size = tmp_bufsize;
-		conn->rmbe_size_short = tmp_bufsize_short;
-		smc->sk.sk_rcvbuf = tmp_bufsize * 2;
+
+	if (IS_ERR(buf_desc))
+		return -ENOMEM;
+
+	if (is_rmb) {
+		conn->rmb_desc = buf_desc;
+		conn->rmbe_size = bufsize;
+		conn->rmbe_size_short = bufsize_short;
+		smc->sk.sk_rcvbuf = bufsize * 2;
 		atomic_set(&conn->bytes_to_rcv, 0);
-		conn->rmbe_update_limit = smc_rmb_wnd_update_limit(tmp_bufsize);
-		return 0;
+		conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize);
 	} else {
-		return -ENOMEM;
+		conn->sndbuf_desc = buf_desc;
+		conn->sndbuf_size = bufsize;
+		smc->sk.sk_sndbuf = bufsize * 2;
+		atomic_set(&conn->sndbuf_space, bufsize);
 	}
+	return 0;
+}
+
+void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
+{
+	struct smc_link_group *lgr = conn->lgr;
+
+	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+			       conn->sndbuf_desc, DMA_TO_DEVICE);
+}
+
+void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
+{
+	struct smc_link_group *lgr = conn->lgr;
+
+	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+				  conn->sndbuf_desc, DMA_TO_DEVICE);
+}
+
+void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
+{
+	struct smc_link_group *lgr = conn->lgr;
+
+	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+			       conn->rmb_desc, DMA_FROM_DEVICE);
+}
+
+void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
+{
+	struct smc_link_group *lgr = conn->lgr;
+
+	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+				  conn->rmb_desc, DMA_FROM_DEVICE);
+}
+
+/* create the send and receive buffer for an SMC socket;
+ * receive buffers are called RMBs;
+ * (even though the SMC protocol allows more than one RMB-element per RMB,
+ * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
+ * extra RMB for every connection in a link group
+ */
+int smc_buf_create(struct smc_sock *smc)
+{
+	int rc;
+
+	/* create send buffer */
+	rc = __smc_buf_create(smc, false);
+	if (rc)
+		return rc;
+	/* create rmb */
+	rc = __smc_buf_create(smc, true);
+	if (rc)
+		smc_buf_free(smc->conn.sndbuf_desc,
+			     &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false);
+	return rc;
 }
 
 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index b013cb4..19c44bf 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -37,6 +37,14 @@ struct smc_wr_buf {
 	u8	raw[SMC_WR_BUF_SIZE];
 };
 
+#define SMC_WR_REG_MR_WAIT_TIME	(5 * HZ)/* wait time for ib_wr_reg_mr result */
+
+enum smc_wr_reg_state {
+	POSTED,		/* ib_wr_reg_mr request posted */
+	CONFIRMED,	/* ib_wr_reg_mr response: successful */
+	FAILED		/* ib_wr_reg_mr response: failure */
+};
+
 struct smc_link {
 	struct smc_ib_device	*smcibdev;	/* ib-device */
 	u8			ibport;		/* port - values 1 | 2 */
@@ -65,6 +73,10 @@ struct smc_link {
 	u64			wr_rx_id;	/* seq # of last recv WR */
 	u32			wr_rx_cnt;	/* number of WR recv buffers */
 
+	struct ib_reg_wr	wr_reg;		/* WR register memory region */
+	wait_queue_head_t	wr_reg_wait;	/* wait for wr_reg result */
+	enum smc_wr_reg_state	wr_reg_state;	/* state of wr_reg request */
+
 	union ib_gid		gid;		/* gid matching used vlan id */
 	u32			peer_qpn;	/* QP number of peer */
 	enum ib_mtu		path_mtu;	/* used mtu */
@@ -90,14 +102,15 @@ struct smc_link {
 /* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */
 struct smc_buf_desc {
 	struct list_head	list;
-	u64			dma_addr[SMC_LINKS_PER_LGR_MAX];
-						/* mapped address of buffer */
 	void			*cpu_addr;	/* virtual address of buffer */
-	u32			rkey[SMC_LINKS_PER_LGR_MAX];
-						/* for rmb only:
-						 * rkey provided to peer
+	struct sg_table		sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */
+	struct ib_mr		*mr_rx[SMC_LINKS_PER_LGR_MAX];
+						/* for rmb only: memory region
+						 * incl. rkey provided to peer
 						 */
+	u32			order;		/* allocation order */
 	u32			used;		/* currently used / unused */
+	bool			reused;		/* new created / reused */
 };
 
 struct smc_rtoken {				/* address/key of remote RMB */
@@ -173,9 +186,11 @@ struct smc_clc_msg_accept_confirm;
 
 void smc_lgr_free(struct smc_link_group *lgr);
 void smc_lgr_terminate(struct smc_link_group *lgr);
-int smc_sndbuf_create(struct smc_sock *smc);
-int smc_rmb_create(struct smc_sock *smc);
+int smc_buf_create(struct smc_sock *smc);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
 			    struct smc_clc_msg_accept_confirm *clc);
-
+void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
+void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
+void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
+void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
 #endif
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index b317155..547e0e1 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -13,6 +13,7 @@
 
 #include <linux/random.h>
 #include <linux/workqueue.h>
+#include <linux/scatterlist.h>
 #include <rdma/ib_verbs.h>
 
 #include "smc_pnet.h"
@@ -192,8 +193,7 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
 {
 	int rc;
 
-	lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev,
-				   IB_PD_UNSAFE_GLOBAL_RKEY);
+	lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0);
 	rc = PTR_ERR_OR_ZERO(lnk->roce_pd);
 	if (IS_ERR(lnk->roce_pd))
 		lnk->roce_pd = NULL;
@@ -232,10 +232,10 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
 		.recv_cq = lnk->smcibdev->roce_cq_recv,
 		.srq = NULL,
 		.cap = {
-			.max_send_wr = SMC_WR_BUF_CNT,
 				/* include unsolicited rdma_writes as well,
 				 * there are max. 2 RDMA_WRITE per 1 WR_SEND
 				 */
+			.max_send_wr = SMC_WR_BUF_CNT * 3,
 			.max_recv_wr = SMC_WR_BUF_CNT * 3,
 			.max_send_sge = SMC_IB_MAX_SEND_SGE,
 			.max_recv_sge = 1,
@@ -254,33 +254,117 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
 	return rc;
 }
 
-/* map a new TX or RX buffer to DMA */
-int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size,
-		   struct smc_buf_desc *buf_slot,
-		   enum dma_data_direction data_direction)
+void smc_ib_put_memory_region(struct ib_mr *mr)
 {
-	int rc = 0;
+	ib_dereg_mr(mr);
+}
 
-	if (buf_slot->dma_addr[SMC_SINGLE_LINK])
-		return rc; /* already mapped */
-	buf_slot->dma_addr[SMC_SINGLE_LINK] =
-		ib_dma_map_single(smcibdev->ibdev, buf_slot->cpu_addr,
-				  buf_size, data_direction);
-	if (ib_dma_mapping_error(smcibdev->ibdev,
-				 buf_slot->dma_addr[SMC_SINGLE_LINK]))
-		rc = -EIO;
-	return rc;
+static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
+{
+	unsigned int offset = 0;
+	int sg_num;
+
+	/* map the largest prefix of a dma mapped SG list */
+	sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK],
+			      buf_slot->sgt[SMC_SINGLE_LINK].sgl,
+			      buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+			      &offset, PAGE_SIZE);
+
+	return sg_num;
+}
+
+/* Allocate a memory region and map the dma mapped SG list of buf_slot */
+int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
+			     struct smc_buf_desc *buf_slot)
+{
+	if (buf_slot->mr_rx[SMC_SINGLE_LINK])
+		return 0; /* already done */
+
+	buf_slot->mr_rx[SMC_SINGLE_LINK] =
+		ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order);
+	if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) {
+		int rc;
+
+		rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]);
+		buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL;
+		return rc;
+	}
+
+	if (smc_ib_map_mr_sg(buf_slot) != 1)
+		return -EINVAL;
+
+	return 0;
 }
 
-void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int buf_size,
+/* synchronize buffer usage for cpu access */
+void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
+			    struct smc_buf_desc *buf_slot,
+			    enum dma_data_direction data_direction)
+{
+	struct scatterlist *sg;
+	unsigned int i;
+
+	/* for now there is just one DMA address */
+	for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
+		    buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
+		if (!sg_dma_len(sg))
+			break;
+		ib_dma_sync_single_for_cpu(smcibdev->ibdev,
+					   sg_dma_address(sg),
+					   sg_dma_len(sg),
+					   data_direction);
+	}
+}
+
+/* synchronize buffer usage for device access */
+void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
+			       struct smc_buf_desc *buf_slot,
+			       enum dma_data_direction data_direction)
+{
+	struct scatterlist *sg;
+	unsigned int i;
+
+	/* for now there is just one DMA address */
+	for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
+		    buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
+		if (!sg_dma_len(sg))
+			break;
+		ib_dma_sync_single_for_device(smcibdev->ibdev,
+					      sg_dma_address(sg),
+					      sg_dma_len(sg),
+					      data_direction);
+	}
+}
+
+/* Map a new TX or RX buffer SG-table to DMA */
+int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
 		      struct smc_buf_desc *buf_slot,
 		      enum dma_data_direction data_direction)
 {
-	if (!buf_slot->dma_addr[SMC_SINGLE_LINK])
+	int mapped_nents;
+
+	mapped_nents = ib_dma_map_sg(smcibdev->ibdev,
+				     buf_slot->sgt[SMC_SINGLE_LINK].sgl,
+				     buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+				     data_direction);
+	if (!mapped_nents)
+		return -ENOMEM;
+
+	return mapped_nents;
+}
+
+void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
+			 struct smc_buf_desc *buf_slot,
+			 enum dma_data_direction data_direction)
+{
+	if (!buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address)
 		return; /* already unmapped */
-	ib_dma_unmap_single(smcibdev->ibdev, *buf_slot->dma_addr, buf_size,
-			    data_direction);
-	buf_slot->dma_addr[SMC_SINGLE_LINK] = 0;
+
+	ib_dma_unmap_sg(smcibdev->ibdev,
+			buf_slot->sgt[SMC_SINGLE_LINK].sgl,
+			buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+			data_direction);
+	buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
 }
 
 static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index b567152..9b927a3 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -51,12 +51,12 @@ int smc_ib_register_client(void) __init;
 void smc_ib_unregister_client(void);
 bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
 int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport);
-int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size,
-		   struct smc_buf_desc *buf_slot,
-		   enum dma_data_direction data_direction);
-void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int bufsize,
+int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
 		      struct smc_buf_desc *buf_slot,
 		      enum dma_data_direction data_direction);
+void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
+			 struct smc_buf_desc *buf_slot,
+			 enum dma_data_direction data_direction);
 void smc_ib_dealloc_protection_domain(struct smc_link *lnk);
 int smc_ib_create_protection_domain(struct smc_link *lnk);
 void smc_ib_destroy_queue_pair(struct smc_link *lnk);
@@ -65,6 +65,13 @@ int smc_ib_ready_link(struct smc_link *lnk);
 int smc_ib_modify_qp_rts(struct smc_link *lnk);
 int smc_ib_modify_qp_reset(struct smc_link *lnk);
 long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
-
-
+int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
+			     struct smc_buf_desc *buf_slot);
+void smc_ib_put_memory_region(struct ib_mr *mr);
+void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
+			    struct smc_buf_desc *buf_slot,
+			    enum dma_data_direction data_direction);
+void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
+			       struct smc_buf_desc *buf_slot,
+			       enum dma_data_direction data_direction);
 #endif
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index f0c8b08..b17a333 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -170,6 +170,7 @@ copy:
 				  copylen, conn->rmbe_size - cons.count);
 		chunk_len_sum = chunk_len;
 		chunk_off = cons.count;
+		smc_rmb_sync_sg_for_cpu(conn);
 		for (chunk = 0; chunk < 2; chunk++) {
 			if (!(flags & MSG_TRUNC)) {
 				rc = memcpy_to_msg(msg, rcvbuf_base + chunk_off,
@@ -177,6 +178,7 @@ copy:
 				if (rc) {
 					if (!read_done)
 						read_done = -EFAULT;
+					smc_rmb_sync_sg_for_device(conn);
 					goto out;
 				}
 			}
@@ -190,6 +192,7 @@ copy:
 			chunk_len_sum += chunk_len;
 			chunk_off = 0; /* modulo offset in recv ring buffer */
 		}
+		smc_rmb_sync_sg_for_device(conn);
 
 		/* update cursors */
 		if (!(flags & MSG_PEEK)) {
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 21ec183..3c656be 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -174,10 +174,12 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
 				  copylen, conn->sndbuf_size - tx_cnt_prep);
 		chunk_len_sum = chunk_len;
 		chunk_off = tx_cnt_prep;
+		smc_sndbuf_sync_sg_for_cpu(conn);
 		for (chunk = 0; chunk < 2; chunk++) {
 			rc = memcpy_from_msg(sndbuf_base + chunk_off,
 					     msg, chunk_len);
 			if (rc) {
+				smc_sndbuf_sync_sg_for_device(conn);
 				if (send_done)
 					return send_done;
 				goto out_err;
@@ -192,6 +194,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
 			chunk_len_sum += chunk_len;
 			chunk_off = 0; /* modulo offset in send ring buffer */
 		}
+		smc_sndbuf_sync_sg_for_device(conn);
 		/* update cursors */
 		smc_curs_add(conn->sndbuf_size, &prep, copylen);
 		smc_curs_write(&conn->tx_curs_prep,
@@ -277,6 +280,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
 	struct smc_link_group *lgr = conn->lgr;
 	int to_send, rmbespace;
 	struct smc_link *link;
+	dma_addr_t dma_addr;
 	int num_sges;
 	int rc;
 
@@ -334,12 +338,11 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
 		src_len = conn->sndbuf_size - sent.count;
 	}
 	src_len_sum = src_len;
+	dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
 	for (dstchunk = 0; dstchunk < 2; dstchunk++) {
 		num_sges = 0;
 		for (srcchunk = 0; srcchunk < 2; srcchunk++) {
-			sges[srcchunk].addr =
-				conn->sndbuf_desc->dma_addr[SMC_SINGLE_LINK] +
-				src_off;
+			sges[srcchunk].addr = dma_addr + src_off;
 			sges[srcchunk].length = src_len;
 			sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
 			num_sges++;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 874ee9f..ab56bda 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -68,6 +68,16 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
 	int i;
 
 	link = wc->qp->qp_context;
+
+	if (wc->opcode == IB_WC_REG_MR) {
+		if (wc->status)
+			link->wr_reg_state = FAILED;
+		else
+			link->wr_reg_state = CONFIRMED;
+		wake_up(&link->wr_reg_wait);
+		return;
+	}
+
 	pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
 	if (pnd_snd_idx == link->wr_tx_cnt)
 		return;
@@ -243,6 +253,52 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
 	return rc;
 }
 
+/* Register a memory region and wait for result. */
+int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
+{
+	struct ib_send_wr *failed_wr = NULL;
+	int rc;
+
+	ib_req_notify_cq(link->smcibdev->roce_cq_send,
+			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+	link->wr_reg_state = POSTED;
+	link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr;
+	link->wr_reg.mr = mr;
+	link->wr_reg.key = mr->rkey;
+	failed_wr = &link->wr_reg.wr;
+	rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr);
+	WARN_ON(failed_wr != &link->wr_reg.wr);
+	if (rc)
+		return rc;
+
+	rc = wait_event_interruptible_timeout(link->wr_reg_wait,
+					      (link->wr_reg_state != POSTED),
+					      SMC_WR_REG_MR_WAIT_TIME);
+	if (!rc) {
+		/* timeout - terminate connections */
+		struct smc_link_group *lgr;
+
+		lgr = container_of(link, struct smc_link_group,
+				   lnk[SMC_SINGLE_LINK]);
+		smc_lgr_terminate(lgr);
+		return -EPIPE;
+	}
+	if (rc == -ERESTARTSYS)
+		return -EINTR;
+	switch (link->wr_reg_state) {
+	case CONFIRMED:
+		rc = 0;
+		break;
+	case FAILED:
+		rc = -EIO;
+		break;
+	case POSTED:
+		rc = -EPIPE;
+		break;
+	}
+	return rc;
+}
+
 void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
 			     smc_wr_tx_filter filter,
 			     smc_wr_tx_dismisser dismisser,
@@ -458,6 +514,11 @@ static void smc_wr_init_sge(struct smc_link *lnk)
 		lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i];
 		lnk->wr_rx_ibs[i].num_sge = 1;
 	}
+	lnk->wr_reg.wr.next = NULL;
+	lnk->wr_reg.wr.num_sge = 0;
+	lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED;
+	lnk->wr_reg.wr.opcode = IB_WR_REG_MR;
+	lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE;
 }
 
 void smc_wr_free_link(struct smc_link *lnk)
@@ -602,6 +663,8 @@ int smc_wr_create_link(struct smc_link *lnk)
 	smc_wr_init_sge(lnk);
 	memset(lnk->wr_tx_mask, 0,
 	       BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
+	init_waitqueue_head(&lnk->wr_tx_wait);
+	init_waitqueue_head(&lnk->wr_reg_wait);
 	return rc;
 
 dma_unmap:
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 0b9beed..45eb538 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -102,5 +102,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
 int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
 int smc_wr_rx_post_init(struct smc_link *link);
 void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
+int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr);
 
 #endif /* SMC_WR_H */
diff --git a/net/socket.c b/net/socket.c
index ad22df1..c729625 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -652,6 +652,20 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 }
 EXPORT_SYMBOL(kernel_sendmsg);
 
+int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
+			  struct kvec *vec, size_t num, size_t size)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (!sock->ops->sendmsg_locked)
+		return sock_no_sendmsg_locked(sk, msg, size);
+
+	iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+
+	return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
+}
+EXPORT_SYMBOL(kernel_sendmsg_locked);
+
 static bool skb_is_err_queue(const struct sk_buff *skb)
 {
 	/* pkt_type of skbs enqueued on the error queue are set to
@@ -3376,6 +3390,19 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 }
 EXPORT_SYMBOL(kernel_sendpage);
 
+int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
+			   size_t size, int flags)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (sock->ops->sendpage_locked)
+		return sock->ops->sendpage_locked(sk, page, offset, size,
+						  flags);
+
+	return sock_no_sendpage_locked(sk, page, offset, size, flags);
+}
+EXPORT_SYMBOL(kernel_sendpage_locked);
+
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
 {
 	mm_segment_t oldfs = get_fs();
@@ -3405,7 +3432,6 @@ u32 kernel_sock_ip_overhead(struct sock *sk)
 	struct inet_sock *inet;
 	struct ip_options_rcu *opt;
 	u32 overhead = 0;
-	bool owned_by_user;
 #if IS_ENABLED(CONFIG_IPV6)
 	struct ipv6_pinfo *np;
 	struct ipv6_txoptions *optv6 = NULL;
@@ -3414,13 +3440,12 @@ u32 kernel_sock_ip_overhead(struct sock *sk)
 	if (!sk)
 		return overhead;
 
-	owned_by_user = sock_owned_by_user(sk);
 	switch (sk->sk_family) {
 	case AF_INET:
 		inet = inet_sk(sk);
 		overhead += sizeof(struct iphdr);
 		opt = rcu_dereference_protected(inet->inet_opt,
-						owned_by_user);
+						sock_owned_by_user(sk));
 		if (opt)
 			overhead += opt->opt.optlen;
 		return overhead;
@@ -3430,7 +3455,7 @@ u32 kernel_sock_ip_overhead(struct sock *sk)
 		overhead += sizeof(struct ipv6hdr);
 		if (np)
 			optv6 = rcu_dereference_protected(np->opt,
-							  owned_by_user);
+							  sock_owned_by_user(sk));
 		if (optv6)
 			overhead += (optv6->opt_flen + optv6->opt_nflen);
 		return overhead;
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index b5c279b..d4ea46a 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -29,44 +29,46 @@
 
 static struct workqueue_struct *strp_wq;
 
-struct _strp_rx_msg {
-	/* Internal cb structure. struct strp_rx_msg must be first for passing
+struct _strp_msg {
+	/* Internal cb structure. struct strp_msg must be first for passing
 	 * to upper layer.
 	 */
-	struct strp_rx_msg strp;
+	struct strp_msg strp;
 	int accum_len;
 	int early_eaten;
 };
 
-static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb)
+static inline struct _strp_msg *_strp_msg(struct sk_buff *skb)
 {
-	return (struct _strp_rx_msg *)((void *)skb->cb +
+	return (struct _strp_msg *)((void *)skb->cb +
 		offsetof(struct qdisc_skb_cb, data));
 }
 
 /* Lower lock held */
-static void strp_abort_rx_strp(struct strparser *strp, int err)
+static void strp_abort_strp(struct strparser *strp, int err)
 {
-	struct sock *csk = strp->sk;
-
 	/* Unrecoverable error in receive */
 
-	del_timer(&strp->rx_msg_timer);
+	del_timer(&strp->msg_timer);
 
-	if (strp->rx_stopped)
+	if (strp->stopped)
 		return;
 
-	strp->rx_stopped = 1;
+	strp->stopped = 1;
+
+	if (strp->sk) {
+		struct sock *sk = strp->sk;
 
-	/* Report an error on the lower socket */
-	csk->sk_err = err;
-	csk->sk_error_report(csk);
+		/* Report an error on the lower socket */
+		sk->sk_err = err;
+		sk->sk_error_report(sk);
+	}
 }
 
-static void strp_start_rx_timer(struct strparser *strp)
+static void strp_start_timer(struct strparser *strp, long timeo)
 {
-	if (strp->sk->sk_rcvtimeo)
-		mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo);
+	if (timeo)
+		mod_timer(&strp->msg_timer, timeo);
 }
 
 /* Lower lock held */
@@ -74,46 +76,55 @@ static void strp_parser_err(struct strparser *strp, int err,
 			    read_descriptor_t *desc)
 {
 	desc->error = err;
-	kfree_skb(strp->rx_skb_head);
-	strp->rx_skb_head = NULL;
+	kfree_skb(strp->skb_head);
+	strp->skb_head = NULL;
 	strp->cb.abort_parser(strp, err);
 }
 
 static inline int strp_peek_len(struct strparser *strp)
 {
-	struct socket *sock = strp->sk->sk_socket;
+	if (strp->sk) {
+		struct socket *sock = strp->sk->sk_socket;
+
+		return sock->ops->peek_len(sock);
+	}
 
-	return sock->ops->peek_len(sock);
+	/* If we don't have an associated socket there's nothing to peek.
+	 * Return int max to avoid stopping the strparser.
+	 */
+
+	return INT_MAX;
 }
 
 /* Lower socket lock held */
-static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
-		     unsigned int orig_offset, size_t orig_len)
+static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+		       unsigned int orig_offset, size_t orig_len,
+		       size_t max_msg_size, long timeo)
 {
 	struct strparser *strp = (struct strparser *)desc->arg.data;
-	struct _strp_rx_msg *rxm;
+	struct _strp_msg *stm;
 	struct sk_buff *head, *skb;
 	size_t eaten = 0, cand_len;
 	ssize_t extra;
 	int err;
 	bool cloned_orig = false;
 
-	if (strp->rx_paused)
+	if (strp->paused)
 		return 0;
 
-	head = strp->rx_skb_head;
+	head = strp->skb_head;
 	if (head) {
 		/* Message already in progress */
 
-		rxm = _strp_rx_msg(head);
-		if (unlikely(rxm->early_eaten)) {
+		stm = _strp_msg(head);
+		if (unlikely(stm->early_eaten)) {
 			/* Already some number of bytes on the receive sock
-			 * data saved in rx_skb_head, just indicate they
+			 * data saved in skb_head, just indicate they
 			 * are consumed.
 			 */
-			eaten = orig_len <= rxm->early_eaten ?
-				orig_len : rxm->early_eaten;
-			rxm->early_eaten -= eaten;
+			eaten = orig_len <= stm->early_eaten ?
+				orig_len : stm->early_eaten;
+			stm->early_eaten -= eaten;
 
 			return eaten;
 		}
@@ -126,12 +137,12 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 			 */
 			orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
 			if (!orig_skb) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				desc->error = -ENOMEM;
 				return 0;
 			}
 			if (!pskb_pull(orig_skb, orig_offset)) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				kfree_skb(orig_skb);
 				desc->error = -ENOMEM;
 				return 0;
@@ -140,13 +151,13 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 			orig_offset = 0;
 		}
 
-		if (!strp->rx_skb_nextp) {
+		if (!strp->skb_nextp) {
 			/* We are going to append to the frags_list of head.
 			 * Need to unshare the frag_list.
 			 */
 			err = skb_unclone(head, GFP_ATOMIC);
 			if (err) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				desc->error = err;
 				return 0;
 			}
@@ -165,20 +176,20 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 
 				skb = alloc_skb(0, GFP_ATOMIC);
 				if (!skb) {
-					STRP_STATS_INCR(strp->stats.rx_mem_fail);
+					STRP_STATS_INCR(strp->stats.mem_fail);
 					desc->error = -ENOMEM;
 					return 0;
 				}
 				skb->len = head->len;
 				skb->data_len = head->len;
 				skb->truesize = head->truesize;
-				*_strp_rx_msg(skb) = *_strp_rx_msg(head);
-				strp->rx_skb_nextp = &head->next;
+				*_strp_msg(skb) = *_strp_msg(head);
+				strp->skb_nextp = &head->next;
 				skb_shinfo(skb)->frag_list = head;
-				strp->rx_skb_head = skb;
+				strp->skb_head = skb;
 				head = skb;
 			} else {
-				strp->rx_skb_nextp =
+				strp->skb_nextp =
 				    &skb_shinfo(head)->frag_list;
 			}
 		}
@@ -188,112 +199,112 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 		/* Always clone since we will consume something */
 		skb = skb_clone(orig_skb, GFP_ATOMIC);
 		if (!skb) {
-			STRP_STATS_INCR(strp->stats.rx_mem_fail);
+			STRP_STATS_INCR(strp->stats.mem_fail);
 			desc->error = -ENOMEM;
 			break;
 		}
 
 		cand_len = orig_len - eaten;
 
-		head = strp->rx_skb_head;
+		head = strp->skb_head;
 		if (!head) {
 			head = skb;
-			strp->rx_skb_head = head;
-			/* Will set rx_skb_nextp on next packet if needed */
-			strp->rx_skb_nextp = NULL;
-			rxm = _strp_rx_msg(head);
-			memset(rxm, 0, sizeof(*rxm));
-			rxm->strp.offset = orig_offset + eaten;
+			strp->skb_head = head;
+			/* Will set skb_nextp on next packet if needed */
+			strp->skb_nextp = NULL;
+			stm = _strp_msg(head);
+			memset(stm, 0, sizeof(*stm));
+			stm->strp.offset = orig_offset + eaten;
 		} else {
 			/* Unclone since we may be appending to an skb that we
 			 * already share a frag_list with.
 			 */
 			err = skb_unclone(skb, GFP_ATOMIC);
 			if (err) {
-				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				STRP_STATS_INCR(strp->stats.mem_fail);
 				desc->error = err;
 				break;
 			}
 
-			rxm = _strp_rx_msg(head);
-			*strp->rx_skb_nextp = skb;
-			strp->rx_skb_nextp = &skb->next;
+			stm = _strp_msg(head);
+			*strp->skb_nextp = skb;
+			strp->skb_nextp = &skb->next;
 			head->data_len += skb->len;
 			head->len += skb->len;
 			head->truesize += skb->truesize;
 		}
 
-		if (!rxm->strp.full_len) {
+		if (!stm->strp.full_len) {
 			ssize_t len;
 
 			len = (*strp->cb.parse_msg)(strp, head);
 
 			if (!len) {
 				/* Need more header to determine length */
-				if (!rxm->accum_len) {
+				if (!stm->accum_len) {
 					/* Start RX timer for new message */
-					strp_start_rx_timer(strp);
+					strp_start_timer(strp, timeo);
 				}
-				rxm->accum_len += cand_len;
+				stm->accum_len += cand_len;
 				eaten += cand_len;
-				STRP_STATS_INCR(strp->stats.rx_need_more_hdr);
+				STRP_STATS_INCR(strp->stats.need_more_hdr);
 				WARN_ON(eaten != orig_len);
 				break;
 			} else if (len < 0) {
-				if (len == -ESTRPIPE && rxm->accum_len) {
+				if (len == -ESTRPIPE && stm->accum_len) {
 					len = -ENODATA;
-					strp->rx_unrecov_intr = 1;
+					strp->unrecov_intr = 1;
 				} else {
-					strp->rx_interrupted = 1;
+					strp->interrupted = 1;
 				}
 				strp_parser_err(strp, len, desc);
 				break;
-			} else if (len > strp->sk->sk_rcvbuf) {
+			} else if (len > max_msg_size) {
 				/* Message length exceeds maximum allowed */
-				STRP_STATS_INCR(strp->stats.rx_msg_too_big);
+				STRP_STATS_INCR(strp->stats.msg_too_big);
 				strp_parser_err(strp, -EMSGSIZE, desc);
 				break;
 			} else if (len <= (ssize_t)head->len -
-					  skb->len - rxm->strp.offset) {
+					  skb->len - stm->strp.offset) {
 				/* Length must be into new skb (and also
 				 * greater than zero)
 				 */
-				STRP_STATS_INCR(strp->stats.rx_bad_hdr_len);
+				STRP_STATS_INCR(strp->stats.bad_hdr_len);
 				strp_parser_err(strp, -EPROTO, desc);
 				break;
 			}
 
-			rxm->strp.full_len = len;
+			stm->strp.full_len = len;
 		}
 
-		extra = (ssize_t)(rxm->accum_len + cand_len) -
-			rxm->strp.full_len;
+		extra = (ssize_t)(stm->accum_len + cand_len) -
+			stm->strp.full_len;
 
 		if (extra < 0) {
 			/* Message not complete yet. */
-			if (rxm->strp.full_len - rxm->accum_len >
+			if (stm->strp.full_len - stm->accum_len >
 			    strp_peek_len(strp)) {
-				/* Don't have the whole messages in the socket
-				 * buffer. Set strp->rx_need_bytes to wait for
+				/* Don't have the whole message in the socket
+				 * buffer. Set strp->need_bytes to wait for
 				 * the rest of the message. Also, set "early
 				 * eaten" since we've already buffered the skb
 				 * but don't consume yet per strp_read_sock.
 				 */
 
-				if (!rxm->accum_len) {
+				if (!stm->accum_len) {
 					/* Start RX timer for new message */
-					strp_start_rx_timer(strp);
+					strp_start_timer(strp, timeo);
 				}
 
-				strp->rx_need_bytes = rxm->strp.full_len -
-						       rxm->accum_len;
-				rxm->accum_len += cand_len;
-				rxm->early_eaten = cand_len;
-				STRP_STATS_ADD(strp->stats.rx_bytes, cand_len);
+				strp->need_bytes = stm->strp.full_len -
+						       stm->accum_len;
+				stm->accum_len += cand_len;
+				stm->early_eaten = cand_len;
+				STRP_STATS_ADD(strp->stats.bytes, cand_len);
 				desc->count = 0; /* Stop reading socket */
 				break;
 			}
-			rxm->accum_len += cand_len;
+			stm->accum_len += cand_len;
 			eaten += cand_len;
 			WARN_ON(eaten != orig_len);
 			break;
@@ -308,14 +319,14 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 		eaten += (cand_len - extra);
 
 		/* Hurray, we have a new message! */
-		del_timer(&strp->rx_msg_timer);
-		strp->rx_skb_head = NULL;
-		STRP_STATS_INCR(strp->stats.rx_msgs);
+		del_timer(&strp->msg_timer);
+		strp->skb_head = NULL;
+		STRP_STATS_INCR(strp->stats.msgs);
 
 		/* Give skb to upper layer */
 		strp->cb.rcv_msg(strp, head);
 
-		if (unlikely(strp->rx_paused)) {
+		if (unlikely(strp->paused)) {
 			/* Upper layer paused strp */
 			break;
 		}
@@ -324,11 +335,33 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 	if (cloned_orig)
 		kfree_skb(orig_skb);
 
-	STRP_STATS_ADD(strp->stats.rx_bytes, eaten);
+	STRP_STATS_ADD(strp->stats.bytes, eaten);
 
 	return eaten;
 }
 
+int strp_process(struct strparser *strp, struct sk_buff *orig_skb,
+		 unsigned int orig_offset, size_t orig_len,
+		 size_t max_msg_size, long timeo)
+{
+	read_descriptor_t desc; /* Dummy arg to strp_recv */
+
+	desc.arg.data = strp;
+
+	return __strp_recv(&desc, orig_skb, orig_offset, orig_len,
+			   max_msg_size, timeo);
+}
+EXPORT_SYMBOL_GPL(strp_process);
+
+static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+		     unsigned int orig_offset, size_t orig_len)
+{
+	struct strparser *strp = (struct strparser *)desc->arg.data;
+
+	return __strp_recv(desc, orig_skb, orig_offset, orig_len,
+			   strp->sk->sk_rcvbuf, strp->sk->sk_rcvtimeo);
+}
+
 static int default_read_sock_done(struct strparser *strp, int err)
 {
 	return err;
@@ -340,6 +373,9 @@ static int strp_read_sock(struct strparser *strp)
 	struct socket *sock = strp->sk->sk_socket;
 	read_descriptor_t desc;
 
+	if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
+		return -EBUSY;
+
 	desc.arg.data = strp;
 	desc.error = 0;
 	desc.count = 1; /* give more than one skb per call */
@@ -355,101 +391,124 @@ static int strp_read_sock(struct strparser *strp)
 /* Lower sock lock held */
 void strp_data_ready(struct strparser *strp)
 {
-	if (unlikely(strp->rx_stopped))
+	if (unlikely(strp->stopped))
 		return;
 
-	/* This check is needed to synchronize with do_strp_rx_work.
-	 * do_strp_rx_work acquires a process lock (lock_sock) whereas
+	/* This check is needed to synchronize with do_strp_work.
+	 * do_strp_work acquires a process lock (lock_sock) whereas
 	 * the lock held here is bh_lock_sock. The two locks can be
 	 * held by different threads at the same time, but bh_lock_sock
 	 * allows a thread in BH context to safely check if the process
 	 * lock is held. In this case, if the lock is held, queue work.
 	 */
 	if (sock_owned_by_user(strp->sk)) {
-		queue_work(strp_wq, &strp->rx_work);
+		queue_work(strp_wq, &strp->work);
 		return;
 	}
 
-	if (strp->rx_paused)
+	if (strp->paused)
 		return;
 
-	if (strp->rx_need_bytes) {
-		if (strp_peek_len(strp) >= strp->rx_need_bytes)
-			strp->rx_need_bytes = 0;
+	if (strp->need_bytes) {
+		if (strp_peek_len(strp) >= strp->need_bytes)
+			strp->need_bytes = 0;
 		else
 			return;
 	}
 
 	if (strp_read_sock(strp) == -ENOMEM)
-		queue_work(strp_wq, &strp->rx_work);
+		queue_work(strp_wq, &strp->work);
 }
 EXPORT_SYMBOL_GPL(strp_data_ready);
 
-static void do_strp_rx_work(struct strparser *strp)
+static void do_strp_work(struct strparser *strp)
 {
 	read_descriptor_t rd_desc;
-	struct sock *csk = strp->sk;
 
 	/* We need the read lock to synchronize with strp_data_ready. We
 	 * need the socket lock for calling strp_read_sock.
 	 */
-	lock_sock(csk);
+	strp->cb.lock(strp);
 
-	if (unlikely(strp->rx_stopped))
+	if (unlikely(strp->stopped))
 		goto out;
 
-	if (strp->rx_paused)
+	if (strp->paused)
 		goto out;
 
 	rd_desc.arg.data = strp;
 
 	if (strp_read_sock(strp) == -ENOMEM)
-		queue_work(strp_wq, &strp->rx_work);
+		queue_work(strp_wq, &strp->work);
 
 out:
-	release_sock(csk);
+	strp->cb.unlock(strp);
 }
 
-static void strp_rx_work(struct work_struct *w)
+static void strp_work(struct work_struct *w)
 {
-	do_strp_rx_work(container_of(w, struct strparser, rx_work));
+	do_strp_work(container_of(w, struct strparser, work));
 }
 
-static void strp_rx_msg_timeout(unsigned long arg)
+static void strp_msg_timeout(unsigned long arg)
 {
 	struct strparser *strp = (struct strparser *)arg;
 
 	/* Message assembly timed out */
-	STRP_STATS_INCR(strp->stats.rx_msg_timeouts);
-	lock_sock(strp->sk);
+	STRP_STATS_INCR(strp->stats.msg_timeouts);
+	strp->cb.lock(strp);
 	strp->cb.abort_parser(strp, ETIMEDOUT);
+	strp->cb.unlock(strp);
+}
+
+static void strp_sock_lock(struct strparser *strp)
+{
+	lock_sock(strp->sk);
+}
+
+static void strp_sock_unlock(struct strparser *strp)
+{
 	release_sock(strp->sk);
 }
 
-int strp_init(struct strparser *strp, struct sock *csk,
-	      struct strp_callbacks *cb)
+int strp_init(struct strparser *strp, struct sock *sk,
+	      const struct strp_callbacks *cb)
 {
-	struct socket *sock = csk->sk_socket;
 
 	if (!cb || !cb->rcv_msg || !cb->parse_msg)
 		return -EINVAL;
 
-	if (!sock->ops->read_sock || !sock->ops->peek_len)
-		return -EAFNOSUPPORT;
-
-	memset(strp, 0, sizeof(*strp));
+	/* The sk (sock) arg determines the mode of the stream parser.
+	 *
+	 * If the sock is set then the strparser is in receive callback mode.
+	 * The upper layer calls strp_data_ready to kick receive processing
+	 * and strparser calls the read_sock function on the socket to
+	 * get packets.
+	 *
+	 * If the sock is not set then the strparser is in general mode.
+	 * The upper layer calls strp_process for each skb to be parsed.
+	 */
 
-	strp->sk = csk;
+	if (!sk) {
+		if (!cb->lock || !cb->unlock)
+			return -EINVAL;
+	}
 
-	setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout,
-		    (unsigned long)strp);
+	memset(strp, 0, sizeof(*strp));
 
-	INIT_WORK(&strp->rx_work, strp_rx_work);
+	strp->sk = sk;
 
+	strp->cb.lock = cb->lock ? : strp_sock_lock;
+	strp->cb.unlock = cb->unlock ? : strp_sock_unlock;
 	strp->cb.rcv_msg = cb->rcv_msg;
 	strp->cb.parse_msg = cb->parse_msg;
 	strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done;
-	strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp;
+	strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp;
+
+	setup_timer(&strp->msg_timer, strp_msg_timeout,
+		    (unsigned long)strp);
+
+	INIT_WORK(&strp->work, strp_work);
 
 	return 0;
 }
@@ -457,12 +516,12 @@ EXPORT_SYMBOL_GPL(strp_init);
 
 void strp_unpause(struct strparser *strp)
 {
-	strp->rx_paused = 0;
+	strp->paused = 0;
 
-	/* Sync setting rx_paused with RX work */
+	/* Sync setting paused with RX work */
 	smp_mb();
 
-	queue_work(strp_wq, &strp->rx_work);
+	queue_work(strp_wq, &strp->work);
 }
 EXPORT_SYMBOL_GPL(strp_unpause);
 
@@ -471,27 +530,27 @@ EXPORT_SYMBOL_GPL(strp_unpause);
  */
 void strp_done(struct strparser *strp)
 {
-	WARN_ON(!strp->rx_stopped);
+	WARN_ON(!strp->stopped);
 
-	del_timer_sync(&strp->rx_msg_timer);
-	cancel_work_sync(&strp->rx_work);
+	del_timer_sync(&strp->msg_timer);
+	cancel_work_sync(&strp->work);
 
-	if (strp->rx_skb_head) {
-		kfree_skb(strp->rx_skb_head);
-		strp->rx_skb_head = NULL;
+	if (strp->skb_head) {
+		kfree_skb(strp->skb_head);
+		strp->skb_head = NULL;
 	}
 }
 EXPORT_SYMBOL_GPL(strp_done);
 
 void strp_stop(struct strparser *strp)
 {
-	strp->rx_stopped = 1;
+	strp->stopped = 1;
 }
 EXPORT_SYMBOL_GPL(strp_stop);
 
 void strp_check_rcv(struct strparser *strp)
 {
-	queue_work(strp_wq, &strp->rx_work);
+	queue_work(strp_wq, &strp->work);
 }
 EXPORT_SYMBOL_GPL(strp_check_rcv);
 
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 25dc67ef..0531b41 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -343,8 +343,6 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj)
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
 		return sizeof(struct switchdev_obj_port_vlan);
-	case SWITCHDEV_OBJ_ID_PORT_FDB:
-		return sizeof(struct switchdev_obj_port_fdb);
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
 		return sizeof(struct switchdev_obj_port_mdb);
 	default:
@@ -534,43 +532,6 @@ int switchdev_port_obj_del(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
 
-/**
- *	switchdev_port_obj_dump - Dump port objects
- *
- *	@dev: port device
- *	@id: object ID
- *	@obj: object to dump
- *	@cb: function to call with a filled object
- *
- *	rtnl_lock must be held.
- */
-int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
-			    switchdev_obj_dump_cb_t *cb)
-{
-	const struct switchdev_ops *ops = dev->switchdev_ops;
-	struct net_device *lower_dev;
-	struct list_head *iter;
-	int err = -EOPNOTSUPP;
-
-	ASSERT_RTNL();
-
-	if (ops && ops->switchdev_port_obj_dump)
-		return ops->switchdev_port_obj_dump(dev, obj, cb);
-
-	/* Switch device port(s) may be stacked under
-	 * bond/team/vlan dev, so recurse down to dump objects on
-	 * first port at bottom of stack.
-	 */
-
-	netdev_for_each_lower_dev(dev, lower_dev, iter) {
-		err = switchdev_port_obj_dump(lower_dev, obj, cb);
-		break;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
-
 static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
 
 /**
@@ -613,486 +574,6 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
 
-struct switchdev_vlan_dump {
-	struct switchdev_obj_port_vlan vlan;
-	struct sk_buff *skb;
-	u32 filter_mask;
-	u16 flags;
-	u16 begin;
-	u16 end;
-};
-
-static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump)
-{
-	struct bridge_vlan_info vinfo;
-
-	vinfo.flags = dump->flags;
-
-	if (dump->begin == 0 && dump->end == 0) {
-		return 0;
-	} else if (dump->begin == dump->end) {
-		vinfo.vid = dump->begin;
-		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
-			    sizeof(vinfo), &vinfo))
-			return -EMSGSIZE;
-	} else {
-		vinfo.vid = dump->begin;
-		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
-		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
-			    sizeof(vinfo), &vinfo))
-			return -EMSGSIZE;
-		vinfo.vid = dump->end;
-		vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
-		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
-		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
-			    sizeof(vinfo), &vinfo))
-			return -EMSGSIZE;
-	}
-
-	return 0;
-}
-
-static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj)
-{
-	struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
-	struct switchdev_vlan_dump *dump =
-		container_of(vlan, struct switchdev_vlan_dump, vlan);
-	int err = 0;
-
-	if (vlan->vid_begin > vlan->vid_end)
-		return -EINVAL;
-
-	if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
-		dump->flags = vlan->flags;
-		for (dump->begin = dump->end = vlan->vid_begin;
-		     dump->begin <= vlan->vid_end;
-		     dump->begin++, dump->end++) {
-			err = switchdev_port_vlan_dump_put(dump);
-			if (err)
-				return err;
-		}
-	} else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
-		if (dump->begin > vlan->vid_begin &&
-		    dump->begin >= vlan->vid_end) {
-			if ((dump->begin - 1) == vlan->vid_end &&
-			    dump->flags == vlan->flags) {
-				/* prepend */
-				dump->begin = vlan->vid_begin;
-			} else {
-				err = switchdev_port_vlan_dump_put(dump);
-				dump->flags = vlan->flags;
-				dump->begin = vlan->vid_begin;
-				dump->end = vlan->vid_end;
-			}
-		} else if (dump->end <= vlan->vid_begin &&
-		           dump->end < vlan->vid_end) {
-			if ((dump->end  + 1) == vlan->vid_begin &&
-			    dump->flags == vlan->flags) {
-				/* append */
-				dump->end = vlan->vid_end;
-			} else {
-				err = switchdev_port_vlan_dump_put(dump);
-				dump->flags = vlan->flags;
-				dump->begin = vlan->vid_begin;
-				dump->end = vlan->vid_end;
-			}
-		} else {
-			err = -EINVAL;
-		}
-	}
-
-	return err;
-}
-
-static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
-				    u32 filter_mask)
-{
-	struct switchdev_vlan_dump dump = {
-		.vlan.obj.orig_dev = dev,
-		.vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
-		.skb = skb,
-		.filter_mask = filter_mask,
-	};
-	int err = 0;
-
-	if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
-	    (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
-		err = switchdev_port_obj_dump(dev, &dump.vlan.obj,
-					      switchdev_port_vlan_dump_cb);
-		if (err)
-			goto err_out;
-		if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
-			/* last one */
-			err = switchdev_port_vlan_dump_put(&dump);
-	}
-
-err_out:
-	return err == -EOPNOTSUPP ? 0 : err;
-}
-
-/**
- *	switchdev_port_bridge_getlink - Get bridge port attributes
- *
- *	@dev: port device
- *
- *	Called for SELF on rtnl_bridge_getlink to get bridge port
- *	attributes.
- */
-int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-				  struct net_device *dev, u32 filter_mask,
-				  int nlflags)
-{
-	struct switchdev_attr attr = {
-		.orig_dev = dev,
-		.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
-	};
-	u16 mode = BRIDGE_MODE_UNDEF;
-	u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD;
-	int err;
-
-	if (!netif_is_bridge_port(dev))
-		return -EOPNOTSUPP;
-
-	err = switchdev_port_attr_get(dev, &attr);
-	if (err && err != -EOPNOTSUPP)
-		return err;
-
-	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
-				       attr.u.brport_flags, mask, nlflags,
-				       filter_mask, switchdev_port_vlan_fill);
-}
-EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
-
-static int switchdev_port_br_setflag(struct net_device *dev,
-				     struct nlattr *nlattr,
-				     unsigned long brport_flag)
-{
-	struct switchdev_attr attr = {
-		.orig_dev = dev,
-		.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
-	};
-	u8 flag = nla_get_u8(nlattr);
-	int err;
-
-	err = switchdev_port_attr_get(dev, &attr);
-	if (err)
-		return err;
-
-	if (flag)
-		attr.u.brport_flags |= brport_flag;
-	else
-		attr.u.brport_flags &= ~brport_flag;
-
-	return switchdev_port_attr_set(dev, &attr);
-}
-
-static const struct nla_policy
-switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
-	[IFLA_BRPORT_STATE]		= { .type = NLA_U8 },
-	[IFLA_BRPORT_COST]		= { .type = NLA_U32 },
-	[IFLA_BRPORT_PRIORITY]		= { .type = NLA_U16 },
-	[IFLA_BRPORT_MODE]		= { .type = NLA_U8 },
-	[IFLA_BRPORT_GUARD]		= { .type = NLA_U8 },
-	[IFLA_BRPORT_PROTECT]		= { .type = NLA_U8 },
-	[IFLA_BRPORT_FAST_LEAVE]	= { .type = NLA_U8 },
-	[IFLA_BRPORT_LEARNING]		= { .type = NLA_U8 },
-	[IFLA_BRPORT_LEARNING_SYNC]	= { .type = NLA_U8 },
-	[IFLA_BRPORT_UNICAST_FLOOD]	= { .type = NLA_U8 },
-};
-
-static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
-					      struct nlattr *protinfo)
-{
-	struct nlattr *attr;
-	int rem;
-	int err;
-
-	err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
-				  switchdev_port_bridge_policy, NULL);
-	if (err)
-		return err;
-
-	nla_for_each_nested(attr, protinfo, rem) {
-		switch (nla_type(attr)) {
-		case IFLA_BRPORT_LEARNING:
-			err = switchdev_port_br_setflag(dev, attr,
-							BR_LEARNING);
-			break;
-		case IFLA_BRPORT_LEARNING_SYNC:
-			err = switchdev_port_br_setflag(dev, attr,
-							BR_LEARNING_SYNC);
-			break;
-		case IFLA_BRPORT_UNICAST_FLOOD:
-			err = switchdev_port_br_setflag(dev, attr, BR_FLOOD);
-			break;
-		default:
-			err = -EOPNOTSUPP;
-			break;
-		}
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int switchdev_port_br_afspec(struct net_device *dev,
-				    struct nlattr *afspec,
-				    int (*f)(struct net_device *dev,
-					     const struct switchdev_obj *obj))
-{
-	struct nlattr *attr;
-	struct bridge_vlan_info *vinfo;
-	struct switchdev_obj_port_vlan vlan = {
-		.obj.orig_dev = dev,
-		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
-	};
-	int rem;
-	int err;
-
-	nla_for_each_nested(attr, afspec, rem) {
-		if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
-			continue;
-		if (nla_len(attr) != sizeof(struct bridge_vlan_info))
-			return -EINVAL;
-		vinfo = nla_data(attr);
-		if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
-			return -EINVAL;
-		vlan.flags = vinfo->flags;
-		if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
-			if (vlan.vid_begin)
-				return -EINVAL;
-			vlan.vid_begin = vinfo->vid;
-			/* don't allow range of pvids */
-			if (vlan.flags & BRIDGE_VLAN_INFO_PVID)
-				return -EINVAL;
-		} else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
-			if (!vlan.vid_begin)
-				return -EINVAL;
-			vlan.vid_end = vinfo->vid;
-			if (vlan.vid_end <= vlan.vid_begin)
-				return -EINVAL;
-			err = f(dev, &vlan.obj);
-			if (err)
-				return err;
-			vlan.vid_begin = 0;
-		} else {
-			if (vlan.vid_begin)
-				return -EINVAL;
-			vlan.vid_begin = vinfo->vid;
-			vlan.vid_end = vinfo->vid;
-			err = f(dev, &vlan.obj);
-			if (err)
-				return err;
-			vlan.vid_begin = 0;
-		}
-	}
-
-	return 0;
-}
-
-/**
- *	switchdev_port_bridge_setlink - Set bridge port attributes
- *
- *	@dev: port device
- *	@nlh: netlink header
- *	@flags: netlink flags
- *
- *	Called for SELF on rtnl_bridge_setlink to set bridge port
- *	attributes.
- */
-int switchdev_port_bridge_setlink(struct net_device *dev,
-				  struct nlmsghdr *nlh, u16 flags)
-{
-	struct nlattr *protinfo;
-	struct nlattr *afspec;
-	int err = 0;
-
-	if (!netif_is_bridge_port(dev))
-		return -EOPNOTSUPP;
-
-	protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
-				   IFLA_PROTINFO);
-	if (protinfo) {
-		err = switchdev_port_br_setlink_protinfo(dev, protinfo);
-		if (err)
-			return err;
-	}
-
-	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
-				 IFLA_AF_SPEC);
-	if (afspec)
-		err = switchdev_port_br_afspec(dev, afspec,
-					       switchdev_port_obj_add);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
-
-/**
- *	switchdev_port_bridge_dellink - Set bridge port attributes
- *
- *	@dev: port device
- *	@nlh: netlink header
- *	@flags: netlink flags
- *
- *	Called for SELF on rtnl_bridge_dellink to set bridge port
- *	attributes.
- */
-int switchdev_port_bridge_dellink(struct net_device *dev,
-				  struct nlmsghdr *nlh, u16 flags)
-{
-	struct nlattr *afspec;
-
-	if (!netif_is_bridge_port(dev))
-		return -EOPNOTSUPP;
-
-	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
-				 IFLA_AF_SPEC);
-	if (afspec)
-		return switchdev_port_br_afspec(dev, afspec,
-						switchdev_port_obj_del);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
-
-/**
- *	switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
- *
- *	@ndmsg: netlink hdr
- *	@nlattr: netlink attributes
- *	@dev: port device
- *	@addr: MAC address to add
- *	@vid: VLAN to add
- *
- *	Add FDB entry to switch device.
- */
-int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-			   struct net_device *dev, const unsigned char *addr,
-			   u16 vid, u16 nlm_flags)
-{
-	struct switchdev_obj_port_fdb fdb = {
-		.obj.orig_dev = dev,
-		.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
-		.vid = vid,
-	};
-
-	ether_addr_copy(fdb.addr, addr);
-	return switchdev_port_obj_add(dev, &fdb.obj);
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
-
-/**
- *	switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
- *
- *	@ndmsg: netlink hdr
- *	@nlattr: netlink attributes
- *	@dev: port device
- *	@addr: MAC address to delete
- *	@vid: VLAN to delete
- *
- *	Delete FDB entry from switch device.
- */
-int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-			   struct net_device *dev, const unsigned char *addr,
-			   u16 vid)
-{
-	struct switchdev_obj_port_fdb fdb = {
-		.obj.orig_dev = dev,
-		.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
-		.vid = vid,
-	};
-
-	ether_addr_copy(fdb.addr, addr);
-	return switchdev_port_obj_del(dev, &fdb.obj);
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
-
-struct switchdev_fdb_dump {
-	struct switchdev_obj_port_fdb fdb;
-	struct net_device *dev;
-	struct sk_buff *skb;
-	struct netlink_callback *cb;
-	int idx;
-};
-
-static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj)
-{
-	struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
-	struct switchdev_fdb_dump *dump =
-		container_of(fdb, struct switchdev_fdb_dump, fdb);
-	u32 portid = NETLINK_CB(dump->cb->skb).portid;
-	u32 seq = dump->cb->nlh->nlmsg_seq;
-	struct nlmsghdr *nlh;
-	struct ndmsg *ndm;
-
-	if (dump->idx < dump->cb->args[2])
-		goto skip;
-
-	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
-			sizeof(*ndm), NLM_F_MULTI);
-	if (!nlh)
-		return -EMSGSIZE;
-
-	ndm = nlmsg_data(nlh);
-	ndm->ndm_family  = AF_BRIDGE;
-	ndm->ndm_pad1    = 0;
-	ndm->ndm_pad2    = 0;
-	ndm->ndm_flags   = NTF_SELF;
-	ndm->ndm_type    = 0;
-	ndm->ndm_ifindex = dump->dev->ifindex;
-	ndm->ndm_state   = fdb->ndm_state;
-
-	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr))
-		goto nla_put_failure;
-
-	if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid))
-		goto nla_put_failure;
-
-	nlmsg_end(dump->skb, nlh);
-
-skip:
-	dump->idx++;
-	return 0;
-
-nla_put_failure:
-	nlmsg_cancel(dump->skb, nlh);
-	return -EMSGSIZE;
-}
-
-/**
- *	switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
- *
- *	@skb: netlink skb
- *	@cb: netlink callback
- *	@dev: port device
- *	@filter_dev: filter device
- *	@idx:
- *
- *	Dump FDB entries from switch device.
- */
-int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
-			    struct net_device *dev,
-			    struct net_device *filter_dev, int *idx)
-{
-	struct switchdev_fdb_dump dump = {
-		.fdb.obj.orig_dev = dev,
-		.fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
-		.dev = dev,
-		.skb = skb,
-		.cb = cb,
-		.idx = *idx,
-	};
-	int err;
-
-	err = switchdev_port_obj_dump(dev, &dump.fdb.obj,
-				      switchdev_port_fdb_dump_cb);
-	*idx = dump.idx;
-	return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
-
 bool switchdev_port_same_parent_id(struct net_device *a,
 				   struct net_device *b)
 {
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 89cd061..ac1d66d 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -367,30 +367,6 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b)
 	return 0;
 }
 
-/* tipc_bearer_reset_all - reset all links on all bearers
- */
-void tipc_bearer_reset_all(struct net *net)
-{
-	struct tipc_bearer *b;
-	int i;
-
-	for (i = 0; i < MAX_BEARERS; i++) {
-		b = bearer_get(net, i);
-		if (b)
-			clear_bit_unlock(0, &b->up);
-	}
-	for (i = 0; i < MAX_BEARERS; i++) {
-		b = bearer_get(net, i);
-		if (b)
-			tipc_reset_bearer(net, b);
-	}
-	for (i = 0; i < MAX_BEARERS; i++) {
-		b = bearer_get(net, i);
-		if (b)
-			test_and_set_bit_lock(0, &b->up);
-	}
-}
-
 /**
  * bearer_disable
  *
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index e07a55a..42d6eee 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -212,7 +212,6 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest);
 struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name);
 int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id);
 struct tipc_media *tipc_media_find(const char *name);
-void tipc_bearer_reset_all(struct net *net);
 int tipc_bearer_setup(void);
 void tipc_bearer_cleanup(void);
 void tipc_bearer_stop(struct net *net);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 60820dc..ac0144f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -978,15 +978,15 @@ static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb)
 	struct tipc_msg *hdr = buf_msg(skb);
 
 	pr_warn("Retransmission failure on link <%s>\n", l->name);
-	link_print(l, "Resetting link ");
+	link_print(l, "State of link ");
 	pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
 		msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
 	pr_info("sqno %u, prev: %x, src: %x\n",
 		msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
 }
 
-int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to,
-		      struct sk_buff_head *xmitq)
+int tipc_link_retrans(struct tipc_link *l, struct tipc_link *nacker,
+		      u16 from, u16 to, struct sk_buff_head *xmitq)
 {
 	struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
 	struct tipc_msg *hdr;
@@ -997,11 +997,14 @@ int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to,
 		return 0;
 
 	/* Detect repeated retransmit failures on same packet */
-	if (likely(l->last_retransm != buf_seqno(skb))) {
-		l->last_retransm = buf_seqno(skb);
-		l->stale_count = 1;
-	} else if (++l->stale_count > 100) {
+	if (nacker->last_retransm != buf_seqno(skb)) {
+		nacker->last_retransm = buf_seqno(skb);
+		nacker->stale_count = 1;
+	} else if (++nacker->stale_count > 100) {
 		link_retransmit_failure(l, skb);
+		nacker->stale_count = 0;
+		if (link_is_bc_sndlink(l))
+			return TIPC_LINK_DOWN_EVT;
 		return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
 	}
 
@@ -1528,7 +1531,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 
 		/* If NACK, retransmit will now start at right position */
 		if (gap) {
-			rc = tipc_link_retrans(l, ack + 1, ack + gap, xmitq);
+			rc = tipc_link_retrans(l, l, ack + 1, ack + gap, xmitq);
 			l->stats.recv_nacks++;
 		}
 
@@ -1680,7 +1683,7 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
 		return rc;
 
 	if (link_bc_retr_eval(snd_l, &from, &to))
-		rc = tipc_link_retrans(snd_l, from, to, xmitq);
+		rc = tipc_link_retrans(snd_l, l, from, to, xmitq);
 
 	l->snd_nxt = peers_snd_nxt;
 	if (link_bc_rcv_gap(l))
@@ -1775,7 +1778,7 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
 
 	if (dnode == tipc_own_addr(l->net)) {
 		tipc_link_bc_ack_rcv(l, acked, xmitq);
-		rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq);
+		rc = tipc_link_retrans(l->bc_sndlink, l, from, to, xmitq);
 		l->stats.recv_nacks++;
 		return rc;
 	}
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 750949d..e48f0b2 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -1217,7 +1217,7 @@ send:
 	return err;
 }
 
-static struct genl_ops tipc_genl_compat_ops[] = {
+static const struct genl_ops tipc_genl_compat_ops[] = {
 	{
 		.cmd		= TIPC_GENL_CMD,
 		.doit		= tipc_nl_compat_recv,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 7dd2233..198dbc7 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1284,7 +1284,7 @@ static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
 	rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr);
 
 	if (rc & TIPC_LINK_DOWN_EVT) {
-		tipc_bearer_reset_all(n->net);
+		tipc_node_reset_links(n);
 		return;
 	}
 
@@ -1351,15 +1351,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
 	if (!skb_queue_empty(&be->inputq1))
 		tipc_node_mcast_rcv(n);
 
-	if (rc & TIPC_LINK_DOWN_EVT) {
-		/* Reception reassembly failure => reset all links to peer */
-		if (!tipc_link_is_up(be->link))
-			tipc_node_reset_links(n);
-
-		/* Retransmission failure => reset all links to all peers */
-		if (!tipc_link_is_up(tipc_bc_sndlink(net)))
-			tipc_bearer_reset_all(net);
-	}
+	/* If reassembly or retransmission failure => reset all links to peer */
+	if (rc & TIPC_LINK_DOWN_EVT)
+		tipc_node_reset_links(n);
 
 	tipc_node_put(n);
 }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index be8982b..7f46bab 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1528,26 +1528,13 @@ static inline bool too_many_unix_fds(struct task_struct *p)
 	return false;
 }
 
-#define MAX_RECURSION_LEVEL 4
-
 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
 	int i;
-	unsigned char max_level = 0;
 
 	if (too_many_unix_fds(current))
 		return -ETOOMANYREFS;
 
-	for (i = scm->fp->count - 1; i >= 0; i--) {
-		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
-
-		if (sk)
-			max_level = max(max_level,
-					unix_sk(sk)->recursion_level);
-	}
-	if (unlikely(max_level > MAX_RECURSION_LEVEL))
-		return -ETOOMANYREFS;
-
 	/*
 	 * Need to duplicate file references for the sake of garbage
 	 * collection.  Otherwise a socket in the fps might become a
@@ -1559,7 +1546,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 
 	for (i = scm->fp->count - 1; i >= 0; i--)
 		unix_inflight(scm->fp->user, scm->fp->fp[i]);
-	return max_level;
+	return 0;
 }
 
 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1649,7 +1636,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	struct sk_buff *skb;
 	long timeo;
 	struct scm_cookie scm;
-	int max_level;
 	int data_len = 0;
 	int sk_locked;
 
@@ -1701,7 +1687,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	err = unix_scm_to_skb(&scm, skb, true);
 	if (err < 0)
 		goto out_free;
-	max_level = err + 1;
 
 	skb_put(skb, len - data_len);
 	skb->data_len = data_len;
@@ -1819,8 +1804,6 @@ restart_locked:
 		__net_timestamp(skb);
 	maybe_add_creds(skb, sock, other);
 	skb_queue_tail(&other->sk_receive_queue, skb);
-	if (max_level > unix_sk(other)->recursion_level)
-		unix_sk(other)->recursion_level = max_level;
 	unix_state_unlock(other);
 	other->sk_data_ready(other);
 	sock_put(other);
@@ -1855,7 +1838,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	int sent = 0;
 	struct scm_cookie scm;
 	bool fds_sent = false;
-	int max_level;
 	int data_len;
 
 	wait_for_unix_gc();
@@ -1905,7 +1887,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 			kfree_skb(skb);
 			goto out_err;
 		}
-		max_level = err + 1;
 		fds_sent = true;
 
 		skb_put(skb, size - data_len);
@@ -1925,8 +1906,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 
 		maybe_add_creds(skb, sock, other);
 		skb_queue_tail(&other->sk_receive_queue, skb);
-		if (max_level > unix_sk(other)->recursion_level)
-			unix_sk(other)->recursion_level = max_level;
 		unix_state_unlock(other);
 		other->sk_data_ready(other);
 		sent += size;
@@ -2321,7 +2300,6 @@ redo:
 		last_len = last ? last->len : 0;
 again:
 		if (skb == NULL) {
-			unix_sk(sk)->recursion_level = 0;
 			if (copied >= target)
 				goto unlock;
 
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
index 8831e7c..a24369d 100644
--- a/net/vmw_vsock/Kconfig
+++ b/net/vmw_vsock/Kconfig
@@ -46,3 +46,15 @@ config VIRTIO_VSOCKETS_COMMON
 	  This option is selected by any driver which needs to access
 	  the virtio_vsock.  The module will be called
 	  vmw_vsock_virtio_transport_common.
+
+config HYPERV_VSOCKETS
+	tristate "Hyper-V transport for Virtual Sockets"
+	depends on VSOCKETS && HYPERV
+	help
+	  This module implements a Hyper-V transport for Virtual Sockets.
+
+	  Enable this transport if your Virtual Machine host supports Virtual
+	  Sockets over Hyper-V VMBus.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called hv_sock. If unsure, say N.
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
index 09fc2eb..e63d574 100644
--- a/net/vmw_vsock/Makefile
+++ b/net/vmw_vsock/Makefile
@@ -2,6 +2,7 @@ obj-$(CONFIG_VSOCKETS) += vsock.o
 obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
 obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
 obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
+obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o
 
 vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o
 
@@ -11,3 +12,5 @@ vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
 vmw_vsock_virtio_transport-y += virtio_transport.o
 
 vmw_vsock_virtio_transport_common-y += virtio_transport_common.o
+
+hv_sock-y += hyperv_transport.o
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
new file mode 100644
index 0000000..14ed5a3
--- /dev/null
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -0,0 +1,904 @@
+/*
+ * Hyper-V transport for vsock
+ *
+ * Hyper-V Sockets supplies a byte-stream based communication mechanism
+ * between the host and the VM. This driver implements the necessary
+ * support in the VM by introducing the new vsock transport.
+ *
+ * Copyright (c) 2017, Microsoft Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/hyperv.h>
+#include <net/sock.h>
+#include <net/af_vsock.h>
+
+/* The host side's design of the feature requires 6 exact 4KB pages for
+ * recv/send rings respectively -- this is suboptimal considering memory
+ * consumption, however unluckily we have to live with it, before the
+ * host comes up with a better design in the future.
+ */
+#define PAGE_SIZE_4K		4096
+#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
+#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
+
+/* The MTU is 16KB per the host side's design */
+#define HVS_MTU_SIZE		(1024 * 16)
+
+struct vmpipe_proto_header {
+	u32 pkt_type;
+	u32 data_size;
+};
+
+/* For recv, we use the VMBus in-place packet iterator APIs to directly copy
+ * data from the ringbuffer into the userspace buffer.
+ */
+struct hvs_recv_buf {
+	/* The header before the payload data */
+	struct vmpipe_proto_header hdr;
+
+	/* The payload */
+	u8 data[HVS_MTU_SIZE];
+};
+
+/* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use
+ * a small size, i.e. HVS_SEND_BUF_SIZE, to minimize the dynamically-allocated
+ * buffer, because tests show there is no significant performance difference.
+ *
+ * Note: the buffer can be eliminated in the future when we add new VMBus
+ * ringbuffer APIs that allow us to directly copy data from userspace buffer
+ * to VMBus ringbuffer.
+ */
+#define HVS_SEND_BUF_SIZE (PAGE_SIZE_4K - sizeof(struct vmpipe_proto_header))
+
+struct hvs_send_buf {
+	/* The header before the payload data */
+	struct vmpipe_proto_header hdr;
+
+	/* The payload */
+	u8 data[HVS_SEND_BUF_SIZE];
+};
+
+#define HVS_HEADER_LEN	(sizeof(struct vmpacket_descriptor) + \
+			 sizeof(struct vmpipe_proto_header))
+
+/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write(), and
+ * __hv_pkt_iter_next().
+ */
+#define VMBUS_PKT_TRAILER_SIZE	(sizeof(u64))
+
+#define HVS_PKT_LEN(payload_len)	(HVS_HEADER_LEN + \
+					 ALIGN((payload_len), 8) + \
+					 VMBUS_PKT_TRAILER_SIZE)
+
+union hvs_service_id {
+	uuid_le	srv_id;
+
+	struct {
+		unsigned int svm_port;
+		unsigned char b[sizeof(uuid_le) - sizeof(unsigned int)];
+	};
+};
+
+/* Per-socket state (accessed via vsk->trans) */
+struct hvsock {
+	struct vsock_sock *vsk;
+
+	uuid_le vm_srv_id;
+	uuid_le host_srv_id;
+
+	struct vmbus_channel *chan;
+	struct vmpacket_descriptor *recv_desc;
+
+	/* The length of the payload not delivered to userland yet */
+	u32 recv_data_len;
+	/* The offset of the payload */
+	u32 recv_data_off;
+
+	/* Have we sent the zero-length packet (FIN)? */
+	bool fin_sent;
+};
+
+/* In the VM, we support Hyper-V Sockets with AF_VSOCK, and the endpoint is
+ * <cid, port> (see struct sockaddr_vm). Note: cid is not really used here:
+ * when we write apps to connect to the host, we can only use VMADDR_CID_ANY
+ * or VMADDR_CID_HOST (both are equivalent) as the remote cid, and when we
+ * write apps to bind() & listen() in the VM, we can only use VMADDR_CID_ANY
+ * as the local cid.
+ *
+ * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
+ * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
+ * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
+ * the below sockaddr:
+ *
+ * struct SOCKADDR_HV
+ * {
+ *    ADDRESS_FAMILY Family;
+ *    USHORT Reserved;
+ *    GUID VmId;
+ *    GUID ServiceId;
+ * };
+ * Note: VmID is not used by Linux VM and actually it isn't transmitted via
+ * VMBus, because here it's obvious the host and the VM can easily identify
+ * each other. Though the VmID is useful on the host, especially in the case
+ * of Windows container, Linux VM doesn't need it at all.
+ *
+ * To make use of the AF_VSOCK infrastructure in Linux VM, we have to limit
+ * the available GUID space of SOCKADDR_HV so that we can create a mapping
+ * between AF_VSOCK port and SOCKADDR_HV Service GUID. The rule of writing
+ * Hyper-V Sockets apps on the host and in Linux VM is:
+ *
+ ****************************************************************************
+ * The only valid Service GUIDs, from the perspectives of both the host and *
+ * Linux VM, that can be connected by the other end, must conform to this   *
+ * format: <port>-facb-11e6-bd58-64006a7986d3, and the "port" must be in    *
+ * this range [0, 0x7FFFFFFF].                                              *
+ ****************************************************************************
+ *
+ * When we write apps on the host to connect(), the GUID ServiceID is used.
+ * When we write apps in Linux VM to connect(), we only need to specify the
+ * port and the driver will form the GUID and use that to request the host.
+ *
+ * From the perspective of Linux VM:
+ * 1. the local ephemeral port (i.e. the local auto-bound port when we call
+ * connect() without explicit bind()) is generated by __vsock_bind_stream(),
+ * and the range is [1024, 0xFFFFFFFF).
+ * 2. the remote ephemeral port (i.e. the auto-generated remote port for
+ * a connect request initiated by the host's connect()) is generated by
+ * hvs_remote_addr_init() and the range is [0x80000000, 0xFFFFFFFF).
+ */
+
+#define MAX_LISTEN_PORT			((u32)0x7FFFFFFF)
+#define MAX_VM_LISTEN_PORT		MAX_LISTEN_PORT
+#define MAX_HOST_LISTEN_PORT		MAX_LISTEN_PORT
+#define MIN_HOST_EPHEMERAL_PORT		(MAX_HOST_LISTEN_PORT + 1)
+
+/* 00000000-facb-11e6-bd58-64006a7986d3 */
+static const uuid_le srv_id_template =
+	UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
+		0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
+
+static bool is_valid_srv_id(const uuid_le *id)
+{
+	return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(uuid_le) - 4);
+}
+
+static unsigned int get_port_by_srv_id(const uuid_le *svr_id)
+{
+	return *((unsigned int *)svr_id);
+}
+
+static void hvs_addr_init(struct sockaddr_vm *addr, const uuid_le *svr_id)
+{
+	unsigned int port = get_port_by_srv_id(svr_id);
+
+	vsock_addr_init(addr, VMADDR_CID_ANY, port);
+}
+
+static void hvs_remote_addr_init(struct sockaddr_vm *remote,
+				 struct sockaddr_vm *local)
+{
+	static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
+	struct sock *sk;
+
+	vsock_addr_init(remote, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+
+	while (1) {
+		/* Wrap around ? */
+		if (host_ephemeral_port < MIN_HOST_EPHEMERAL_PORT ||
+		    host_ephemeral_port == VMADDR_PORT_ANY)
+			host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
+
+		remote->svm_port = host_ephemeral_port++;
+
+		sk = vsock_find_connected_socket(remote, local);
+		if (!sk) {
+			/* Found an available ephemeral port */
+			return;
+		}
+
+		/* Release refcnt got in vsock_find_connected_socket */
+		sock_put(sk);
+	}
+}
+
+static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan)
+{
+	set_channel_pending_send_size(chan,
+				      HVS_PKT_LEN(HVS_SEND_BUF_SIZE));
+
+	/* See hvs_stream_has_space(): we must make sure the host has seen
+	 * the new pending send size, before we can re-check the writable
+	 * bytes.
+	 */
+	virt_mb();
+}
+
+static void hvs_clear_channel_pending_send_size(struct vmbus_channel *chan)
+{
+	set_channel_pending_send_size(chan, 0);
+
+	/* Ditto */
+	virt_mb();
+}
+
+static bool hvs_channel_readable(struct vmbus_channel *chan)
+{
+	u32 readable = hv_get_bytes_to_read(&chan->inbound);
+
+	/* 0-size payload means FIN */
+	return readable >= HVS_PKT_LEN(0);
+}
+
+static int hvs_channel_readable_payload(struct vmbus_channel *chan)
+{
+	u32 readable = hv_get_bytes_to_read(&chan->inbound);
+
+	if (readable > HVS_PKT_LEN(0)) {
+		/* At least we have 1 byte to read. We don't need to return
+		 * the exact readable bytes: see vsock_stream_recvmsg() ->
+		 * vsock_stream_has_data().
+		 */
+		return 1;
+	}
+
+	if (readable == HVS_PKT_LEN(0)) {
+		/* 0-size payload means FIN */
+		return 0;
+	}
+
+	/* No payload or FIN */
+	return -1;
+}
+
+static size_t hvs_channel_writable_bytes(struct vmbus_channel *chan)
+{
+	u32 writeable = hv_get_bytes_to_write(&chan->outbound);
+	size_t ret;
+
+	/* The ringbuffer mustn't be 100% full, and we should reserve a
+	 * zero-length-payload packet for the FIN: see hv_ringbuffer_write()
+	 * and hvs_shutdown().
+	 */
+	if (writeable <= HVS_PKT_LEN(1) + HVS_PKT_LEN(0))
+		return 0;
+
+	ret = writeable - HVS_PKT_LEN(1) - HVS_PKT_LEN(0);
+
+	return round_down(ret, 8);
+}
+
+static int hvs_send_data(struct vmbus_channel *chan,
+			 struct hvs_send_buf *send_buf, size_t to_write)
+{
+	send_buf->hdr.pkt_type = 1;
+	send_buf->hdr.data_size = to_write;
+	return vmbus_sendpacket(chan, &send_buf->hdr,
+				sizeof(send_buf->hdr) + to_write,
+				0, VM_PKT_DATA_INBAND, 0);
+}
+
+static void hvs_channel_cb(void *ctx)
+{
+	struct sock *sk = (struct sock *)ctx;
+	struct vsock_sock *vsk = vsock_sk(sk);
+	struct hvsock *hvs = vsk->trans;
+	struct vmbus_channel *chan = hvs->chan;
+
+	if (hvs_channel_readable(chan))
+		sk->sk_data_ready(sk);
+
+	/* See hvs_stream_has_space(): when we reach here, the writable bytes
+	 * may be already less than HVS_PKT_LEN(HVS_SEND_BUF_SIZE).
+	 */
+	if (hv_get_bytes_to_write(&chan->outbound) > 0)
+		sk->sk_write_space(sk);
+}
+
+static void hvs_close_connection(struct vmbus_channel *chan)
+{
+	struct sock *sk = get_per_channel_state(chan);
+	struct vsock_sock *vsk = vsock_sk(sk);
+
+	sk->sk_state = SS_UNCONNECTED;
+	sock_set_flag(sk, SOCK_DONE);
+	vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
+
+	sk->sk_state_change(sk);
+}
+
+static void hvs_open_connection(struct vmbus_channel *chan)
+{
+	uuid_le *if_instance, *if_type;
+	unsigned char conn_from_host;
+
+	struct sockaddr_vm addr;
+	struct sock *sk, *new = NULL;
+	struct vsock_sock *vnew;
+	struct hvsock *hvs, *hvs_new;
+	int ret;
+
+	if_type = &chan->offermsg.offer.if_type;
+	if_instance = &chan->offermsg.offer.if_instance;
+	conn_from_host = chan->offermsg.offer.u.pipe.user_def[0];
+
+	/* The host or the VM should only listen on a port in
+	 * [0, MAX_LISTEN_PORT]
+	 */
+	if (!is_valid_srv_id(if_type) ||
+	    get_port_by_srv_id(if_type) > MAX_LISTEN_PORT)
+		return;
+
+	hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
+	sk = vsock_find_bound_socket(&addr);
+	if (!sk)
+		return;
+
+	if ((conn_from_host && sk->sk_state != VSOCK_SS_LISTEN) ||
+	    (!conn_from_host && sk->sk_state != SS_CONNECTING))
+		goto out;
+
+	if (conn_from_host) {
+		if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog)
+			goto out;
+
+		new = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
+				     sk->sk_type, 0);
+		if (!new)
+			goto out;
+
+		new->sk_state = SS_CONNECTING;
+		vnew = vsock_sk(new);
+		hvs_new = vnew->trans;
+		hvs_new->chan = chan;
+	} else {
+		hvs = vsock_sk(sk)->trans;
+		hvs->chan = chan;
+	}
+
+	set_channel_read_mode(chan, HV_CALL_DIRECT);
+	ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE,
+			 RINGBUFFER_HVS_RCV_SIZE, NULL, 0,
+			 hvs_channel_cb, conn_from_host ? new : sk);
+	if (ret != 0) {
+		if (conn_from_host) {
+			hvs_new->chan = NULL;
+			sock_put(new);
+		} else {
+			hvs->chan = NULL;
+		}
+		goto out;
+	}
+
+	set_per_channel_state(chan, conn_from_host ? new : sk);
+	vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
+
+	if (conn_from_host) {
+		new->sk_state = SS_CONNECTED;
+		sk->sk_ack_backlog++;
+
+		hvs_addr_init(&vnew->local_addr, if_type);
+		hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
+
+		hvs_new->vm_srv_id = *if_type;
+		hvs_new->host_srv_id = *if_instance;
+
+		vsock_insert_connected(vnew);
+
+		lock_sock(sk);
+		vsock_enqueue_accept(sk, new);
+		release_sock(sk);
+	} else {
+		sk->sk_state = SS_CONNECTED;
+		sk->sk_socket->state = SS_CONNECTED;
+
+		vsock_insert_connected(vsock_sk(sk));
+	}
+
+	sk->sk_state_change(sk);
+
+out:
+	/* Release refcnt obtained when we called vsock_find_bound_socket() */
+	sock_put(sk);
+}
+
+static u32 hvs_get_local_cid(void)
+{
+	return VMADDR_CID_ANY;
+}
+
+static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
+{
+	struct hvsock *hvs;
+
+	hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
+	if (!hvs)
+		return -ENOMEM;
+
+	vsk->trans = hvs;
+	hvs->vsk = vsk;
+
+	return 0;
+}
+
+static int hvs_connect(struct vsock_sock *vsk)
+{
+	union hvs_service_id vm, host;
+	struct hvsock *h = vsk->trans;
+
+	vm.srv_id = srv_id_template;
+	vm.svm_port = vsk->local_addr.svm_port;
+	h->vm_srv_id = vm.srv_id;
+
+	host.srv_id = srv_id_template;
+	host.svm_port = vsk->remote_addr.svm_port;
+	h->host_srv_id = host.srv_id;
+
+	return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
+}
+
+static int hvs_shutdown(struct vsock_sock *vsk, int mode)
+{
+	struct sock *sk = sk_vsock(vsk);
+	struct vmpipe_proto_header hdr;
+	struct hvs_send_buf *send_buf;
+	struct hvsock *hvs;
+
+	if (!(mode & SEND_SHUTDOWN))
+		return 0;
+
+	lock_sock(sk);
+
+	hvs = vsk->trans;
+	if (hvs->fin_sent)
+		goto out;
+
+	send_buf = (struct hvs_send_buf *)&hdr;
+
+	/* It can't fail: see hvs_channel_writable_bytes(). */
+	(void)hvs_send_data(hvs->chan, send_buf, 0);
+
+	hvs->fin_sent = true;
+out:
+	release_sock(sk);
+	return 0;
+}
+
+static void hvs_release(struct vsock_sock *vsk)
+{
+	struct hvsock *hvs = vsk->trans;
+	struct vmbus_channel *chan = hvs->chan;
+
+	if (chan)
+		hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN);
+
+	vsock_remove_sock(vsk);
+}
+
+static void hvs_destruct(struct vsock_sock *vsk)
+{
+	struct hvsock *hvs = vsk->trans;
+	struct vmbus_channel *chan = hvs->chan;
+
+	if (chan)
+		vmbus_hvsock_device_unregister(chan);
+
+	kfree(hvs);
+}
+
+static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hvs_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
+			     size_t len, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hvs_dgram_enqueue(struct vsock_sock *vsk,
+			     struct sockaddr_vm *remote, struct msghdr *msg,
+			     size_t dgram_len)
+{
+	return -EOPNOTSUPP;
+}
+
+static bool hvs_dgram_allow(u32 cid, u32 port)
+{
+	return false;
+}
+
+static int hvs_update_recv_data(struct hvsock *hvs)
+{
+	struct hvs_recv_buf *recv_buf;
+	u32 payload_len;
+
+	recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
+	payload_len = recv_buf->hdr.data_size;
+
+	if (payload_len > HVS_MTU_SIZE)
+		return -EIO;
+
+	if (payload_len == 0)
+		hvs->vsk->peer_shutdown |= SEND_SHUTDOWN;
+
+	hvs->recv_data_len = payload_len;
+	hvs->recv_data_off = 0;
+
+	return 0;
+}
+
+static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
+				  size_t len, int flags)
+{
+	struct hvsock *hvs = vsk->trans;
+	bool need_refill = !hvs->recv_desc;
+	struct hvs_recv_buf *recv_buf;
+	u32 to_read;
+	int ret;
+
+	if (flags & MSG_PEEK)
+		return -EOPNOTSUPP;
+
+	if (need_refill) {
+		hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
+		ret = hvs_update_recv_data(hvs);
+		if (ret)
+			return ret;
+	}
+
+	recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
+	to_read = min_t(u32, len, hvs->recv_data_len);
+	ret = memcpy_to_msg(msg, recv_buf->data + hvs->recv_data_off, to_read);
+	if (ret != 0)
+		return ret;
+
+	hvs->recv_data_len -= to_read;
+	if (hvs->recv_data_len == 0) {
+		hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
+		if (hvs->recv_desc) {
+			ret = hvs_update_recv_data(hvs);
+			if (ret)
+				return ret;
+		}
+	} else {
+		hvs->recv_data_off += to_read;
+	}
+
+	return to_read;
+}
+
+static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
+				  size_t len)
+{
+	struct hvsock *hvs = vsk->trans;
+	struct vmbus_channel *chan = hvs->chan;
+	struct hvs_send_buf *send_buf;
+	ssize_t to_write, max_writable, ret;
+
+	BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
+
+	send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL);
+	if (!send_buf)
+		return -ENOMEM;
+
+	max_writable = hvs_channel_writable_bytes(chan);
+	to_write = min_t(ssize_t, len, max_writable);
+	to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
+
+	ret = memcpy_from_msg(send_buf->data, msg, to_write);
+	if (ret < 0)
+		goto out;
+
+	ret = hvs_send_data(hvs->chan, send_buf, to_write);
+	if (ret < 0)
+		goto out;
+
+	ret = to_write;
+out:
+	kfree(send_buf);
+	return ret;
+}
+
+static s64 hvs_stream_has_data(struct vsock_sock *vsk)
+{
+	struct hvsock *hvs = vsk->trans;
+	s64 ret;
+
+	if (hvs->recv_data_len > 0)
+		return 1;
+
+	switch (hvs_channel_readable_payload(hvs->chan)) {
+	case 1:
+		ret = 1;
+		break;
+	case 0:
+		vsk->peer_shutdown |= SEND_SHUTDOWN;
+		ret = 0;
+		break;
+	default: /* -1 */
+		ret = 0;
+		break;
+	}
+
+	return ret;
+}
+
+static s64 hvs_stream_has_space(struct vsock_sock *vsk)
+{
+	struct hvsock *hvs = vsk->trans;
+	struct vmbus_channel *chan = hvs->chan;
+	s64 ret;
+
+	ret = hvs_channel_writable_bytes(chan);
+	if (ret > 0)  {
+		hvs_clear_channel_pending_send_size(chan);
+	} else {
+		/* See hvs_channel_cb() */
+		hvs_set_channel_pending_send_size(chan);
+
+		/* Re-check the writable bytes to avoid race */
+		ret = hvs_channel_writable_bytes(chan);
+		if (ret > 0)
+			hvs_clear_channel_pending_send_size(chan);
+	}
+
+	return ret;
+}
+
+static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk)
+{
+	return HVS_MTU_SIZE + 1;
+}
+
+static bool hvs_stream_is_active(struct vsock_sock *vsk)
+{
+	struct hvsock *hvs = vsk->trans;
+
+	return hvs->chan != NULL;
+}
+
+static bool hvs_stream_allow(u32 cid, u32 port)
+{
+	/* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF) is
+	 * reserved as ephemeral ports, which are used as the host's ports
+	 * when the host initiates connections.
+	 *
+	 * Perform this check in the guest so an immediate error is produced
+	 * instead of a timeout.
+	 */
+	if (port > MAX_HOST_LISTEN_PORT)
+		return false;
+
+	if (cid == VMADDR_CID_HOST)
+		return true;
+
+	return false;
+}
+
+static
+int hvs_notify_poll_in(struct vsock_sock *vsk, size_t target, bool *readable)
+{
+	struct hvsock *hvs = vsk->trans;
+
+	*readable = hvs_channel_readable(hvs->chan);
+	return 0;
+}
+
+static
+int hvs_notify_poll_out(struct vsock_sock *vsk, size_t target, bool *writable)
+{
+	*writable = hvs_stream_has_space(vsk) > 0;
+
+	return 0;
+}
+
+static
+int hvs_notify_recv_init(struct vsock_sock *vsk, size_t target,
+			 struct vsock_transport_recv_notify_data *d)
+{
+	return 0;
+}
+
+static
+int hvs_notify_recv_pre_block(struct vsock_sock *vsk, size_t target,
+			      struct vsock_transport_recv_notify_data *d)
+{
+	return 0;
+}
+
+static
+int hvs_notify_recv_pre_dequeue(struct vsock_sock *vsk, size_t target,
+				struct vsock_transport_recv_notify_data *d)
+{
+	return 0;
+}
+
+static
+int hvs_notify_recv_post_dequeue(struct vsock_sock *vsk, size_t target,
+				 ssize_t copied, bool data_read,
+				 struct vsock_transport_recv_notify_data *d)
+{
+	return 0;
+}
+
+static
+int hvs_notify_send_init(struct vsock_sock *vsk,
+			 struct vsock_transport_send_notify_data *d)
+{
+	return 0;
+}
+
+static
+int hvs_notify_send_pre_block(struct vsock_sock *vsk,
+			      struct vsock_transport_send_notify_data *d)
+{
+	return 0;
+}
+
+static
+int hvs_notify_send_pre_enqueue(struct vsock_sock *vsk,
+				struct vsock_transport_send_notify_data *d)
+{
+	return 0;
+}
+
+static
+int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
+				 struct vsock_transport_send_notify_data *d)
+{
+	return 0;
+}
+
+static void hvs_set_buffer_size(struct vsock_sock *vsk, u64 val)
+{
+	/* Ignored. */
+}
+
+static void hvs_set_min_buffer_size(struct vsock_sock *vsk, u64 val)
+{
+	/* Ignored. */
+}
+
+static void hvs_set_max_buffer_size(struct vsock_sock *vsk, u64 val)
+{
+	/* Ignored. */
+}
+
+static u64 hvs_get_buffer_size(struct vsock_sock *vsk)
+{
+	return -ENOPROTOOPT;
+}
+
+static u64 hvs_get_min_buffer_size(struct vsock_sock *vsk)
+{
+	return -ENOPROTOOPT;
+}
+
+static u64 hvs_get_max_buffer_size(struct vsock_sock *vsk)
+{
+	return -ENOPROTOOPT;
+}
+
+static struct vsock_transport hvs_transport = {
+	.get_local_cid            = hvs_get_local_cid,
+
+	.init                     = hvs_sock_init,
+	.destruct                 = hvs_destruct,
+	.release                  = hvs_release,
+	.connect                  = hvs_connect,
+	.shutdown                 = hvs_shutdown,
+
+	.dgram_bind               = hvs_dgram_bind,
+	.dgram_dequeue            = hvs_dgram_dequeue,
+	.dgram_enqueue            = hvs_dgram_enqueue,
+	.dgram_allow              = hvs_dgram_allow,
+
+	.stream_dequeue           = hvs_stream_dequeue,
+	.stream_enqueue           = hvs_stream_enqueue,
+	.stream_has_data          = hvs_stream_has_data,
+	.stream_has_space         = hvs_stream_has_space,
+	.stream_rcvhiwat          = hvs_stream_rcvhiwat,
+	.stream_is_active         = hvs_stream_is_active,
+	.stream_allow             = hvs_stream_allow,
+
+	.notify_poll_in           = hvs_notify_poll_in,
+	.notify_poll_out          = hvs_notify_poll_out,
+	.notify_recv_init         = hvs_notify_recv_init,
+	.notify_recv_pre_block    = hvs_notify_recv_pre_block,
+	.notify_recv_pre_dequeue  = hvs_notify_recv_pre_dequeue,
+	.notify_recv_post_dequeue = hvs_notify_recv_post_dequeue,
+	.notify_send_init         = hvs_notify_send_init,
+	.notify_send_pre_block    = hvs_notify_send_pre_block,
+	.notify_send_pre_enqueue  = hvs_notify_send_pre_enqueue,
+	.notify_send_post_enqueue = hvs_notify_send_post_enqueue,
+
+	.set_buffer_size          = hvs_set_buffer_size,
+	.set_min_buffer_size      = hvs_set_min_buffer_size,
+	.set_max_buffer_size      = hvs_set_max_buffer_size,
+	.get_buffer_size          = hvs_get_buffer_size,
+	.get_min_buffer_size      = hvs_get_min_buffer_size,
+	.get_max_buffer_size      = hvs_get_max_buffer_size,
+};
+
+static int hvs_probe(struct hv_device *hdev,
+		     const struct hv_vmbus_device_id *dev_id)
+{
+	struct vmbus_channel *chan = hdev->channel;
+
+	hvs_open_connection(chan);
+
+	/* Always return success to suppress the unnecessary error message
+	 * in vmbus_probe(): on error the host will rescind the device in
+	 * 30 seconds and we can do cleanup at that time in
+	 * vmbus_onoffer_rescind().
+	 */
+	return 0;
+}
+
+static int hvs_remove(struct hv_device *hdev)
+{
+	struct vmbus_channel *chan = hdev->channel;
+
+	vmbus_close(chan);
+
+	return 0;
+}
+
+/* This isn't really used. See vmbus_match() and vmbus_probe() */
+static const struct hv_vmbus_device_id id_table[] = {
+	{},
+};
+
+static struct hv_driver hvs_drv = {
+	.name		= "hv_sock",
+	.hvsock		= true,
+	.id_table	= id_table,
+	.probe		= hvs_probe,
+	.remove		= hvs_remove,
+};
+
+static int __init hvs_init(void)
+{
+	int ret;
+
+	if (vmbus_proto_version < VERSION_WIN10)
+		return -ENODEV;
+
+	ret = vmbus_driver_register(&hvs_drv);
+	if (ret != 0)
+		return ret;
+
+	ret = vsock_core_init(&hvs_transport);
+	if (ret) {
+		vmbus_driver_unregister(&hvs_drv);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void __exit hvs_exit(void)
+{
+	vsock_core_exit();
+	vmbus_driver_unregister(&hvs_drv);
+}
+
+module_init(hvs_init);
+module_exit(hvs_exit);
+
+MODULE_DESCRIPTION("Hyper-V Sockets");
+MODULE_VERSION("1.0.0");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5a1a98d..ac09593 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -74,7 +74,7 @@ DEFINE_RWLOCK(x25_list_lock);
 
 static const struct proto_ops x25_proto_ops;
 
-static struct x25_address null_x25_address = {"               "};
+static const struct x25_address null_x25_address = {"               "};
 
 #ifdef CONFIG_COMPAT
 struct compat_x25_subscrip_struct {
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 5f7e8bf..acf0010 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -63,7 +63,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 	xfrm_address_t *daddr;
 
 	if (!x->type_offload)
-		return 0;
+		return -EINVAL;
 
 	/* We don't yet support UDP encapsulation, TFC padding and ESN. */
 	if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN))
@@ -79,7 +79,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 			daddr = &x->props.saddr;
 		}
 
-		dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr, x->props.family);
+		dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr,
+					x->props.family, x->props.output_mark);
 		if (IS_ERR(dst))
 			return 0;
 
@@ -153,6 +154,7 @@ static int xfrm_dev_register(struct net_device *dev)
 
 static int xfrm_dev_unregister(struct net_device *dev)
 {
+	xfrm_policy_cache_flush();
 	return NOTIFY_DONE;
 }
 
@@ -175,8 +177,7 @@ static int xfrm_dev_down(struct net_device *dev)
 	if (dev->features & NETIF_F_HW_ESP)
 		xfrm_dev_state_flush(dev_net(dev), dev, true);
 
-	xfrm_garbage_collect(dev_net(dev));
-
+	xfrm_policy_cache_flush();
 	return NOTIFY_DONE;
 }
 
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 923205e..2515cd2 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -247,6 +247,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 					goto drop;
 				}
 
+				if (xo->status & CRYPTO_INVALID_PROTOCOL) {
+					XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR);
+					goto drop;
+				}
+
 				XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
 				goto drop;
 			}
@@ -424,6 +429,7 @@ resume:
 	nf_reset(skb);
 
 	if (decaps) {
+		skb->sp->olen = 0;
 		skb_dst_drop(skb);
 		gro_cells_receive(&gro_cells, skb);
 		return 0;
@@ -434,6 +440,7 @@ resume:
 
 		err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async);
 		if (xfrm_gro) {
+			skb->sp->olen = 0;
 			skb_dst_drop(skb);
 			gro_cells_receive(&gro_cells, skb);
 			return err;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 8c0b672..31a2e6d 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -66,6 +66,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 			goto error_nolock;
 		}
 
+		if (x->props.output_mark)
+			skb->mark = x->props.output_mark;
+
 		err = x->outer_mode->output(x, skb);
 		if (err) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 69b16ee..f062539 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -24,6 +24,7 @@
 #include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/cache.h>
+#include <linux/cpu.h>
 #include <linux/audit.h>
 #include <net/dst.h>
 #include <net/flow.h>
@@ -44,6 +45,8 @@ struct xfrm_flo {
 	u8 flags;
 };
 
+static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
+static struct work_struct *xfrm_pcpu_work __read_mostly;
 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
 						__read_mostly;
@@ -119,7 +122,7 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa
 struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
 				    const xfrm_address_t *saddr,
 				    const xfrm_address_t *daddr,
-				    int family)
+				    int family, u32 mark)
 {
 	const struct xfrm_policy_afinfo *afinfo;
 	struct dst_entry *dst;
@@ -128,7 +131,7 @@ struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
 	if (unlikely(afinfo == NULL))
 		return ERR_PTR(-EAFNOSUPPORT);
 
-	dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr);
+	dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark);
 
 	rcu_read_unlock();
 
@@ -140,7 +143,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
 						int tos, int oif,
 						xfrm_address_t *prev_saddr,
 						xfrm_address_t *prev_daddr,
-						int family)
+						int family, u32 mark)
 {
 	struct net *net = xs_net(x);
 	xfrm_address_t *saddr = &x->props.saddr;
@@ -156,7 +159,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
 		daddr = x->coaddr;
 	}
 
-	dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family);
+	dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark);
 
 	if (!IS_ERR(dst)) {
 		if (prev_saddr != saddr)
@@ -246,36 +249,6 @@ expired:
 	xfrm_pol_put(xp);
 }
 
-static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
-{
-	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
-
-	if (unlikely(pol->walk.dead))
-		flo = NULL;
-	else
-		xfrm_pol_hold(pol);
-
-	return flo;
-}
-
-static int xfrm_policy_flo_check(struct flow_cache_object *flo)
-{
-	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
-
-	return !pol->walk.dead;
-}
-
-static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
-{
-	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
-}
-
-static const struct flow_cache_ops xfrm_policy_fc_ops = {
-	.get = xfrm_policy_flo_get,
-	.check = xfrm_policy_flo_check,
-	.delete = xfrm_policy_flo_delete,
-};
-
 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
  * SPD calls.
  */
@@ -298,7 +271,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 				(unsigned long)policy);
 		setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
 			    (unsigned long)policy);
-		policy->flo.ops = &xfrm_policy_fc_ops;
 	}
 	return policy;
 }
@@ -798,7 +770,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	else
 		hlist_add_head(&policy->bydst, chain);
 	__xfrm_policy_link(policy, dir);
-	atomic_inc(&net->xfrm.flow_cache_genid);
 
 	/* After previous checking, family can either be AF_INET or AF_INET6 */
 	if (policy->family == AF_INET)
@@ -1004,6 +975,8 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
 	}
 	if (!cnt)
 		err = -ESRCH;
+	else
+		xfrm_policy_cache_flush();
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 	return err;
@@ -1175,7 +1148,7 @@ fail:
 }
 
 static struct xfrm_policy *
-__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
+xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
 {
 #ifdef CONFIG_XFRM_SUB_POLICY
 	struct xfrm_policy *pol;
@@ -1187,61 +1160,6 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir
 	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
 }
 
-static int flow_to_policy_dir(int dir)
-{
-	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
-	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
-	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
-		return dir;
-
-	switch (dir) {
-	default:
-	case FLOW_DIR_IN:
-		return XFRM_POLICY_IN;
-	case FLOW_DIR_OUT:
-		return XFRM_POLICY_OUT;
-	case FLOW_DIR_FWD:
-		return XFRM_POLICY_FWD;
-	}
-}
-
-static struct flow_cache_object *
-xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
-		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
-{
-	struct xfrm_policy *pol;
-
-	if (old_obj)
-		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
-
-	pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
-	if (IS_ERR_OR_NULL(pol))
-		return ERR_CAST(pol);
-
-	/* Resolver returns two references:
-	 * one for cache and one for caller of flow_cache_lookup() */
-	xfrm_pol_hold(pol);
-
-	return &pol->flo;
-}
-
-static inline int policy_to_flow_dir(int dir)
-{
-	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
-	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
-	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
-		return dir;
-	switch (dir) {
-	default:
-	case XFRM_POLICY_IN:
-		return FLOW_DIR_IN;
-	case XFRM_POLICY_OUT:
-		return FLOW_DIR_OUT;
-	case XFRM_POLICY_FWD:
-		return FLOW_DIR_FWD;
-	}
-}
-
 static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 						 const struct flowi *fl, u16 family)
 {
@@ -1261,7 +1179,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 			}
 			err = security_xfrm_policy_lookup(pol->security,
 						      fl->flowi_secid,
-						      policy_to_flow_dir(dir));
+						      dir);
 			if (!err) {
 				if (!xfrm_pol_hold_rcu(pol))
 					goto again;
@@ -1422,14 +1340,14 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
 
 static int
 xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
-	       xfrm_address_t *remote, unsigned short family)
+	       xfrm_address_t *remote, unsigned short family, u32 mark)
 {
 	int err;
 	const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 
 	if (unlikely(afinfo == NULL))
 		return -EINVAL;
-	err = afinfo->get_saddr(net, oif, local, remote);
+	err = afinfo->get_saddr(net, oif, local, remote, mark);
 	rcu_read_unlock();
 	return err;
 }
@@ -1460,7 +1378,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
 			if (xfrm_addr_any(local, tmpl->encap_family)) {
 				error = xfrm_get_saddr(net, fl->flowi_oif,
 						       &tmp, remote,
-						       tmpl->encap_family);
+						       tmpl->encap_family, 0);
 				if (error)
 					goto fail;
 				local = &tmp;
@@ -1545,58 +1463,6 @@ static int xfrm_get_tos(const struct flowi *fl, int family)
 	return tos;
 }
 
-static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
-{
-	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-	struct dst_entry *dst = &xdst->u.dst;
-
-	if (xdst->route == NULL) {
-		/* Dummy bundle - if it has xfrms we were not
-		 * able to build bundle as template resolution failed.
-		 * It means we need to try again resolving. */
-		if (xdst->num_xfrms > 0)
-			return NULL;
-	} else if (dst->flags & DST_XFRM_QUEUE) {
-		return NULL;
-	} else {
-		/* Real bundle */
-		if (stale_bundle(dst))
-			return NULL;
-	}
-
-	dst_hold(dst);
-	return flo;
-}
-
-static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
-{
-	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-	struct dst_entry *dst = &xdst->u.dst;
-
-	if (!xdst->route)
-		return 0;
-	if (stale_bundle(dst))
-		return 0;
-
-	return 1;
-}
-
-static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
-{
-	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
-	struct dst_entry *dst = &xdst->u.dst;
-
-	/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-	dst->obsolete = DST_OBSOLETE_DEAD;
-	dst_release_immediate(dst);
-}
-
-static const struct flow_cache_ops xfrm_bundle_fc_ops = {
-	.get = xfrm_bundle_flo_get,
-	.check = xfrm_bundle_flo_check,
-	.delete = xfrm_bundle_flo_delete,
-};
-
 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 {
 	const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1624,7 +1490,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 		struct dst_entry *dst = &xdst->u.dst;
 
 		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
-		xdst->flo.ops = &xfrm_bundle_fc_ops;
 	} else
 		xdst = ERR_PTR(-ENOBUFS);
 
@@ -1733,7 +1598,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
 			family = xfrm[i]->props.family;
 			dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
-					      &saddr, &daddr, family);
+					      &saddr, &daddr, family,
+					      xfrm[i]->props.output_mark);
 			err = PTR_ERR(dst);
 			if (IS_ERR(dst))
 				goto put_states;
@@ -1840,6 +1706,102 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
 
 }
 
+static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
+{
+	this_cpu_write(xfrm_last_dst, xdst);
+	if (old)
+		dst_release(&old->u.dst);
+}
+
+static void __xfrm_pcpu_work_fn(void)
+{
+	struct xfrm_dst *old;
+
+	old = this_cpu_read(xfrm_last_dst);
+	if (old && !xfrm_bundle_ok(old))
+		xfrm_last_dst_update(NULL, old);
+}
+
+static void xfrm_pcpu_work_fn(struct work_struct *work)
+{
+	local_bh_disable();
+	rcu_read_lock();
+	__xfrm_pcpu_work_fn();
+	rcu_read_unlock();
+	local_bh_enable();
+}
+
+void xfrm_policy_cache_flush(void)
+{
+	struct xfrm_dst *old;
+	bool found = 0;
+	int cpu;
+
+	local_bh_disable();
+	rcu_read_lock();
+	for_each_possible_cpu(cpu) {
+		old = per_cpu(xfrm_last_dst, cpu);
+		if (old && !xfrm_bundle_ok(old)) {
+			if (smp_processor_id() == cpu) {
+				__xfrm_pcpu_work_fn();
+				continue;
+			}
+			found = true;
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+	local_bh_enable();
+
+	if (!found)
+		return;
+
+	get_online_cpus();
+
+	for_each_possible_cpu(cpu) {
+		bool bundle_release;
+
+		rcu_read_lock();
+		old = per_cpu(xfrm_last_dst, cpu);
+		bundle_release = old && !xfrm_bundle_ok(old);
+		rcu_read_unlock();
+
+		if (!bundle_release)
+			continue;
+
+		if (cpu_online(cpu)) {
+			schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
+			continue;
+		}
+
+		rcu_read_lock();
+		old = per_cpu(xfrm_last_dst, cpu);
+		if (old && !xfrm_bundle_ok(old)) {
+			per_cpu(xfrm_last_dst, cpu) = NULL;
+			dst_release(&old->u.dst);
+		}
+		rcu_read_unlock();
+	}
+
+	put_online_cpus();
+}
+
+static bool xfrm_pol_dead(struct xfrm_dst *xdst)
+{
+	unsigned int num_pols = xdst->num_pols;
+	unsigned int pol_dead = 0, i;
+
+	for (i = 0; i < num_pols; i++)
+		pol_dead |= xdst->pols[i]->walk.dead;
+
+	/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
+	if (pol_dead)
+		xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
+
+	return pol_dead;
+}
+
 static struct xfrm_dst *
 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 			       const struct flowi *fl, u16 family,
@@ -1847,10 +1809,23 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 {
 	struct net *net = xp_net(pols[0]);
 	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+	struct xfrm_dst *xdst, *old;
 	struct dst_entry *dst;
-	struct xfrm_dst *xdst;
 	int err;
 
+	xdst = this_cpu_read(xfrm_last_dst);
+	if (xdst &&
+	    xdst->u.dst.dev == dst_orig->dev &&
+	    xdst->num_pols == num_pols &&
+	    !xfrm_pol_dead(xdst) &&
+	    memcmp(xdst->pols, pols,
+		   sizeof(struct xfrm_policy *) * num_pols) == 0 &&
+	    xfrm_bundle_ok(xdst)) {
+		dst_hold(&xdst->u.dst);
+		return xdst;
+	}
+
+	old = xdst;
 	/* Try to instantiate a bundle */
 	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
 	if (err <= 0) {
@@ -1871,6 +1846,9 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 	xdst->policy_genid = atomic_read(&pols[0]->genid);
 
+	atomic_set(&xdst->u.dst.__refcnt, 2);
+	xfrm_last_dst_update(xdst, old);
+
 	return xdst;
 }
 
@@ -2051,86 +2029,39 @@ free_dst:
 	goto out;
 }
 
-static struct flow_cache_object *
-xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
-		   struct flow_cache_object *oldflo, void *ctx)
+static struct xfrm_dst *
+xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo)
 {
-	struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
-	struct xfrm_dst *xdst, *new_xdst;
-	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
-
-	/* Check if the policies from old bundle are usable */
-	xdst = NULL;
-	if (oldflo) {
-		xdst = container_of(oldflo, struct xfrm_dst, flo);
-		num_pols = xdst->num_pols;
-		num_xfrms = xdst->num_xfrms;
-		pol_dead = 0;
-		for (i = 0; i < num_pols; i++) {
-			pols[i] = xdst->pols[i];
-			pol_dead |= pols[i]->walk.dead;
-		}
-		if (pol_dead) {
-			/* Mark DST_OBSOLETE_DEAD to fail the next
-			 * xfrm_dst_check()
-			 */
-			xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
-			dst_release_immediate(&xdst->u.dst);
-			xdst = NULL;
-			num_pols = 0;
-			num_xfrms = 0;
-			oldflo = NULL;
-		}
-	}
+	int num_pols = 0, num_xfrms = 0, err;
+	struct xfrm_dst *xdst;
 
 	/* Resolve policies to use if we couldn't get them from
 	 * previous cache entry */
-	if (xdst == NULL) {
-		num_pols = 1;
-		pols[0] = __xfrm_policy_lookup(net, fl, family,
-					       flow_to_policy_dir(dir));
-		err = xfrm_expand_policies(fl, family, pols,
+	num_pols = 1;
+	pols[0] = xfrm_policy_lookup(net, fl, family, dir);
+	err = xfrm_expand_policies(fl, family, pols,
 					   &num_pols, &num_xfrms);
-		if (err < 0)
-			goto inc_error;
-		if (num_pols == 0)
-			return NULL;
-		if (num_xfrms <= 0)
-			goto make_dummy_bundle;
-	}
+	if (err < 0)
+		goto inc_error;
+	if (num_pols == 0)
+		return NULL;
+	if (num_xfrms <= 0)
+		goto make_dummy_bundle;
 
-	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
+	xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
 						  xflo->dst_orig);
-	if (IS_ERR(new_xdst)) {
-		err = PTR_ERR(new_xdst);
+	if (IS_ERR(xdst)) {
+		err = PTR_ERR(xdst);
 		if (err != -EAGAIN)
 			goto error;
-		if (oldflo == NULL)
-			goto make_dummy_bundle;
-		dst_hold(&xdst->u.dst);
-		return oldflo;
-	} else if (new_xdst == NULL) {
+		goto make_dummy_bundle;
+	} else if (xdst == NULL) {
 		num_xfrms = 0;
-		if (oldflo == NULL)
-			goto make_dummy_bundle;
-		xdst->num_xfrms = 0;
-		dst_hold(&xdst->u.dst);
-		return oldflo;
-	}
-
-	/* Kill the previous bundle */
-	if (xdst) {
-		/* The policies were stolen for newly generated bundle */
-		xdst->num_pols = 0;
-		/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-		xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
-		dst_release_immediate(&xdst->u.dst);
+		goto make_dummy_bundle;
 	}
 
-	/* We do need to return one reference for original caller */
-	dst_hold(&new_xdst->u.dst);
-	return &new_xdst->flo;
+	return xdst;
 
 make_dummy_bundle:
 	/* We found policies, but there's no bundles to instantiate:
@@ -2146,17 +2077,12 @@ make_dummy_bundle:
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 
 	dst_hold(&xdst->u.dst);
-	return &xdst->flo;
+	return xdst;
 
 inc_error:
 	XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
 error:
-	if (xdst != NULL) {
-		/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
-		xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
-		dst_release_immediate(&xdst->u.dst);
-	} else
-		xfrm_pols_put(pols, num_pols);
+	xfrm_pols_put(pols, num_pols);
 	return ERR_PTR(err);
 }
 
@@ -2187,11 +2113,10 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 			      const struct sock *sk, int flags)
 {
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
-	struct flow_cache_object *flo;
 	struct xfrm_dst *xdst;
 	struct dst_entry *dst, *route;
 	u16 family = dst_orig->ops->family;
-	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+	u8 dir = XFRM_POLICY_OUT;
 	int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
 
 	dst = NULL;
@@ -2241,15 +2166,13 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
 			goto nopol;
 
-		flo = flow_cache_lookup(net, fl, family, dir,
-					xfrm_bundle_lookup, &xflo);
-		if (flo == NULL)
+		xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo);
+		if (xdst == NULL)
 			goto nopol;
-		if (IS_ERR(flo)) {
-			err = PTR_ERR(flo);
+		if (IS_ERR(xdst)) {
+			err = PTR_ERR(xdst);
 			goto dropdst;
 		}
-		xdst = container_of(flo, struct xfrm_dst, flo);
 
 		num_pols = xdst->num_pols;
 		num_xfrms = xdst->num_xfrms;
@@ -2448,12 +2371,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	int pi;
 	int reverse;
 	struct flowi fl;
-	u8 fl_dir;
 	int xerr_idx = -1;
 
 	reverse = dir & ~XFRM_POLICY_MASK;
 	dir &= XFRM_POLICY_MASK;
-	fl_dir = policy_to_flow_dir(dir);
 
 	if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
@@ -2485,16 +2406,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		}
 	}
 
-	if (!pol) {
-		struct flow_cache_object *flo;
-
-		flo = flow_cache_lookup(net, &fl, family, fl_dir,
-					xfrm_policy_lookup, NULL);
-		if (IS_ERR_OR_NULL(flo))
-			pol = ERR_CAST(flo);
-		else
-			pol = container_of(flo, struct xfrm_policy, flo);
-	}
+	if (!pol)
+		pol = xfrm_policy_lookup(net, &fl, family, dir);
 
 	if (IS_ERR(pol)) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
@@ -2640,11 +2553,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 	 * notice.  That's what we are validating here via the
 	 * stale_bundle() check.
 	 *
-	 * When an xdst is removed from flow cache, DST_OBSOLETE_DEAD will
-	 * be marked on it.
 	 * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will
 	 * be marked on it.
-	 * Both will force stable_bundle() to fail on any xdst bundle with
+	 * This will force stale_bundle() to fail on any xdst bundle with
 	 * this dst linked in it.
 	 */
 	if (dst->obsolete < 0 && !stale_bundle(dst))
@@ -2684,18 +2595,6 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
 	return dst;
 }
 
-void xfrm_garbage_collect(struct net *net)
-{
-	flow_cache_flush(net);
-}
-EXPORT_SYMBOL(xfrm_garbage_collect);
-
-void xfrm_garbage_collect_deferred(struct net *net)
-{
-	flow_cache_flush_deferred(net);
-}
-EXPORT_SYMBOL(xfrm_garbage_collect_deferred);
-
 static void xfrm_init_pmtu(struct dst_entry *dst)
 {
 	do {
@@ -3033,14 +2932,9 @@ static int __net_init xfrm_net_init(struct net *net)
 	rv = xfrm_sysctl_init(net);
 	if (rv < 0)
 		goto out_sysctl;
-	rv = flow_cache_init(net);
-	if (rv < 0)
-		goto out;
 
 	return 0;
 
-out:
-	xfrm_sysctl_fini(net);
 out_sysctl:
 	xfrm_policy_fini(net);
 out_policy:
@@ -3053,7 +2947,6 @@ out_statistics:
 
 static void __net_exit xfrm_net_exit(struct net *net)
 {
-	flow_cache_fini(net);
 	xfrm_sysctl_fini(net);
 	xfrm_policy_fini(net);
 	xfrm_state_fini(net);
@@ -3067,7 +2960,15 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
 
 void __init xfrm_init(void)
 {
-	flow_cache_hp_init();
+	int i;
+
+	xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
+				       GFP_KERNEL);
+	BUG_ON(!xfrm_pcpu_work);
+
+	for (i = 0; i < NR_CPUS; i++)
+		INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
+
 	register_pernet_subsys(&xfrm_net_ops);
 	seqcount_init(&xfrm_policy_hash_generation);
 	xfrm_input_init();
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index a792eff..0dab1cd 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -296,12 +296,14 @@ int xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
 }
 EXPORT_SYMBOL(xfrm_unregister_type_offload);
 
-static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned short family)
+static const struct xfrm_type_offload *
+xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load)
 {
 	struct xfrm_state_afinfo *afinfo;
 	const struct xfrm_type_offload **typemap;
 	const struct xfrm_type_offload *type;
 
+retry:
 	afinfo = xfrm_state_get_afinfo(family);
 	if (unlikely(afinfo == NULL))
 		return NULL;
@@ -311,6 +313,12 @@ static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned
 	if ((type && !try_module_get(type->owner)))
 		type = NULL;
 
+	if (!type && try_load) {
+		request_module("xfrm-offload-%d-%d", family, proto);
+		try_load = 0;
+		goto retry;
+	}
+
 	rcu_read_unlock();
 	return type;
 }
@@ -724,9 +732,10 @@ restart:
 			}
 		}
 	}
-	if (cnt)
+	if (cnt) {
 		err = 0;
-
+		xfrm_policy_cache_flush();
+	}
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 	return err;
@@ -2172,7 +2181,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
 	return mtu - x->props.header_len;
 }
 
-int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
+int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
 {
 	struct xfrm_state_afinfo *afinfo;
 	struct xfrm_mode *inner_mode;
@@ -2237,7 +2246,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
 	if (x->type == NULL)
 		goto error;
 
-	x->type_offload = xfrm_get_type_offload(x->id.proto, family);
+	x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload);
 
 	err = x->type->init_state(x);
 	if (err)
@@ -2265,7 +2274,7 @@ EXPORT_SYMBOL(__xfrm_init_state);
 
 int xfrm_init_state(struct xfrm_state *x)
 {
-	return __xfrm_init_state(x, true);
+	return __xfrm_init_state(x, true, false);
 }
 
 EXPORT_SYMBOL(xfrm_init_state);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 9391ced..2bfbd91 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -584,7 +584,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	xfrm_mark_get(attrs, &x->mark);
 
-	err = __xfrm_init_state(x, false);
+	if (attrs[XFRMA_OUTPUT_MARK])
+		x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]);
+
+	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
 	if (err)
 		goto error;
 
@@ -897,6 +900,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 		ret = copy_user_offload(&x->xso, skb);
 	if (ret)
 		goto out;
+	if (x->props.output_mark) {
+		ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark);
+		if (ret)
+			goto out;
+	}
 	if (x->security)
 		ret = copy_sec_ctx(x->security, skb);
 out:
@@ -1815,8 +1823,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 out:
 	xfrm_pol_put(xp);
-	if (delete && err == 0)
-		xfrm_garbage_collect(net);
 	return err;
 }
 
@@ -2028,7 +2034,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 			return 0;
 		return err;
 	}
-	xfrm_garbage_collect(net);
 
 	c.data.type = type;
 	c.event = nlh->nlmsg_type;
@@ -2458,6 +2463,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_PROTO]		= { .type = NLA_U8 },
 	[XFRMA_ADDRESS_FILTER]	= { .len = sizeof(struct xfrm_address_filter) },
 	[XFRMA_OFFLOAD_DEV]	= { .len = sizeof(struct xfrm_user_offload) },
+	[XFRMA_OUTPUT_MARK]	= { .len = NLA_U32 },
 };
 
 static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
@@ -2679,6 +2685,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
 		l += nla_total_size(sizeof(x->props.extra_flags));
 	if (x->xso.dev)
 		 l += nla_total_size(sizeof(x->xso));
+	if (x->props.output_mark)
+		l += nla_total_size(sizeof(x->props.output_mark));
 
 	/* Must count x->lastused as it may become non-zero behind our back. */
 	l += nla_total_size_64bit(sizeof(u64));
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 87246be..cf17c79 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -37,6 +37,10 @@ hostprogs-y += xdp_tx_iptunnel
 hostprogs-y += test_map_in_map
 hostprogs-y += per_socket_stats_example
 hostprogs-y += load_sock_ops
+hostprogs-y += xdp_redirect
+hostprogs-y += xdp_redirect_map
+hostprogs-y += xdp_monitor
+hostprogs-y += syscall_tp
 
 # Libbpf dependencies
 LIBBPF := ../../tools/lib/bpf/bpf.o
@@ -78,6 +82,10 @@ lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o
 xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o
 test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
 per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
+xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
+xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
+xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
+syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -119,6 +127,10 @@ always += tcp_bufs_kern.o
 always += tcp_cong_kern.o
 always += tcp_iw_kern.o
 always += tcp_clamp_kern.o
+always += xdp_redirect_kern.o
+always += xdp_redirect_map_kern.o
+always += xdp_monitor_kern.o
+always += syscall_tp_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -155,6 +167,10 @@ HOSTLOADLIBES_tc_l2_redirect += -l elf
 HOSTLOADLIBES_lwt_len_hist += -l elf
 HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
 HOSTLOADLIBES_test_map_in_map += -lelf
+HOSTLOADLIBES_xdp_redirect += -lelf
+HOSTLOADLIBES_xdp_redirect_map += -lelf
+HOSTLOADLIBES_xdp_monitor += -lelf
+HOSTLOADLIBES_syscall_tp += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 899f403..6aa5009 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -65,6 +65,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
 	bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
 	bool is_sockops = strncmp(event, "sockops", 7) == 0;
+	bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
 	size_t insns_cnt = size / sizeof(struct bpf_insn);
 	enum bpf_prog_type prog_type;
 	char buf[256];
@@ -92,6 +93,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
 	} else if (is_sockops) {
 		prog_type = BPF_PROG_TYPE_SOCK_OPS;
+	} else if (is_sk_skb) {
+		prog_type = BPF_PROG_TYPE_SK_SKB;
 	} else {
 		printf("Unknown event '%s'\n", event);
 		return -1;
@@ -109,7 +112,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
 		return 0;
 
-	if (is_socket || is_sockops) {
+	if (is_socket || is_sockops || is_sk_skb) {
 		if (is_socket)
 			event += 6;
 		else
@@ -198,7 +201,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 static int load_maps(struct bpf_map_data *maps, int nr_maps,
 		     fixup_map_cb fixup_map)
 {
-	int i;
+	int i, numa_node;
 
 	for (i = 0; i < nr_maps; i++) {
 		if (fixup_map) {
@@ -210,21 +213,26 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps,
 			}
 		}
 
+		numa_node = maps[i].def.map_flags & BPF_F_NUMA_NODE ?
+			maps[i].def.numa_node : -1;
+
 		if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
 		    maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
 			int inner_map_fd = map_fd[maps[i].def.inner_map_idx];
 
-			map_fd[i] = bpf_create_map_in_map(maps[i].def.type,
+			map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type,
 							maps[i].def.key_size,
 							inner_map_fd,
 							maps[i].def.max_entries,
-							maps[i].def.map_flags);
+							maps[i].def.map_flags,
+							numa_node);
 		} else {
-			map_fd[i] = bpf_create_map(maps[i].def.type,
-						   maps[i].def.key_size,
-						   maps[i].def.value_size,
-						   maps[i].def.max_entries,
-						   maps[i].def.map_flags);
+			map_fd[i] = bpf_create_map_node(maps[i].def.type,
+							maps[i].def.key_size,
+							maps[i].def.value_size,
+							maps[i].def.max_entries,
+							maps[i].def.map_flags,
+							numa_node);
 		}
 		if (map_fd[i] < 0) {
 			printf("failed to create a map: %d %s\n",
@@ -567,7 +575,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 		    memcmp(shname, "perf_event", 10) == 0 ||
 		    memcmp(shname, "socket", 6) == 0 ||
 		    memcmp(shname, "cgroup/", 7) == 0 ||
-		    memcmp(shname, "sockops", 7) == 0) {
+		    memcmp(shname, "sockops", 7) == 0 ||
+		    memcmp(shname, "sk_skb", 6) == 0) {
 			ret = load_and_attach(shname, data->d_buf,
 					      data->d_size);
 			if (ret != 0)
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index ca0563d..453e322 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -13,6 +13,7 @@ struct bpf_map_def {
 	unsigned int max_entries;
 	unsigned int map_flags;
 	unsigned int inner_map_idx;
+	unsigned int numa_node;
 };
 
 struct bpf_map_data {
diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
index 2451658..098c857 100644
--- a/samples/bpf/map_perf_test_kern.c
+++ b/samples/bpf/map_perf_test_kern.c
@@ -40,6 +40,8 @@ struct bpf_map_def SEC("maps") inner_lru_hash_map = {
 	.key_size = sizeof(u32),
 	.value_size = sizeof(long),
 	.max_entries = MAX_ENTRIES,
+	.map_flags = BPF_F_NUMA_NODE,
+	.numa_node = 0,
 };
 
 struct bpf_map_def SEC("maps") array_of_lru_hashs = {
@@ -86,6 +88,13 @@ struct bpf_map_def SEC("maps") array_map = {
 	.max_entries = MAX_ENTRIES,
 };
 
+struct bpf_map_def SEC("maps") lru_hash_lookup_map = {
+	.type = BPF_MAP_TYPE_LRU_HASH,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(long),
+	.max_entries = MAX_ENTRIES,
+};
+
 SEC("kprobe/sys_getuid")
 int stress_hmap(struct pt_regs *ctx)
 {
@@ -146,12 +155,23 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx)
 SEC("kprobe/sys_connect")
 int stress_lru_hmap_alloc(struct pt_regs *ctx)
 {
+	char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%dn";
+	union {
+		u16 dst6[8];
+		struct {
+			u16 magic0;
+			u16 magic1;
+			u16 tcase;
+			u16 unused16;
+			u32 unused32;
+			u32 key;
+		};
+	} test_params;
 	struct sockaddr_in6 *in6;
-	u16 test_case, dst6[8];
+	u16 test_case;
 	int addrlen, ret;
-	char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%d\n";
 	long val = 1;
-	u32 key = bpf_get_prandom_u32();
+	u32 key = 0;
 
 	in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx);
 	addrlen = (int)PT_REGS_PARM3(ctx);
@@ -159,14 +179,18 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
 	if (addrlen != sizeof(*in6))
 		return 0;
 
-	ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr);
+	ret = bpf_probe_read(test_params.dst6, sizeof(test_params.dst6),
+			     &in6->sin6_addr);
 	if (ret)
 		goto done;
 
-	if (dst6[0] != 0xdead || dst6[1] != 0xbeef)
+	if (test_params.magic0 != 0xdead ||
+	    test_params.magic1 != 0xbeef)
 		return 0;
 
-	test_case = dst6[7];
+	test_case = test_params.tcase;
+	if (test_case != 3)
+		key = bpf_get_prandom_u32();
 
 	if (test_case == 0) {
 		ret = bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY);
@@ -186,6 +210,16 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
 
 		ret = bpf_map_update_elem(nolocal_lru_map, &key, &val,
 					  BPF_ANY);
+	} else if (test_case == 3) {
+		u32 i;
+
+		key = test_params.key;
+
+#pragma clang loop unroll(full)
+		for (i = 0; i < 32; i++) {
+			bpf_map_lookup_elem(&lru_hash_lookup_map, &key);
+			key++;
+		}
 	} else {
 		ret = -EINVAL;
 	}
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 1a8894b..f388254 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -46,6 +46,7 @@ enum test_type {
 	HASH_LOOKUP,
 	ARRAY_LOOKUP,
 	INNER_LRU_HASH_PREALLOC,
+	LRU_HASH_LOOKUP,
 	NR_TESTS,
 };
 
@@ -60,6 +61,7 @@ const char *test_map_names[NR_TESTS] = {
 	[HASH_LOOKUP] = "hash_map",
 	[ARRAY_LOOKUP] = "array_map",
 	[INNER_LRU_HASH_PREALLOC] = "inner_lru_hash_map",
+	[LRU_HASH_LOOKUP] = "lru_hash_lookup_map",
 };
 
 static int test_flags = ~0;
@@ -67,6 +69,8 @@ static uint32_t num_map_entries;
 static uint32_t inner_lru_hash_size;
 static int inner_lru_hash_idx = -1;
 static int array_of_lru_hashs_idx = -1;
+static int lru_hash_lookup_idx = -1;
+static int lru_hash_lookup_test_entries = 32;
 static uint32_t max_cnt = 1000000;
 
 static int check_test_flags(enum test_type t)
@@ -86,6 +90,32 @@ static void test_hash_prealloc(int cpu)
 	       cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
 }
 
+static int pre_test_lru_hash_lookup(int tasks)
+{
+	int fd = map_fd[lru_hash_lookup_idx];
+	uint32_t key;
+	long val = 1;
+	int ret;
+
+	if (num_map_entries > lru_hash_lookup_test_entries)
+		lru_hash_lookup_test_entries = num_map_entries;
+
+	/* Populate the lru_hash_map for LRU_HASH_LOOKUP perf test.
+	 *
+	 * It is fine that the user requests for a map with
+	 * num_map_entries < 32 and some of the later lru hash lookup
+	 * may return not found.  For LRU map, we are not interested
+	 * in such small map performance.
+	 */
+	for (key = 0; key < lru_hash_lookup_test_entries; key++) {
+		ret = bpf_map_update_elem(fd, &key, &val, BPF_NOEXIST);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static void do_test_lru(enum test_type test, int cpu)
 {
 	static int inner_lru_map_fds[MAX_NR_CPUS];
@@ -97,14 +127,20 @@ static void do_test_lru(enum test_type test, int cpu)
 
 	if (test == INNER_LRU_HASH_PREALLOC) {
 		int outer_fd = map_fd[array_of_lru_hashs_idx];
+		unsigned int mycpu, mynode;
 
 		assert(cpu < MAX_NR_CPUS);
 
 		if (cpu) {
+			ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL);
+			assert(!ret);
+
 			inner_lru_map_fds[cpu] =
-				bpf_create_map(BPF_MAP_TYPE_LRU_HASH,
-					       sizeof(uint32_t), sizeof(long),
-					       inner_lru_hash_size, 0);
+				bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH,
+						    sizeof(uint32_t),
+						    sizeof(long),
+						    inner_lru_hash_size, 0,
+						    mynode);
 			if (inner_lru_map_fds[cpu] == -1) {
 				printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n",
 				       strerror(errno), errno);
@@ -129,13 +165,17 @@ static void do_test_lru(enum test_type test, int cpu)
 
 	if (test == LRU_HASH_PREALLOC) {
 		test_name = "lru_hash_map_perf";
-		in6.sin6_addr.s6_addr16[7] = 0;
+		in6.sin6_addr.s6_addr16[2] = 0;
 	} else if (test == NOCOMMON_LRU_HASH_PREALLOC) {
 		test_name = "nocommon_lru_hash_map_perf";
-		in6.sin6_addr.s6_addr16[7] = 1;
+		in6.sin6_addr.s6_addr16[2] = 1;
 	} else if (test == INNER_LRU_HASH_PREALLOC) {
 		test_name = "inner_lru_hash_map_perf";
-		in6.sin6_addr.s6_addr16[7] = 2;
+		in6.sin6_addr.s6_addr16[2] = 2;
+	} else if (test == LRU_HASH_LOOKUP) {
+		test_name = "lru_hash_lookup_perf";
+		in6.sin6_addr.s6_addr16[2] = 3;
+		in6.sin6_addr.s6_addr32[3] = 0;
 	} else {
 		assert(0);
 	}
@@ -144,6 +184,11 @@ static void do_test_lru(enum test_type test, int cpu)
 	for (i = 0; i < max_cnt; i++) {
 		ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6));
 		assert(ret == -1 && errno == EBADF);
+		if (in6.sin6_addr.s6_addr32[3] <
+		    lru_hash_lookup_test_entries - 32)
+			in6.sin6_addr.s6_addr32[3] += 32;
+		else
+			in6.sin6_addr.s6_addr32[3] = 0;
 	}
 	printf("%d:%s pre-alloc %lld events per sec\n",
 	       cpu, test_name,
@@ -165,6 +210,11 @@ static void test_inner_lru_hash_prealloc(int cpu)
 	do_test_lru(INNER_LRU_HASH_PREALLOC, cpu);
 }
 
+static void test_lru_hash_lookup(int cpu)
+{
+	do_test_lru(LRU_HASH_LOOKUP, cpu);
+}
+
 static void test_percpu_hash_prealloc(int cpu)
 {
 	__u64 start_time;
@@ -237,6 +287,11 @@ static void test_array_lookup(int cpu)
 	       cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time));
 }
 
+typedef int (*pre_test_func)(int tasks);
+const pre_test_func pre_test_funcs[] = {
+	[LRU_HASH_LOOKUP] = pre_test_lru_hash_lookup,
+};
+
 typedef void (*test_func)(int cpu);
 const test_func test_funcs[] = {
 	[HASH_PREALLOC] = test_hash_prealloc,
@@ -249,8 +304,25 @@ const test_func test_funcs[] = {
 	[HASH_LOOKUP] = test_hash_lookup,
 	[ARRAY_LOOKUP] = test_array_lookup,
 	[INNER_LRU_HASH_PREALLOC] = test_inner_lru_hash_prealloc,
+	[LRU_HASH_LOOKUP] = test_lru_hash_lookup,
 };
 
+static int pre_test(int tasks)
+{
+	int i;
+
+	for (i = 0; i < NR_TESTS; i++) {
+		if (pre_test_funcs[i] && check_test_flags(i)) {
+			int ret = pre_test_funcs[i](tasks);
+
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
 static void loop(int cpu)
 {
 	cpu_set_t cpuset;
@@ -271,6 +343,8 @@ static void run_perf_test(int tasks)
 	pid_t pid[tasks];
 	int i;
 
+	assert(!pre_test(tasks));
+
 	for (i = 0; i < tasks; i++) {
 		pid[i] = fork();
 		if (pid[i] == 0) {
@@ -338,6 +412,9 @@ static void fixup_map(struct bpf_map_data *map, int idx)
 		array_of_lru_hashs_idx = idx;
 	}
 
+	if (!strcmp("lru_hash_lookup_map", map->name))
+		lru_hash_lookup_idx = idx;
+
 	if (num_map_entries <= 0)
 		return;
 
diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c
index 533dd11a6..05dcdf8 100644
--- a/samples/bpf/sock_flags_kern.c
+++ b/samples/bpf/sock_flags_kern.c
@@ -9,8 +9,13 @@ SEC("cgroup/sock1")
 int bpf_prog1(struct bpf_sock *sk)
 {
 	char fmt[] = "socket: family %d type %d protocol %d\n";
+	char fmt2[] = "socket: uid %u gid %u\n";
+	__u64 gid_uid = bpf_get_current_uid_gid();
+	__u32 uid = gid_uid & 0xffffffff;
+	__u32 gid = gid_uid >> 32;
 
 	bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
+	bpf_trace_printk(fmt2, sizeof(fmt2), uid, gid);
 
 	/* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets
 	 * ie., make ping6 fail
diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c
new file mode 100644
index 0000000..9149c52
--- /dev/null
+++ b/samples/bpf/syscall_tp_kern.c
@@ -0,0 +1,62 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct syscalls_enter_open_args {
+	unsigned long long unused;
+	long syscall_nr;
+	long filename_ptr;
+	long flags;
+	long mode;
+};
+
+struct syscalls_exit_open_args {
+	unsigned long long unused;
+	long syscall_nr;
+	long ret;
+};
+
+struct bpf_map_def SEC("maps") enter_open_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u32),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") exit_open_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u32),
+	.max_entries = 1,
+};
+
+static __always_inline void count(void *map)
+{
+	u32 key = 0;
+	u32 *value, init_val = 1;
+
+	value = bpf_map_lookup_elem(map, &key);
+	if (value)
+		*value += 1;
+	else
+		bpf_map_update_elem(map, &key, &init_val, BPF_NOEXIST);
+}
+
+SEC("tracepoint/syscalls/sys_enter_open")
+int trace_enter_open(struct syscalls_enter_open_args *ctx)
+{
+	count((void *)&enter_open_map);
+	return 0;
+}
+
+SEC("tracepoint/syscalls/sys_exit_open")
+int trace_enter_exit(struct syscalls_exit_open_args *ctx)
+{
+	count((void *)&exit_open_map);
+	return 0;
+}
diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c
new file mode 100644
index 0000000..a3cb91e
--- /dev/null
+++ b/samples/bpf/syscall_tp_user.c
@@ -0,0 +1,71 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <linux/bpf.h>
+#include <string.h>
+#include <linux/perf_event.h>
+#include <errno.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <sys/resource.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+/* This program verifies bpf attachment to tracepoint sys_enter_* and sys_exit_*.
+ * This requires kernel CONFIG_FTRACE_SYSCALLS to be set.
+ */
+
+static void verify_map(int map_id)
+{
+	__u32 key = 0;
+	__u32 val;
+
+	if (bpf_map_lookup_elem(map_id, &key, &val) != 0) {
+		fprintf(stderr, "map_lookup failed: %s\n", strerror(errno));
+		return;
+	}
+	if (val == 0)
+		fprintf(stderr, "failed: map #%d returns value 0\n", map_id);
+}
+
+int main(int argc, char **argv)
+{
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	char filename[256];
+	int fd;
+
+	setrlimit(RLIMIT_MEMLOCK, &r);
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		fprintf(stderr, "%s", bpf_log_buf);
+		return 1;
+	}
+
+	/* current load_bpf_file has perf_event_open default pid = -1
+	 * and cpu = 0, which permits attached bpf execution on
+	 * all cpus for all pid's. bpf program execution ignores
+	 * cpu affinity.
+	 */
+	/* trigger some "open" operations */
+	fd = open(filename, O_RDONLY);
+	if (fd < 0) {
+		fprintf(stderr, "open failed: %s\n", strerror(errno));
+		return 1;
+	}
+	close(fd);
+
+	/* verify the map */
+	verify_map(map_fd[0]);
+	verify_map(map_fd[1]);
+
+	return 0;
+}
diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
index 270edcc..370b749 100644
--- a/samples/bpf/tcbpf2_kern.c
+++ b/samples/bpf/tcbpf2_kern.c
@@ -17,6 +17,7 @@
 #include <uapi/linux/pkt_cls.h>
 #include <net/ipv6.h>
 #include "bpf_helpers.h"
+#include "bpf_endian.h"
 
 #define _htonl __builtin_bswap32
 #define ERROR(ret) do {\
@@ -38,6 +39,10 @@ struct vxlan_metadata {
 	u32     gbp;
 };
 
+struct erspan_metadata {
+	__be32 index;
+};
+
 SEC("gre_set_tunnel")
 int _gre_set_tunnel(struct __sk_buff *skb)
 {
@@ -76,6 +81,63 @@ int _gre_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
+SEC("erspan_set_tunnel")
+int _erspan_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	md.index = htonl(123);
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("erspan_get_tunnel")
+int _erspan_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip 0x%x erspan index 0x%x\n";
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	u32 index;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	index = bpf_ntohl(md.index);
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, index);
+
+	return TC_ACT_OK;
+}
+
 SEC("vxlan_set_tunnel")
 int _vxlan_set_tunnel(struct __sk_buff *skb)
 {
@@ -378,5 +440,4 @@ int _ip6ip6_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-
 char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c
index c3cfb23e..e79594d 100644
--- a/samples/bpf/test_cgrp2_sock.c
+++ b/samples/bpf/test_cgrp2_sock.c
@@ -19,68 +19,271 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <net/if.h>
+#include <inttypes.h>
 #include <linux/bpf.h>
 
 #include "libbpf.h"
 
 char bpf_log_buf[BPF_LOG_BUF_SIZE];
 
-static int prog_load(int idx)
+static int prog_load(__u32 idx, __u32 mark, __u32 prio)
 {
-	struct bpf_insn prog[] = {
+	/* save pointer to context */
+	struct bpf_insn prog_start[] = {
 		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	};
+	struct bpf_insn prog_end[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
+		BPF_EXIT_INSN(),
+	};
+
+	/* set sk_bound_dev_if on socket */
+	struct bpf_insn prog_dev[] = {
 		BPF_MOV64_IMM(BPF_REG_3, idx),
 		BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)),
 		BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)),
-		BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
-		BPF_EXIT_INSN(),
 	};
-	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
 
-	return bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt,
+	/* set mark on socket */
+	struct bpf_insn prog_mark[] = {
+		/* get uid of process */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_get_current_uid_gid),
+		BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffffffff),
+
+		/* if uid is 0, use given mark, else use the uid as the mark */
+		BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+		BPF_MOV64_IMM(BPF_REG_3, mark),
+
+		/* set the mark on the new socket */
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, mark)),
+		BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, mark)),
+	};
+
+	/* set priority on socket */
+	struct bpf_insn prog_prio[] = {
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+		BPF_MOV64_IMM(BPF_REG_3, prio),
+		BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, priority)),
+		BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, priority)),
+	};
+
+	struct bpf_insn *prog;
+	size_t insns_cnt;
+	void *p;
+	int ret;
+
+	insns_cnt = sizeof(prog_start) + sizeof(prog_end);
+	if (idx)
+		insns_cnt += sizeof(prog_dev);
+
+	if (mark)
+		insns_cnt += sizeof(prog_mark);
+
+	if (prio)
+		insns_cnt += sizeof(prog_prio);
+
+	p = prog = malloc(insns_cnt);
+	if (!prog) {
+		fprintf(stderr, "Failed to allocate memory for instructions\n");
+		return EXIT_FAILURE;
+	}
+
+	memcpy(p, prog_start, sizeof(prog_start));
+	p += sizeof(prog_start);
+
+	if (idx) {
+		memcpy(p, prog_dev, sizeof(prog_dev));
+		p += sizeof(prog_dev);
+	}
+
+	if (mark) {
+		memcpy(p, prog_mark, sizeof(prog_mark));
+		p += sizeof(prog_mark);
+	}
+
+	if (prio) {
+		memcpy(p, prog_prio, sizeof(prog_prio));
+		p += sizeof(prog_prio);
+	}
+
+	memcpy(p, prog_end, sizeof(prog_end));
+	p += sizeof(prog_end);
+
+	insns_cnt /= sizeof(struct bpf_insn);
+
+	ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt,
 				"GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE);
+
+	free(prog);
+
+	return ret;
+}
+
+static int get_bind_to_device(int sd, char *name, size_t len)
+{
+	socklen_t optlen = len;
+	int rc;
+
+	name[0] = '\0';
+	rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen);
+	if (rc < 0)
+		perror("setsockopt(SO_BINDTODEVICE)");
+
+	return rc;
+}
+
+static unsigned int get_somark(int sd)
+{
+	unsigned int mark = 0;
+	socklen_t optlen = sizeof(mark);
+	int rc;
+
+	rc = getsockopt(sd, SOL_SOCKET, SO_MARK, &mark, &optlen);
+	if (rc < 0)
+		perror("getsockopt(SO_MARK)");
+
+	return mark;
+}
+
+static unsigned int get_priority(int sd)
+{
+	unsigned int prio = 0;
+	socklen_t optlen = sizeof(prio);
+	int rc;
+
+	rc = getsockopt(sd, SOL_SOCKET, SO_PRIORITY, &prio, &optlen);
+	if (rc < 0)
+		perror("getsockopt(SO_PRIORITY)");
+
+	return prio;
+}
+
+static int show_sockopts(int family)
+{
+	unsigned int mark, prio;
+	char name[16];
+	int sd;
+
+	sd = socket(family, SOCK_DGRAM, 17);
+	if (sd < 0) {
+		perror("socket");
+		return 1;
+	}
+
+	if (get_bind_to_device(sd, name, sizeof(name)) < 0)
+		return 1;
+
+	mark = get_somark(sd);
+	prio = get_priority(sd);
+
+	close(sd);
+
+	printf("sd %d: dev %s, mark %u, priority %u\n", sd, name, mark, prio);
+
+	return 0;
 }
 
 static int usage(const char *argv0)
 {
-	printf("Usage: %s cg-path device-index\n", argv0);
+	printf("Usage:\n");
+	printf("  Attach a program\n");
+	printf("  %s -b bind-to-dev -m mark -p prio cg-path\n", argv0);
+	printf("\n");
+	printf("  Detach a program\n");
+	printf("  %s -d cg-path\n", argv0);
+	printf("\n");
+	printf("  Show inherited socket settings (mark, priority, and device)\n");
+	printf("  %s [-6]\n", argv0);
 	return EXIT_FAILURE;
 }
 
 int main(int argc, char **argv)
 {
+	__u32 idx = 0, mark = 0, prio = 0;
+	const char *cgrp_path = NULL;
 	int cg_fd, prog_fd, ret;
-	unsigned int idx;
+	int family = PF_INET;
+	int do_attach = 1;
+	int rc;
+
+	while ((rc = getopt(argc, argv, "db:m:p:6")) != -1) {
+		switch (rc) {
+		case 'd':
+			do_attach = 0;
+			break;
+		case 'b':
+			idx = if_nametoindex(optarg);
+			if (!idx) {
+				idx = strtoumax(optarg, NULL, 0);
+				if (!idx) {
+					printf("Invalid device name\n");
+					return EXIT_FAILURE;
+				}
+			}
+			break;
+		case 'm':
+			mark = strtoumax(optarg, NULL, 0);
+			break;
+		case 'p':
+			prio = strtoumax(optarg, NULL, 0);
+			break;
+		case '6':
+			family = PF_INET6;
+			break;
+		default:
+			return usage(argv[0]);
+		}
+	}
 
-	if (argc < 2)
-		return usage(argv[0]);
+	if (optind == argc)
+		return show_sockopts(family);
 
-	idx = if_nametoindex(argv[2]);
-	if (!idx) {
-		printf("Invalid device name\n");
+	cgrp_path = argv[optind];
+	if (!cgrp_path) {
+		fprintf(stderr, "cgroup path not given\n");
 		return EXIT_FAILURE;
 	}
 
-	cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
-	if (cg_fd < 0) {
-		printf("Failed to open cgroup path: '%s'\n", strerror(errno));
+	if (do_attach && !idx && !mark && !prio) {
+		fprintf(stderr,
+			"One of device, mark or priority must be given\n");
 		return EXIT_FAILURE;
 	}
 
-	prog_fd = prog_load(idx);
-	printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
-
-	if (prog_fd < 0) {
-		printf("Failed to load prog: '%s'\n", strerror(errno));
+	cg_fd = open(cgrp_path, O_DIRECTORY | O_RDONLY);
+	if (cg_fd < 0) {
+		printf("Failed to open cgroup path: '%s'\n", strerror(errno));
 		return EXIT_FAILURE;
 	}
 
-	ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0);
-	if (ret < 0) {
-		printf("Failed to attach prog to cgroup: '%s'\n",
-		       strerror(errno));
-		return EXIT_FAILURE;
+	if (do_attach) {
+		prog_fd = prog_load(idx, mark, prio);
+		if (prog_fd < 0) {
+			printf("Failed to load prog: '%s'\n", strerror(errno));
+			printf("Output from kernel verifier:\n%s\n-------\n",
+			       bpf_log_buf);
+			return EXIT_FAILURE;
+		}
+
+		ret = bpf_prog_attach(prog_fd, cg_fd,
+				      BPF_CGROUP_INET_SOCK_CREATE, 0);
+		if (ret < 0) {
+			printf("Failed to attach prog to cgroup: '%s'\n",
+			       strerror(errno));
+			return EXIT_FAILURE;
+		}
+	} else {
+		ret = bpf_prog_detach(cg_fd, BPF_CGROUP_INET_SOCK_CREATE);
+		if (ret < 0) {
+			printf("Failed to detach prog from cgroup: '%s'\n",
+			       strerror(errno));
+			return EXIT_FAILURE;
+		}
 	}
 
+	close(cg_fd);
 	return EXIT_SUCCESS;
 }
diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh
index 925fd46..a81f38e 100755
--- a/samples/bpf/test_cgrp2_sock.sh
+++ b/samples/bpf/test_cgrp2_sock.sh
@@ -1,47 +1,133 @@
-#!/bin/bash
-
-function config_device {
-	ip netns add at_ns0
-	ip link add veth0 type veth peer name veth0b
-	ip link set veth0b up
-	ip link set veth0 netns at_ns0
-	ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
-	ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
-	ip netns exec at_ns0 ip link set dev veth0 up
-	ip link add foo type vrf table 1234
-	ip link set foo up
-	ip addr add 172.16.1.101/24 dev veth0b
-	ip addr add 2401:db00::2/64 dev veth0b nodad
-	ip link set veth0b master foo
+#!/bin/sh
+
+# Test various socket options that can be set by attaching programs to cgroups.
+
+CGRP_MNT="/tmp/cgroupv2-test_cgrp2_sock"
+
+################################################################################
+#
+print_result()
+{
+	local rc=$1
+	local status=" OK "
+
+	[ $rc -ne 0 ] && status="FAIL"
+
+	printf "%-50s    [%4s]\n" "$2" "$status"
 }
 
-function attach_bpf {
-	rm -rf /tmp/cgroupv2
-	mkdir -p /tmp/cgroupv2
-	mount -t cgroup2 none /tmp/cgroupv2
-	mkdir -p /tmp/cgroupv2/foo
-	test_cgrp2_sock /tmp/cgroupv2/foo foo
-	echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
+check_sock()
+{
+	out=$(test_cgrp2_sock)
+	echo $out | grep -q "$1"
+	if [ $? -ne 0 ]; then
+		print_result 1 "IPv4: $2"
+		echo "    expected: $1"
+		echo "        have: $out"
+		rc=1
+	else
+		print_result 0 "IPv4: $2"
+	fi
 }
 
-function cleanup {
-	set +ex
-	ip netns delete at_ns0
-	ip link del veth0
-	ip link del foo
-	umount /tmp/cgroupv2
-	rm -rf /tmp/cgroupv2
-	set -ex
+check_sock6()
+{
+	out=$(test_cgrp2_sock -6)
+	echo $out | grep -q "$1"
+	if [ $? -ne 0 ]; then
+		print_result 1 "IPv6: $2"
+		echo "    expected: $1"
+		echo "        have: $out"
+		rc=1
+	else
+		print_result 0 "IPv6: $2"
+	fi
 }
 
-function do_test {
-	ping -c1 -w1 172.16.1.100
-	ping6 -c1 -w1 2401:db00::1
+################################################################################
+#
+
+cleanup()
+{
+	echo $$ >> ${CGRP_MNT}/cgroup.procs
+	rmdir ${CGRP_MNT}/sockopts
 }
 
+cleanup_and_exit()
+{
+	local rc=$1
+	local msg="$2"
+
+	[ -n "$msg" ] && echo "ERROR: $msg"
+
+	ip li del cgrp2_sock
+	umount ${CGRP_MNT}
+
+	exit $rc
+}
+
+
+################################################################################
+# main
+
+rc=0
+
+ip li add cgrp2_sock type dummy 2>/dev/null
+
+set -e
+mkdir -p ${CGRP_MNT}
+mount -t cgroup2 none ${CGRP_MNT}
+set +e
+
+
+# make sure we have a known start point
 cleanup 2>/dev/null
-config_device
-attach_bpf
-do_test
-cleanup
-echo "*** PASS ***"
+
+mkdir -p ${CGRP_MNT}/sockopts
+[ $? -ne 0 ] && cleanup_and_exit 1 "Failed to create cgroup hierarchy"
+
+
+# set pid into cgroup
+echo $$ > ${CGRP_MNT}/sockopts/cgroup.procs
+
+# no bpf program attached, so socket should show no settings
+check_sock "dev , mark 0, priority 0" "No programs attached"
+check_sock6 "dev , mark 0, priority 0" "No programs attached"
+
+# verify device is set
+#
+test_cgrp2_sock -b cgrp2_sock ${CGRP_MNT}/sockopts
+if [ $? -ne 0 ]; then
+	cleanup_and_exit 1 "Failed to install program to set device"
+fi
+check_sock "dev cgrp2_sock, mark 0, priority 0" "Device set"
+check_sock6 "dev cgrp2_sock, mark 0, priority 0" "Device set"
+
+# verify mark is set
+#
+test_cgrp2_sock -m 666 ${CGRP_MNT}/sockopts
+if [ $? -ne 0 ]; then
+	cleanup_and_exit 1 "Failed to install program to set mark"
+fi
+check_sock "dev , mark 666, priority 0" "Mark set"
+check_sock6 "dev , mark 666, priority 0" "Mark set"
+
+# verify priority is set
+#
+test_cgrp2_sock -p 123 ${CGRP_MNT}/sockopts
+if [ $? -ne 0 ]; then
+	cleanup_and_exit 1 "Failed to install program to set priority"
+fi
+check_sock "dev , mark 0, priority 123" "Priority set"
+check_sock6 "dev , mark 0, priority 123" "Priority set"
+
+# all 3 at once
+#
+test_cgrp2_sock -b cgrp2_sock -m 666 -p 123 ${CGRP_MNT}/sockopts
+if [ $? -ne 0 ]; then
+	cleanup_and_exit 1 "Failed to install program to set device, mark and priority"
+fi
+check_sock "dev cgrp2_sock, mark 666, priority 123" "Priority set"
+check_sock6 "dev cgrp2_sock, mark 666, priority 123" "Priority set"
+
+cleanup_and_exit $rc
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
index a70d2ea..410052d 100755
--- a/samples/bpf/test_tunnel_bpf.sh
+++ b/samples/bpf/test_tunnel_bpf.sh
@@ -32,6 +32,19 @@ function add_gre_tunnel {
 	ip addr add dev $DEV 10.1.1.200/24
 }
 
+function add_erspan_tunnel {
+	# in namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 local 172.16.1.100 remote 172.16.1.200 erspan 123
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# out of namespace
+	ip link add dev $DEV type $TYPE external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
 function add_vxlan_tunnel {
 	# Set static ARP entry here because iptables set-mark works
 	# on L3 packet, as a result not applying to ARP packets,
@@ -99,6 +112,18 @@ function test_gre {
 	cleanup
 }
 
+function test_erspan {
+	TYPE=erspan
+	DEV_NS=erspan00
+	DEV=erspan11
+	config_device
+	add_erspan_tunnel
+	attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
+	ping -c 1 10.1.1.100
+	ip netns exec at_ns0 ping -c 1 10.1.1.200
+	cleanup
+}
+
 function test_vxlan {
 	TYPE=vxlan
 	DEV_NS=vxlan00
@@ -151,14 +176,18 @@ function cleanup {
 	ip link del gretap11
 	ip link del vxlan11
 	ip link del geneve11
+	ip link del erspan11
 	pkill tcpdump
 	pkill cat
 	set -ex
 }
 
+trap cleanup 0 2 3 6 9
 cleanup
 echo "Testing GRE tunnel..."
 test_gre
+echo "Testing ERSPAN tunnel..."
+test_erspan
 echo "Testing VXLAN tunnel..."
 test_vxlan
 echo "Testing GENEVE tunnel..."
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
new file mode 100644
index 0000000..74f3fd8
--- /dev/null
+++ b/samples/bpf/xdp_monitor_kern.c
@@ -0,0 +1,88 @@
+/* XDP monitor tool, based on tracepoints
+ *
+ *  Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ */
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") redirect_err_cnt = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u64),
+	.max_entries = 2,
+	/* TODO: have entries for all possible errno's */
+};
+
+/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
+ * Code in:                kernel/include/trace/events/xdp.h
+ */
+struct xdp_redirect_ctx {
+	unsigned short common_type;	//	offset:0;  size:2; signed:0;
+	unsigned char common_flags;	//	offset:2;  size:1; signed:0;
+	unsigned char common_preempt_count;//	offset:3;  size:1; signed:0;
+	int common_pid;			//	offset:4;  size:4; signed:1;
+
+	int prog_id;			//	offset:8;  size:4; signed:1;
+	u32 act;			//	offset:12  size:4; signed:0;
+	int ifindex;			//	offset:16  size:4; signed:1;
+	int err;			//	offset:20  size:4; signed:1;
+	int to_ifindex;			//	offset:24  size:4; signed:1;
+	u32 map_id;			//	offset:28  size:4; signed:0;
+	int map_index;			//	offset:32  size:4; signed:1;
+};					//	offset:36
+
+enum {
+	XDP_REDIRECT_SUCCESS = 0,
+	XDP_REDIRECT_ERROR = 1
+};
+
+static __always_inline
+int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
+{
+	u32 key = XDP_REDIRECT_ERROR;
+	int err = ctx->err;
+	u64 *cnt;
+
+	if (!err)
+		key = XDP_REDIRECT_SUCCESS;
+
+	cnt  = bpf_map_lookup_elem(&redirect_err_cnt, &key);
+	if (!cnt)
+		return 0;
+	*cnt += 1;
+
+	return 0; /* Indicate event was filtered (no further processing)*/
+	/*
+	 * Returning 1 here would allow e.g. a perf-record tracepoint
+	 * to see and record these events, but it doesn't work well
+	 * in-practice as stopping perf-record also unload this
+	 * bpf_prog.  Plus, there is additional overhead of doing so.
+	 */
+}
+
+SEC("tracepoint/xdp/xdp_redirect_err")
+int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
+{
+	return xdp_redirect_collect_stat(ctx);
+}
+
+
+SEC("tracepoint/xdp/xdp_redirect_map_err")
+int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
+{
+	return xdp_redirect_collect_stat(ctx);
+}
+
+/* Likely unloaded when prog starts */
+SEC("tracepoint/xdp/xdp_redirect")
+int trace_xdp_redirect(struct xdp_redirect_ctx *ctx)
+{
+	return xdp_redirect_collect_stat(ctx);
+}
+
+/* Likely unloaded when prog starts */
+SEC("tracepoint/xdp/xdp_redirect_map")
+int trace_xdp_redirect_map(struct xdp_redirect_ctx *ctx)
+{
+	return xdp_redirect_collect_stat(ctx);
+}
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
new file mode 100644
index 0000000..b51b4f5
--- /dev/null
+++ b/samples/bpf/xdp_monitor_user.c
@@ -0,0 +1,295 @@
+/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
+ */
+static const char *__doc__=
+ "XDP monitor tool, based on tracepoints\n"
+;
+
+static const char *__doc_err_only__=
+ " NOTICE: Only tracking XDP redirect errors\n"
+ "         Enable TX success stats via '--stats'\n"
+ "         (which comes with a per packet processing overhead)\n"
+;
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <locale.h>
+
+#include <getopt.h>
+#include <net/if.h>
+#include <time.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+#include "bpf_util.h"
+
+static int verbose = 1;
+static bool debug = false;
+
+static const struct option long_options[] = {
+	{"help",	no_argument,		NULL, 'h' },
+	{"debug",	no_argument,		NULL, 'D' },
+	{"stats",	no_argument,		NULL, 'S' },
+	{"sec", 	required_argument,	NULL, 's' },
+	{0, 0, NULL,  0 }
+};
+
+static void usage(char *argv[])
+{
+	int i;
+	printf("\nDOCUMENTATION:\n%s\n", __doc__);
+	printf("\n");
+	printf(" Usage: %s (options-see-below)\n",
+	       argv[0]);
+	printf(" Listing options:\n");
+	for (i = 0; long_options[i].name != 0; i++) {
+		printf(" --%-15s", long_options[i].name);
+		if (long_options[i].flag != NULL)
+			printf(" flag (internal value:%d)",
+			       *long_options[i].flag);
+		else
+			printf("(internal short-option: -%c)",
+			       long_options[i].val);
+		printf("\n");
+	}
+	printf("\n");
+}
+
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+__u64 gettime(void)
+{
+	struct timespec t;
+	int res;
+
+	res = clock_gettime(CLOCK_MONOTONIC, &t);
+	if (res < 0) {
+		fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
+		exit(EXIT_FAILURE);
+	}
+	return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+enum {
+	REDIR_SUCCESS = 0,
+	REDIR_ERROR = 1,
+};
+#define REDIR_RES_MAX 2
+static const char *redir_names[REDIR_RES_MAX] = {
+	[REDIR_SUCCESS]	= "Success",
+	[REDIR_ERROR]	= "Error",
+};
+static const char *err2str(int err)
+{
+	if (err < REDIR_RES_MAX)
+		return redir_names[err];
+	return NULL;
+}
+
+struct record {
+	__u64 counter;
+	__u64 timestamp;
+};
+
+struct stats_record {
+	struct record xdp_redir[REDIR_RES_MAX];
+};
+
+static void stats_print_headers(bool err_only)
+{
+	if (err_only)
+		printf("\n%s\n", __doc_err_only__);
+
+	printf("%-14s %-10s %-18s %-9s\n",
+	       "XDP_REDIRECT", "pps ", "pps-human-readable", "measure-period");
+}
+
+static void stats_print(struct stats_record *rec,
+			struct stats_record *prev,
+			bool err_only)
+{
+	int i = 0;
+
+	if (err_only)
+		i = REDIR_ERROR;
+
+	for (; i < REDIR_RES_MAX; i++) {
+		struct record *r = &rec->xdp_redir[i];
+		struct record *p = &prev->xdp_redir[i];
+		__u64 period  = 0;
+		__u64 packets = 0;
+		double pps = 0;
+		double period_ = 0;
+
+		if (p->timestamp) {
+			packets = r->counter - p->counter;
+			period  = r->timestamp - p->timestamp;
+			if (period > 0) {
+				period_ = ((double) period / NANOSEC_PER_SEC);
+				pps = packets / period_;
+			}
+		}
+
+		printf("%-14s %-10.0f %'-18.0f %f\n",
+		       err2str(i), pps, pps, period_);
+	}
+}
+
+static __u64 get_key32_value64_percpu(int fd, __u32 key)
+{
+	/* For percpu maps, userspace gets a value per possible CPU */
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	__u64 values[nr_cpus];
+	__u64 sum = 0;
+	int i;
+
+	if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
+		fprintf(stderr,
+			"ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
+		return 0;
+	}
+
+	/* Sum values from each CPU */
+	for (i = 0; i < nr_cpus; i++) {
+		sum += values[i];
+	}
+	return sum;
+}
+
+static bool stats_collect(int fd, struct stats_record *rec)
+{
+	int i;
+
+	/* TODO: Detect if someone unloaded the perf event_fd's, as
+	 * this can happen by someone running perf-record -e
+	 */
+
+	for (i = 0; i < REDIR_RES_MAX; i++) {
+		rec->xdp_redir[i].timestamp = gettime();
+		rec->xdp_redir[i].counter = get_key32_value64_percpu(fd, i);
+	}
+	return true;
+}
+
+static void stats_poll(int interval, bool err_only)
+{
+	struct stats_record rec, prev;
+	int map_fd;
+
+	memset(&rec, 0, sizeof(rec));
+
+	/* Trick to pretty printf with thousands separators use %' */
+	setlocale(LC_NUMERIC, "en_US");
+
+	/* Header */
+	if (verbose)
+		printf("\n%s", __doc__);
+
+	/* TODO Need more advanced stats on error types */
+	if (verbose)
+		printf(" - Stats map: %s\n", map_data[0].name);
+	map_fd = map_data[0].fd;
+
+	stats_print_headers(err_only);
+	fflush(stdout);
+
+	while (1) {
+		memcpy(&prev, &rec, sizeof(rec));
+		stats_collect(map_fd, &rec);
+		stats_print(&rec, &prev, err_only);
+		fflush(stdout);
+		sleep(interval);
+	}
+}
+
+void print_bpf_prog_info(void)
+{
+	int i;
+
+	/* Prog info */
+	printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt);
+	for (i = 0; i < prog_cnt; i++) {
+		printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]);
+	}
+
+	/* Maps info */
+	printf("Loaded BPF prog have %d map(s)\n", map_data_count);
+	for (i = 0; i < map_data_count; i++) {
+		char *name = map_data[i].name;
+		int fd     = map_data[i].fd;
+
+		printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name);
+	}
+
+	/* Event info */
+	printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt);
+	for (i = 0; i < prog_cnt; i++) {
+		if (event_fd[i] != -1)
+			printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int longindex = 0, opt;
+	int ret = EXIT_SUCCESS;
+	char bpf_obj_file[256];
+
+	/* Default settings: */
+	bool errors_only = true;
+	int interval = 2;
+
+	snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]);
+
+	/* Parse commands line args */
+	while ((opt = getopt_long(argc, argv, "h",
+				  long_options, &longindex)) != -1) {
+		switch (opt) {
+		case 'D':
+			debug = true;
+			break;
+		case 'S':
+			errors_only = false;
+			break;
+		case 's':
+			interval = atoi(optarg);
+			break;
+		case 'h':
+		default:
+			usage(argv);
+			return EXIT_FAILURE;
+		}
+	}
+
+	if (load_bpf_file(bpf_obj_file)) {
+		printf("ERROR - bpf_log_buf: %s", bpf_log_buf);
+		return 1;
+	}
+	if (!prog_fd[0]) {
+		printf("ERROR - load_bpf_file: %s\n", strerror(errno));
+		return 1;
+	}
+
+	if (debug) {
+		print_bpf_prog_info();
+	}
+
+	/* Unload/stop tracepoint event by closing fd's */
+	if (errors_only) {
+		/* The prog_fd[i] and event_fd[i] depend on the
+		 * order the functions was defined in _kern.c
+		 */
+		close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */
+		close(prog_fd[2]);  /* func: trace_xdp_redirect */
+		close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */
+		close(prog_fd[3]);  /* func: trace_xdp_redirect_map */
+	}
+
+	stats_poll(interval, errors_only);
+
+	return ret;
+}
diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c
new file mode 100644
index 0000000..8abb151
--- /dev/null
+++ b/samples/bpf/xdp_redirect_kern.c
@@ -0,0 +1,90 @@
+/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") tx_port = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1,
+};
+
+/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
+ * feedback.  Redirect TX errors can be caught via a tracepoint.
+ */
+struct bpf_map_def SEC("maps") rxcnt = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(long),
+	.max_entries = 1,
+};
+
+static void swap_src_dst_mac(void *data)
+{
+	unsigned short *p = data;
+	unsigned short dst[3];
+
+	dst[0] = p[0];
+	dst[1] = p[1];
+	dst[2] = p[2];
+	p[0] = p[3];
+	p[1] = p[4];
+	p[2] = p[5];
+	p[3] = dst[0];
+	p[4] = dst[1];
+	p[5] = dst[2];
+}
+
+SEC("xdp_redirect")
+int xdp_redirect_prog(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct ethhdr *eth = data;
+	int rc = XDP_DROP;
+	int *ifindex, port = 0;
+	long *value;
+	u32 key = 0;
+	u64 nh_off;
+
+	nh_off = sizeof(*eth);
+	if (data + nh_off > data_end)
+		return rc;
+
+	ifindex = bpf_map_lookup_elem(&tx_port, &port);
+	if (!ifindex)
+		return rc;
+
+	value = bpf_map_lookup_elem(&rxcnt, &key);
+	if (value)
+		*value += 1;
+
+	swap_src_dst_mac(data);
+	return bpf_redirect(*ifindex, 0);
+}
+
+/* Redirect require an XDP bpf_prog loaded on the TX device */
+SEC("xdp_redirect_dummy")
+int xdp_redirect_dummy_prog(struct xdp_md *ctx)
+{
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c
new file mode 100644
index 0000000..740a529
--- /dev/null
+++ b/samples/bpf/xdp_redirect_map_kern.c
@@ -0,0 +1,92 @@
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") tx_port = {
+	.type = BPF_MAP_TYPE_DEVMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 100,
+};
+
+/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
+ * feedback.  Redirect TX errors can be caught via a tracepoint.
+ */
+struct bpf_map_def SEC("maps") rxcnt = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(long),
+	.max_entries = 1,
+};
+
+static void swap_src_dst_mac(void *data)
+{
+	unsigned short *p = data;
+	unsigned short dst[3];
+
+	dst[0] = p[0];
+	dst[1] = p[1];
+	dst[2] = p[2];
+	p[0] = p[3];
+	p[1] = p[4];
+	p[2] = p[5];
+	p[3] = dst[0];
+	p[4] = dst[1];
+	p[5] = dst[2];
+}
+
+SEC("xdp_redirect_map")
+int xdp_redirect_map_prog(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct ethhdr *eth = data;
+	int rc = XDP_DROP;
+	int vport, port = 0, m = 0;
+	long *value;
+	u32 key = 0;
+	u64 nh_off;
+
+	nh_off = sizeof(*eth);
+	if (data + nh_off > data_end)
+		return rc;
+
+	/* constant virtual port */
+	vport = 0;
+
+	/* count packet in global counter */
+	value = bpf_map_lookup_elem(&rxcnt, &key);
+	if (value)
+		*value += 1;
+
+	swap_src_dst_mac(data);
+
+	/* send packet out physical port */
+	return bpf_redirect_map(&tx_port, vport, 0);
+}
+
+/* Redirect require an XDP bpf_prog loaded on the TX device */
+SEC("xdp_redirect_dummy")
+int xdp_redirect_dummy_prog(struct xdp_md *ctx)
+{
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
new file mode 100644
index 0000000..d4d86a2
--- /dev/null
+++ b/samples/bpf/xdp_redirect_map_user.c
@@ -0,0 +1,145 @@
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <libgen.h>
+
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include "libbpf.h"
+
+static int ifindex_in;
+static int ifindex_out;
+static bool ifindex_out_xdp_dummy_attached = true;
+
+static __u32 xdp_flags;
+
+static void int_exit(int sig)
+{
+	set_link_xdp_fd(ifindex_in, -1, xdp_flags);
+	if (ifindex_out_xdp_dummy_attached)
+		set_link_xdp_fd(ifindex_out, -1, xdp_flags);
+	exit(0);
+}
+
+static void poll_stats(int interval, int ifindex)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	__u64 values[nr_cpus], prev[nr_cpus];
+
+	memset(prev, 0, sizeof(prev));
+
+	while (1) {
+		__u64 sum = 0;
+		__u32 key = 0;
+		int i;
+
+		sleep(interval);
+		assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
+		for (i = 0; i < nr_cpus; i++)
+			sum += (values[i] - prev[i]);
+		if (sum)
+			printf("ifindex %i: %10llu pkt/s\n",
+			       ifindex, sum / interval);
+		memcpy(prev, values, sizeof(values));
+	}
+}
+
+static void usage(const char *prog)
+{
+	fprintf(stderr,
+		"usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n"
+		"OPTS:\n"
+		"    -S    use skb-mode\n"
+		"    -N    enforce native mode\n",
+		prog);
+}
+
+int main(int argc, char **argv)
+{
+	const char *optstr = "SN";
+	char filename[256];
+	int ret, opt, key = 0;
+
+	while ((opt = getopt(argc, argv, optstr)) != -1) {
+		switch (opt) {
+		case 'S':
+			xdp_flags |= XDP_FLAGS_SKB_MODE;
+			break;
+		case 'N':
+			xdp_flags |= XDP_FLAGS_DRV_MODE;
+			break;
+		default:
+			usage(basename(argv[0]));
+			return 1;
+		}
+	}
+
+	if (optind == argc) {
+		printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]);
+		return 1;
+	}
+
+	ifindex_in = strtoul(argv[optind], NULL, 0);
+	ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+	printf("input: %d output: %d\n", ifindex_in, ifindex_out);
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	if (!prog_fd[0]) {
+		printf("load_bpf_file: %s\n", strerror(errno));
+		return 1;
+	}
+
+	if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
+		printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
+		return 1;
+	}
+
+	/* Loading dummy XDP prog on out-device */
+	if (set_link_xdp_fd(ifindex_out, prog_fd[1],
+			    (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
+		printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
+		ifindex_out_xdp_dummy_attached = false;
+	}
+
+	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
+
+	printf("map[0] (vports) = %i, map[1] (map) = %i, map[2] (count) = %i\n",
+		map_fd[0], map_fd[1], map_fd[2]);
+
+	/* populate virtual to physical port map */
+	ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0);
+	if (ret) {
+		perror("bpf_update_elem");
+		goto out;
+	}
+
+	poll_stats(2, ifindex_out);
+
+out:
+	return 0;
+}
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
new file mode 100644
index 0000000..4475d83
--- /dev/null
+++ b/samples/bpf/xdp_redirect_user.c
@@ -0,0 +1,143 @@
+/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <libgen.h>
+
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include "libbpf.h"
+
+static int ifindex_in;
+static int ifindex_out;
+static bool ifindex_out_xdp_dummy_attached = true;
+
+static __u32 xdp_flags;
+
+static void int_exit(int sig)
+{
+	set_link_xdp_fd(ifindex_in, -1, xdp_flags);
+	if (ifindex_out_xdp_dummy_attached)
+		set_link_xdp_fd(ifindex_out, -1, xdp_flags);
+	exit(0);
+}
+
+static void poll_stats(int interval, int ifindex)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	__u64 values[nr_cpus], prev[nr_cpus];
+
+	memset(prev, 0, sizeof(prev));
+
+	while (1) {
+		__u64 sum = 0;
+		__u32 key = 0;
+		int i;
+
+		sleep(interval);
+		assert(bpf_map_lookup_elem(map_fd[1], &key, values) == 0);
+		for (i = 0; i < nr_cpus; i++)
+			sum += (values[i] - prev[i]);
+		if (sum)
+			printf("ifindex %i: %10llu pkt/s\n",
+			       ifindex, sum / interval);
+		memcpy(prev, values, sizeof(values));
+	}
+}
+
+static void usage(const char *prog)
+{
+	fprintf(stderr,
+		"usage: %s [OPTS] IFINDEX_IN IFINDEX_OUT\n\n"
+		"OPTS:\n"
+		"    -S    use skb-mode\n"
+		"    -N    enforce native mode\n",
+		prog);
+}
+
+
+int main(int argc, char **argv)
+{
+	const char *optstr = "SN";
+	char filename[256];
+	int ret, opt, key = 0;
+
+	while ((opt = getopt(argc, argv, optstr)) != -1) {
+		switch (opt) {
+		case 'S':
+			xdp_flags |= XDP_FLAGS_SKB_MODE;
+			break;
+		case 'N':
+			xdp_flags |= XDP_FLAGS_DRV_MODE;
+			break;
+		default:
+			usage(basename(argv[0]));
+			return 1;
+		}
+	}
+
+	if (optind == argc) {
+		printf("usage: %s IFINDEX_IN IFINDEX_OUT\n", argv[0]);
+		return 1;
+	}
+
+	ifindex_in = strtoul(argv[optind], NULL, 0);
+	ifindex_out = strtoul(argv[optind + 1], NULL, 0);
+	printf("input: %d output: %d\n", ifindex_in, ifindex_out);
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	if (!prog_fd[0]) {
+		printf("load_bpf_file: %s\n", strerror(errno));
+		return 1;
+	}
+
+	if (set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
+		printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
+		return 1;
+	}
+
+	/* Loading dummy XDP prog on out-device */
+	if (set_link_xdp_fd(ifindex_out, prog_fd[1],
+			    (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
+		printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
+		ifindex_out_xdp_dummy_attached = false;
+	}
+
+	signal(SIGINT, int_exit);
+	signal(SIGTERM, int_exit);
+
+	/* bpf redirect port */
+	ret = bpf_map_update_elem(map_fd[0], &key, &ifindex_out, 0);
+	if (ret) {
+		perror("bpf_update_elem");
+		goto out;
+	}
+
+	poll_stats(2, ifindex_out);
+
+out:
+	return 0;
+}
diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile
new file mode 100644
index 0000000..9291ab8
--- /dev/null
+++ b/samples/sockmap/Makefile
@@ -0,0 +1,78 @@
+# kbuild trick to avoid linker error. Can be omitted if a module is built.
+obj- := dummy.o
+
+# List of programs to build
+hostprogs-y := sockmap
+
+# Libbpf dependencies
+LIBBPF := ../../tools/lib/bpf/bpf.o
+
+HOSTCFLAGS += -I$(objtree)/usr/include
+HOSTCFLAGS += -I$(srctree)/tools/lib/
+HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
+HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
+HOSTCFLAGS += -I$(srctree)/tools/perf
+
+sockmap-objs := ../bpf/bpf_load.o $(LIBBPF) sockmap_user.o
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+always += sockmap_kern.o
+
+HOSTLOADLIBES_sockmap += -lelf -lpthread
+
+# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
+#  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
+LLC ?= llc
+CLANG ?= clang
+
+# Trick to allow make to be run from this directory
+all:
+	$(MAKE) -C ../../ $(CURDIR)/
+
+clean:
+	$(MAKE) -C ../../ M=$(CURDIR) clean
+	@rm -f *~
+
+$(obj)/syscall_nrs.s:	$(src)/syscall_nrs.c
+	$(call if_changed_dep,cc_s_c)
+
+$(obj)/syscall_nrs.h:	$(obj)/syscall_nrs.s FORCE
+	$(call filechk,offsets,__SYSCALL_NRS_H__)
+
+clean-files += syscall_nrs.h
+
+FORCE:
+
+
+# Verify LLVM compiler tools are available and bpf target is supported by llc
+.PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC)
+
+verify_cmds: $(CLANG) $(LLC)
+	@for TOOL in $^ ; do \
+		if ! (which -- "$${TOOL}" > /dev/null 2>&1); then \
+			echo "*** ERROR: Cannot find LLVM tool $${TOOL}" ;\
+			exit 1; \
+		else true; fi; \
+	done
+
+verify_target_bpf: verify_cmds
+	@if ! (${LLC} -march=bpf -mattr=help > /dev/null 2>&1); then \
+		echo "*** ERROR: LLVM (${LLC}) does not support 'bpf' target" ;\
+		echo "   NOTICE: LLVM version >= 3.7.1 required" ;\
+		exit 2; \
+	else true; fi
+
+$(src)/*.c: verify_target_bpf
+
+# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
+# But, there is no easy way to fix it, so just exclude it since it is
+# useless for BPF samples.
+$(obj)/%.o: $(src)/%.c
+	$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
+		-D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
+		-Wno-compare-distinct-pointer-types \
+		-Wno-gnu-variable-sized-type-not-at-end \
+		-Wno-address-of-packed-member -Wno-tautological-compare \
+		-Wno-unknown-warning-option \
+		-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
new file mode 100644
index 0000000..f9b38ef
--- /dev/null
+++ b/samples/sockmap/sockmap_kern.c
@@ -0,0 +1,108 @@
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include "../../tools/testing/selftests/bpf/bpf_helpers.h"
+#include "../../tools/testing/selftests/bpf/bpf_endian.h"
+
+/* Sockmap sample program connects a client and a backend together
+ * using cgroups.
+ *
+ *    client:X <---> frontend:80 client:X <---> backend:80
+ *
+ * For simplicity we hard code values here and bind 1:1. The hard
+ * coded values are part of the setup in sockmap.sh script that
+ * is associated with this BPF program.
+ *
+ * The bpf_printk is verbose and prints information as connections
+ * are established and verdicts are decided.
+ */
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+struct bpf_map_def SEC("maps") sock_map = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+SEC("sk_skb1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+	return skb->len;
+}
+
+SEC("sk_skb2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+	__u32 lport = skb->local_port;
+	__u32 rport = skb->remote_port;
+	int ret = 0;
+
+	if (lport == 10000)
+		ret = 10;
+	else
+		ret = 1;
+
+	bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
+	return bpf_sk_redirect_map(&sock_map, ret, 0);
+}
+
+SEC("sockops")
+int bpf_sockmap(struct bpf_sock_ops *skops)
+{
+	__u32 lport, rport;
+	int op, err = 0, index, key, ret;
+
+
+	op = (int) skops->op;
+
+	switch (op) {
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		lport = skops->local_port;
+		rport = skops->remote_port;
+
+		if (lport == 10000) {
+			ret = 1;
+			err = bpf_sock_map_update(skops, &sock_map, &ret,
+						  BPF_NOEXIST);
+			bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
+				   lport, bpf_ntohl(rport), err);
+		}
+		break;
+	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+		lport = skops->local_port;
+		rport = skops->remote_port;
+
+		if (bpf_ntohl(rport) == 10001) {
+			ret = 10;
+			err = bpf_sock_map_update(skops, &sock_map, &ret,
+						  BPF_NOEXIST);
+			bpf_printk("active(%i -> %i) map ctx update err: %d\n",
+				   lport, bpf_ntohl(rport), err);
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
new file mode 100644
index 0000000..7cc9d22
--- /dev/null
+++ b/samples/sockmap/sockmap_user.c
@@ -0,0 +1,294 @@
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <fcntl.h>
+
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <linux/netlink.h>
+#include <linux/socket.h>
+#include <linux/sock_diag.h>
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <libgen.h>
+
+#include "../bpf/bpf_load.h"
+#include "../bpf/bpf_util.h"
+#include "../bpf/libbpf.h"
+
+int running;
+void running_handler(int a);
+
+/* randomly selected ports for testing on lo */
+#define S1_PORT 10000
+#define S2_PORT 10001
+
+static int sockmap_test_sockets(int rate, int dot)
+{
+	int i, sc, err, max_fd, one = 1;
+	int s1, s2, c1, c2, p1, p2;
+	struct sockaddr_in addr;
+	struct timeval timeout;
+	char buf[1024] = {0};
+	int *fds[4] = {&s1, &s2, &c1, &c2};
+	fd_set w;
+
+	s1 = s2 = p1 = p2 = c1 = c2 = 0;
+
+	/* Init sockets */
+	for (i = 0; i < 4; i++) {
+		*fds[i] = socket(AF_INET, SOCK_STREAM, 0);
+		if (*fds[i] < 0) {
+			perror("socket s1 failed()");
+			err = *fds[i];
+			goto out;
+		}
+	}
+
+	/* Allow reuse */
+	for (i = 0; i < 2; i++) {
+		err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
+				 (char *)&one, sizeof(one));
+		if (err) {
+			perror("setsockopt failed()");
+			goto out;
+		}
+	}
+
+	/* Non-blocking sockets */
+	for (i = 0; i < 4; i++) {
+		err = ioctl(*fds[i], FIONBIO, (char *)&one);
+		if (err < 0) {
+			perror("ioctl s1 failed()");
+			goto out;
+		}
+	}
+
+	/* Bind server sockets */
+	memset(&addr, 0, sizeof(struct sockaddr_in));
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+	addr.sin_port = htons(S1_PORT);
+	err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0) {
+		perror("bind s1 failed()\n");
+		goto out;
+	}
+
+	addr.sin_port = htons(S2_PORT);
+	err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0) {
+		perror("bind s2 failed()\n");
+		goto out;
+	}
+
+	/* Listen server sockets */
+	addr.sin_port = htons(S1_PORT);
+	err = listen(s1, 32);
+	if (err < 0) {
+		perror("listen s1 failed()\n");
+		goto out;
+	}
+
+	addr.sin_port = htons(S2_PORT);
+	err = listen(s2, 32);
+	if (err < 0) {
+		perror("listen s1 failed()\n");
+		goto out;
+	}
+
+	/* Initiate Connect */
+	addr.sin_port = htons(S1_PORT);
+	err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0 && errno != EINPROGRESS) {
+		perror("connect c1 failed()\n");
+		goto out;
+	}
+
+	addr.sin_port = htons(S2_PORT);
+	err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0 && errno != EINPROGRESS) {
+		perror("connect c2 failed()\n");
+		goto out;
+	}
+
+	/* Accept Connecrtions */
+	p1 = accept(s1, NULL, NULL);
+	if (p1 < 0) {
+		perror("accept s1 failed()\n");
+		goto out;
+	}
+
+	p2 = accept(s2, NULL, NULL);
+	if (p2 < 0) {
+		perror("accept s1 failed()\n");
+		goto out;
+	}
+
+	max_fd = p2;
+	timeout.tv_sec = 10;
+	timeout.tv_usec = 0;
+
+	printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
+	printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
+		c1, s1, c2, s2);
+
+	/* Ping/Pong data from client to server */
+	sc = send(c1, buf, sizeof(buf), 0);
+	if (sc < 0) {
+		perror("send failed()\n");
+		goto out;
+	}
+
+	do {
+		int s, rc, i;
+
+		/* FD sets */
+		FD_ZERO(&w);
+		FD_SET(c1, &w);
+		FD_SET(c2, &w);
+		FD_SET(p1, &w);
+		FD_SET(p2, &w);
+
+		s = select(max_fd + 1, &w, NULL, NULL, &timeout);
+		if (s == -1) {
+			perror("select()");
+			break;
+		} else if (!s) {
+			fprintf(stderr, "unexpected timeout\n");
+			break;
+		}
+
+		for (i = 0; i <= max_fd && s > 0; ++i) {
+			if (!FD_ISSET(i, &w))
+				continue;
+
+			s--;
+
+			rc = recv(i, buf, sizeof(buf), 0);
+			if (rc < 0) {
+				if (errno != EWOULDBLOCK) {
+					perror("recv failed()\n");
+					break;
+				}
+			}
+
+			if (rc == 0) {
+				close(i);
+				break;
+			}
+
+			sc = send(i, buf, rc, 0);
+			if (sc < 0) {
+				perror("send failed()\n");
+				break;
+			}
+		}
+		sleep(rate);
+		if (dot) {
+			printf(".");
+			fflush(stdout);
+
+		}
+	} while (running);
+
+out:
+	close(s1);
+	close(s2);
+	close(p1);
+	close(p2);
+	close(c1);
+	close(c2);
+	return err;
+}
+
+int main(int argc, char **argv)
+{
+	int rate = 1, dot = 1;
+	char filename[256];
+	int err, cg_fd;
+	char *cg_path;
+
+	cg_path = argv[argc - 1];
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	running = 1;
+
+	/* catch SIGINT */
+	signal(SIGINT, running_handler);
+
+	if (load_bpf_file(filename)) {
+		fprintf(stderr, "load_bpf_file: (%s) %s\n",
+			filename, strerror(errno));
+		return 1;
+	}
+
+	/* Cgroup configuration */
+	cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
+	if (cg_fd < 0) {
+		fprintf(stderr, "ERROR: (%i) open cg path failed: %s\n",
+			cg_fd, cg_path);
+		return cg_fd;
+	}
+
+	/* Attach programs to sockmap */
+	err = bpf_prog_attach(prog_fd[0], map_fd[0],
+				BPF_SK_SKB_STREAM_PARSER, 0);
+	if (err) {
+		fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
+			err, strerror(errno));
+		return err;
+	}
+
+	err = bpf_prog_attach(prog_fd[1], map_fd[0],
+				BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (err) {
+		fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
+			err, strerror(errno));
+		return err;
+	}
+
+	/* Attach to cgroups */
+	err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (err) {
+		fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
+			err, strerror(errno));
+		return err;
+	}
+
+	err = sockmap_test_sockets(rate, dot);
+	if (err) {
+		fprintf(stderr, "ERROR: test socket failed: %d\n", err);
+		return err;
+	}
+	return 0;
+}
+
+void running_handler(int a)
+{
+	running = 0;
+}
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 33fd061..2f2e133 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6530,7 +6530,7 @@ security_initcall(selinux_init);
 
 #if defined(CONFIG_NETFILTER)
 
-static struct nf_hook_ops selinux_nf_ops[] = {
+static const struct nf_hook_ops selinux_nf_ops[] = {
 	{
 		.hook =		selinux_ipv4_postroute,
 		.pf =		NFPROTO_IPV4,
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 1450f85..36a7ce9 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -47,10 +47,8 @@ static inline void selinux_xfrm_notify_policyload(void)
 	struct net *net;
 
 	rtnl_lock();
-	for_each_net(net) {
-		atomic_inc(&net->xfrm.flow_cache_genid);
+	for_each_net(net)
 		rt_genid_bump_all(net);
-	}
 	rtnl_unlock();
 }
 #else
diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c
index cdeb0f3..e36d178 100644
--- a/security/smack/smack_netfilter.c
+++ b/security/smack/smack_netfilter.c
@@ -58,7 +58,7 @@ static unsigned int smack_ipv4_output(void *priv,
 	return NF_ACCEPT;
 }
 
-static struct nf_hook_ops smack_nf_ops[] = {
+static const struct nf_hook_ops smack_nf_ops[] = {
 	{
 		.hook =		smack_ipv4_output,
 		.pf =		NFPROTO_IPV4,
diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh
deleted file mode 100755
index 89b2506..0000000
--- a/tools/hv/bondvf.sh
+++ /dev/null
@@ -1,232 +0,0 @@
-#!/bin/bash
-
-# This example script creates bonding network devices based on synthetic NIC
-# (the virtual network adapter usually provided by Hyper-V) and the matching
-# VF NIC (SRIOV virtual function). So the synthetic NIC and VF NIC can
-# function as one network device, and fail over to the synthetic NIC if VF is
-# down.
-#
-# Usage:
-# - After configured vSwitch and vNIC with SRIOV, start Linux virtual
-#   machine (VM)
-# - Run this scripts on the VM. It will create configuration files in
-#   distro specific directory.
-# - Reboot the VM, so that the bonding config are enabled.
-#
-# The config files are DHCP by default. You may edit them if you need to change
-# to Static IP or change other settings.
-#
-
-sysdir=/sys/class/net
-netvsc_cls={f8615163-df3e-46c5-913f-f2d2f965ed0e}
-bondcnt=0
-
-# Detect Distro
-if [ -f /etc/redhat-release ];
-then
-	cfgdir=/etc/sysconfig/network-scripts
-	distro=redhat
-elif grep -q 'Ubuntu' /etc/issue
-then
-	cfgdir=/etc/network
-	distro=ubuntu
-elif grep -q 'SUSE' /etc/issue
-then
-	cfgdir=/etc/sysconfig/network
-	distro=suse
-else
-	echo "Unsupported Distro"
-	exit 1
-fi
-
-echo Detected Distro: $distro, or compatible
-
-# Get a list of ethernet names
-list_eth=(`cd $sysdir && ls -d */ | cut -d/ -f1 | grep -v bond`)
-eth_cnt=${#list_eth[@]}
-
-echo List of net devices:
-
-# Get the MAC addresses
-for (( i=0; i < $eth_cnt; i++ ))
-do
-	list_mac[$i]=`cat $sysdir/${list_eth[$i]}/address`
-	echo ${list_eth[$i]}, ${list_mac[$i]}
-done
-
-# Find NIC with matching MAC
-for (( i=0; i < $eth_cnt-1; i++ ))
-do
-	for (( j=i+1; j < $eth_cnt; j++ ))
-	do
-		if [ "${list_mac[$i]}" = "${list_mac[$j]}" ]
-		then
-			list_match[$i]=${list_eth[$j]}
-			break
-		fi
-	done
-done
-
-function create_eth_cfg_redhat {
-	local fn=$cfgdir/ifcfg-$1
-
-	rm -f $fn
-	echo DEVICE=$1 >>$fn
-	echo TYPE=Ethernet >>$fn
-	echo BOOTPROTO=none >>$fn
-	echo UUID=`uuidgen` >>$fn
-	echo ONBOOT=yes >>$fn
-	echo PEERDNS=yes >>$fn
-	echo IPV6INIT=yes >>$fn
-	echo MASTER=$2 >>$fn
-	echo SLAVE=yes >>$fn
-}
-
-function create_eth_cfg_pri_redhat {
-	create_eth_cfg_redhat $1 $2
-}
-
-function create_bond_cfg_redhat {
-	local fn=$cfgdir/ifcfg-$1
-
-	rm -f $fn
-	echo DEVICE=$1 >>$fn
-	echo TYPE=Bond >>$fn
-	echo BOOTPROTO=dhcp >>$fn
-	echo UUID=`uuidgen` >>$fn
-	echo ONBOOT=yes >>$fn
-	echo PEERDNS=yes >>$fn
-	echo IPV6INIT=yes >>$fn
-	echo BONDING_MASTER=yes >>$fn
-	echo BONDING_OPTS=\"mode=active-backup miimon=100 primary=$2\" >>$fn
-}
-
-function del_eth_cfg_ubuntu {
-	local mainfn=$cfgdir/interfaces
-	local fnlist=( $mainfn )
-
-	local dirlist=(`awk '/^[ \t]*source/{print $2}' $mainfn`)
-
-	local i
-	for i in "${dirlist[@]}"
-	do
-		fnlist+=(`ls $i 2>/dev/null`)
-	done
-
-	local tmpfl=$(mktemp)
-
-	local nic_start='^[ \t]*(auto|iface|mapping|allow-.*)[ \t]+'$1
-	local nic_end='^[ \t]*(auto|iface|mapping|allow-.*|source)'
-
-	local fn
-	for fn in "${fnlist[@]}"
-	do
-		awk "/$nic_end/{x=0} x{next} /$nic_start/{x=1;next} 1" \
-			$fn >$tmpfl
-
-		cp $tmpfl $fn
-	done
-
-	rm $tmpfl
-}
-
-function create_eth_cfg_ubuntu {
-	local fn=$cfgdir/interfaces
-
-	del_eth_cfg_ubuntu $1
-	echo $'\n'auto $1 >>$fn
-	echo iface $1 inet manual >>$fn
-	echo bond-master $2 >>$fn
-}
-
-function create_eth_cfg_pri_ubuntu {
-	local fn=$cfgdir/interfaces
-
-	del_eth_cfg_ubuntu $1
-	echo $'\n'allow-hotplug $1 >>$fn
-	echo iface $1 inet manual >>$fn
-	echo bond-master $2 >>$fn
-	echo bond-primary $1 >>$fn
-}
-
-function create_bond_cfg_ubuntu {
-	local fn=$cfgdir/interfaces
-
-	del_eth_cfg_ubuntu $1
-
-	echo $'\n'auto $1 >>$fn
-	echo iface $1 inet dhcp >>$fn
-	echo bond-mode active-backup >>$fn
-	echo bond-miimon 100 >>$fn
-	echo bond-slaves none >>$fn
-}
-
-function create_eth_cfg_suse {
-        local fn=$cfgdir/ifcfg-$1
-
-        rm -f $fn
-	echo BOOTPROTO=none >>$fn
-	echo STARTMODE=auto >>$fn
-}
-
-function create_eth_cfg_pri_suse {
-	local fn=$cfgdir/ifcfg-$1
-
-	rm -f $fn
-	echo BOOTPROTO=none >>$fn
-	echo STARTMODE=hotplug >>$fn
-}
-
-function create_bond_cfg_suse {
-	local fn=$cfgdir/ifcfg-$1
-
-	rm -f $fn
-	echo BOOTPROTO=dhcp >>$fn
-	echo STARTMODE=auto >>$fn
-	echo BONDING_MASTER=yes >>$fn
-	echo BONDING_SLAVE_0=$2 >>$fn
-	echo BONDING_SLAVE_1=$3 >>$fn
-	echo BONDING_MODULE_OPTS=\'mode=active-backup miimon=100 primary=$2\' >>$fn
-}
-
-function create_bond {
-	local bondname=bond$bondcnt
-	local primary
-	local secondary
-
-	local class_id1=`cat $sysdir/$1/device/class_id 2>/dev/null`
-	local class_id2=`cat $sysdir/$2/device/class_id 2>/dev/null`
-
-	if [ "$class_id1" = "$netvsc_cls" ]
-	then
-		primary=$2
-		secondary=$1
-	elif [ "$class_id2" = "$netvsc_cls" ]
-	then
-		primary=$1
-		secondary=$2
-	else
-		return 0
-	fi
-
-	echo $'\nBond name:' $bondname
-
-	echo configuring $primary
-	create_eth_cfg_pri_$distro $primary $bondname
-
-	echo configuring $secondary
-	create_eth_cfg_$distro $secondary $bondname
-
-	echo creating: $bondname with primary slave: $primary
-	create_bond_cfg_$distro $bondname $primary $secondary
-
-	let bondcnt=bondcnt+1
-}
-
-for (( i=0; i < $eth_cnt-1; i++ ))
-do
-        if [ -n "${list_match[$i]}" ]
-        then
-		create_bond ${list_eth[$i]} ${list_match[$i]}
-        fi
-done
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e99e3e6..461811e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -30,9 +30,14 @@
 #define BPF_FROM_LE	BPF_TO_LE
 #define BPF_FROM_BE	BPF_TO_BE
 
+/* jmp encodings */
 #define BPF_JNE		0x50	/* jump != */
+#define BPF_JLT		0xa0	/* LT is unsigned, '<' */
+#define BPF_JLE		0xb0	/* LE is unsigned, '<=' */
 #define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
 #define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
+#define BPF_JSLT	0xc0	/* SLT is signed, '<' */
+#define BPF_JSLE	0xd0	/* SLE is signed, '<=' */
 #define BPF_CALL	0x80	/* function call */
 #define BPF_EXIT	0x90	/* function return */
 
@@ -104,6 +109,8 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_LPM_TRIE,
 	BPF_MAP_TYPE_ARRAY_OF_MAPS,
 	BPF_MAP_TYPE_HASH_OF_MAPS,
+	BPF_MAP_TYPE_DEVMAP,
+	BPF_MAP_TYPE_SOCKMAP,
 };
 
 enum bpf_prog_type {
@@ -121,6 +128,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_LWT_OUT,
 	BPF_PROG_TYPE_LWT_XMIT,
 	BPF_PROG_TYPE_SOCK_OPS,
+	BPF_PROG_TYPE_SK_SKB,
 };
 
 enum bpf_attach_type {
@@ -128,11 +136,19 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET_EGRESS,
 	BPF_CGROUP_INET_SOCK_CREATE,
 	BPF_CGROUP_SOCK_OPS,
+	BPF_SK_SKB_STREAM_PARSER,
+	BPF_SK_SKB_STREAM_VERDICT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
 #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
 
+enum bpf_sockmap_flags {
+	BPF_SOCKMAP_UNSPEC,
+	BPF_SOCKMAP_STRPARSER,
+	__MAX_BPF_SOCKMAP_FLAG
+};
+
 /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
  * to the given target_fd cgroup the descendent cgroup will be able to
  * override effective bpf program that was inherited from this cgroup
@@ -153,6 +169,7 @@ enum bpf_attach_type {
 #define BPF_NOEXIST	1 /* create new element if it didn't exist */
 #define BPF_EXIST	2 /* update existing element */
 
+/* flags for BPF_MAP_CREATE command */
 #define BPF_F_NO_PREALLOC	(1U << 0)
 /* Instead of having one common LRU list in the
  * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
@@ -161,6 +178,8 @@ enum bpf_attach_type {
  * across different LRU lists.
  */
 #define BPF_F_NO_COMMON_LRU	(1U << 1)
+/* Specify numa node during map creation */
+#define BPF_F_NUMA_NODE		(1U << 2)
 
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
@@ -168,8 +187,13 @@ union bpf_attr {
 		__u32	key_size;	/* size of key in bytes */
 		__u32	value_size;	/* size of value in bytes */
 		__u32	max_entries;	/* max number of entries in a map */
-		__u32	map_flags;	/* prealloc or not */
+		__u32	map_flags;	/* BPF_MAP_CREATE related
+					 * flags defined above.
+					 */
 		__u32	inner_map_fd;	/* fd pointing to the inner map */
+		__u32	numa_node;	/* numa node (effective only if
+					 * BPF_F_NUMA_NODE is set).
+					 */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -539,6 +563,20 @@ union bpf_attr {
  *     @mode: operation mode (enum bpf_adj_room_mode)
  *     @flags: reserved for future use
  *     Return: 0 on success or negative error code
+ *
+ * int bpf_sk_redirect_map(map, key, flags)
+ *     Redirect skb to a sock in map using key as a lookup key for the
+ *     sock in map.
+ *     @map: pointer to sockmap
+ *     @key: key to lookup sock in map
+ *     @flags: reserved for future use
+ *     Return: SK_REDIRECT
+ *
+ * int bpf_sock_map_update(skops, map, key, flags)
+ *	@skops: pointer to bpf_sock_ops
+ *	@map: pointer to sockmap to update
+ *	@key: key to insert/update sock in map
+ *	@flags: same flags as map update elem
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -591,7 +629,10 @@ union bpf_attr {
 	FN(get_socket_uid),		\
 	FN(set_hash),			\
 	FN(setsockopt),			\
-	FN(skb_adjust_room),
+	FN(skb_adjust_room),		\
+	FN(redirect_map),		\
+	FN(sk_redirect_map),		\
+	FN(sock_map_update),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -668,6 +709,15 @@ struct __sk_buff {
 	__u32 data;
 	__u32 data_end;
 	__u32 napi_id;
+
+	/* accessed by BPF_PROG_TYPE_sk_skb types */
+	__u32 family;
+	__u32 remote_ip4;	/* Stored in network byte order */
+	__u32 local_ip4;	/* Stored in network byte order */
+	__u32 remote_ip6[4];	/* Stored in network byte order */
+	__u32 local_ip6[4];	/* Stored in network byte order */
+	__u32 remote_port;	/* Stored in network byte order */
+	__u32 local_port;	/* stored in host byte order */
 };
 
 struct bpf_tunnel_key {
@@ -727,6 +777,12 @@ struct xdp_md {
 	__u32 data_end;
 };
 
+enum sk_action {
+	SK_ABORTED = 0,
+	SK_DROP,
+	SK_REDIRECT,
+};
+
 #define BPF_TAG_SIZE	8
 
 struct bpf_prog_info {
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 4ed0257..d2441db 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -189,6 +189,10 @@ install_lib: all_cmd
 	$(call QUIET_INSTALL, $(LIB_FILE)) \
 		$(call do_install,$(LIB_FILE),$(libdir_SQ))
 
+install_headers:
+	$(call QUIET_INSTALL, headers) \
+		$(call do_install,bpf.h,$(prefix)/include/bpf,644)
+
 install: install_lib
 
 ### Cleaning rules
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index e5bbb09..1d6907d 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -57,8 +57,9 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
 	return syscall(__NR_bpf, cmd, attr, size);
 }
 
-int bpf_create_map(enum bpf_map_type map_type, int key_size,
-		   int value_size, int max_entries, __u32 map_flags)
+int bpf_create_map_node(enum bpf_map_type map_type, int key_size,
+			int value_size, int max_entries, __u32 map_flags,
+			int node)
 {
 	union bpf_attr attr;
 
@@ -69,12 +70,24 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size,
 	attr.value_size = value_size;
 	attr.max_entries = max_entries;
 	attr.map_flags = map_flags;
+	if (node >= 0) {
+		attr.map_flags |= BPF_F_NUMA_NODE;
+		attr.numa_node = node;
+	}
 
 	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 }
 
-int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
-			  int inner_map_fd, int max_entries, __u32 map_flags)
+int bpf_create_map(enum bpf_map_type map_type, int key_size,
+		   int value_size, int max_entries, __u32 map_flags)
+{
+	return bpf_create_map_node(map_type, key_size, value_size,
+				   max_entries, map_flags, -1);
+}
+
+int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size,
+			       int inner_map_fd, int max_entries,
+			       __u32 map_flags, int node)
 {
 	union bpf_attr attr;
 
@@ -86,10 +99,21 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
 	attr.inner_map_fd = inner_map_fd;
 	attr.max_entries = max_entries;
 	attr.map_flags = map_flags;
+	if (node >= 0) {
+		attr.map_flags |= BPF_F_NUMA_NODE;
+		attr.numa_node = node;
+	}
 
 	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 }
 
+int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
+			  int inner_map_fd, int max_entries, __u32 map_flags)
+{
+	return bpf_create_map_in_map_node(map_type, key_size, inner_map_fd,
+					  max_entries, map_flags, -1);
+}
+
 int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 		     size_t insns_cnt, const char *license,
 		     __u32 kern_version, char *log_buf, size_t log_buf_sz)
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 418c86e..b8ea584 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -24,8 +24,14 @@
 #include <linux/bpf.h>
 #include <stddef.h>
 
+int bpf_create_map_node(enum bpf_map_type map_type, int key_size,
+			int value_size, int max_entries, __u32 map_flags,
+			int node);
 int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
 		   int max_entries, __u32 map_flags);
+int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size,
+			       int inner_map_fd, int max_entries,
+			       __u32 map_flags, int node);
 int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
 			  int inner_map_fd, int max_entries, __u32 map_flags);
 
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8c67a90..35f6dfc 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1745,3 +1745,32 @@ long libbpf_get_error(const void *ptr)
 		return PTR_ERR(ptr);
 	return 0;
 }
+
+int bpf_prog_load(const char *file, enum bpf_prog_type type,
+		  struct bpf_object **pobj, int *prog_fd)
+{
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	int err;
+
+	obj = bpf_object__open(file);
+	if (IS_ERR(obj))
+		return -ENOENT;
+
+	prog = bpf_program__next(NULL, obj);
+	if (!prog) {
+		bpf_object__close(obj);
+		return -ENOENT;
+	}
+
+	bpf_program__set_type(prog, type);
+	err = bpf_object__load(obj);
+	if (err) {
+		bpf_object__close(obj);
+		return -EINVAL;
+	}
+
+	*pobj = obj;
+	*prog_fd = bpf_program__fd(prog);
+	return 0;
+}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 32c7252..7959086 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -243,4 +243,6 @@ int bpf_map__pin(struct bpf_map *map, const char *path);
 
 long libbpf_get_error(const void *ptr);
 
+int bpf_prog_load(const char *file, enum bpf_prog_type type,
+		  struct bpf_object **pobj, int *prog_fd);
 #endif
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 153c3a1..f4b23d6 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -15,9 +15,9 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_align
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
-	test_pkt_md_access.o
+	test_pkt_md_access.o test_xdp_redirect.o sockmap_parse_prog.o sockmap_verdict_prog.o
 
-TEST_PROGS := test_kmod.sh
+TEST_PROGS := test_kmod.sh test_xdp_redirect.sh
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index d50ac34..36fb916 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -38,6 +38,8 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
 	(void *) BPF_FUNC_clone_redirect;
 static int (*bpf_redirect)(int ifindex, int flags) =
 	(void *) BPF_FUNC_redirect;
+static int (*bpf_redirect_map)(void *map, int key, int flags) =
+	(void *) BPF_FUNC_redirect_map;
 static int (*bpf_perf_event_output)(void *ctx, void *map,
 				    unsigned long long flags, void *data,
 				    int size) =
@@ -63,6 +65,12 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
 static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
 			     int optlen) =
 	(void *) BPF_FUNC_setsockopt;
+static int (*bpf_sk_redirect_map)(void *map, int key, int flags) =
+	(void *) BPF_FUNC_sk_redirect_map;
+static int (*bpf_sock_map_update)(void *map, void *key, void *value,
+				  unsigned long long flags) =
+	(void *) BPF_FUNC_sock_map_update;
+
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -85,6 +93,7 @@ struct bpf_map_def {
 	unsigned int max_entries;
 	unsigned int map_flags;
 	unsigned int inner_map_idx;
+	unsigned int numa_node;
 };
 
 static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/sockmap_parse_prog.c
new file mode 100644
index 0000000..fae3b96
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_parse_prog.c
@@ -0,0 +1,38 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sk_skb1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long) skb->data_end;
+	void *data = (void *)(long) skb->data;
+	__u32 lport = skb->local_port;
+	__u32 rport = skb->remote_port;
+	__u8 *d = data;
+
+	if (data + 10 > data_end)
+		return skb->len;
+
+	/* This write/read is a bit pointless but tests the verifier and
+	 * strparser handler for read/write pkt data and access into sk
+	 * fields.
+	 */
+	d[7] = 1;
+
+	bpf_printk("parse: data[0] = (%u): local_port %i remote %i\n",
+		   d[0], lport, bpf_ntohl(rport));
+	return skb->len;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
new file mode 100644
index 0000000..9b99bd1
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
@@ -0,0 +1,68 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+struct bpf_map_def SEC("maps") sock_map_rx = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_tx = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_break = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+SEC("sk_skb2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long) skb->data_end;
+	void *data = (void *)(long) skb->data;
+	__u32 lport = skb->local_port;
+	__u32 rport = skb->remote_port;
+	__u8 *d = data;
+	__u8 sk, map;
+
+	if (data + 8 > data_end)
+		return SK_DROP;
+
+	map = d[0];
+	sk = d[1];
+
+	d[0] = 0xd;
+	d[1] = 0xe;
+	d[2] = 0xa;
+	d[3] = 0xd;
+	d[4] = 0xb;
+	d[5] = 0xe;
+	d[6] = 0xe;
+	d[7] = 0xf;
+
+	bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk);
+
+	if (!map)
+		return bpf_sk_redirect_map(&sock_map_rx, sk, 0);
+	return bpf_sk_redirect_map(&sock_map_tx, sk, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index 2979369..8591c89 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -27,6 +27,11 @@
 #define MAX_INSNS	512
 #define MAX_MATCHES	16
 
+struct bpf_reg_match {
+	unsigned int line;
+	const char *match;
+};
+
 struct bpf_align_test {
 	const char *descr;
 	struct bpf_insn	insns[MAX_INSNS];
@@ -36,10 +41,14 @@ struct bpf_align_test {
 		REJECT
 	} result;
 	enum bpf_prog_type prog_type;
-	const char *matches[MAX_MATCHES];
+	/* Matches must be in order of increasing line */
+	struct bpf_reg_match matches[MAX_MATCHES];
 };
 
 static struct bpf_align_test tests[] = {
+	/* Four tests of known constants.  These aren't staggeringly
+	 * interesting since we track exact values now.
+	 */
 	{
 		.descr = "mov",
 		.insns = {
@@ -53,11 +62,13 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			"1: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
-			"2: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
-			"3: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
-			"4: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
-			"5: R1=ctx R3=imm32,min_value=32,max_value=32,min_align=32 R10=fp",
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3=inv2"},
+			{2, "R3=inv4"},
+			{3, "R3=inv8"},
+			{4, "R3=inv16"},
+			{5, "R3=inv32"},
 		},
 	},
 	{
@@ -79,17 +90,19 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			"1: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp",
-			"2: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
-			"3: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
-			"4: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
-			"5: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
-			"6: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp",
-			"7: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm32,min_value=32,max_value=32,min_align=32 R10=fp",
-			"8: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
-			"9: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
-			"10: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
-			"11: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3=inv1"},
+			{2, "R3=inv2"},
+			{3, "R3=inv4"},
+			{4, "R3=inv8"},
+			{5, "R3=inv16"},
+			{6, "R3=inv1"},
+			{7, "R4=inv32"},
+			{8, "R4=inv16"},
+			{9, "R4=inv8"},
+			{10, "R4=inv4"},
+			{11, "R4=inv2"},
 		},
 	},
 	{
@@ -106,12 +119,14 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			"1: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
-			"2: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=4 R10=fp",
-			"3: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R10=fp",
-			"4: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
-			"5: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm12,min_value=12,max_value=12,min_align=4 R10=fp",
-			"6: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm14,min_value=14,max_value=14,min_align=2 R10=fp",
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3=inv4"},
+			{2, "R3=inv8"},
+			{3, "R3=inv10"},
+			{4, "R4=inv8"},
+			{5, "R4=inv12"},
+			{6, "R4=inv14"},
 		},
 	},
 	{
@@ -126,13 +141,16 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			"1: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp",
-			"2: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp",
-			"3: R1=ctx R3=imm14,min_value=14,max_value=14,min_align=2 R10=fp",
-			"4: R1=ctx R3=imm56,min_value=56,max_value=56,min_align=4 R10=fp",
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3=inv7"},
+			{2, "R3=inv7"},
+			{3, "R3=inv14"},
+			{4, "R3=inv56"},
 		},
 	},
 
+	/* Tests using unknown values */
 #define PREP_PKT_POINTERS \
 	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \
 		    offsetof(struct __sk_buff, data)), \
@@ -166,17 +184,19 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			"7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp",
-			"8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv55,min_align=2 R10=fp",
-			"9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv54,min_align=4 R10=fp",
-			"10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv53,min_align=8 R10=fp",
-			"11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv52,min_align=16 R10=fp",
-			"18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv56 R10=fp",
-			"19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv51,min_align=32 R10=fp",
-			"20: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv52,min_align=16 R10=fp",
-			"21: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv53,min_align=8 R10=fp",
-			"22: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv54,min_align=4 R10=fp",
-			"23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv55,min_align=2 R10=fp",
+			{7, "R0=pkt(id=0,off=8,r=8,imm=0)"},
+			{7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{8, "R3=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+			{9, "R3=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{10, "R3=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+			{11, "R3=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+			{18, "R3=pkt_end(id=0,off=0,imm=0)"},
+			{18, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{19, "R4=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
+			{20, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+			{21, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+			{22, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{23, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
 		},
 	},
 	{
@@ -197,16 +217,16 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			"7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp",
-			"8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
-			"9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv55,min_align=1 R10=fp",
-			"10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
-			"11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv54,min_align=2 R10=fp",
-			"12: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
-			"13: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv53,min_align=4 R10=fp",
-			"14: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
-			"15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv52,min_align=8 R10=fp",
-			"16: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv50,min_align=8 R10=fp"
+			{7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{8, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{9, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{11, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+			{12, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{13, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{14, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{15, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+			{16, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
 		},
 	},
 	{
@@ -237,12 +257,14 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			"4: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=0,r=0) R10=fp",
-			"5: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=14,r=0) R10=fp",
-			"6: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R4=pkt(id=0,off=14,r=0) R5=pkt(id=0,off=14,r=0) R10=fp",
-			"10: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv56 R5=pkt(id=0,off=14,r=18) R10=fp",
-			"14: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp",
-			"15: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp",
+			{4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
+			{5, "R5=pkt(id=0,off=14,r=0,imm=0)"},
+			{6, "R4=pkt(id=0,off=14,r=0,imm=0)"},
+			{10, "R2=pkt(id=0,off=0,r=18,imm=0)"},
+			{10, "R5=pkt(id=0,off=14,r=18,imm=0)"},
+			{10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{14, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+			{15, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
 		},
 	},
 	{
@@ -297,62 +319,286 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			"8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R6=inv54,min_align=4 R10=fp",
-
-			/* Offset is added to packet pointer R5, resulting in known
-			 * auxiliary alignment and offset.
+			{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Offset is added to packet pointer R5, resulting in
+			 * known fixed offset, and variable offset from R6.
 			 */
-			"11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R5=pkt(id=1,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
-
+			{11, "R5=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* At the time the word size load is performed from R5,
 			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
 			 * reg->aux_off (14) which is 16.  Then the variable
 			 * offset is considered using reg->aux_off_align which
 			 * is 4 and meets the load's requirements.
 			 */
-			"15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=1,off=4,r=4),aux_off=14,aux_off_align=4 R5=pkt(id=1,off=0,r=4),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
-
-
+			{15, "R4=pkt(id=1,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{15, "R5=pkt(id=1,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Variable offset is added to R5 packet pointer,
 			 * resulting in auxiliary alignment of 4.
 			 */
-			"18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=0,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
-
+			{18, "R5=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Constant offset is added to R5, resulting in
 			 * reg->off of 14.
 			 */
-			"19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=14,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
-
+			{19, "R5=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* At the time the word size load is performed from R5,
-			 * it's total offset is NET_IP_ALIGN + reg->off (14) which
-			 * is 16.  Then the variable offset is considered using
-			 * reg->aux_off_align which is 4 and meets the load's
-			 * requirements.
+			 * its total fixed offset is NET_IP_ALIGN + reg->off
+			 * (14) which is 16.  Then the variable offset is 4-byte
+			 * aligned, so the total offset is 4-byte aligned and
+			 * meets the load's requirements.
 			 */
-			"23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=2,off=18,r=18),aux_off_align=4 R5=pkt(id=2,off=14,r=18),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
-
+			{23, "R4=pkt(id=2,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{23, "R5=pkt(id=2,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Constant offset is added to R5 packet pointer,
 			 * resulting in reg->off value of 14.
 			 */
-			"26: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=0,off=14,r=8) R6=inv54,min_align=4 R10=fp",
-			/* Variable offset is added to R5, resulting in an
-			 * auxiliary offset of 14, and an auxiliary alignment of 4.
+			{26, "R5=pkt(id=0,off=14,r=8"},
+			/* Variable offset is added to R5, resulting in a
+			 * variable offset of (4n).
 			 */
-			"27: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
-			/* Constant is added to R5 again, setting reg->off to 4. */
-			"28: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=4,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
-			/* And once more we add a variable, which causes an accumulation
-			 * of reg->off into reg->aux_off_align, with resulting value of
-			 * 18.  The auxiliary alignment stays at 4.
+			{27, "R5=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Constant is added to R5 again, setting reg->off to 18. */
+			{28, "R5=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* And once more we add a variable; resulting var_off
+			 * is still (4n), fixed offset is not changed.
+			 * Also, we create a new reg->id.
 			 */
-			"29: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=4,off=0,r=0),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+			{29, "R5=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
 			/* At the time the word size load is performed from R5,
-			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
-			 * reg->aux_off (18) which is 20.  Then the variable offset
-			 * is considered using reg->aux_off_align which is 4 and meets
-			 * the load's requirements.
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (18)
+			 * which is 20.  Then the variable offset is (4n), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
+			{33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
+		},
+	},
+	{
+		.descr = "packet variable offset 2",
+		.insns = {
+			/* Create an unknown offset, (4n+2)-aligned */
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+			/* Add it to the packet pointer */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			/* Make a (4n) offset from the value we just read */
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xff),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			/* Add it to the packet pointer */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Adding 14 makes R6 be (4n+2) */
+			{9, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* Packet pointer has (4n+2) offset */
+			{11, "R5=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			{13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* Newly read value in R6 was shifted left by 2, so has
+			 * known alignment of 4.
+			 */
+			{18, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Added (4n) to packet pointer's (4n+2) var_off, giving
+			 * another (4n+2).
+			 */
+			{19, "R5=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+			{21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+		},
+	},
+	{
+		.descr = "dubious pointer arithmetic",
+		.insns = {
+			PREP_PKT_POINTERS,
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* ptr & const => unknown & const */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 0x40),
+			/* ptr << const => unknown << const */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2),
+			/* We have a (4n) value.  Let's make a packet offset
+			 * out of it.  First add 14, to make it a (4n+2)
+			 */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			/* Then make sure it's nonnegative */
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_5, 0, 1),
+			BPF_EXIT_INSN(),
+			/* Add it to packet pointer */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.matches = {
+			{4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
+			/* ptr & 0x40 == either 0 or 0x40 */
+			{5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"},
+			/* ptr << 2 == unknown, (4n) */
+			{7, "R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
+			/* (4n) + 14 == (4n+2).  We blow our bounds, because
+			 * the add could overflow.
+			 */
+			{8, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
+			/* Checked s>=0 */
+			{10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			/* packet pointer + nonnegative (4n+2) */
+			{12, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			{14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
+			 * We checked the bounds, but it might have been able
+			 * to overflow if the packet pointer started in the
+			 * upper half of the address space.
+			 * So we did not get a 'range' on R6, and the access
+			 * attempt will fail.
+			 */
+			{16, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+		}
+	},
+	{
+		.descr = "variable subtraction",
+		.insns = {
+			/* Create an unknown offset, (4n+2)-aligned */
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+			/* Create another unknown, (4n)-aligned, and subtract
+			 * it from the first one
+			 */
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_7),
+			/* Bounds-check the result */
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_6, 0, 1),
+			BPF_EXIT_INSN(),
+			/* Add it to the packet pointer */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{9, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Adding 14 makes R6 be (4n+2) */
+			{10, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* New unknown value in R7 is (4n) */
+			{11, "R7=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Subtracting it from R6 blows our unsigned bounds */
+			{12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"},
+			/* Checked s>= 0 */
+			{14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+		},
+	},
+	{
+		.descr = "pointer variable subtraction",
+		.insns = {
+			/* Create an unknown offset, (4n+2)-aligned and bounded
+			 * to [14,74]
+			 */
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xf),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+			/* Subtract it from the packet pointer */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_6),
+			/* Create another unknown, (4n)-aligned and >= 74.
+			 * That in fact means >= 76, since 74 % 4 == 2
 			 */
-			"33: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=4,off=4,r=4),aux_off=18,aux_off_align=4 R5=pkt(id=4,off=0,r=4),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 76),
+			/* Add it to the packet pointer */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_7),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{10, "R6=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
+			/* Adding 14 makes R6 be (4n+2) */
+			{11, "R6=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
+			/* Subtracting from packet pointer overflows ubounds */
+			{13, "R5=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
+			/* New unknown value in R7 is (4n), >= 76 */
+			{15, "R7=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
+			/* Adding it to packet pointer gives nice bounds again */
+			{16, "R5=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
 		},
 	},
 };
@@ -373,6 +619,9 @@ static int do_test_single(struct bpf_align_test *test)
 {
 	struct bpf_insn *prog = test->insns;
 	int prog_type = test->prog_type;
+	char bpf_vlog_copy[32768];
+	const char *line_ptr;
+	int cur_line = -1;
 	int prog_len, i;
 	int fd_prog;
 	int ret;
@@ -381,26 +630,49 @@ static int do_test_single(struct bpf_align_test *test)
 	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
 				     prog, prog_len, 1, "GPL", 0,
 				     bpf_vlog, sizeof(bpf_vlog), 2);
-	if (fd_prog < 0) {
+	if (fd_prog < 0 && test->result != REJECT) {
 		printf("Failed to load program.\n");
 		printf("%s", bpf_vlog);
 		ret = 1;
+	} else if (fd_prog >= 0 && test->result == REJECT) {
+		printf("Unexpected success to load!\n");
+		printf("%s", bpf_vlog);
+		ret = 1;
+		close(fd_prog);
 	} else {
 		ret = 0;
+		/* We make a local copy so that we can strtok() it */
+		strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy));
+		line_ptr = strtok(bpf_vlog_copy, "\n");
 		for (i = 0; i < MAX_MATCHES; i++) {
-			const char *t, *m = test->matches[i];
+			struct bpf_reg_match m = test->matches[i];
 
-			if (!m)
+			if (!m.match)
 				break;
-			t = strstr(bpf_vlog, m);
-			if (!t) {
-				printf("Failed to find match: %s\n", m);
+			while (line_ptr) {
+				cur_line = -1;
+				sscanf(line_ptr, "%u: ", &cur_line);
+				if (cur_line == m.line)
+					break;
+				line_ptr = strtok(NULL, "\n");
+			}
+			if (!line_ptr) {
+				printf("Failed to find line %u for match: %s\n",
+				       m.line, m.match);
+				ret = 1;
+				printf("%s", bpf_vlog);
+				break;
+			}
+			if (!strstr(line_ptr, m.match)) {
+				printf("Failed to find match %u: %s\n",
+				       m.line, m.match);
 				ret = 1;
 				printf("%s", bpf_vlog);
 				break;
 			}
 		}
-		close(fd_prog);
+		if (fd_prog >= 0)
+			close(fd_prog);
 	}
 	return ret;
 }
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 79601c8..4acc772 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -22,6 +22,7 @@
 #include <linux/bpf.h>
 
 #include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 #include "bpf_util.h"
 
 static int map_flags;
@@ -438,6 +439,395 @@ static void test_arraymap_percpu_many_keys(void)
 	close(fd);
 }
 
+static void test_devmap(int task, void *data)
+{
+	int fd;
+	__u32 key, value;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value),
+			    2, 0);
+	if (fd < 0) {
+		printf("Failed to create arraymap '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	close(fd);
+}
+
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <sys/select.h>
+#include <linux/err.h>
+#define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
+#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
+static void test_sockmap(int tasks, void *data)
+{
+	int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc;
+	struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
+	int ports[] = {50200, 50201, 50202, 50204};
+	int err, i, fd, sfd[6] = {0xdeadbeef};
+	u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
+	int parse_prog, verdict_prog;
+	struct sockaddr_in addr;
+	struct bpf_object *obj;
+	struct timeval to;
+	__u32 key, value;
+	pid_t pid[tasks];
+	fd_set w;
+
+	/* Create some sockets to use with sockmap */
+	for (i = 0; i < 2; i++) {
+		sfd[i] = socket(AF_INET, SOCK_STREAM, 0);
+		if (sfd[i] < 0)
+			goto out;
+		err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR,
+				 (char *)&one, sizeof(one));
+		if (err) {
+			printf("failed to setsockopt\n");
+			goto out;
+		}
+		err = ioctl(sfd[i], FIONBIO, (char *)&one);
+		if (err < 0) {
+			printf("failed to ioctl\n");
+			goto out;
+		}
+		memset(&addr, 0, sizeof(struct sockaddr_in));
+		addr.sin_family = AF_INET;
+		addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+		addr.sin_port = htons(ports[i]);
+		err = bind(sfd[i], (struct sockaddr *)&addr, sizeof(addr));
+		if (err < 0) {
+			printf("failed to bind: err %i: %i:%i\n",
+			       err, i, sfd[i]);
+			goto out;
+		}
+		err = listen(sfd[i], 32);
+		if (err < 0) {
+			printf("failed to listen\n");
+			goto out;
+		}
+	}
+
+	for (i = 2; i < 4; i++) {
+		sfd[i] = socket(AF_INET, SOCK_STREAM, 0);
+		if (sfd[i] < 0)
+			goto out;
+		err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR,
+				 (char *)&one, sizeof(one));
+		if (err) {
+			printf("set sock opt\n");
+			goto out;
+		}
+		memset(&addr, 0, sizeof(struct sockaddr_in));
+		addr.sin_family = AF_INET;
+		addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+		addr.sin_port = htons(ports[i - 2]);
+		err = connect(sfd[i], (struct sockaddr *)&addr, sizeof(addr));
+		if (err) {
+			printf("failed to connect\n");
+			goto out;
+		}
+	}
+
+
+	for (i = 4; i < 6; i++) {
+		sfd[i] = accept(sfd[i - 4], NULL, NULL);
+		if (sfd[i] < 0) {
+			printf("accept failed\n");
+			goto out;
+		}
+	}
+
+	/* Test sockmap with connected sockets */
+	fd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP,
+			    sizeof(key), sizeof(value),
+			    6, 0);
+	if (fd < 0) {
+		printf("Failed to create sockmap %i\n", fd);
+		goto out_sockmap;
+	}
+
+	/* Test update without programs */
+	for (i = 0; i < 6; i++) {
+		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+		if (err) {
+			printf("Failed noprog update sockmap '%i:%i'\n",
+			       i, sfd[i]);
+			goto out_sockmap;
+		}
+	}
+
+	/* Test attaching bad fds */
+	err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_PARSER, 0);
+	if (!err) {
+		printf("Failed invalid parser prog attach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (!err) {
+		printf("Failed invalid verdict prog attach\n");
+		goto out_sockmap;
+	}
+
+	/* Load SK_SKB program and Attach */
+	err = bpf_prog_load(SOCKMAP_PARSE_PROG,
+			    BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog);
+	if (err) {
+		printf("Failed to load SK_SKB parse prog\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_load(SOCKMAP_VERDICT_PROG,
+			    BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog);
+	if (err) {
+		printf("Failed to load SK_SKB verdict prog\n");
+		goto out_sockmap;
+	}
+
+	bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
+	if (IS_ERR(bpf_map_rx)) {
+		printf("Failed to load map rx from verdict prog\n");
+		goto out_sockmap;
+	}
+
+	map_fd_rx = bpf_map__fd(bpf_map_rx);
+	if (map_fd_rx < 0) {
+		printf("Failed to get map fd\n");
+		goto out_sockmap;
+	}
+
+	bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
+	if (IS_ERR(bpf_map_tx)) {
+		printf("Failed to load map tx from verdict prog\n");
+		goto out_sockmap;
+	}
+
+	map_fd_tx = bpf_map__fd(bpf_map_tx);
+	if (map_fd_tx < 0) {
+		printf("Failed to get map tx fd\n");
+		goto out_sockmap;
+	}
+
+	bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
+	if (IS_ERR(bpf_map_break)) {
+		printf("Failed to load map tx from verdict prog\n");
+		goto out_sockmap;
+	}
+
+	map_fd_break = bpf_map__fd(bpf_map_break);
+	if (map_fd_break < 0) {
+		printf("Failed to get map tx fd\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(parse_prog, map_fd_break,
+			      BPF_SK_SKB_STREAM_PARSER, 0);
+	if (!err) {
+		printf("Allowed attaching SK_SKB program to invalid map\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(parse_prog, map_fd_rx,
+		      BPF_SK_SKB_STREAM_PARSER, 0);
+	if (err) {
+		printf("Failed stream parser bpf prog attach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(verdict_prog, map_fd_rx,
+			      BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (err) {
+		printf("Failed stream verdict bpf prog attach\n");
+		goto out_sockmap;
+	}
+
+	/* Test map update elem afterwards fd lives in fd and map_fd */
+	for (i = 0; i < 6; i++) {
+		err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
+		if (err) {
+			printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+		err = bpf_map_update_elem(map_fd_tx, &i, &sfd[i], BPF_ANY);
+		if (err) {
+			printf("Failed map_fd_tx update sockmap %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+	}
+
+	/* Test map delete elem and remove send/recv sockets */
+	for (i = 2; i < 4; i++) {
+		err = bpf_map_delete_elem(map_fd_rx, &i);
+		if (err) {
+			printf("Failed delete sockmap rx %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+		err = bpf_map_delete_elem(map_fd_tx, &i);
+		if (err) {
+			printf("Failed delete sockmap tx %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+	}
+
+	/* Test map send/recv */
+	for (i = 0; i < 2; i++) {
+		buf[0] = i;
+		buf[1] = 0x5;
+		sc = send(sfd[2], buf, 20, 0);
+		if (sc < 0) {
+			printf("Failed sockmap send\n");
+			goto out_sockmap;
+		}
+
+		FD_ZERO(&w);
+		FD_SET(sfd[3], &w);
+		to.tv_sec = 1;
+		to.tv_usec = 0;
+		s = select(sfd[3] + 1, &w, NULL, NULL, &to);
+		if (s == -1) {
+			perror("Failed sockmap select()");
+			goto out_sockmap;
+		} else if (!s) {
+			printf("Failed sockmap unexpected timeout\n");
+			goto out_sockmap;
+		}
+
+		if (!FD_ISSET(sfd[3], &w)) {
+			printf("Failed sockmap select/recv\n");
+			goto out_sockmap;
+		}
+
+		rc = recv(sfd[3], buf, sizeof(buf), 0);
+		if (rc < 0) {
+			printf("Failed sockmap recv\n");
+			goto out_sockmap;
+		}
+	}
+
+	/* Negative null entry lookup from datapath should be dropped */
+	buf[0] = 1;
+	buf[1] = 12;
+	sc = send(sfd[2], buf, 20, 0);
+	if (sc < 0) {
+		printf("Failed sockmap send\n");
+		goto out_sockmap;
+	}
+
+	/* Push fd into same slot */
+	i = 2;
+	err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
+	if (!err) {
+		printf("Failed allowed sockmap dup slot BPF_NOEXIST\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+	if (err) {
+		printf("Failed sockmap update new slot BPF_ANY\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
+	if (err) {
+		printf("Failed sockmap update new slot BPF_EXIST\n");
+		goto out_sockmap;
+	}
+
+	/* Delete the elems without programs */
+	for (i = 0; i < 6; i++) {
+		err = bpf_map_delete_elem(fd, &i);
+		if (err) {
+			printf("Failed delete sockmap %i '%i:%i'\n",
+			       err, i, sfd[i]);
+		}
+	}
+
+	/* Test having multiple maps open and set with programs on same fds */
+	err = bpf_prog_attach(parse_prog, fd,
+			      BPF_SK_SKB_STREAM_PARSER, 0);
+	if (err) {
+		printf("Failed fd bpf parse prog attach\n");
+		goto out_sockmap;
+	}
+	err = bpf_prog_attach(verdict_prog, fd,
+			      BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (err) {
+		printf("Failed fd bpf verdict prog attach\n");
+		goto out_sockmap;
+	}
+
+	for (i = 4; i < 6; i++) {
+		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+		if (!err) {
+			printf("Failed allowed duplicate programs in update ANY sockmap %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
+		if (!err) {
+			printf("Failed allowed duplicate program in update NOEXIST sockmap  %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
+		if (!err) {
+			printf("Failed allowed duplicate program in update EXIST sockmap  %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+	}
+
+	/* Test tasks number of forked operations */
+	for (i = 0; i < tasks; i++) {
+		pid[i] = fork();
+		if (pid[i] == 0) {
+			for (i = 0; i < 6; i++) {
+				bpf_map_delete_elem(map_fd_tx, &i);
+				bpf_map_delete_elem(map_fd_rx, &i);
+				bpf_map_update_elem(map_fd_tx, &i,
+						    &sfd[i], BPF_ANY);
+				bpf_map_update_elem(map_fd_rx, &i,
+						    &sfd[i], BPF_ANY);
+			}
+			exit(0);
+		} else if (pid[i] == -1) {
+			printf("Couldn't spawn #%d process!\n", i);
+			exit(1);
+		}
+	}
+
+	for (i = 0; i < tasks; i++) {
+		int status;
+
+		assert(waitpid(pid[i], &status, 0) == pid[i]);
+		assert(status == 0);
+	}
+
+	/* Test map close sockets */
+	for (i = 0; i < 6; i++)
+		close(sfd[i]);
+	close(fd);
+	close(map_fd_rx);
+	bpf_object__close(obj);
+	return;
+out:
+	for (i = 0; i < 6; i++)
+		close(sfd[i]);
+	printf("Failed to create sockmap '%i:%s'!\n", i, strerror(errno));
+	exit(1);
+out_sockmap:
+	for (i = 0; i < 6; i++)
+		close(sfd[i]);
+	close(fd);
+	exit(1);
+}
+
 #define MAP_SIZE (32 * 1024)
 
 static void test_map_large(void)
@@ -605,6 +995,9 @@ static void run_all_tests(void)
 
 	test_arraymap_percpu_many_keys();
 
+	test_devmap(0, NULL);
+	test_sockmap(0, NULL);
+
 	test_map_large();
 	test_map_parallel();
 	test_map_stress();
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 1f7dd35..11ee25c 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -75,39 +75,6 @@ static struct {
 	__ret;								\
 })
 
-static int bpf_prog_load(const char *file, enum bpf_prog_type type,
-			 struct bpf_object **pobj, int *prog_fd)
-{
-	struct bpf_program *prog;
-	struct bpf_object *obj;
-	int err;
-
-	obj = bpf_object__open(file);
-	if (IS_ERR(obj)) {
-		error_cnt++;
-		return -ENOENT;
-	}
-
-	prog = bpf_program__next(NULL, obj);
-	if (!prog) {
-		bpf_object__close(obj);
-		error_cnt++;
-		return -ENOENT;
-	}
-
-	bpf_program__set_type(prog, type);
-	err = bpf_object__load(obj);
-	if (err) {
-		bpf_object__close(obj);
-		error_cnt++;
-		return -EINVAL;
-	}
-
-	*pobj = obj;
-	*prog_fd = bpf_program__fd(prog);
-	return 0;
-}
-
 static int bpf_find_map(const char *test, struct bpf_object *obj,
 			const char *name)
 {
@@ -130,8 +97,10 @@ static void test_pkt_access(void)
 	int err, prog_fd;
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
-	if (err)
+	if (err) {
+		error_cnt++;
 		return;
+	}
 
 	err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
 				NULL, NULL, &retval, &duration);
@@ -162,8 +131,10 @@ static void test_xdp(void)
 	int err, prog_fd, map_fd;
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-	if (err)
+	if (err) {
+		error_cnt++;
 		return;
+	}
 
 	map_fd = bpf_find_map(__func__, obj, "vip2tnl");
 	if (map_fd < 0)
@@ -223,8 +194,10 @@ static void test_l4lb(void)
 	u32 *magic = (u32 *)buf;
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
-	if (err)
+	if (err) {
+		error_cnt++;
 		return;
+	}
 
 	map_fd = bpf_find_map(__func__, obj, "vip_map");
 	if (map_fd < 0)
@@ -280,8 +253,10 @@ static void test_tcp_estats(void)
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
 	CHECK(err, "", "err %d errno %d\n", err, errno);
-	if (err)
+	if (err) {
+		error_cnt++;
 		return;
+	}
 
 	bpf_object__close(obj);
 }
@@ -304,7 +279,7 @@ static void test_bpf_obj_id(void)
 	/* +1 to test for the info_len returned by kernel */
 	struct bpf_prog_info prog_infos[nr_iters + 1];
 	struct bpf_map_info map_infos[nr_iters + 1];
-	char jited_insns[128], xlated_insns[128];
+	char jited_insns[128], xlated_insns[128], zeros[128];
 	__u32 i, next_id, info_len, nr_id_found, duration = 0;
 	int sysctl_fd, jit_enabled = 0, err = 0;
 	__u64 array_value;
@@ -330,17 +305,22 @@ static void test_bpf_obj_id(void)
 		objs[i] = NULL;
 
 	/* Check bpf_obj_get_info_by_fd() */
+	bzero(zeros, sizeof(zeros));
 	for (i = 0; i < nr_iters; i++) {
 		err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER,
 				    &objs[i], &prog_fds[i]);
 		/* test_obj_id.o is a dumb prog. It should never fail
 		 * to load.
 		 */
+		if (err)
+			error_cnt++;
 		assert(!err);
 
 		/* Check getting prog info */
 		info_len = sizeof(struct bpf_prog_info) * 2;
 		bzero(&prog_infos[i], info_len);
+		bzero(jited_insns, sizeof(jited_insns));
+		bzero(xlated_insns, sizeof(xlated_insns));
 		prog_infos[i].jited_prog_insns = ptr_to_u64(jited_insns);
 		prog_infos[i].jited_prog_len = sizeof(jited_insns);
 		prog_infos[i].xlated_prog_insns = ptr_to_u64(xlated_insns);
@@ -351,15 +331,20 @@ static void test_bpf_obj_id(void)
 			  prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER ||
 			  info_len != sizeof(struct bpf_prog_info) ||
 			  (jit_enabled && !prog_infos[i].jited_prog_len) ||
-			  !prog_infos[i].xlated_prog_len,
+			  (jit_enabled &&
+			   !memcmp(jited_insns, zeros, sizeof(zeros))) ||
+			  !prog_infos[i].xlated_prog_len ||
+			  !memcmp(xlated_insns, zeros, sizeof(zeros)),
 			  "get-prog-info(fd)",
-			  "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u\n",
+			  "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d\n",
 			  err, errno, i,
 			  prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
 			  info_len, sizeof(struct bpf_prog_info),
 			  jit_enabled,
 			  prog_infos[i].jited_prog_len,
-			  prog_infos[i].xlated_prog_len))
+			  prog_infos[i].xlated_prog_len,
+			  !!memcmp(jited_insns, zeros, sizeof(zeros)),
+			  !!memcmp(xlated_insns, zeros, sizeof(zeros))))
 			goto done;
 
 		map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
@@ -496,8 +481,10 @@ static void test_pkt_md_access(void)
 	int err, prog_fd;
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
-	if (err)
+	if (err) {
+		error_cnt++;
 		return;
+	}
 
 	err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
 				NULL, NULL, &retval, &duration);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index d3ed732..8eb0995 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -422,7 +422,7 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr_unpriv = "R1 pointer arithmetic",
+		.errstr_unpriv = "R1 subtraction from stack pointer",
 		.result_unpriv = REJECT,
 		.errstr = "R1 invalid mem access",
 		.result = REJECT,
@@ -604,8 +604,9 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned stack access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"invalid map_fd for function call",
@@ -651,8 +652,9 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr = "misaligned access",
+		.errstr = "misaligned value access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"sometimes access memory with incorrect alignment",
@@ -673,6 +675,7 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 invalid mem access",
 		.errstr_unpriv = "R0 leaks addr",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"jump test 1",
@@ -964,6 +967,256 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 	},
 	{
+		"invalid access __sk_buff family",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, family)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff remote_ip4",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff local_ip4",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff remote_ip6",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff local_ip6",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff remote_port",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_port)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff remote_port",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_port)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"valid access __sk_buff family",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, family)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff remote_ip4",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff local_ip4",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff remote_ip6",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[3])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff local_ip6",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[3])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff remote_port",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_port)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff remote_port",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_port)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"invalid access of tc_classid for SK_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_classid)),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+		.errstr = "invalid bpf_context access",
+	},
+	{
+		"check skb->mark is writeable by SK_SKB",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"check skb->tc_index is writeable by SK_SKB",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"check skb->priority is writeable by SK_SKB",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, priority)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"direct packet read for SK_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"direct packet write for SK_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"overlapping checks for direct packet access SK_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
 		"check skb->mark is not writeable by sockets",
 		.insns = {
 			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
@@ -1216,8 +1469,9 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, cb[0]) + 1),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned context access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"check __sk_buff->hash, offset 0, half store not permitted",
@@ -1320,8 +1574,9 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, cb[0]) + 2),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned context access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"check cb access: word, unaligned 2",
@@ -1331,8 +1586,9 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, cb[4]) + 1),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned context access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"check cb access: word, unaligned 3",
@@ -1342,8 +1598,9 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, cb[4]) + 2),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned context access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"check cb access: word, unaligned 4",
@@ -1353,8 +1610,9 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, cb[4]) + 3),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned context access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"check cb access: double",
@@ -1380,8 +1638,9 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, cb[1])),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned context access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"check cb access: double, unaligned 2",
@@ -1391,8 +1650,9 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, cb[3])),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned context access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"check cb access: double, oob 1",
@@ -1524,7 +1784,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "misaligned access off -6 size 8",
+		.errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"PTR_TO_STACK store/load - bad alignment on reg",
@@ -1536,7 +1797,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "misaligned access off -2 size 8",
+		.errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"PTR_TO_STACK store/load - out of bounds low",
@@ -1580,8 +1842,6 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
-		.result_unpriv = REJECT,
-		.errstr_unpriv = "R1 pointer arithmetic",
 	},
 	{
 		"unpriv: add pointer to pointer",
@@ -1592,7 +1852,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
-		.errstr_unpriv = "R1 pointer arithmetic",
+		.errstr_unpriv = "R1 pointer += pointer",
 	},
 	{
 		"unpriv: neg pointer",
@@ -1933,10 +2193,7 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
 			BPF_EXIT_INSN(),
 		},
-		.errstr_unpriv = "pointer arithmetic prohibited",
-		.result_unpriv = REJECT,
-		.errstr = "R1 invalid mem access",
-		.result = REJECT,
+		.result = ACCEPT,
 	},
 	{
 		"unpriv: cmp of stack pointer",
@@ -2000,7 +2257,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "invalid stack type R3",
+		.errstr = "R4 min value is negative",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
@@ -2017,7 +2274,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "invalid stack type R3",
+		.errstr = "R4 min value is negative",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
@@ -2219,7 +2476,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "invalid stack type R3 off=-1 access_size=-1",
+		.errstr = "R4 min value is negative",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
@@ -2236,7 +2493,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "invalid stack type R3 off=-1 access_size=2147483647",
+		.errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
@@ -2253,7 +2510,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "invalid stack type R3 off=-512 access_size=2147483647",
+		.errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
@@ -2324,8 +2581,8 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, data)),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 48),
-			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 48),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
@@ -2653,7 +2910,7 @@ static struct bpf_test tests[] = {
 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
 			BPF_JMP_A(-6),
 		},
-		.errstr = "misaligned packet access off 2+15+-4 size 4",
+		.errstr = "misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
@@ -2704,11 +2961,11 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
 			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
-			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0x7fff),
 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
 			BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
 			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
 			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
 			BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -2730,10 +2987,10 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
 			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
 			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
-			BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0xffff),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0x7fff),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
 			BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
 			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
 			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
 			BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -2759,7 +3016,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
 			BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
 			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
-			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 48),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 49),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
@@ -2796,7 +3053,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = REJECT,
-		.errstr = "cannot add integer value with 47 upper zero bits to ptr_to_packet",
+		.errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)",
 	},
 	{
 		"direct packet access: test24 (x += pkt_ptr, 5)",
@@ -2814,7 +3071,7 @@ static struct bpf_test tests[] = {
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
 			BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7fff - 1),
 			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
 			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
 			BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -2824,6 +3081,79 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 	},
 	{
+		"direct packet access: test25 (marking on <, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -4),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test26 (marking on <, bad access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -3),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test27 (marking on <=, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test28 (marking on <=, bad access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -4),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
 		"helper access to packet: test1, valid packet_ptr range",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -3113,7 +3443,7 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"helper access to packet: test14, cls helper fail sub",
+		"helper access to packet: test14, cls helper ok sub",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -3133,12 +3463,36 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test15, cls helper fail sub",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 12),
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
 		.result = REJECT,
-		.errstr = "type=inv expected=fp",
+		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"helper access to packet: test15, cls helper fail range 1",
+		"helper access to packet: test16, cls helper fail range 1",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -3163,7 +3517,7 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"helper access to packet: test16, cls helper fail range 2",
+		"helper access to packet: test17, cls helper fail range 2",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -3184,11 +3538,11 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "invalid access to packet",
+		.errstr = "R2 min value is negative",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"helper access to packet: test17, cls helper fail range 3",
+		"helper access to packet: test18, cls helper fail range 3",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -3209,11 +3563,11 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "invalid access to packet",
+		.errstr = "R2 min value is negative",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"helper access to packet: test18, cls helper fail range zero",
+		"helper access to packet: test19, cls helper fail range zero",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -3238,7 +3592,7 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"helper access to packet: test19, pkt end as input",
+		"helper access to packet: test20, pkt end as input",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -3263,7 +3617,7 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"helper access to packet: test20, wrong reg",
+		"helper access to packet: test21, wrong reg",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -3323,7 +3677,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -3347,7 +3701,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -3375,7 +3729,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -3416,9 +3770,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is outside of the array range",
-		.result_unpriv = REJECT,
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
@@ -3440,9 +3792,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
-		.result_unpriv = REJECT,
+		.errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
@@ -3456,7 +3806,7 @@ static struct bpf_test tests[] = {
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 				     BPF_FUNC_map_lookup_elem),
 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
-			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
 			BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
 			BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
 			BPF_MOV32_IMM(BPF_REG_1, 0),
@@ -3467,8 +3817,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+		.errstr_unpriv = "R0 leaks addr",
+		.errstr = "R0 unbounded memory access",
 		.result_unpriv = REJECT,
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -3494,7 +3844,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.errstr = "invalid access to map value, value_size=48 off=44 size=8",
 		.result_unpriv = REJECT,
 		.result = REJECT,
@@ -3524,8 +3874,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3, 11 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+		.errstr_unpriv = "R0 pointer += pointer",
+		.errstr = "R0 invalid mem access 'inv'",
 		.result_unpriv = REJECT,
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -3667,34 +4017,6 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
 	{
-		"multiple registers share map_lookup_elem bad reg type",
-		.insns = {
-			BPF_MOV64_IMM(BPF_REG_1, 10),
-			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
-			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-			BPF_LD_MAP_FD(BPF_REG_1, 0),
-			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
-				     BPF_FUNC_map_lookup_elem),
-			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
-			BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
-			BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
-			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
-			BPF_MOV64_IMM(BPF_REG_1, 1),
-			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
-			BPF_MOV64_IMM(BPF_REG_1, 2),
-			BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 1),
-			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 0),
-			BPF_MOV64_IMM(BPF_REG_1, 3),
-			BPF_EXIT_INSN(),
-		},
-		.fixup_map1 = { 4 },
-		.result = REJECT,
-		.errstr = "R3 invalid mem access 'inv'",
-		.prog_type = BPF_PROG_TYPE_SCHED_CLS
-	},
-	{
 		"invalid map access from else condition",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
@@ -3712,9 +4034,9 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
+		.errstr = "R0 unbounded memory access",
 		.result = REJECT,
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
@@ -4092,7 +4414,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "invalid access to map value, value_size=48 off=0 size=-8",
+		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4158,7 +4480,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "R1 min value is outside of the array range",
+		.errstr = "invalid access to map value, value_size=48 off=4 size=0",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4204,7 +4526,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "invalid access to map value, value_size=48 off=4 size=-8",
+		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4226,7 +4548,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "R1 min value is outside of the array range",
+		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4342,7 +4664,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "invalid access to map value, value_size=48 off=4 size=-8",
+		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4365,7 +4687,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "R1 min value is outside of the array range",
+		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4453,13 +4775,13 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
-			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
 			BPF_MOV64_IMM(BPF_REG_3, 0),
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "R1 min value is negative, either use unsigned index or do a if (index >=0) check",
+		.errstr = "R1 unbounded memory access",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4490,6 +4812,246 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
 	{
+		"helper access to map: bounds check using <, good access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using <, bad access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = REJECT,
+		.errstr = "R1 unbounded memory access",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using <=, good access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using <=, bad access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = REJECT,
+		.errstr = "R1 unbounded memory access",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<, good access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 0, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<, good access 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<, bad access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = REJECT,
+		.errstr = "R1 min value is negative",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<=, good access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<=, good access 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<=, bad access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = REJECT,
+		.errstr = "R1 min value is negative",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
 		"map element value is preserved across register spilling",
 		.insns = {
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -4579,7 +5141,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -4607,7 +5169,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -4626,7 +5188,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 bitwise operator &= on pointer",
 		.errstr = "invalid mem access 'inv'",
 		.result = REJECT,
 		.result_unpriv = REJECT,
@@ -4645,7 +5207,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 32-bit pointer arithmetic prohibited",
 		.errstr = "invalid mem access 'inv'",
 		.result = REJECT,
 		.result_unpriv = REJECT,
@@ -4664,7 +5226,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 pointer arithmetic with /= operator",
 		.errstr = "invalid mem access 'inv'",
 		.result = REJECT,
 		.result_unpriv = REJECT,
@@ -4707,10 +5269,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 invalid mem access 'inv'",
 		.errstr = "R0 invalid mem access 'inv'",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"map element value is preserved across register spilling",
@@ -4732,7 +5292,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -4914,7 +5474,8 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "R2 unbounded memory access",
+		/* because max wasn't checked, signed min is negative */
+		.errstr = "R2 min value is negative, either use unsigned or 'var &= const'",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5063,6 +5624,20 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
 	{
+		"helper access to variable memory: size = 0 allowed on NULL",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
 		"helper access to variable memory: size > 0 not allowed on NULL",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_1, 0),
@@ -5076,7 +5651,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "R1 type=imm expected=fp",
+		.errstr = "R1 type=inv expected=fp",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -5161,7 +5736,7 @@ static struct bpf_test tests[] = {
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 				     BPF_FUNC_map_lookup_elem),
 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
-			BPF_MOV64_IMM(BPF_REG_1, 6),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
 			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4),
 			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
@@ -5170,10 +5745,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
@@ -5202,10 +5775,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
@@ -5252,7 +5823,7 @@ static struct bpf_test tests[] = {
 		},
 		.fixup_map_in_map = { 3 },
 		.errstr = "R1 type=inv expected=map_ptr",
-		.errstr_unpriv = "R1 pointer arithmetic prohibited",
+		.errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
 		.result = REJECT,
 	},
 	{
@@ -5532,10 +6103,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"bounds checks mixing signed and unsigned",
@@ -5558,10 +6127,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"bounds checks mixing signed and unsigned, variant 2",
@@ -5586,10 +6153,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R8 invalid mem access 'inv'",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"bounds checks mixing signed and unsigned, variant 3",
@@ -5613,10 +6178,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R8 invalid mem access 'inv'",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"bounds checks mixing signed and unsigned, variant 4",
@@ -5639,10 +6202,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative",
-		.result = REJECT,
-		.result_unpriv = REJECT,
+		.result = ACCEPT,
 	},
 	{
 		"bounds checks mixing signed and unsigned, variant 5",
@@ -5666,10 +6226,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 invalid mem access",
+		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"bounds checks mixing signed and unsigned, variant 6",
@@ -5690,10 +6248,8 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr_unpriv = "R4 min value is negative, either use unsigned",
 		.errstr = "R4 min value is negative, either use unsigned",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"bounds checks mixing signed and unsigned, variant 7",
@@ -5716,10 +6272,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative",
-		.result = REJECT,
-		.result_unpriv = REJECT,
+		.result = ACCEPT,
 	},
 	{
 		"bounds checks mixing signed and unsigned, variant 8",
@@ -5730,32 +6283,6 @@ static struct bpf_test tests[] = {
 			BPF_LD_MAP_FD(BPF_REG_1, 0),
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 				     BPF_FUNC_map_lookup_elem),
-			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
-			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
-			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
-			BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024 + 1),
-			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
-			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
-			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
-			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_EXIT_INSN(),
-		},
-		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative",
-		.result = REJECT,
-		.result_unpriv = REJECT,
-	},
-	{
-		"bounds checks mixing signed and unsigned, variant 9",
-		.insns = {
-			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
-			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-			BPF_LD_MAP_FD(BPF_REG_1, 0),
-			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
-				     BPF_FUNC_map_lookup_elem),
 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
 			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
@@ -5770,13 +6297,11 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
-		"bounds checks mixing signed and unsigned, variant 10",
+		"bounds checks mixing signed and unsigned, variant 9",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -5798,13 +6323,10 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative",
-		.result = REJECT,
-		.result_unpriv = REJECT,
+		.result = ACCEPT,
 	},
 	{
-		"bounds checks mixing signed and unsigned, variant 11",
+		"bounds checks mixing signed and unsigned, variant 10",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -5826,13 +6348,11 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
-		"bounds checks mixing signed and unsigned, variant 12",
+		"bounds checks mixing signed and unsigned, variant 11",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -5855,13 +6375,11 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
-		"bounds checks mixing signed and unsigned, variant 13",
+		"bounds checks mixing signed and unsigned, variant 12",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -5883,13 +6401,11 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
-		"bounds checks mixing signed and unsigned, variant 14",
+		"bounds checks mixing signed and unsigned, variant 13",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -5914,13 +6430,11 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
-		"bounds checks mixing signed and unsigned, variant 15",
+		"bounds checks mixing signed and unsigned, variant 14",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
 				    offsetof(struct __sk_buff, mark)),
@@ -5946,13 +6460,11 @@ static struct bpf_test tests[] = {
 			BPF_JMP_IMM(BPF_JA, 0, 0, -7),
 		},
 		.fixup_map1 = { 4 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
-		"bounds checks mixing signed and unsigned, variant 16",
+		"bounds checks mixing signed and unsigned, variant 15",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -5976,13 +6488,13 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 pointer comparison prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
 		.result_unpriv = REJECT,
 	},
 	{
-		"subtraction bounds (map value)",
+		"subtraction bounds (map value) variant 1",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -6004,10 +6516,118 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 max value is outside of the array range",
+		.result = REJECT,
+	},
+	{
+		"subtraction bounds (map value) variant 2",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 6),
+			BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 4),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result = REJECT,
+	},
+	{
+		"variable-offset ctx access",
+		.insns = {
+			/* Get an unknown value */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+			/* Make it small and 4-byte aligned */
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+			/* add it to skb.  We now have either &skb->len or
+			 * &skb->pkt_type, but we don't know which
+			 */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+			/* dereference it */
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "variable ctx access var_off=(0x0; 0x4)",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"variable-offset stack access",
+		.insns = {
+			/* Fill the top 8 bytes of the stack */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			/* Get an unknown value */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+			/* Make it small and 4-byte aligned */
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
+			/* add it to fp.  We now have either fp-4 or fp-8, but
+			 * we don't know which
+			 */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+			/* dereference it */
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "variable stack access var_off=(0xfffffffffffffff8; 0x4)",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"liveness pruning and write screening",
+		.insns = {
+			/* Get an unknown value */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+			/* branch conditions teach us nothing about R2 */
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 !read_ok",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"varlen_map_value_access pruning",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
+			BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.errstr = "R0 unbounded memory access",
 		.result_unpriv = REJECT,
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 };
 
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.c b/tools/testing/selftests/bpf/test_xdp_redirect.c
new file mode 100644
index 0000000..ef9e704
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.c
@@ -0,0 +1,28 @@
+/* Copyright (c) 2017 VMware
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+SEC("redirect_to_111")
+int xdp_redirect_to_111(struct xdp_md *xdp)
+{
+	return bpf_redirect(111, 0);
+}
+SEC("redirect_to_222")
+int xdp_redirect_to_222(struct xdp_md *xdp)
+{
+	return bpf_redirect(222, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
new file mode 100755
index 0000000..344a365
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+# Create 2 namespaces with two veth peers, and
+# forward packets in-between using generic XDP
+#
+# NS1(veth11)     NS2(veth22)
+#     |               |
+#     |               |
+#   (veth1, ------ (veth2,
+#   id:111)         id:222)
+#     | xdp forwarding |
+#     ------------------
+
+cleanup()
+{
+	if [ "$?" = "0" ]; then
+		echo "selftests: test_xdp_redirect [PASS]";
+	else
+		echo "selftests: test_xdp_redirect [FAILED]";
+	fi
+
+	set +e
+	ip netns del ns1 2> /dev/null
+	ip netns del ns2 2> /dev/null
+}
+
+ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
+if [ $? -ne 0 ];then
+	echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
+	exit 0
+fi
+set -e
+
+ip netns add ns1
+ip netns add ns2
+
+trap cleanup 0 2 3 6 9
+
+ip link add veth1 index 111 type veth peer name veth11
+ip link add veth2 index 222 type veth peer name veth22
+
+ip link set veth11 netns ns1
+ip link set veth22 netns ns2
+
+ip link set veth1 up
+ip link set veth2 up
+
+ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth11
+ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth22
+
+ip netns exec ns1 ip link set dev veth11 up
+ip netns exec ns2 ip link set dev veth22 up
+
+ip link set dev veth1 xdpgeneric obj test_xdp_redirect.o sec redirect_to_222
+ip link set dev veth2 xdpgeneric obj test_xdp_redirect.o sec redirect_to_111
+
+ip netns exec ns1 ping -c 1 10.1.1.22
+ip netns exec ns2 ping -c 1 10.1.1.11
+
+exit 0
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index afe109e..9801253 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,3 +1,4 @@
+msg_zerocopy
 socket
 psock_fanout
 psock_tpacket
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index f6c9dbf..de1f577 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -3,11 +3,11 @@
 CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
-TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh
+TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket
 TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_FILES += reuseport_dualstack
+TEST_GEN_FILES += reuseport_dualstack msg_zerocopy
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
new file mode 100644
index 0000000..40232af
--- /dev/null
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -0,0 +1,697 @@
+/* Evaluate MSG_ZEROCOPY
+ *
+ * Send traffic between two processes over one of the supported
+ * protocols and modes:
+ *
+ * PF_INET/PF_INET6
+ * - SOCK_STREAM
+ * - SOCK_DGRAM
+ * - SOCK_DGRAM with UDP_CORK
+ * - SOCK_RAW
+ * - SOCK_RAW with IP_HDRINCL
+ *
+ * PF_PACKET
+ * - SOCK_DGRAM
+ * - SOCK_RAW
+ *
+ * Start this program on two connected hosts, one in send mode and
+ * the other with option '-r' to put it in receiver mode.
+ *
+ * If zerocopy mode ('-z') is enabled, the sender will verify that
+ * the kernel queues completions on the error queue for all zerocopy
+ * transfers.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#ifndef SO_EE_ORIGIN_ZEROCOPY
+#define SO_EE_ORIGIN_ZEROCOPY		SO_EE_ORIGIN_UPAGE
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY	60
+#endif
+
+#ifndef SO_EE_CODE_ZEROCOPY_COPIED
+#define SO_EE_CODE_ZEROCOPY_COPIED	1
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY	0x4000000
+#endif
+
+static int  cfg_cork;
+static bool cfg_cork_mixed;
+static int  cfg_cpu		= -1;		/* default: pin to last cpu */
+static int  cfg_family		= PF_UNSPEC;
+static int  cfg_ifindex		= 1;
+static int  cfg_payload_len;
+static int  cfg_port		= 8000;
+static bool cfg_rx;
+static int  cfg_runtime_ms	= 4200;
+static int  cfg_verbose;
+static int  cfg_waittime_ms	= 500;
+static bool cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+static struct sockaddr_storage cfg_src_addr;
+
+static char payload[IP_MAXPACKET];
+static long packets, bytes, completions, expected_completions;
+static int  zerocopied = -1;
+static uint32_t next_completion;
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static uint16_t get_ip_csum(const uint16_t *start, int num_words)
+{
+	unsigned long sum = 0;
+	int i;
+
+	for (i = 0; i < num_words; i++)
+		sum += start[i];
+
+	while (sum >> 16)
+		sum = (sum & 0xFFFF) + (sum >> 16);
+
+	return ~sum;
+}
+
+static int do_setcpu(int cpu)
+{
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	if (sched_setaffinity(0, sizeof(mask), &mask))
+		error(1, 0, "setaffinity %d", cpu);
+
+	if (cfg_verbose)
+		fprintf(stderr, "cpu: %u\n", cpu);
+
+	return 0;
+}
+
+static void do_setsockopt(int fd, int level, int optname, int val)
+{
+	if (setsockopt(fd, level, optname, &val, sizeof(val)))
+		error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
+}
+
+static int do_poll(int fd, int events)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.events = events;
+	pfd.revents = 0;
+	pfd.fd = fd;
+
+	ret = poll(&pfd, 1, cfg_waittime_ms);
+	if (ret == -1)
+		error(1, errno, "poll");
+
+	return ret && (pfd.revents & events);
+}
+
+static int do_accept(int fd)
+{
+	int fda = fd;
+
+	fd = accept(fda, NULL, NULL);
+	if (fd == -1)
+		error(1, errno, "accept");
+	if (close(fda))
+		error(1, errno, "close listen sock");
+
+	return fd;
+}
+
+static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
+{
+	int ret, len, i, flags;
+
+	len = 0;
+	for (i = 0; i < msg->msg_iovlen; i++)
+		len += msg->msg_iov[i].iov_len;
+
+	flags = MSG_DONTWAIT;
+	if (do_zerocopy)
+		flags |= MSG_ZEROCOPY;
+
+	ret = sendmsg(fd, msg, flags);
+	if (ret == -1 && errno == EAGAIN)
+		return false;
+	if (ret == -1)
+		error(1, errno, "send");
+	if (cfg_verbose && ret != len)
+		fprintf(stderr, "send: ret=%u != %u\n", ret, len);
+
+	if (len) {
+		packets++;
+		bytes += ret;
+		if (do_zerocopy && ret)
+			expected_completions++;
+	}
+
+	return true;
+}
+
+static void do_sendmsg_corked(int fd, struct msghdr *msg)
+{
+	bool do_zerocopy = cfg_zerocopy;
+	int i, payload_len, extra_len;
+
+	/* split up the packet. for non-multiple, make first buffer longer */
+	payload_len = cfg_payload_len / cfg_cork;
+	extra_len = cfg_payload_len - (cfg_cork * payload_len);
+
+	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
+
+	for (i = 0; i < cfg_cork; i++) {
+
+		/* in mixed-frags mode, alternate zerocopy and copy frags
+		 * start with non-zerocopy, to ensure attach later works
+		 */
+		if (cfg_cork_mixed)
+			do_zerocopy = (i & 1);
+
+		msg->msg_iov[0].iov_len = payload_len + extra_len;
+		extra_len = 0;
+
+		do_sendmsg(fd, msg, do_zerocopy);
+	}
+
+	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
+}
+
+static int setup_iph(struct iphdr *iph, uint16_t payload_len)
+{
+	struct sockaddr_in *daddr = (void *) &cfg_dst_addr;
+	struct sockaddr_in *saddr = (void *) &cfg_src_addr;
+
+	memset(iph, 0, sizeof(*iph));
+
+	iph->version	= 4;
+	iph->tos	= 0;
+	iph->ihl	= 5;
+	iph->ttl	= 2;
+	iph->saddr	= saddr->sin_addr.s_addr;
+	iph->daddr	= daddr->sin_addr.s_addr;
+	iph->protocol	= IPPROTO_EGP;
+	iph->tot_len	= htons(sizeof(*iph) + payload_len);
+	iph->check	= get_ip_csum((void *) iph, iph->ihl << 1);
+
+	return sizeof(*iph);
+}
+
+static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
+{
+	struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr;
+	struct sockaddr_in6 *saddr = (void *) &cfg_src_addr;
+
+	memset(ip6h, 0, sizeof(*ip6h));
+
+	ip6h->version		= 6;
+	ip6h->payload_len	= htons(payload_len);
+	ip6h->nexthdr		= IPPROTO_EGP;
+	ip6h->hop_limit		= 2;
+	ip6h->saddr		= saddr->sin6_addr;
+	ip6h->daddr		= daddr->sin6_addr;
+
+	return sizeof(*ip6h);
+}
+
+static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (domain) {
+	case PF_INET:
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(cfg_port);
+		if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+			error(1, 0, "ipv4 parse error: %s", str_addr);
+		break;
+	case PF_INET6:
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(cfg_port);
+		if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+			error(1, 0, "ipv6 parse error: %s", str_addr);
+		break;
+	default:
+		error(1, 0, "illegal domain");
+	}
+}
+
+static int do_setup_tx(int domain, int type, int protocol)
+{
+	int fd;
+
+	fd = socket(domain, type, protocol);
+	if (fd == -1)
+		error(1, errno, "socket t");
+
+	do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
+	if (cfg_zerocopy)
+		do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
+
+	if (domain != PF_PACKET)
+		if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
+			error(1, errno, "connect");
+
+	return fd;
+}
+
+static bool do_recv_completion(int fd)
+{
+	struct sock_extended_err *serr;
+	struct msghdr msg = {};
+	struct cmsghdr *cm;
+	uint32_t hi, lo, range;
+	int ret, zerocopy;
+	char control[100];
+
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+
+	ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+	if (ret == -1 && errno == EAGAIN)
+		return false;
+	if (ret == -1)
+		error(1, errno, "recvmsg notification");
+	if (msg.msg_flags & MSG_CTRUNC)
+		error(1, errno, "recvmsg notification: truncated");
+
+	cm = CMSG_FIRSTHDR(&msg);
+	if (!cm)
+		error(1, 0, "cmsg: no cmsg");
+	if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
+	      (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
+	      (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
+		error(1, 0, "serr: wrong type: %d.%d",
+		      cm->cmsg_level, cm->cmsg_type);
+
+	serr = (void *) CMSG_DATA(cm);
+	if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
+		error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
+	if (serr->ee_errno != 0)
+		error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
+
+	hi = serr->ee_data;
+	lo = serr->ee_info;
+	range = hi - lo + 1;
+
+	/* Detect notification gaps. These should not happen often, if at all.
+	 * Gaps can occur due to drops, reordering and retransmissions.
+	 */
+	if (lo != next_completion)
+		fprintf(stderr, "gap: %u..%u does not append to %u\n",
+			lo, hi, next_completion);
+	next_completion = hi + 1;
+
+	zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
+	if (zerocopied == -1)
+		zerocopied = zerocopy;
+	else if (zerocopied != zerocopy) {
+		fprintf(stderr, "serr: inconsistent\n");
+		zerocopied = zerocopy;
+	}
+
+	if (cfg_verbose >= 2)
+		fprintf(stderr, "completed: %u (h=%u l=%u)\n",
+			range, hi, lo);
+
+	completions += range;
+	return true;
+}
+
+/* Read all outstanding messages on the errqueue */
+static void do_recv_completions(int fd)
+{
+	while (do_recv_completion(fd)) {}
+}
+
+/* Wait for all remaining completions on the errqueue */
+static void do_recv_remaining_completions(int fd)
+{
+	int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
+
+	while (completions < expected_completions &&
+	       gettimeofday_ms() < tstop) {
+		if (do_poll(fd, POLLERR))
+			do_recv_completions(fd);
+	}
+
+	if (completions < expected_completions)
+		fprintf(stderr, "missing notifications: %lu < %lu\n",
+			completions, expected_completions);
+}
+
+static void do_tx(int domain, int type, int protocol)
+{
+	struct iovec iov[3] = { {0} };
+	struct sockaddr_ll laddr;
+	struct msghdr msg = {0};
+	struct ethhdr eth;
+	union {
+		struct ipv6hdr ip6h;
+		struct iphdr iph;
+	} nh;
+	uint64_t tstop;
+	int fd;
+
+	fd = do_setup_tx(domain, type, protocol);
+
+	if (domain == PF_PACKET) {
+		uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6;
+
+		/* sock_raw passes ll header as data */
+		if (type == SOCK_RAW) {
+			memset(eth.h_dest, 0x06, ETH_ALEN);
+			memset(eth.h_source, 0x02, ETH_ALEN);
+			eth.h_proto = htons(proto);
+			iov[0].iov_base = &eth;
+			iov[0].iov_len = sizeof(eth);
+			msg.msg_iovlen++;
+		}
+
+		/* both sock_raw and sock_dgram expect name */
+		memset(&laddr, 0, sizeof(laddr));
+		laddr.sll_family	= AF_PACKET;
+		laddr.sll_ifindex	= cfg_ifindex;
+		laddr.sll_protocol	= htons(proto);
+		laddr.sll_halen		= ETH_ALEN;
+
+		memset(laddr.sll_addr, 0x06, ETH_ALEN);
+
+		msg.msg_name		= &laddr;
+		msg.msg_namelen		= sizeof(laddr);
+	}
+
+	/* packet and raw sockets with hdrincl must pass network header */
+	if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
+		if (cfg_family == PF_INET)
+			iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len);
+		else
+			iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len);
+
+		iov[1].iov_base = (void *) &nh;
+		msg.msg_iovlen++;
+	}
+
+	iov[2].iov_base = payload;
+	iov[2].iov_len = cfg_payload_len;
+	msg.msg_iovlen++;
+	msg.msg_iov = &iov[3 - msg.msg_iovlen];
+
+	tstop = gettimeofday_ms() + cfg_runtime_ms;
+	do {
+		if (cfg_cork)
+			do_sendmsg_corked(fd, &msg);
+		else
+			do_sendmsg(fd, &msg, cfg_zerocopy);
+
+		while (!do_poll(fd, POLLOUT)) {
+			if (cfg_zerocopy)
+				do_recv_completions(fd);
+		}
+
+	} while (gettimeofday_ms() < tstop);
+
+	if (cfg_zerocopy)
+		do_recv_remaining_completions(fd);
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
+		packets, bytes >> 20, completions,
+		zerocopied == 1 ? 'y' : 'n');
+}
+
+static int do_setup_rx(int domain, int type, int protocol)
+{
+	int fd;
+
+	/* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW,
+	 * to recv the only copy of the packet, not a clone
+	 */
+	if (domain == PF_PACKET)
+		error(1, 0, "Use PF_INET/SOCK_RAW to read");
+
+	if (type == SOCK_RAW && protocol == IPPROTO_RAW)
+		error(1, 0, "IPPROTO_RAW: not supported on Rx");
+
+	fd = socket(domain, type, protocol);
+	if (fd == -1)
+		error(1, errno, "socket r");
+
+	do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21);
+	do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16);
+	do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
+
+	if (bind(fd, (void *) &cfg_dst_addr, cfg_alen))
+		error(1, errno, "bind");
+
+	if (type == SOCK_STREAM) {
+		if (listen(fd, 1))
+			error(1, errno, "listen");
+		fd = do_accept(fd);
+	}
+
+	return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+	int ret;
+
+	/* MSG_TRUNC flushes up to len bytes */
+	ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+	if (ret == -1 && errno == EAGAIN)
+		return;
+	if (ret == -1)
+		error(1, errno, "flush");
+	if (!ret)
+		return;
+
+	packets++;
+	bytes += ret;
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_datagram(int fd, int type)
+{
+	int ret, off = 0;
+	char buf[64];
+
+	/* MSG_TRUNC will return full datagram length */
+	ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
+	if (ret == -1 && errno == EAGAIN)
+		return;
+
+	/* raw ipv4 return with header, raw ipv6 without */
+	if (cfg_family == PF_INET && type == SOCK_RAW) {
+		off += sizeof(struct iphdr);
+		ret -= sizeof(struct iphdr);
+	}
+
+	if (ret == -1)
+		error(1, errno, "recv");
+	if (ret != cfg_payload_len)
+		error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
+	if (ret > sizeof(buf) - off)
+		ret = sizeof(buf) - off;
+	if (memcmp(buf + off, payload, ret))
+		error(1, 0, "recv: data mismatch");
+
+	packets++;
+	bytes += cfg_payload_len;
+}
+
+static void do_rx(int domain, int type, int protocol)
+{
+	uint64_t tstop;
+	int fd;
+
+	fd = do_setup_rx(domain, type, protocol);
+
+	tstop = gettimeofday_ms() + cfg_runtime_ms;
+	do {
+		if (type == SOCK_STREAM)
+			do_flush_tcp(fd);
+		else
+			do_flush_datagram(fd, type);
+
+		do_poll(fd, POLLIN);
+
+	} while (gettimeofday_ms() < tstop);
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20);
+}
+
+static void do_test(int domain, int type, int protocol)
+{
+	int i;
+
+	if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM))
+		error(1, 0, "can only cork udp sockets");
+
+	do_setcpu(cfg_cpu);
+
+	for (i = 0; i < IP_MAXPACKET; i++)
+		payload[i] = 'a' + (i % 26);
+
+	if (cfg_rx)
+		do_rx(domain, type, protocol);
+	else
+		do_tx(domain, type, protocol);
+}
+
+static void usage(const char *filepath)
+{
+	error(1, 0, "Usage: %s [options] <test>", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	const int max_payload_len = sizeof(payload) -
+				    sizeof(struct ipv6hdr) -
+				    sizeof(struct tcphdr) -
+				    40 /* max tcp options */;
+	int c;
+
+	cfg_payload_len = max_payload_len;
+
+	while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
+		switch (c) {
+		case '4':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET6;
+			cfg_alen = sizeof(struct sockaddr_in6);
+			break;
+		case 'c':
+			cfg_cork = strtol(optarg, NULL, 0);
+			break;
+		case 'C':
+			cfg_cpu = strtol(optarg, NULL, 0);
+			break;
+		case 'D':
+			setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+			break;
+		case 'i':
+			cfg_ifindex = if_nametoindex(optarg);
+			if (cfg_ifindex == 0)
+				error(1, errno, "invalid iface: %s", optarg);
+			break;
+		case 'm':
+			cfg_cork_mixed = true;
+			break;
+		case 'p':
+			cfg_port = htons(strtoul(optarg, NULL, 0));
+			break;
+		case 'r':
+			cfg_rx = true;
+			break;
+		case 's':
+			cfg_payload_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'S':
+			setup_sockaddr(cfg_family, optarg, &cfg_src_addr);
+			break;
+		case 't':
+			cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
+			break;
+		case 'v':
+			cfg_verbose++;
+			break;
+		case 'z':
+			cfg_zerocopy = true;
+			break;
+		}
+	}
+
+	if (cfg_payload_len > max_payload_len)
+		error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
+	if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork))
+		error(1, 0, "-m: cork_mixed requires corking and zerocopy");
+
+	if (optind != argc - 1)
+		usage(argv[0]);
+}
+
+int main(int argc, char **argv)
+{
+	const char *cfg_test;
+
+	parse_opts(argc, argv);
+
+	cfg_test = argv[argc - 1];
+
+	if (!strcmp(cfg_test, "packet"))
+		do_test(PF_PACKET, SOCK_RAW, 0);
+	else if (!strcmp(cfg_test, "packet_dgram"))
+		do_test(PF_PACKET, SOCK_DGRAM, 0);
+	else if (!strcmp(cfg_test, "raw"))
+		do_test(cfg_family, SOCK_RAW, IPPROTO_EGP);
+	else if (!strcmp(cfg_test, "raw_hdrincl"))
+		do_test(cfg_family, SOCK_RAW, IPPROTO_RAW);
+	else if (!strcmp(cfg_test, "tcp"))
+		do_test(cfg_family, SOCK_STREAM, 0);
+	else if (!strcmp(cfg_test, "udp"))
+		do_test(cfg_family, SOCK_DGRAM, 0);
+	else
+		error(1, 0, "unknown cfg_test %s", cfg_test);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
new file mode 100755
index 0000000..d571d21
--- /dev/null
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+#
+# Send data between two processes across namespaces
+# Run twice: once without and once with zerocopy
+
+set -e
+
+readonly DEV="veth0"
+readonly DEV_MTU=65535
+readonly BIN="./msg_zerocopy"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+readonly path_sysctl_mem="net.core.optmem_max"
+
+# Argument parsing
+if [[ "$#" -lt "2" ]]; then
+	echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
+	exit 1
+fi
+
+readonly IP="$1"
+shift
+readonly TXMODE="$1"
+shift
+readonly EXTRA_ARGS="$@"
+
+# Argument parsing: configure addresses
+if [[ "${IP}" == "4" ]]; then
+	readonly SADDR="${SADDR4}"
+	readonly DADDR="${DADDR4}"
+elif [[ "${IP}" == "6" ]]; then
+	readonly SADDR="${SADDR6}"
+	readonly DADDR="${DADDR6}"
+else
+	echo "Invalid IP version ${IP}"
+	exit 1
+fi
+
+# Argument parsing: select receive mode
+#
+# This differs from send mode for
+# - packet:	use raw recv, because packet receives skb clones
+# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option
+case "${TXMODE}" in
+'packet' | 'packet_dgram' | 'raw_hdrincl')
+	RXMODE='raw'
+	;;
+*)
+	RXMODE="${TXMODE}"
+	;;
+esac
+
+# Start of state changes: install cleanup handler
+save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})"
+
+cleanup() {
+	ip netns del "${NS2}"
+	ip netns del "${NS1}"
+	sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}"
+}
+
+trap cleanup EXIT
+
+# Configure system settings
+sysctl -w -q "${path_sysctl_mem}=1000000"
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
+  peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add       fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add       fd::2/64 dev "${DEV}" nodad
+
+# Optionally disable sg or csum offload to test edge cases
+# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+
+do_test() {
+	local readonly ARGS="$1"
+
+	echo "ipv${IP} ${TXMODE} ${ARGS}"
+	ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
+	sleep 0.2
+	ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}"
+	wait
+}
+
+do_test "${EXTRA_ARGS}"
+do_test "-z ${EXTRA_ARGS}"
+echo ok
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
new file mode 100755
index 0000000..57b5ff5
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -0,0 +1,272 @@
+#!/bin/sh
+#
+# This test is for checking rtnetlink callpaths, and get as much coverage as possible.
+#
+# set -e
+
+devdummy="test-dummy0"
+ret=0
+
+# set global exit status, but never reset nonzero one.
+check_err()
+{
+	if [ $ret -eq 0 ]; then
+		ret=$1
+	fi
+}
+
+kci_add_dummy()
+{
+	ip link add name "$devdummy" type dummy
+	check_err $?
+	ip link set "$devdummy" up
+	check_err $?
+}
+
+kci_del_dummy()
+{
+	ip link del dev "$devdummy"
+	check_err $?
+}
+
+# add a bridge with vlans on top
+kci_test_bridge()
+{
+	devbr="test-br0"
+	vlandev="testbr-vlan1"
+
+	ret=0
+	ip link add name "$devbr" type bridge
+	check_err $?
+
+	ip link set dev "$devdummy" master "$devbr"
+	check_err $?
+
+	ip link set "$devbr" up
+	check_err $?
+
+	ip link add link "$devbr" name "$vlandev" type vlan id 1
+	check_err $?
+	ip addr add dev "$vlandev" 10.200.7.23/30
+	check_err $?
+	ip -6 addr add dev "$vlandev" dead:42::1234/64
+	check_err $?
+	ip -d link > /dev/null
+	check_err $?
+	ip r s t all > /dev/null
+	check_err $?
+	ip -6 addr del dev "$vlandev" dead:42::1234/64
+	check_err $?
+
+	ip link del dev "$vlandev"
+	check_err $?
+	ip link del dev "$devbr"
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: bridge setup"
+		return 1
+	fi
+	echo "PASS: bridge setup"
+
+}
+
+kci_test_gre()
+{
+	gredev=neta
+	rem=10.42.42.1
+	loc=10.0.0.1
+
+	ret=0
+	ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
+	check_err $?
+	ip link set $gredev up
+	check_err $?
+	ip addr add 10.23.7.10 dev $gredev
+	check_err $?
+	ip route add 10.23.8.0/30 dev $gredev
+	check_err $?
+	ip addr add dev "$devdummy" 10.23.7.11/24
+	check_err $?
+	ip link > /dev/null
+	check_err $?
+	ip addr > /dev/null
+	check_err $?
+	ip addr del dev "$devdummy" 10.23.7.11/24
+	check_err $?
+
+	ip link del $gredev
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: gre tunnel endpoint"
+		return 1
+	fi
+	echo "PASS: gre tunnel endpoint"
+}
+
+# tc uses rtnetlink too, for full tc testing
+# please see tools/testing/selftests/tc-testing.
+kci_test_tc()
+{
+	dev=lo
+	ret=0
+
+	tc qdisc add dev "$dev" root handle 1: htb
+	check_err $?
+	tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit
+	check_err $?
+	tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256
+	check_err $?
+	tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256
+	check_err $?
+	tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256
+	check_err $?
+	tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10
+	check_err $?
+	tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10
+	check_err $?
+	tc filter show dev "$dev" parent  1:0 > /dev/null
+	check_err $?
+	tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32
+	check_err $?
+	tc filter show dev "$dev" parent  1:0 > /dev/null
+	check_err $?
+	tc qdisc del dev "$dev" root handle 1: htb
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: tc htb hierarchy"
+		return 1
+	fi
+	echo "PASS: tc htb hierarchy"
+
+}
+
+kci_test_polrouting()
+{
+	ret=0
+	ip rule add fwmark 1 lookup 100
+	check_err $?
+	ip route add local 0.0.0.0/0 dev lo table 100
+	check_err $?
+	ip r s t all > /dev/null
+	check_err $?
+	ip rule del fwmark 1 lookup 100
+	check_err $?
+	ip route del local 0.0.0.0/0 dev lo table 100
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: policy route test"
+		return 1
+	fi
+	echo "PASS: policy routing"
+}
+
+kci_test_route_get()
+{
+	ret=0
+
+	ip route get 127.0.0.1 > /dev/null
+	check_err $?
+	ip route get 127.0.0.1 dev "$devdummy" > /dev/null
+	check_err $?
+	ip route get ::1 > /dev/null
+	check_err $?
+	ip route get fe80::1 dev "$devdummy" > /dev/null
+	check_err $?
+	ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x1 mark 0x1 > /dev/null
+	check_err $?
+	ip route get ::1 from ::1 iif lo oif lo tos 0x1 mark 0x1 > /dev/null
+	check_err $?
+	ip addr add dev "$devdummy" 10.23.7.11/24
+	check_err $?
+	ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null
+	check_err $?
+	ip addr del dev "$devdummy" 10.23.7.11/24
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: route get"
+		return 1
+	fi
+
+	echo "PASS: route get"
+}
+
+kci_test_addrlabel()
+{
+	ret=0
+
+	ip addrlabel add prefix dead::/64 dev lo label 1
+	check_err $?
+
+	ip addrlabel list |grep -q "prefix dead::/64 dev lo label 1"
+	check_err $?
+
+	ip addrlabel del prefix dead::/64 dev lo label 1 2> /dev/null
+	check_err $?
+
+	ip addrlabel add prefix dead::/64 label 1 2> /dev/null
+	check_err $?
+
+	ip addrlabel del prefix dead::/64 label 1 2> /dev/null
+	check_err $?
+
+	# concurrent add/delete
+	for i in $(seq 1 1000); do
+		ip addrlabel add prefix 1c3::/64 label 12345 2>/dev/null
+	done &
+
+	for i in $(seq 1 1000); do
+		ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
+	done
+
+	wait
+
+	ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: ipv6 addrlabel"
+		return 1
+	fi
+
+	echo "PASS: ipv6 addrlabel"
+}
+
+kci_test_rtnl()
+{
+	kci_add_dummy
+	if [ $ret -ne 0 ];then
+		echo "FAIL: cannot add dummy interface"
+		return 1
+	fi
+
+	kci_test_polrouting
+	kci_test_route_get
+	kci_test_tc
+	kci_test_gre
+	kci_test_bridge
+	kci_test_addrlabel
+
+	kci_del_dummy
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit 0
+fi
+
+for x in ip tc;do
+	$x -Version 2>/dev/null >/dev/null
+	if [ $? -ne 0 ];then
+		echo "SKIP: Could not run test without the $x tool"
+		exit 0
+	fi
+done
+
+kci_test_rtnl
+
+exit $ret
diff --git a/tools/testing/selftests/networking/timestamping/.gitignore b/tools/testing/selftests/networking/timestamping/.gitignore
index 9e69e98..d935503 100644
--- a/tools/testing/selftests/networking/timestamping/.gitignore
+++ b/tools/testing/selftests/networking/timestamping/.gitignore
@@ -1,3 +1,4 @@
 timestamping
+rxtimestamp
 txtimestamp
 hwtstamp_config
diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile
index ccbb9ed..92fb8ee 100644
--- a/tools/testing/selftests/networking/timestamping/Makefile
+++ b/tools/testing/selftests/networking/timestamping/Makefile
@@ -1,4 +1,6 @@
-TEST_PROGS := hwtstamp_config timestamping txtimestamp
+CFLAGS += -I../../../../../usr/include
+
+TEST_PROGS := hwtstamp_config rxtimestamp timestamping txtimestamp
 
 all: $(TEST_PROGS)
 
diff --git a/tools/testing/selftests/networking/timestamping/rxtimestamp.c b/tools/testing/selftests/networking/timestamping/rxtimestamp.c
new file mode 100644
index 0000000..00f2866
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/rxtimestamp.c
@@ -0,0 +1,389 @@
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include <asm/types.h>
+#include <linux/net_tstamp.h>
+#include <linux/errqueue.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+struct options {
+	int so_timestamp;
+	int so_timestampns;
+	int so_timestamping;
+};
+
+struct tstamps {
+	bool tstamp;
+	bool tstampns;
+	bool swtstamp;
+	bool hwtstamp;
+};
+
+struct socket_type {
+	char *friendly_name;
+	int type;
+	int protocol;
+	bool enabled;
+};
+
+struct test_case {
+	struct options sockopt;
+	struct tstamps expected;
+	bool enabled;
+};
+
+struct sof_flag {
+	int mask;
+	char *name;
+};
+
+static struct sof_flag sof_flags[] = {
+#define SOF_FLAG(f) { f, #f }
+	SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE),
+	SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE),
+	SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE),
+};
+
+static struct socket_type socket_types[] = {
+	{ "ip",		SOCK_RAW,	IPPROTO_EGP },
+	{ "udp",	SOCK_DGRAM,	IPPROTO_UDP },
+	{ "tcp",	SOCK_STREAM,	IPPROTO_TCP },
+};
+
+static struct test_case test_cases[] = {
+	{ {}, {} },
+	{
+		{ so_timestamp: 1 },
+		{ tstamp: true }
+	},
+	{
+		{ so_timestampns: 1 },
+		{ tstampns: true }
+	},
+	{
+		{ so_timestamp: 1, so_timestampns: 1 },
+		{ tstampns: true }
+	},
+	{
+		{ so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE },
+		{}
+	},
+	{
+		/* Loopback device does not support hw timestamps. */
+		{ so_timestamping: SOF_TIMESTAMPING_RX_HARDWARE },
+		{}
+	},
+	{
+		{ so_timestamping: SOF_TIMESTAMPING_SOFTWARE },
+		{}
+	},
+	{
+		{ so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE
+			| SOF_TIMESTAMPING_RX_HARDWARE },
+		{}
+	},
+	{
+		{ so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+			| SOF_TIMESTAMPING_RX_SOFTWARE },
+		{ swtstamp: true }
+	},
+	{
+		{ so_timestamp: 1, so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+			| SOF_TIMESTAMPING_RX_SOFTWARE },
+		{ tstamp: true, swtstamp: true }
+	},
+};
+
+static struct option long_options[] = {
+	{ "list_tests", no_argument, 0, 'l' },
+	{ "test_num", required_argument, 0, 'n' },
+	{ "op_size", required_argument, 0, 's' },
+	{ "tcp", no_argument, 0, 't' },
+	{ "udp", no_argument, 0, 'u' },
+	{ "ip", no_argument, 0, 'i' },
+};
+
+static int next_port = 19999;
+static int op_size = 10 * 1024;
+
+void print_test_case(struct test_case *t)
+{
+	int f = 0;
+
+	printf("sockopts {");
+	if (t->sockopt.so_timestamp)
+		printf(" SO_TIMESTAMP ");
+	if (t->sockopt.so_timestampns)
+		printf(" SO_TIMESTAMPNS ");
+	if (t->sockopt.so_timestamping) {
+		printf(" SO_TIMESTAMPING: {");
+		for (f = 0; f < ARRAY_SIZE(sof_flags); f++)
+			if (t->sockopt.so_timestamping & sof_flags[f].mask)
+				printf(" %s |", sof_flags[f].name);
+		printf("}");
+	}
+	printf("} expected cmsgs: {");
+	if (t->expected.tstamp)
+		printf(" SCM_TIMESTAMP ");
+	if (t->expected.tstampns)
+		printf(" SCM_TIMESTAMPNS ");
+	if (t->expected.swtstamp || t->expected.hwtstamp) {
+		printf(" SCM_TIMESTAMPING {");
+		if (t->expected.swtstamp)
+			printf("0");
+		if (t->expected.swtstamp && t->expected.hwtstamp)
+			printf(",");
+		if (t->expected.hwtstamp)
+			printf("2");
+		printf("}");
+	}
+	printf("}\n");
+}
+
+void do_send(int src)
+{
+	int r;
+	char *buf = malloc(op_size);
+
+	memset(buf, 'z', op_size);
+	r = write(src, buf, op_size);
+	if (r < 0)
+		error(1, errno, "Failed to sendmsg");
+
+	free(buf);
+}
+
+bool do_recv(int rcv, int read_size, struct tstamps expected)
+{
+	const int CMSG_SIZE = 1024;
+
+	struct scm_timestamping *ts;
+	struct tstamps actual = {};
+	char cmsg_buf[CMSG_SIZE];
+	struct iovec recv_iov;
+	struct cmsghdr *cmsg;
+	bool failed = false;
+	struct msghdr hdr;
+	int flags = 0;
+	int r;
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.msg_iov = &recv_iov;
+	hdr.msg_iovlen = 1;
+	recv_iov.iov_base = malloc(read_size);
+	recv_iov.iov_len = read_size;
+
+	hdr.msg_control = cmsg_buf;
+	hdr.msg_controllen = sizeof(cmsg_buf);
+
+	r = recvmsg(rcv, &hdr, flags);
+	if (r < 0)
+		error(1, errno, "Failed to recvmsg");
+	if (r != read_size)
+		error(1, 0, "Only received %d bytes of payload.", r);
+
+	if (hdr.msg_flags & (MSG_TRUNC | MSG_CTRUNC))
+		error(1, 0, "Message was truncated.");
+
+	for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
+	     cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
+		if (cmsg->cmsg_level != SOL_SOCKET)
+			error(1, 0, "Unexpected cmsg_level %d",
+			      cmsg->cmsg_level);
+		switch (cmsg->cmsg_type) {
+		case SCM_TIMESTAMP:
+			actual.tstamp = true;
+			break;
+		case SCM_TIMESTAMPNS:
+			actual.tstampns = true;
+			break;
+		case SCM_TIMESTAMPING:
+			ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
+			actual.swtstamp = !!ts->ts[0].tv_sec;
+			if (ts->ts[1].tv_sec != 0)
+				error(0, 0, "ts[1] should not be set.");
+			actual.hwtstamp = !!ts->ts[2].tv_sec;
+			break;
+		default:
+			error(1, 0, "Unexpected cmsg_type %d", cmsg->cmsg_type);
+		}
+	}
+
+#define VALIDATE(field) \
+	do { \
+		if (expected.field != actual.field) { \
+			if (expected.field) \
+				error(0, 0, "Expected " #field " to be set."); \
+			else \
+				error(0, 0, \
+				      "Expected " #field " to not be set."); \
+			failed = true; \
+		} \
+	} while (0)
+
+	VALIDATE(tstamp);
+	VALIDATE(tstampns);
+	VALIDATE(swtstamp);
+	VALIDATE(hwtstamp);
+#undef VALIDATE
+
+	free(recv_iov.iov_base);
+
+	return failed;
+}
+
+void config_so_flags(int rcv, struct options o)
+{
+	int on = 1;
+
+	if (setsockopt(rcv, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0)
+		error(1, errno, "Failed to enable SO_REUSEADDR");
+
+	if (o.so_timestamp &&
+	    setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMP,
+		       &o.so_timestamp, sizeof(o.so_timestamp)) < 0)
+		error(1, errno, "Failed to enable SO_TIMESTAMP");
+
+	if (o.so_timestampns &&
+	    setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPNS,
+		       &o.so_timestampns, sizeof(o.so_timestampns)) < 0)
+		error(1, errno, "Failed to enable SO_TIMESTAMPNS");
+
+	if (o.so_timestamping &&
+	    setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPING,
+		       &o.so_timestamping, sizeof(o.so_timestamping)) < 0)
+		error(1, errno, "Failed to set SO_TIMESTAMPING");
+}
+
+bool run_test_case(struct socket_type s, struct test_case t)
+{
+	int port = (s.type == SOCK_RAW) ? 0 : next_port++;
+	int read_size = op_size;
+	struct sockaddr_in addr;
+	bool failed = false;
+	int src, dst, rcv;
+
+	src = socket(AF_INET, s.type, s.protocol);
+	if (src < 0)
+		error(1, errno, "Failed to open src socket");
+
+	dst = socket(AF_INET, s.type, s.protocol);
+	if (dst < 0)
+		error(1, errno, "Failed to open dst socket");
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	addr.sin_port = htons(port);
+
+	if (bind(dst, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+		error(1, errno, "Failed to bind to port %d", port);
+
+	if (s.type == SOCK_STREAM && (listen(dst, 1) < 0))
+		error(1, errno, "Failed to listen");
+
+	if (connect(src, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+		error(1, errno, "Failed to connect");
+
+	if (s.type == SOCK_STREAM) {
+		rcv = accept(dst, NULL, NULL);
+		if (rcv < 0)
+			error(1, errno, "Failed to accept");
+		close(dst);
+	} else {
+		rcv = dst;
+	}
+
+	config_so_flags(rcv, t.sockopt);
+	usleep(20000); /* setsockopt for SO_TIMESTAMPING is asynchronous */
+	do_send(src);
+
+	if (s.type == SOCK_RAW)
+		read_size += 20;  /* for IP header */
+	failed = do_recv(rcv, read_size, t.expected);
+
+	close(rcv);
+	close(src);
+
+	return failed;
+}
+
+int main(int argc, char **argv)
+{
+	bool all_protocols = true;
+	bool all_tests = true;
+	int arg_index = 0;
+	int failures = 0;
+	int s, t;
+	char opt;
+
+	while ((opt = getopt_long(argc, argv, "", long_options,
+				  &arg_index)) != -1) {
+		switch (opt) {
+		case 'l':
+			for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
+				printf("%d\t", t);
+				print_test_case(&test_cases[t]);
+			}
+			return 0;
+		case 'n':
+			t = atoi(optarg);
+			if (t > ARRAY_SIZE(test_cases))
+				error(1, 0, "Invalid test case: %d", t);
+			all_tests = false;
+			test_cases[t].enabled = true;
+			break;
+		case 's':
+			op_size = atoi(optarg);
+			break;
+		case 't':
+			all_protocols = false;
+			socket_types[2].enabled = true;
+			break;
+		case 'u':
+			all_protocols = false;
+			socket_types[1].enabled = true;
+			break;
+		case 'i':
+			all_protocols = false;
+			socket_types[0].enabled = true;
+			break;
+		default:
+			error(1, 0, "Failed to parse parameters.");
+		}
+	}
+
+	for (s = 0; s < ARRAY_SIZE(socket_types); s++) {
+		if (!all_protocols && !socket_types[s].enabled)
+			continue;
+
+		printf("Testing %s...\n", socket_types[s].friendly_name);
+		for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
+			if (!all_tests && !test_cases[t].enabled)
+				continue;
+
+			printf("Starting testcase %d...\n", t);
+			if (run_test_case(socket_types[s], test_cases[t])) {
+				failures++;
+				printf("FAILURE in test case ");
+				print_test_case(&test_cases[t]);
+			}
+		}
+	}
+	if (!failures)
+		printf("PASSED.\n");
+	return failures;
+}
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json
index af519bc..6973bdc 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json
@@ -1111,5 +1111,55 @@
         "teardown": [
             "$TC actions flush action gact"
         ]
+    },
+    {
+        "id": "a568",
+        "name": "Add action with ife type",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action ife encode type 0xDEAD index 1"
+        ],
+        "cmdUnderTest": "$TC actions get action ife index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "type 0xDEAD",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "b983",
+        "name": "Add action without ife type",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action ife encode index 1"
+        ],
+        "cmdUnderTest": "$TC actions get action ife index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "type 0xED3E",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
     }
 ]
 \ No newline at end of file